• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2023 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef MINDSPORE_CCSRC_RUNTIME_PROFILER_PROFILER_H_
18 #define MINDSPORE_CCSRC_RUNTIME_PROFILER_PROFILER_H_
19 
20 #if !defined(_WIN32) && !defined(_WIN64) && !defined(__ANDROID__) && !defined(ANDROID) && !defined(__APPLE__)
21 #include <sys/syscall.h>
22 #endif
23 #include <list>
24 #include <map>
25 #include <memory>
26 #include <mutex>
27 #include <string>
28 #include <thread>
29 #include <utility>
30 #include <vector>
31 #include "nlohmann/json.hpp"
32 #include "utils/os.h"
33 #include "utils/ms_utils.h"
34 #include "utils/hash_map.h"
35 #include "utils/log_adapter.h"
36 #include "utils/convert_utils_base.h"
37 #include "include/common/visible.h"
38 #include "mindrt/include/async/spinlock.h"
39 
40 namespace mindspore {
41 namespace runtime {
42 static const char kDefaultOpName[] = "Default";
43 static const size_t kPercent = 100;
44 
45 enum class ProfilerStage {
46   kDefault,
47   kPython,
48   kCapture,
49   kRunGraph,
50   kRunGrad,
51   kRunOp,
52   kAsnumpy,
53   kCompileGradGraph,
54   kWaitPipeline,
55   kSyncStream,
56 };
57 
58 enum class ProfilerModule { kDefault, kGraphExecutorPy, kRuntime, kPynative, kKernel, kPython, kCapture, kOther };
59 
60 enum class ProfilerEvent {
61   kDefault,
62   kKernelInfer,
63   kKernelResize,
64   kKernelInferAndResize,
65   kKernelLaunch,
66   kKernelLaunckCallback,
67   kKernelUpdate,
68   kGraphLaunch,
69   kInputProcess,
70   kOutputProcess,
71   kWaitTaskFinish,
72   kPreLaunch,
73   kPostLaunch,
74   kSendOutput,
75   kMemoryAlloc,
76   kMemoryFree,
77   kCopyData,
78   kStreamSync,
79   kProcessMultiStream,
80   kWaitKernelsInferFinish,
81   kWaitKernelsResizeFinish,
82   kWaitKernelsLaunchFinish,
83 
84   // Inner event is not counted in the total time.
85   kKernelInferInner,
86   kKernelInferDataSync,
87   kKernelLaunchInner,
88   kBackendGraphRunInner,
89 
90   // PyNative Pipeline
91   kRunOp,
92   kPyNativeFrontendTask,
93   kPyNativeBackendTask,
94   kPyNativeDeviceTask,
95   kPyNativeLaunchTask,
96   kPyNativeBpropTask,
97   // PyNative inner Event
98   kPyNativeGilAcquire,
99   kPyNativeCast,
100   kPyNativeInfer,
101   kPyNativeOpCompile,
102   kPyNativeGradExpander,
103   kPyNativeGradUpdateSens,
104   kPyNativeGradClearTopCell,
105   kPyNativeGradClearAutoGradCell,
106   // PyBoost
107   kPyBoostInferOutput,
108   kPyBoostInferByOpDef,
109   kPyBoostCreateOutputTensor,
110   kPyBoostDeviceTask,
111   kPyBoostMallocInput,
112   kPyBoostMallocOutput,
113   kPyBoostLaunchAclnn,
114   // pybind api
115   kPyNativeNewGraph,
116   kPyNativeEndGraph,
117   // Python
118   kPythonObserved,
119   // Capture Event
120   kCaptureRunGraph,
121   kCaptureProcess,
122   kCaptureCompile,
123   kCaptureGuard,
124 };
125 
126 static const std::map<ProfilerStage, std::string> kProfilerStageString = {
127   {ProfilerStage::kDefault, "Default"},           {ProfilerStage::kPython, "Python"},
128   {ProfilerStage::kCapture, "Capture"},           {ProfilerStage::kRunGraph, "RunGraph"},
129   {ProfilerStage::kRunGrad, "RunGrad"},           {ProfilerStage::kRunOp, "RunOp"},
130   {ProfilerStage::kAsnumpy, "Asnumpy"},           {ProfilerStage::kCompileGradGraph, "CompileGradGraph"},
131   {ProfilerStage::kWaitPipeline, "WaitPipeline"}, {ProfilerStage::kSyncStream, "SyncStream"},
132 };
133 
134 static const std::map<ProfilerModule, std::string> kProfilerModuleString = {
135   {ProfilerModule::kDefault, "Default"},
136   {ProfilerModule::kGraphExecutorPy, "GraphExecutorPy"},
137   {ProfilerModule::kRuntime, "RuntimeFramework"},
138   {ProfilerModule::kPynative, "PynativeFramework"},
139   {ProfilerModule::kKernel, "Kernel"},
140   {ProfilerModule::kPython, "Python"},
141   {ProfilerModule::kCapture, "Capture"},
142   {ProfilerModule::kOther, "Other"},
143 };
144 
145 static const std::map<ProfilerEvent, std::string> kProfilerEventString = {
146   {ProfilerEvent::kDefault, "Default"},
147   {ProfilerEvent::kKernelInfer, "KernelInfer"},
148   {ProfilerEvent::kKernelResize, "KernelResize"},
149   {ProfilerEvent::kKernelInferAndResize, "KernelInferAndResize"},
150   {ProfilerEvent::kKernelLaunch, "KernelLaunch"},
151   {ProfilerEvent::kKernelLaunckCallback, "KernelLaunchCallback"},
152   {ProfilerEvent::kKernelUpdate, "KernelUpdate"},
153   {ProfilerEvent::kGraphLaunch, "GraphLaunch"},
154   {ProfilerEvent::kInputProcess, "InputProcess"},
155   {ProfilerEvent::kOutputProcess, "OutputProcess"},
156   {ProfilerEvent::kWaitTaskFinish, "WaitTaskFinish"},
157   {ProfilerEvent::kPreLaunch, "PreLaunch"},
158   {ProfilerEvent::kPostLaunch, "PostLaunch"},
159   {ProfilerEvent::kSendOutput, "SendOutput"},
160   {ProfilerEvent::kMemoryAlloc, "MemoryAlloc"},
161   {ProfilerEvent::kMemoryFree, "MemoryFree"},
162   {ProfilerEvent::kCopyData, "CopyData"},
163   {ProfilerEvent::kStreamSync, "StreamSync"},
164   {ProfilerEvent::kProcessMultiStream, "ProcessMultiStream"},
165   {ProfilerEvent::kWaitKernelsInferFinish, "WaitKernelsInferFinish"},
166   {ProfilerEvent::kWaitKernelsResizeFinish, "WaitKernelsResizeFinish"},
167   {ProfilerEvent::kWaitKernelsLaunchFinish, "WaitKernelsLaunchFinish"},
168   // Inner event.
169   {ProfilerEvent::kKernelInferInner, "KernelInferInner"},
170   {ProfilerEvent::kKernelInferDataSync, "KernelInferDataSync"},
171   {ProfilerEvent::kKernelLaunchInner, "KernelLaunchInner"},
172   {ProfilerEvent::kBackendGraphRunInner, "BackendGraphRunInner"},
173   // PyNative events
174   {ProfilerEvent::kRunOp, "RunOp"},
175   {ProfilerEvent::kPyNativeFrontendTask, "FrontendTask"},
176   {ProfilerEvent::kPyNativeBackendTask, "BackendTask"},
177   {ProfilerEvent::kPyNativeDeviceTask, "DeviceTask"},
178   {ProfilerEvent::kPyNativeLaunchTask, "LaunchTask"},
179   {ProfilerEvent::kPyNativeBpropTask, "BpropTask"},
180   {ProfilerEvent::kPyNativeGilAcquire, "AcquireGil"},
181   {ProfilerEvent::kPyNativeCast, "PyNativeCast"},
182   {ProfilerEvent::kPyNativeInfer, "PyNativeInfer"},
183   {ProfilerEvent::kPyNativeOpCompile, "OpCompile"},
184   {ProfilerEvent::kPyNativeGradExpander, "Expander"},
185   {ProfilerEvent::kPyNativeGradUpdateSens, "UpdateSens"},
186   {ProfilerEvent::kPyNativeGradClearTopCell, "ClearTopCell"},
187   {ProfilerEvent::kPyNativeGradClearAutoGradCell, "ClearAutoGradCell"},
188   // PyBoost events
189   {ProfilerEvent::kPyBoostInferOutput, "InferOutput"},
190   {ProfilerEvent::kPyBoostInferByOpDef, "InferByOpDef"},
191   {ProfilerEvent::kPyBoostCreateOutputTensor, "CreateOutputTensor"},
192   {ProfilerEvent::kPyBoostDeviceTask, "DeviceTask"},
193   {ProfilerEvent::kPyBoostMallocInput, "MallocInput"},
194   {ProfilerEvent::kPyBoostMallocOutput, "MallocOutput"},
195   {ProfilerEvent::kPyBoostLaunchAclnn, "LaunchAclnn"},
196   // pybind api
197   {ProfilerEvent::kPyNativeNewGraph, "new_graph"},
198   {ProfilerEvent::kPyNativeEndGraph, "end_graph"},
199   // python events
200   {ProfilerEvent::kPythonObserved, "PythonObserved"},
201   // Capture events
202   {ProfilerEvent::kCaptureRunGraph, "CaptureRunGraph"},
203   {ProfilerEvent::kCaptureProcess, "CaptureProcess"},
204   {ProfilerEvent::kCaptureCompile, "CaptureCompile"},
205   {ProfilerEvent::kCaptureGuard, "CaptureGuard"}};
206 
207 #define PROFILER_START(start_time)                                          \
208   do {                                                                      \
209     if (runtime::ProfilerAnalyzer::GetInstance().profiler_enable()) {       \
210       start_time = runtime::ProfilerAnalyzer::GetInstance().GetTimeStamp(); \
211     }                                                                       \
212   } while (0);
213 
214 // Match PROFILER_START to use.
215 #define PROFILER_END(start_time, module, event, op_name, is_inner_event)                                           \
216   do {                                                                                                             \
217     if (runtime::ProfilerAnalyzer::GetInstance().profiler_enable()) {                                              \
218       auto end_time = runtime::ProfilerAnalyzer::GetInstance().GetTimeStamp();                                     \
219       auto brief_name = runtime::ProfilerAnalyzer::GetInstance().GetBriefName(op_name);                            \
220       runtime::ProfilerAnalyzer::GetInstance().RecordData(                                                         \
221         std::make_shared<runtime::ProfilerData>(module, event, brief_name, is_inner_event, start_time, end_time)); \
222     }                                                                                                              \
223   } while (0);
224 
225 // Match PROFILER_START to use.
226 #define PROFILER_STAGE_END(start_time, stage)                                  \
227   do {                                                                         \
228     if (runtime::ProfilerAnalyzer::GetInstance().profiler_enable()) {          \
229       auto end_time = runtime::ProfilerAnalyzer::GetInstance().GetTimeStamp(); \
230       runtime::ProfilerAnalyzer::GetInstance().RecordData(                     \
231         std::make_shared<runtime::ProfilerData>(stage, start_time, end_time)); \
232     }                                                                          \
233   } while (0);
234 
235 // Record the profiler data by the constructor and destructor of this class.
236 class COMMON_EXPORT ProfilerRecorder {
237  public:
238   ProfilerRecorder(ProfilerModule module, ProfilerEvent event, const std::string &op_name, bool is_inner_event = false,
239                    bool need_py_stack = false, uint64_t flow_id = UINT64_MAX);
240   ~ProfilerRecorder();
241 
242   struct Data {
DataData243     Data(ProfilerModule module, ProfilerEvent event, std::string op_name, std::string py_stack, uint64_t start_time,
244          uint64_t flow_id, bool is_inner_event)
245         : module_(module),
246           event_(event),
247           op_name_(std::move(op_name)),
248           py_stack_(std::move(py_stack)),
249           start_time_(start_time),
250           flow_id_(flow_id),
251           is_inner_event_(is_inner_event) {}
252     ProfilerModule module_;
253     ProfilerEvent event_;
254     std::string op_name_;
255     std::string py_stack_;
256     uint64_t start_time_;
257     uint64_t flow_id_;
258     bool is_inner_event_;
259   };
260 
261   inline static const std::string kNoName{};
262 
263  private:
264   std::unique_ptr<Data> data_{nullptr};
265 };
266 
267 class COMMON_EXPORT PythonProfilerRecorder {
268  public:
269   explicit PythonProfilerRecorder(const std::string &record_name);
270   ~PythonProfilerRecorder() = default;
271 
272   void record_start();
273   void record_end();
274 
275  private:
276   uint64_t start_time_;
277   std::string record_name_;
278   ProfilerModule module_;
279   ProfilerEvent event_;
280 };
281 
282 class COMMON_EXPORT ProfilerStageRecorder {
283  public:
284   explicit ProfilerStageRecorder(ProfilerStage stage);
285   ~ProfilerStageRecorder();
286 
287  private:
288   ProfilerStage stage_{ProfilerStage::kDefault};
289   uint64_t start_time_{0};
290 };
291 
292 struct StepInfo {
StepInfoStepInfo293   StepInfo(size_t step, uint64_t step_time) : step_(step), step_time_(step_time) {}
294   const size_t step_;
295   const uint64_t step_time_;
296 };
297 
298 using StepInfoPtr = std::shared_ptr<StepInfo>;
299 
300 struct ProfilerData {
301   ProfilerData(ProfilerModule module, ProfilerEvent event, const std::string &op_name, bool is_inner_event,
302                uint64_t start_time, uint64_t end_time, uint64_t flow_id = UINT64_MAX, std::string py_stack = "")
is_stage_ProfilerData303       : is_stage_(false),
304         stage_(ProfilerStage::kDefault),
305         module_(module),
306         event_(event),
307         op_name_(op_name),
308         is_inner_event_(is_inner_event),
309         start_time_(start_time),
310         end_time_(end_time),
311         dur_time_(end_time - start_time),
312 #if !defined(_WIN32) && !defined(_WIN64) && !defined(__ANDROID__) && !defined(ANDROID) && !defined(__APPLE__)
313         tid_(LongToUlong(syscall(SYS_gettid))),
314 #else
315         tid_(0),
316 #endif
317         pid_(getpid()),
318         flow_id_(flow_id),
319         py_stack_(std::move(py_stack)) {
320   }
321 
ProfilerDataProfilerData322   ProfilerData(ProfilerStage stage, uint64_t start_time, uint64_t end_time)
323       : is_stage_(true),
324         stage_(stage),
325         module_(ProfilerModule::kDefault),
326         event_(ProfilerEvent::kDefault),
327         op_name_(""),
328         is_inner_event_(false),
329         start_time_(start_time),
330         end_time_(end_time) {
331     dur_time_ = end_time - start_time;
332 #if !defined(_WIN32) && !defined(_WIN64) && !defined(__ANDROID__) && !defined(ANDROID) && !defined(__APPLE__)
333     tid_ = LongToUlong(syscall(SYS_gettid));
334 #else
335     tid_ = 0;
336 #endif
337     pid_ = getpid();
338   }
339 
ProfilerDataProfilerData340   ProfilerData(const ProfilerData &other)
341       : is_stage_(other.is_stage_),
342         stage_(other.stage_),
343         module_(other.module_),
344         event_(other.event_),
345         op_name_(other.op_name_),
346         is_inner_event_(other.is_inner_event_),
347         start_time_(other.start_time_),
348         end_time_(other.end_time_),
349         dur_time_(other.dur_time_),
350         tid_(other.tid_),
351         pid_(other.pid_),
352         flow_id_(other.flow_id_) {}
353 
354   ProfilerData &operator=(const ProfilerData &other) {
355     if (this == &other) {
356       return *this;
357     }
358 
359     is_stage_ = other.is_stage_;
360     stage_ = other.stage_;
361     module_ = other.module_;
362     event_ = other.event_;
363     op_name_ = other.op_name_;
364     is_inner_event_ = other.is_inner_event_;
365     start_time_ = other.start_time_;
366     end_time_ = other.end_time_;
367     dur_time_ = other.dur_time_;
368     tid_ = other.tid_;
369     pid_ = other.pid_;
370     flow_id_ = other.flow_id_;
371     return *this;
372   }
373 
374   bool is_stage_{false};
375   ProfilerStage stage_{ProfilerStage::kDefault};
376   ProfilerModule module_{ProfilerModule::kDefault};
377   ProfilerEvent event_{ProfilerEvent::kDefault};
378   std::string op_name_{};
379   bool is_inner_event_{false};
380   uint64_t start_time_{0L};
381   uint64_t end_time_{0L};
382   uint64_t dur_time_{0L};
383   uint64_t tid_{};
384   int32_t pid_{0};
385   uint64_t flow_id_{UINT64_MAX};
386   std::string py_stack_{};
387 };
388 using ProfilerDataPtr = std::shared_ptr<ProfilerData>;
389 
390 struct ProfilerFlowEventData {};
391 
392 struct ProfilerStatisticsInfo {
393   explicit ProfilerStatisticsInfo(const std::string &name, bool is_inner_info = false)
name_ProfilerStatisticsInfo394       : name_(name), is_inner_info_(is_inner_info), count_(0), total_time_(0), average_time_(0), percent_(0) {}
395   std::string name_;
396   bool is_inner_info_;
397   size_t count_;
398   uint64_t total_time_;
399   double average_time_;
400   double percent_;
401 
AccumulateTimeProfilerStatisticsInfo402   void AccumulateTime(uint64_t time) {
403     total_time_ += time;
404     ++count_;
405   }
AverageProfilerStatisticsInfo406   void Average() {
407     MS_EXCEPTION_IF_ZERO("count", count_);
408     average_time_ = static_cast<double>(total_time_) / count_;
409   }
CalculatePercentProfilerStatisticsInfo410   void CalculatePercent(uint64_t step_total_time) {
411     MS_EXCEPTION_IF_ZERO("step_total_time", step_total_time);
412     percent_ = (static_cast<double>(total_time_) / step_total_time) * kPercent;
413   }
414 };
415 using ProfilerStatisticsInfoPtr = std::shared_ptr<ProfilerStatisticsInfo>;
416 
417 struct ProfilerEventInfo {
418   ProfilerStatisticsInfoPtr event_statistics_info_;
419   mindspore::HashMap<std::string, ProfilerStatisticsInfoPtr> op_infos_;
420 };
421 using ProfilerEventInfoPtr = std::shared_ptr<ProfilerEventInfo>;
422 
423 struct ProfilerModuleInfo {
424   ProfilerStatisticsInfoPtr module_statistics_info_;
425   std::map<ProfilerEvent, ProfilerEventInfoPtr> event_infos_;
426 };
427 using ProfilerModuleInfoPtr = std::shared_ptr<ProfilerModuleInfo>;
428 
429 using ProfilerDataSpan = std::list<ProfilerDataPtr>;
430 class COMMON_EXPORT ProfilerAnalyzer {
431  public:
432   static ProfilerAnalyzer &GetInstance() noexcept;
433 
434   // Increase step at the step begin.
435   void StartStep();
436   // Analyze and output profiler data at the step end.
437   void EndStep();
438 
439   void Clear() noexcept;
440 
441   // Enable MindInsight Profile
442   void EnableMiProfile();
443 
444   // The used by ProfilerRecorder to record data.
445   bool profiler_enable() const;
enable_by_env()446   bool enable_by_env() const { return profiler_enable_; }
447   void RecordData(const ProfilerDataPtr &data) noexcept;
448   void RecordFlowData(uint64_t flow_id);
449   uint64_t GetTimeStamp() const noexcept;
450   std::string GetBriefName(const std::string &scope_name) const;
451 
452   void ProcessModuleSummaryData(const ProfilerDataSpan &span);
453 
step()454   const size_t step() const { return step_; }
set_step_time(uint64_t step_time)455   void set_step_time(uint64_t step_time) { step_time_ = step_time; }
set_profiler_enable(bool profiler_enable)456   void set_profiler_enable(bool profiler_enable) { profiler_enable_ = profiler_enable; }
data()457   const ProfilerDataSpan &data() const { return data_; }
data_line()458   const std::list<std::pair<StepInfoPtr, ProfilerDataSpan>> data_line() const { return data_line_; }
json_infos()459   const nlohmann::json &json_infos() const { return json_infos_; }
module_infos()460   const std::map<ProfilerModule, ProfilerModuleInfoPtr> &module_infos() const { return module_infos_; }
stage_infos()461   const std::map<ProfilerStage, ProfilerStatisticsInfoPtr> &stage_infos() const { return stage_infos_; }
462   void SetThreadIdToName(const std::thread::id &id, const std::string &name);
463 
464  private:
465   ProfilerAnalyzer() = default;
~ProfilerAnalyzer()466   ~ProfilerAnalyzer() { Clear(); }
467   DISABLE_COPY_AND_ASSIGN(ProfilerAnalyzer);
468 
469   void Initialize();
470   void ProcessData();
471 
472   // Process data.
473   void SaveJsonData(const ProfilerDataPtr &data);
474   void AnalyzeSummaryData(const ProfilerDataPtr &data);
475   void AnalyzeStageSummaryData(const ProfilerDataPtr &data);
476   void AnalyzeModuleSummaryData(const ProfilerDataPtr &data);
477   void AnalyzeEventSummaryData(const ProfilerDataPtr &data);
478   void AnalyzeOpSummaryData(mindspore::HashMap<std::string, ProfilerStatisticsInfoPtr> *const op_infos,
479                             const ProfilerDataPtr &data);
480   void AddPythonSummaryData(const uint64_t span_time);
481 
482   // Dump data.
483   void DumpJsonData() const;
484   void DumpDetailData(const size_t step, const ProfilerDataSpan &span) const;
485   void DumpSummaryData(const size_t step) const;
486   void DumpStageSummaryData(std::stringstream &string_stream) const;
487   void DumpModuleSummaryData(std::stringstream &string_stream) const;
488   void DumpEventSummaryData(const std::map<ProfilerEvent, ProfilerEventInfoPtr> &event_infos,
489                             std::stringstream &string_stream) const;
490   void DumpOpSummaryData(const mindspore::HashMap<std::string, ProfilerStatisticsInfoPtr> &op_infos,
491                          std::stringstream &string_stream) const;
492 
493   // The relevant members of step.
494   size_t step_{0};
495   uint64_t step_time_{0};
496   uint64_t step_start_time_{0};
497   uint64_t module_total_time_{0};
498   ProfilerDataSpan data_;
499   // Container list for all data_ points.
500   std::list<std::pair<StepInfoPtr, ProfilerDataSpan>> data_line_;
501   SpinLock data_mutex_;
502   nlohmann::json json_infos_;
503   // The data analyzed level is module-->event-->op, these data would not be cleared in unit test.
504   std::map<ProfilerModule, ProfilerModuleInfoPtr> module_infos_;
505   std::map<ProfilerStage, ProfilerStatisticsInfoPtr> stage_infos_;
506 
507   std::map<std::thread::id, std::string> thread_id_to_name_;
508 
509   // Save file name.
510   std::string json_file_name_;
511   std::string summary_info_file_name_;
512   std::string detail_info_file_name_;
513 
514   // The relevant members of init.
515   int show_top_num_{0};
516   bool profiler_enable_{false};
517   bool mi_profiler_enable_{false};
518   bool init_{false};
519 };
520 }  // namespace runtime
521 }  // namespace mindspore
522 
523 #endif  // MINDSPORE_CCSRC_RUNTIME_PROFILER_PROFILER_H_
524