1 /** 2 * Copyright 2023 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef MINDSPORE_CCSRC_RUNTIME_PROFILER_PROFILER_H_ 18 #define MINDSPORE_CCSRC_RUNTIME_PROFILER_PROFILER_H_ 19 20 #if !defined(_WIN32) && !defined(_WIN64) && !defined(__ANDROID__) && !defined(ANDROID) && !defined(__APPLE__) 21 #include <sys/syscall.h> 22 #endif 23 #include <list> 24 #include <map> 25 #include <memory> 26 #include <mutex> 27 #include <string> 28 #include <thread> 29 #include <utility> 30 #include <vector> 31 #include "nlohmann/json.hpp" 32 #include "utils/os.h" 33 #include "utils/ms_utils.h" 34 #include "utils/hash_map.h" 35 #include "utils/log_adapter.h" 36 #include "utils/convert_utils_base.h" 37 #include "include/common/visible.h" 38 #include "mindrt/include/async/spinlock.h" 39 40 namespace mindspore { 41 namespace runtime { 42 static const char kDefaultOpName[] = "Default"; 43 static const size_t kPercent = 100; 44 45 enum class ProfilerStage { 46 kDefault, 47 kPython, 48 kCapture, 49 kRunGraph, 50 kRunGrad, 51 kRunOp, 52 kAsnumpy, 53 kCompileGradGraph, 54 kWaitPipeline, 55 kSyncStream, 56 }; 57 58 enum class ProfilerModule { kDefault, kGraphExecutorPy, kRuntime, kPynative, kKernel, kPython, kCapture, kOther }; 59 60 enum class ProfilerEvent { 61 kDefault, 62 kKernelInfer, 63 kKernelResize, 64 kKernelInferAndResize, 65 kKernelLaunch, 66 kKernelLaunckCallback, 67 kKernelUpdate, 68 kGraphLaunch, 69 kInputProcess, 70 kOutputProcess, 71 kWaitTaskFinish, 72 kPreLaunch, 73 kPostLaunch, 74 kSendOutput, 75 kMemoryAlloc, 76 kMemoryFree, 77 kCopyData, 78 kStreamSync, 79 kProcessMultiStream, 80 kWaitKernelsInferFinish, 81 kWaitKernelsResizeFinish, 82 kWaitKernelsLaunchFinish, 83 84 // Inner event is not counted in the total time. 85 kKernelInferInner, 86 kKernelInferDataSync, 87 kKernelLaunchInner, 88 kBackendGraphRunInner, 89 90 // PyNative Pipeline 91 kRunOp, 92 kPyNativeFrontendTask, 93 kPyNativeBackendTask, 94 kPyNativeDeviceTask, 95 kPyNativeLaunchTask, 96 kPyNativeBpropTask, 97 // PyNative inner Event 98 kPyNativeGilAcquire, 99 kPyNativeCast, 100 kPyNativeInfer, 101 kPyNativeOpCompile, 102 kPyNativeGradExpander, 103 kPyNativeGradUpdateSens, 104 kPyNativeGradClearTopCell, 105 kPyNativeGradClearAutoGradCell, 106 // PyBoost 107 kPyBoostInferOutput, 108 kPyBoostInferByOpDef, 109 kPyBoostCreateOutputTensor, 110 kPyBoostDeviceTask, 111 kPyBoostMallocInput, 112 kPyBoostMallocOutput, 113 kPyBoostLaunchAclnn, 114 // pybind api 115 kPyNativeNewGraph, 116 kPyNativeEndGraph, 117 // Python 118 kPythonObserved, 119 // Capture Event 120 kCaptureRunGraph, 121 kCaptureProcess, 122 kCaptureCompile, 123 kCaptureGuard, 124 }; 125 126 static const std::map<ProfilerStage, std::string> kProfilerStageString = { 127 {ProfilerStage::kDefault, "Default"}, {ProfilerStage::kPython, "Python"}, 128 {ProfilerStage::kCapture, "Capture"}, {ProfilerStage::kRunGraph, "RunGraph"}, 129 {ProfilerStage::kRunGrad, "RunGrad"}, {ProfilerStage::kRunOp, "RunOp"}, 130 {ProfilerStage::kAsnumpy, "Asnumpy"}, {ProfilerStage::kCompileGradGraph, "CompileGradGraph"}, 131 {ProfilerStage::kWaitPipeline, "WaitPipeline"}, {ProfilerStage::kSyncStream, "SyncStream"}, 132 }; 133 134 static const std::map<ProfilerModule, std::string> kProfilerModuleString = { 135 {ProfilerModule::kDefault, "Default"}, 136 {ProfilerModule::kGraphExecutorPy, "GraphExecutorPy"}, 137 {ProfilerModule::kRuntime, "RuntimeFramework"}, 138 {ProfilerModule::kPynative, "PynativeFramework"}, 139 {ProfilerModule::kKernel, "Kernel"}, 140 {ProfilerModule::kPython, "Python"}, 141 {ProfilerModule::kCapture, "Capture"}, 142 {ProfilerModule::kOther, "Other"}, 143 }; 144 145 static const std::map<ProfilerEvent, std::string> kProfilerEventString = { 146 {ProfilerEvent::kDefault, "Default"}, 147 {ProfilerEvent::kKernelInfer, "KernelInfer"}, 148 {ProfilerEvent::kKernelResize, "KernelResize"}, 149 {ProfilerEvent::kKernelInferAndResize, "KernelInferAndResize"}, 150 {ProfilerEvent::kKernelLaunch, "KernelLaunch"}, 151 {ProfilerEvent::kKernelLaunckCallback, "KernelLaunchCallback"}, 152 {ProfilerEvent::kKernelUpdate, "KernelUpdate"}, 153 {ProfilerEvent::kGraphLaunch, "GraphLaunch"}, 154 {ProfilerEvent::kInputProcess, "InputProcess"}, 155 {ProfilerEvent::kOutputProcess, "OutputProcess"}, 156 {ProfilerEvent::kWaitTaskFinish, "WaitTaskFinish"}, 157 {ProfilerEvent::kPreLaunch, "PreLaunch"}, 158 {ProfilerEvent::kPostLaunch, "PostLaunch"}, 159 {ProfilerEvent::kSendOutput, "SendOutput"}, 160 {ProfilerEvent::kMemoryAlloc, "MemoryAlloc"}, 161 {ProfilerEvent::kMemoryFree, "MemoryFree"}, 162 {ProfilerEvent::kCopyData, "CopyData"}, 163 {ProfilerEvent::kStreamSync, "StreamSync"}, 164 {ProfilerEvent::kProcessMultiStream, "ProcessMultiStream"}, 165 {ProfilerEvent::kWaitKernelsInferFinish, "WaitKernelsInferFinish"}, 166 {ProfilerEvent::kWaitKernelsResizeFinish, "WaitKernelsResizeFinish"}, 167 {ProfilerEvent::kWaitKernelsLaunchFinish, "WaitKernelsLaunchFinish"}, 168 // Inner event. 169 {ProfilerEvent::kKernelInferInner, "KernelInferInner"}, 170 {ProfilerEvent::kKernelInferDataSync, "KernelInferDataSync"}, 171 {ProfilerEvent::kKernelLaunchInner, "KernelLaunchInner"}, 172 {ProfilerEvent::kBackendGraphRunInner, "BackendGraphRunInner"}, 173 // PyNative events 174 {ProfilerEvent::kRunOp, "RunOp"}, 175 {ProfilerEvent::kPyNativeFrontendTask, "FrontendTask"}, 176 {ProfilerEvent::kPyNativeBackendTask, "BackendTask"}, 177 {ProfilerEvent::kPyNativeDeviceTask, "DeviceTask"}, 178 {ProfilerEvent::kPyNativeLaunchTask, "LaunchTask"}, 179 {ProfilerEvent::kPyNativeBpropTask, "BpropTask"}, 180 {ProfilerEvent::kPyNativeGilAcquire, "AcquireGil"}, 181 {ProfilerEvent::kPyNativeCast, "PyNativeCast"}, 182 {ProfilerEvent::kPyNativeInfer, "PyNativeInfer"}, 183 {ProfilerEvent::kPyNativeOpCompile, "OpCompile"}, 184 {ProfilerEvent::kPyNativeGradExpander, "Expander"}, 185 {ProfilerEvent::kPyNativeGradUpdateSens, "UpdateSens"}, 186 {ProfilerEvent::kPyNativeGradClearTopCell, "ClearTopCell"}, 187 {ProfilerEvent::kPyNativeGradClearAutoGradCell, "ClearAutoGradCell"}, 188 // PyBoost events 189 {ProfilerEvent::kPyBoostInferOutput, "InferOutput"}, 190 {ProfilerEvent::kPyBoostInferByOpDef, "InferByOpDef"}, 191 {ProfilerEvent::kPyBoostCreateOutputTensor, "CreateOutputTensor"}, 192 {ProfilerEvent::kPyBoostDeviceTask, "DeviceTask"}, 193 {ProfilerEvent::kPyBoostMallocInput, "MallocInput"}, 194 {ProfilerEvent::kPyBoostMallocOutput, "MallocOutput"}, 195 {ProfilerEvent::kPyBoostLaunchAclnn, "LaunchAclnn"}, 196 // pybind api 197 {ProfilerEvent::kPyNativeNewGraph, "new_graph"}, 198 {ProfilerEvent::kPyNativeEndGraph, "end_graph"}, 199 // python events 200 {ProfilerEvent::kPythonObserved, "PythonObserved"}, 201 // Capture events 202 {ProfilerEvent::kCaptureRunGraph, "CaptureRunGraph"}, 203 {ProfilerEvent::kCaptureProcess, "CaptureProcess"}, 204 {ProfilerEvent::kCaptureCompile, "CaptureCompile"}, 205 {ProfilerEvent::kCaptureGuard, "CaptureGuard"}}; 206 207 #define PROFILER_START(start_time) \ 208 do { \ 209 if (runtime::ProfilerAnalyzer::GetInstance().profiler_enable()) { \ 210 start_time = runtime::ProfilerAnalyzer::GetInstance().GetTimeStamp(); \ 211 } \ 212 } while (0); 213 214 // Match PROFILER_START to use. 215 #define PROFILER_END(start_time, module, event, op_name, is_inner_event) \ 216 do { \ 217 if (runtime::ProfilerAnalyzer::GetInstance().profiler_enable()) { \ 218 auto end_time = runtime::ProfilerAnalyzer::GetInstance().GetTimeStamp(); \ 219 auto brief_name = runtime::ProfilerAnalyzer::GetInstance().GetBriefName(op_name); \ 220 runtime::ProfilerAnalyzer::GetInstance().RecordData( \ 221 std::make_shared<runtime::ProfilerData>(module, event, brief_name, is_inner_event, start_time, end_time)); \ 222 } \ 223 } while (0); 224 225 // Match PROFILER_START to use. 226 #define PROFILER_STAGE_END(start_time, stage) \ 227 do { \ 228 if (runtime::ProfilerAnalyzer::GetInstance().profiler_enable()) { \ 229 auto end_time = runtime::ProfilerAnalyzer::GetInstance().GetTimeStamp(); \ 230 runtime::ProfilerAnalyzer::GetInstance().RecordData( \ 231 std::make_shared<runtime::ProfilerData>(stage, start_time, end_time)); \ 232 } \ 233 } while (0); 234 235 // Record the profiler data by the constructor and destructor of this class. 236 class COMMON_EXPORT ProfilerRecorder { 237 public: 238 ProfilerRecorder(ProfilerModule module, ProfilerEvent event, const std::string &op_name, bool is_inner_event = false, 239 bool need_py_stack = false, uint64_t flow_id = UINT64_MAX); 240 ~ProfilerRecorder(); 241 242 struct Data { DataData243 Data(ProfilerModule module, ProfilerEvent event, std::string op_name, std::string py_stack, uint64_t start_time, 244 uint64_t flow_id, bool is_inner_event) 245 : module_(module), 246 event_(event), 247 op_name_(std::move(op_name)), 248 py_stack_(std::move(py_stack)), 249 start_time_(start_time), 250 flow_id_(flow_id), 251 is_inner_event_(is_inner_event) {} 252 ProfilerModule module_; 253 ProfilerEvent event_; 254 std::string op_name_; 255 std::string py_stack_; 256 uint64_t start_time_; 257 uint64_t flow_id_; 258 bool is_inner_event_; 259 }; 260 261 inline static const std::string kNoName{}; 262 263 private: 264 std::unique_ptr<Data> data_{nullptr}; 265 }; 266 267 class COMMON_EXPORT PythonProfilerRecorder { 268 public: 269 explicit PythonProfilerRecorder(const std::string &record_name); 270 ~PythonProfilerRecorder() = default; 271 272 void record_start(); 273 void record_end(); 274 275 private: 276 uint64_t start_time_; 277 std::string record_name_; 278 ProfilerModule module_; 279 ProfilerEvent event_; 280 }; 281 282 class COMMON_EXPORT ProfilerStageRecorder { 283 public: 284 explicit ProfilerStageRecorder(ProfilerStage stage); 285 ~ProfilerStageRecorder(); 286 287 private: 288 ProfilerStage stage_{ProfilerStage::kDefault}; 289 uint64_t start_time_{0}; 290 }; 291 292 struct StepInfo { StepInfoStepInfo293 StepInfo(size_t step, uint64_t step_time) : step_(step), step_time_(step_time) {} 294 const size_t step_; 295 const uint64_t step_time_; 296 }; 297 298 using StepInfoPtr = std::shared_ptr<StepInfo>; 299 300 struct ProfilerData { 301 ProfilerData(ProfilerModule module, ProfilerEvent event, const std::string &op_name, bool is_inner_event, 302 uint64_t start_time, uint64_t end_time, uint64_t flow_id = UINT64_MAX, std::string py_stack = "") is_stage_ProfilerData303 : is_stage_(false), 304 stage_(ProfilerStage::kDefault), 305 module_(module), 306 event_(event), 307 op_name_(op_name), 308 is_inner_event_(is_inner_event), 309 start_time_(start_time), 310 end_time_(end_time), 311 dur_time_(end_time - start_time), 312 #if !defined(_WIN32) && !defined(_WIN64) && !defined(__ANDROID__) && !defined(ANDROID) && !defined(__APPLE__) 313 tid_(LongToUlong(syscall(SYS_gettid))), 314 #else 315 tid_(0), 316 #endif 317 pid_(getpid()), 318 flow_id_(flow_id), 319 py_stack_(std::move(py_stack)) { 320 } 321 ProfilerDataProfilerData322 ProfilerData(ProfilerStage stage, uint64_t start_time, uint64_t end_time) 323 : is_stage_(true), 324 stage_(stage), 325 module_(ProfilerModule::kDefault), 326 event_(ProfilerEvent::kDefault), 327 op_name_(""), 328 is_inner_event_(false), 329 start_time_(start_time), 330 end_time_(end_time) { 331 dur_time_ = end_time - start_time; 332 #if !defined(_WIN32) && !defined(_WIN64) && !defined(__ANDROID__) && !defined(ANDROID) && !defined(__APPLE__) 333 tid_ = LongToUlong(syscall(SYS_gettid)); 334 #else 335 tid_ = 0; 336 #endif 337 pid_ = getpid(); 338 } 339 ProfilerDataProfilerData340 ProfilerData(const ProfilerData &other) 341 : is_stage_(other.is_stage_), 342 stage_(other.stage_), 343 module_(other.module_), 344 event_(other.event_), 345 op_name_(other.op_name_), 346 is_inner_event_(other.is_inner_event_), 347 start_time_(other.start_time_), 348 end_time_(other.end_time_), 349 dur_time_(other.dur_time_), 350 tid_(other.tid_), 351 pid_(other.pid_), 352 flow_id_(other.flow_id_) {} 353 354 ProfilerData &operator=(const ProfilerData &other) { 355 if (this == &other) { 356 return *this; 357 } 358 359 is_stage_ = other.is_stage_; 360 stage_ = other.stage_; 361 module_ = other.module_; 362 event_ = other.event_; 363 op_name_ = other.op_name_; 364 is_inner_event_ = other.is_inner_event_; 365 start_time_ = other.start_time_; 366 end_time_ = other.end_time_; 367 dur_time_ = other.dur_time_; 368 tid_ = other.tid_; 369 pid_ = other.pid_; 370 flow_id_ = other.flow_id_; 371 return *this; 372 } 373 374 bool is_stage_{false}; 375 ProfilerStage stage_{ProfilerStage::kDefault}; 376 ProfilerModule module_{ProfilerModule::kDefault}; 377 ProfilerEvent event_{ProfilerEvent::kDefault}; 378 std::string op_name_{}; 379 bool is_inner_event_{false}; 380 uint64_t start_time_{0L}; 381 uint64_t end_time_{0L}; 382 uint64_t dur_time_{0L}; 383 uint64_t tid_{}; 384 int32_t pid_{0}; 385 uint64_t flow_id_{UINT64_MAX}; 386 std::string py_stack_{}; 387 }; 388 using ProfilerDataPtr = std::shared_ptr<ProfilerData>; 389 390 struct ProfilerFlowEventData {}; 391 392 struct ProfilerStatisticsInfo { 393 explicit ProfilerStatisticsInfo(const std::string &name, bool is_inner_info = false) name_ProfilerStatisticsInfo394 : name_(name), is_inner_info_(is_inner_info), count_(0), total_time_(0), average_time_(0), percent_(0) {} 395 std::string name_; 396 bool is_inner_info_; 397 size_t count_; 398 uint64_t total_time_; 399 double average_time_; 400 double percent_; 401 AccumulateTimeProfilerStatisticsInfo402 void AccumulateTime(uint64_t time) { 403 total_time_ += time; 404 ++count_; 405 } AverageProfilerStatisticsInfo406 void Average() { 407 MS_EXCEPTION_IF_ZERO("count", count_); 408 average_time_ = static_cast<double>(total_time_) / count_; 409 } CalculatePercentProfilerStatisticsInfo410 void CalculatePercent(uint64_t step_total_time) { 411 MS_EXCEPTION_IF_ZERO("step_total_time", step_total_time); 412 percent_ = (static_cast<double>(total_time_) / step_total_time) * kPercent; 413 } 414 }; 415 using ProfilerStatisticsInfoPtr = std::shared_ptr<ProfilerStatisticsInfo>; 416 417 struct ProfilerEventInfo { 418 ProfilerStatisticsInfoPtr event_statistics_info_; 419 mindspore::HashMap<std::string, ProfilerStatisticsInfoPtr> op_infos_; 420 }; 421 using ProfilerEventInfoPtr = std::shared_ptr<ProfilerEventInfo>; 422 423 struct ProfilerModuleInfo { 424 ProfilerStatisticsInfoPtr module_statistics_info_; 425 std::map<ProfilerEvent, ProfilerEventInfoPtr> event_infos_; 426 }; 427 using ProfilerModuleInfoPtr = std::shared_ptr<ProfilerModuleInfo>; 428 429 using ProfilerDataSpan = std::list<ProfilerDataPtr>; 430 class COMMON_EXPORT ProfilerAnalyzer { 431 public: 432 static ProfilerAnalyzer &GetInstance() noexcept; 433 434 // Increase step at the step begin. 435 void StartStep(); 436 // Analyze and output profiler data at the step end. 437 void EndStep(); 438 439 void Clear() noexcept; 440 441 // Enable MindInsight Profile 442 void EnableMiProfile(); 443 444 // The used by ProfilerRecorder to record data. 445 bool profiler_enable() const; enable_by_env()446 bool enable_by_env() const { return profiler_enable_; } 447 void RecordData(const ProfilerDataPtr &data) noexcept; 448 void RecordFlowData(uint64_t flow_id); 449 uint64_t GetTimeStamp() const noexcept; 450 std::string GetBriefName(const std::string &scope_name) const; 451 452 void ProcessModuleSummaryData(const ProfilerDataSpan &span); 453 step()454 const size_t step() const { return step_; } set_step_time(uint64_t step_time)455 void set_step_time(uint64_t step_time) { step_time_ = step_time; } set_profiler_enable(bool profiler_enable)456 void set_profiler_enable(bool profiler_enable) { profiler_enable_ = profiler_enable; } data()457 const ProfilerDataSpan &data() const { return data_; } data_line()458 const std::list<std::pair<StepInfoPtr, ProfilerDataSpan>> data_line() const { return data_line_; } json_infos()459 const nlohmann::json &json_infos() const { return json_infos_; } module_infos()460 const std::map<ProfilerModule, ProfilerModuleInfoPtr> &module_infos() const { return module_infos_; } stage_infos()461 const std::map<ProfilerStage, ProfilerStatisticsInfoPtr> &stage_infos() const { return stage_infos_; } 462 void SetThreadIdToName(const std::thread::id &id, const std::string &name); 463 464 private: 465 ProfilerAnalyzer() = default; ~ProfilerAnalyzer()466 ~ProfilerAnalyzer() { Clear(); } 467 DISABLE_COPY_AND_ASSIGN(ProfilerAnalyzer); 468 469 void Initialize(); 470 void ProcessData(); 471 472 // Process data. 473 void SaveJsonData(const ProfilerDataPtr &data); 474 void AnalyzeSummaryData(const ProfilerDataPtr &data); 475 void AnalyzeStageSummaryData(const ProfilerDataPtr &data); 476 void AnalyzeModuleSummaryData(const ProfilerDataPtr &data); 477 void AnalyzeEventSummaryData(const ProfilerDataPtr &data); 478 void AnalyzeOpSummaryData(mindspore::HashMap<std::string, ProfilerStatisticsInfoPtr> *const op_infos, 479 const ProfilerDataPtr &data); 480 void AddPythonSummaryData(const uint64_t span_time); 481 482 // Dump data. 483 void DumpJsonData() const; 484 void DumpDetailData(const size_t step, const ProfilerDataSpan &span) const; 485 void DumpSummaryData(const size_t step) const; 486 void DumpStageSummaryData(std::stringstream &string_stream) const; 487 void DumpModuleSummaryData(std::stringstream &string_stream) const; 488 void DumpEventSummaryData(const std::map<ProfilerEvent, ProfilerEventInfoPtr> &event_infos, 489 std::stringstream &string_stream) const; 490 void DumpOpSummaryData(const mindspore::HashMap<std::string, ProfilerStatisticsInfoPtr> &op_infos, 491 std::stringstream &string_stream) const; 492 493 // The relevant members of step. 494 size_t step_{0}; 495 uint64_t step_time_{0}; 496 uint64_t step_start_time_{0}; 497 uint64_t module_total_time_{0}; 498 ProfilerDataSpan data_; 499 // Container list for all data_ points. 500 std::list<std::pair<StepInfoPtr, ProfilerDataSpan>> data_line_; 501 SpinLock data_mutex_; 502 nlohmann::json json_infos_; 503 // The data analyzed level is module-->event-->op, these data would not be cleared in unit test. 504 std::map<ProfilerModule, ProfilerModuleInfoPtr> module_infos_; 505 std::map<ProfilerStage, ProfilerStatisticsInfoPtr> stage_infos_; 506 507 std::map<std::thread::id, std::string> thread_id_to_name_; 508 509 // Save file name. 510 std::string json_file_name_; 511 std::string summary_info_file_name_; 512 std::string detail_info_file_name_; 513 514 // The relevant members of init. 515 int show_top_num_{0}; 516 bool profiler_enable_{false}; 517 bool mi_profiler_enable_{false}; 518 bool init_{false}; 519 }; 520 } // namespace runtime 521 } // namespace mindspore 522 523 #endif // MINDSPORE_CCSRC_RUNTIME_PROFILER_PROFILER_H_ 524