1 /** 2 * Copyright 2021-2023 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef MINDSPORE_CCSRC_PROFILER_DEVICE_PROFILING_H 18 #define MINDSPORE_CCSRC_PROFILER_DEVICE_PROFILING_H 19 #include <algorithm> 20 #include <cstdio> 21 #include <map> 22 #include <thread> 23 #include <memory> 24 #include <mutex> 25 #include <shared_mutex> 26 #include <string> 27 #include <unordered_map> 28 #include <utility> 29 #include <vector> 30 #include "utils/hash_map.h" 31 #include "include/backend/visible.h" 32 33 namespace mindspore { 34 namespace profiler { 35 struct StartDuration { 36 uint64_t start_timestamp = 0l; 37 float duration = 0l; 38 size_t tid = 0; 39 }; 40 41 struct OneStepStartEndInfo { 42 std::string iter_start_op_name; 43 std::string fp_start_op_name; 44 std::string iter_end_op_name; 45 }; 46 47 struct OpInfo { 48 std::string op_name; 49 float cupti_api_call_time = 0l; 50 float cupti_activity_time = 0l; 51 float op_host_cost_time = 0; 52 int op_kernel_api_count = 0; 53 int op_kernel_count = 0; 54 int op_count = 0; 55 StartDuration tmp_start_duration; 56 std::vector<StartDuration> start_duration; 57 void *stream; 58 uint32_t pid; 59 }; 60 61 struct HostProfileData { 62 std::thread::id tid = std::thread::id(); 63 int pid = 0; 64 int parent_pid = 0; 65 std::string module_name = ""; 66 std::string event = ""; 67 std::string stage = ""; 68 int level = 0; 69 int start_end = 0; 70 std::map<std::string, std::string> custom_info; 71 int64_t memory_usage = 0; 72 uint64_t time_stamp = 0; 73 }; 74 75 struct MemoryPoolInfo { 76 uint64_t time_stamp = 0; 77 size_t total_allocated = 0; 78 size_t total_reserved = 0; 79 size_t total_active = 0; 80 }; 81 82 class BACKEND_EXPORT ProfilerManager { 83 public: 84 static std::shared_ptr<ProfilerManager> &GetInstance(); 85 ProfilerManager() = default; 86 ~ProfilerManager() = default; 87 ProfilerManager(const ProfilerManager &) = delete; 88 ProfilerManager &operator=(const ProfilerManager &) = delete; 89 bool GetProfilingEnableFlag() const; 90 void RecordOneStepStartEndInfo() const; 91 std::string GetProfilingOptions() const; GetNetDynamicShapeStatus()92 bool GetNetDynamicShapeStatus() const { return is_dynamic_shape_net_; } SetNetDynamicShapeStatus()93 void SetNetDynamicShapeStatus() { is_dynamic_shape_net_ = true; } 94 std::string ProfileDataPath() const; 95 void SetProfileFramework(const std::string &profile_framework); 96 bool NeedCollectHostTime() const; 97 bool NeedCollectHostMemory() const; 98 bool EnableCollectHost() const; 99 100 private: 101 inline static std::shared_ptr<ProfilerManager> profiler_manager_inst_ = std::make_shared<ProfilerManager>(); 102 bool is_dynamic_shape_net_ = 0; 103 std::string profile_framework_ = "all"; 104 }; 105 106 class BACKEND_EXPORT Profiler { 107 public: 108 static std::shared_ptr<Profiler> GetInstance(const std::string &name) noexcept; 109 static bool Register(const std::string &name, const std::shared_ptr<Profiler> &instance); 110 static void Clear(); 111 112 Profiler() = default; 113 virtual ~Profiler() = default; 114 115 virtual void Init(const std::string &profiling_path, uint32_t device_id, const std::string &profiling_options) = 0; 116 virtual void Finalize() = 0; IsInitialized()117 bool IsInitialized() const { return init_flag_; } 118 virtual void Start() = 0; 119 virtual void Stop() = 0; StepStart(uint64_t,void *)120 virtual void StepStart(uint64_t /* step_id */, void * /* stream */) {} StepStop()121 virtual void StepStop() {} 122 virtual void StepProfilingEnable(const bool enable_flag) = 0; 123 virtual void OpDataProducerEnd() = 0; 124 void RecordOneStepStartEndInfo(); GetEnableFlag()125 bool GetEnableFlag() const { return enable_flag_; } EnableOpTime()126 void EnableOpTime() { op_time_ = true; } EnableProfileMemory()127 void EnableProfileMemory() { profile_memory_ = true; } GetOpTimeFlag()128 bool GetOpTimeFlag() const { return op_time_; } GetProfileMemoryFlag()129 bool GetProfileMemoryFlag() const { return profile_memory_; } GetProfilingOptions()130 std::string GetProfilingOptions() const { return profiling_options_; } ProfileDataPath()131 std::string ProfileDataPath() const { return profile_data_path_; } 132 void RecordOneStepStartEndInfo(std::string op_name); GetSingleOpLaunchTime()133 std::pair<double, double> GetSingleOpLaunchTime() { return single_op_launch_start_time_end_time_; } SetSingleOpLaunchTime(const std::pair<double,double> & launch_start_end)134 void SetSingleOpLaunchTime(const std::pair<double, double> &launch_start_end) { 135 single_op_launch_start_time_end_time_ = launch_start_end; 136 } GetParallelStrategyEnableFlag()137 bool GetParallelStrategyEnableFlag() const { return is_parallel_strategy; } 138 void SyncEnable(const bool enable_flag); 139 void DataProcessEnable(const bool enable_flag); EnableHostStack()140 bool EnableHostStack() const { return host_stack_ && enable_flag_; } 141 142 protected: 143 void SetRunTimeData(const std::string &op_name, const float time_elapsed); 144 void SetRunTimeData(const std::string &op_name, const uint64_t start, const float duration); 145 void FindOneStepFpStartOp(uint32_t vector_size); 146 void FindOneStepIterEndOp(uint32_t vector_size); 147 uint64_t GetHostMonoTimeStamp() const; 148 // Get timestamp in us 149 uint64_t GetRealTimeStamp() const; 150 virtual void SaveProfileData() = 0; 151 virtual void ClearInst() = 0; 152 std::pair<double, double> single_op_launch_start_time_end_time_; 153 bool enable_flag_ = false; 154 bool op_time_ = false; 155 bool profile_memory_ = false; 156 bool has_find_ = false; 157 bool is_parallel_strategy = false; 158 bool init_flag_ = false; 159 std::string profile_data_path_; 160 std::unordered_map<std::string, OpInfo> op_info_map_; 161 std::vector<MemoryPoolInfo> memory_info_list_; 162 OneStepStartEndInfo step_start_end_info_; 163 std::vector<OneStepStartEndInfo> all_step_start_end_info_; 164 std::vector<std::string> step_start_end_info_vector_; 165 std::shared_mutex op_map_mutex_; 166 std::mutex record_mutex_; 167 std::string profiling_options_; 168 uint32_t iter_end_op_index_ = 0; 169 uint32_t fp_start_op_index_ = 1; 170 bool sync_enable_flag_ = true; 171 bool data_process_enable_ = false; 172 std::string op_type_ = "GetNext"; 173 bool host_stack_ = false; 174 175 private: 176 static std::map<std::string, std::shared_ptr<Profiler>> &GetInstanceMap(); 177 }; 178 179 // level: 0, for developer user, 1, for general user; 180 // profile_framework: 0, all host info, 1, host memory, 2, host time; 181 // start_end: 0, start flag, 1, end flag, 2, no distinguish start and end. 182 // Default parameter for host profile meaning: for developer user, collect both time and memory, record timestamp. 183 BACKEND_EXPORT void CollectHostInfo( 184 const std::string &module_name, const std::string &event, const std::string &stage, int level = 0, 185 int profile_framework = 0, int start_end = 2, 186 const std::map<std::string, std::string> &custom_info = std::map<std::string, std::string>()); 187 #ifdef __linux__ 188 BACKEND_EXPORT void WriteHostDataToFile(const HostProfileData &host_profile_data, const std::string &output_path); 189 #endif 190 191 BACKEND_EXPORT uint64_t GetClockTime(); 192 193 BACKEND_EXPORT uint64_t GetClockSyscnt(); 194 195 } // namespace profiler 196 } // namespace mindspore 197 198 #define PROFILER_REG(NAME, CLAZZ) \ 199 static bool g_Profiler_##NAME##_reg_result = mindspore::profiler::Profiler::Register(NAME, std::make_shared<CLAZZ>()) 200 201 #endif // MINDSPORE_CCSRC_PROFILER_DEVICE_PROFILING_H 202