• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020-2021 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "profiler/device/gpu/gpu_data_saver.h"
17 #include <fstream>
18 #include <numeric>
19 #include "sys/stat.h"
20 #include "utils/log_adapter.h"
21 #include "utils/ms_utils.h"
22 #include "utils/ms_context.h"
23 
24 namespace mindspore {
25 namespace profiler {
26 namespace gpu {
ActivityData(std::shared_ptr<Event> data)27 ActivityData::ActivityData(std::shared_ptr<Event> data) : basic_info_(data) {
28   grid_dim_ = basic_info_->activity_type == ActivityType::kKernel
29                 ? "\"" + std::to_string(basic_info_->kernel_info.grid_x) + ',' +
30                     std::to_string(basic_info_->kernel_info.grid_y) + ',' +
31                     std::to_string(basic_info_->kernel_info.grid_z) + "\""
32                 : "";
33   block_dim_ = basic_info_->activity_type == ActivityType::kKernel
34                  ? "\"" + std::to_string(basic_info_->kernel_info.block_x) + ',' +
35                      std::to_string(basic_info_->kernel_info.block_y) + ',' +
36                      std::to_string(basic_info_->kernel_info.block_z) + "\""
37                  : "";
38   count_ = 1;
39   total_duration_ = (basic_info_->end_time_stamp - basic_info_->start_time_stamp) / kTimeUnit;
40   avg_duration_ = total_duration_;
41   max_duration_ = total_duration_;
42   min_duration_ = total_duration_;
43   start_duration.emplace_back(StartDuration({basic_info_->start_time_stamp, total_duration_}));
44 }
45 
operator +=(const ActivityData & other)46 ActivityData &ActivityData::operator+=(const ActivityData &other) {
47   this->count_ += other.count_;
48   this->total_duration_ += other.total_duration_;
49   // update max or min duration
50   if (other.total_duration_ > this->max_duration_) {
51     this->max_duration_ = other.total_duration_;
52   } else if (other.max_duration_ < this->min_duration_) {
53     this->min_duration_ = other.total_duration_;
54   }
55   return *this;
56 }
57 
ParseEvent(const std::vector<Event> & events)58 void GpuDataSaver::ParseEvent(const std::vector<Event> &events) {
59   // Put Kernel activity events into activity_infos_
60   for (const auto &event : events) {
61     if (event.op_name.empty() || event.api_type != CUPTIApiType::kActivity ||
62         event.activity_type != ActivityType::kKernel) {
63       continue;
64     }
65     AddKernelEvent(event);
66   }
67   // update average time of kernel op cost
68   for (auto &device_infos : activity_infos_) {
69     // device_infos: <device_id, DeviceActivityInfos>
70     for (auto &activity_info : device_infos.second) {
71       if (activity_info.second.count_ == 0) {
72         MS_LOG(ERROR) << "The number of activity operations can not be 0.";
73         return;
74       }
75       // activity_info: <kernel_name, Activity>
76       activity_info.second.avg_duration_ = activity_info.second.total_duration_ / activity_info.second.count_;
77     }
78     MS_LOG(DEBUG) << "Get " << device_infos.second.size() << " activity items for device:" << device_infos.first;
79   }
80 }
81 
AddKernelEvent(const Event & event)82 void GpuDataSaver::AddKernelEvent(const Event &event) {
83   // Put kernel event to activity_infos according to device id
84   uint32_t device_id = event.device_id;
85   auto iter = activity_infos_.find(device_id);
86   if (iter == activity_infos_.end()) {
87     auto res_flag = activity_infos_.emplace(device_id, DeviceActivityInfos());
88     AddKernelEventToDevice(event, &res_flag.first->second);
89   } else {
90     AddKernelEventToDevice(event, &iter->second);
91   }
92 }
93 
AddKernelEventToDevice(const Event & event,DeviceActivityInfos * device_activity_infos)94 void GpuDataSaver::AddKernelEventToDevice(const Event &event, DeviceActivityInfos *device_activity_infos) {
95   if (device_activity_infos == nullptr) {
96     MS_LOG(ERROR) << "Device activity information is invalid.";
97     return;
98   }
99   // Combine kernel activity with same kernel name
100   auto event_ptr = std::make_shared<Event>(event);
101   if (event_ptr == nullptr) {
102     MS_LOG(WARNING) << "Create event failed when add event to device.";
103     return;
104   }
105   ActivityData activity_data = ActivityData(event_ptr);
106   std::string kernel_name = event.kernel_name;
107   auto iter = device_activity_infos->find(kernel_name);
108   if (iter == device_activity_infos->end()) {
109     device_activity_infos->emplace(kernel_name, activity_data);
110   } else {
111     iter->second += activity_data;
112     iter->second.start_duration.emplace_back(StartDuration({event.start_time_stamp, activity_data.total_duration_}));
113   }
114 }
115 
CpuProfilingTimeSynchronizedToGpu(const BaseTime & start_time)116 void GpuDataSaver::CpuProfilingTimeSynchronizedToGpu(const BaseTime &start_time) {
117   auto cpu_data_saver_inst = profiler::cpu::CpuDataSaver::GetInstance();
118   MS_EXCEPTION_IF_NULL(cpu_data_saver_inst);
119   auto &cpu_op_timestamps_map = cpu_data_saver_inst->GetOpTimeStampInfo();
120   auto cpu_op_iter = cpu_op_timestamps_map.begin();
121   while (cpu_op_iter != cpu_op_timestamps_map.end()) {
122     for (auto &time_iter : cpu_op_iter->second) {
123       time_iter.start_timestamp =
124         time_iter.start_timestamp - start_time.host_start_monotonic_raw_time + start_time.gpu_start_time;
125       // time unit from ms to us.
126       time_iter.duration *= kTimeUnit;
127     }
128     cpu_op_iter++;
129   }
130 }
131 
WriteFile(std::string out_path_dir,const BaseTime & start_time)132 void GpuDataSaver::WriteFile(std::string out_path_dir, const BaseTime &start_time) {
133   if (out_path_dir.empty()) {
134     MS_LOG(WARNING) << "Output directory. Ignore the writing data.";
135     return;
136   }
137   if (op_detail_infos_.empty() || op_type_infos_.empty() || activity_infos_.empty()) {
138     MS_LOG(WARNING) << "No operation detail infos to write.";
139     return;
140   }
141   // not support multi-device for operator info per process yet
142   device_id_ = std::to_string(activity_infos_.begin()->first);
143   op_side_ = "gpu";
144   WriteOpDetail(out_path_dir);
145   WriteOpType(out_path_dir);
146   WriteActivity(out_path_dir);
147   WriteOpTimestamp(out_path_dir);
148   WriteStartTime(out_path_dir, start_time);
149   CpuProfilingTimeSynchronizedToGpu(start_time);
150   if (MsContext::GetInstance()->get_param<bool>(MS_CTX_ENABLE_MINDRT)) {
151     WriteStepTraceAsyncLaunchKernel(out_path_dir);
152   } else {
153     WriteStepTrace(out_path_dir);
154   }
155 }
156 
WriteActivity(const std::string & saver_base_dir)157 void GpuDataSaver::WriteActivity(const std::string &saver_base_dir) {
158   std::string file_path_base = saver_base_dir + "/gpu_activity_data_";
159   std::string timestamp_file_path_base = saver_base_dir + "/activity_execute_timestamp_";
160   for (auto device_info : activity_infos_) {
161     // write activity result csv
162     std::string file_path = file_path_base + std::to_string(device_info.first) + ".csv";
163     std::ofstream ofs(file_path);
164     if (!ofs.is_open()) {
165       MS_LOG(WARNING) << "Open file '" << file_path << "' failed!";
166       return;
167     }
168     // write activity timestamp txt
169     std::string timestamp_file_path = timestamp_file_path_base + std::to_string(device_info.first) + ".txt";
170     std::ofstream activity_timestamp_ofs(timestamp_file_path);
171     if (!activity_timestamp_ofs.is_open()) {
172       ofs.close();
173       MS_LOG(WARNING) << "Open file '" << timestamp_file_path << "' failed!";
174       return;
175     }
176     // write activity data into file
177     ofs << ActivityData().GetHeader() << std::endl;
178     for (auto activity_data : device_info.second) {
179       ofs << activity_data.second << std::endl;
180       for (auto start_duration : activity_data.second.start_duration) {
181         activity_timestamp_ofs << activity_data.second.basic_info_->kernel_name << ";";
182         activity_timestamp_ofs << activity_data.second.basic_info_->stream_id << ";";
183         activity_timestamp_ofs << start_duration.start_timestamp << ";";
184         activity_timestamp_ofs << start_duration.duration << std::endl;
185       }
186     }
187     ofs.close();
188     ChangeFileMode(file_path);
189     activity_timestamp_ofs.close();
190     ChangeFileMode(timestamp_file_path);
191     MS_LOG(INFO) << "Write " << device_info.second.size() << " activity infos into file: " << file_path;
192   }
193 }
194 
WriteStepTraceAsyncLaunchKernel(const std::string & saver_base_dir)195 void GpuDataSaver::WriteStepTraceAsyncLaunchKernel(const std::string &saver_base_dir) {
196   std::string file_path = saver_base_dir + "/step_trace_profiling_" + device_id_ + ".txt";
197   std::ofstream ofs(file_path);
198   // check if the file is writable
199   if (!ofs.is_open()) {
200     MS_LOG(WARNING) << "Open file '" << file_path << "' failed!";
201     return;
202   }
203 
204   // cpu profiler information.
205   auto cpu_data_saver_inst = profiler::cpu::CpuDataSaver::GetInstance();
206   MS_EXCEPTION_IF_NULL(cpu_data_saver_inst);
207   auto &cpu_op_timestamps_map = cpu_data_saver_inst->GetOpTimeStampInfo();
208 
209   // write step trace time info into file
210   uint32_t step = 0;
211   uint64_t duration;
212   for (auto step_start_end : all_step_start_end_info_) {
213     auto iter_start_op_name = step_start_end.iter_start_op_name;
214     auto fp_op_name = step_start_end.fp_start_op_name;
215     auto iter_end_op_name = step_start_end.iter_end_op_name;
216     auto iter_start_op_timestamp = op_timestamps_map_.find(iter_start_op_name);
217     auto fp_op_timestamp = op_timestamps_map_.find(fp_op_name);
218     auto bp_end_op_timestamp = op_timestamps_map_.find(step_trace_op_name_.trace_bp_end);
219     auto iter_end_op_timestamp = op_timestamps_map_.find(iter_end_op_name);
220 
221     // if iter_start/fp_start/iter_end op is executed on cpu, update it.
222     if (iter_start_op_timestamp == op_timestamps_map_.end()) {
223       iter_start_op_timestamp = cpu_op_timestamps_map.find(iter_start_op_name);
224     }
225     if (fp_op_timestamp == op_timestamps_map_.end()) {
226       fp_op_timestamp = cpu_op_timestamps_map.find(fp_op_name);
227     }
228     if (iter_end_op_timestamp == op_timestamps_map_.end()) {
229       iter_end_op_timestamp = cpu_op_timestamps_map.find(iter_end_op_name);
230     }
231 
232     if (iter_end_op_name == "Default/InitDataSetQueue-op0") {
233       continue;
234     }
235 
236     if (iter_start_op_timestamp == op_timestamps_map_.end() || fp_op_timestamp == op_timestamps_map_.end() ||
237         iter_end_op_timestamp == op_timestamps_map_.end() || bp_end_op_timestamp == op_timestamps_map_.end()) {
238       MS_LOG(ERROR) << "[profiling step trace] failed, do not find \"" << fp_op_name << "\" or \"" << iter_end_op_name
239                     << "\" or \"" << step_trace_op_name_.trace_bp_end << "\"";
240       ofs.close();
241       return;
242     }
243     if (iter_start_op_timestamp->second.size() <= step || fp_op_timestamp->second.size() <= step ||
244         iter_end_op_timestamp->second.size() <= step || bp_end_op_timestamp->second.size() <= step) {
245       MS_LOG(WARNING) << "[profiling step trace] insufficient number of timestamps for fp/bp/iter_end operators.";
246       ofs.close();
247       return;
248     }
249 
250     try {
251       // write fp,bp and iter_end timestamp.
252       duration = iter_end_op_timestamp->second[step].duration * kTimeUnit;
253       uint64_t iter_end_timestamp = iter_end_op_timestamp->second[step].start_timestamp + duration;
254       ofs << iter_start_op_name << "," << iter_start_op_timestamp->second[step].start_timestamp << " " << fp_op_name
255           << "," << fp_op_timestamp->second[step].start_timestamp << " " << step_trace_op_name_.trace_bp_end << ","
256           << bp_end_op_timestamp->second[step].start_timestamp << " " << iter_end_op_name << "," << iter_end_timestamp;
257 
258       // write communication op info
259       for (auto op_name : step_trace_op_name_.trace_custom_node) {
260         // convert the time unit from 1ns to 10ns (keep the same with ascend)
261         auto iter_op_timestamp = op_timestamps_map_.find(op_name);
262         if (iter_op_timestamp == op_timestamps_map_.end()) {
263           MS_LOG(ERROR) << "[profiling step trace] failed, do not find \"" << fp_op_name << "\" or " << iter_end_op_name
264                         << "\" or \"" << step_trace_op_name_.trace_bp_end << "\"";
265           ofs.close();
266           return;
267         }
268 
269         if (iter_op_timestamp->second.size() <= step) {
270           MS_LOG(WARNING) << "[profiling step trace] insufficient number of timestamps for communication operators.";
271           ofs.close();
272           return;
273         }
274 
275         duration = iter_op_timestamp->second[step].duration * kTimeUnit;
276         uint64_t end_timestamp = (duration + iter_op_timestamp->second[step].start_timestamp);
277         uint64_t start_timestamp = iter_op_timestamp->second[step].start_timestamp;
278         ofs << " " << op_name << "," << start_timestamp << "," << end_timestamp;
279       }
280       ofs << std::endl;
281     } catch (const std::exception &e) {
282       MS_LOG(ERROR) << "Write " << file_path << "failed:" << e.what();
283       ofs.close();
284       return;
285     }
286     step++;
287   }
288 
289   ofs.close();
290   ChangeFileMode(file_path);
291   MS_LOG(INFO) << "Write step trace infos into file: " << file_path;
292 }
293 
WriteStepTrace(const std::string & saver_base_dir)294 void GpuDataSaver::WriteStepTrace(const std::string &saver_base_dir) {
295   std::string file_path = saver_base_dir + "/step_trace_profiling_" + device_id_ + ".txt";
296   std::ofstream ofs(file_path);
297   // check if the file is writable
298   if (!ofs.is_open()) {
299     MS_LOG(WARNING) << "Open file '" << file_path << "' failed!";
300     return;
301   }
302 
303   // write step trace time info into file
304   const uint32_t factor = 10;
305   std::vector<std::string> op_name_arr;
306   op_name_arr.push_back(step_trace_op_name_from_graph_.trace_fp_start);
307   op_name_arr.push_back(step_trace_op_name_from_graph_.trace_bp_end);
308   op_name_arr.push_back(step_trace_op_name_from_graph_.trace_iter_end);
309   if (!step_trace_op_name_from_graph_.trace_custom_node.empty()) {
310     auto start = step_trace_op_name_from_graph_.trace_custom_node.begin();
311     auto end = step_trace_op_name_from_graph_.trace_custom_node.end();
312     std::copy(start, end, std::back_inserter(op_name_arr));
313   }
314   for (auto op_name : op_name_arr) {
315     auto iter_op_timestamp = op_timestamps_map_.find(op_name);
316     if (iter_op_timestamp != op_timestamps_map_.end()) {
317       try {
318         ofs << op_name << " ";
319         for (auto start_end : iter_op_timestamp->second) {
320           // convert the time unit from 1ns to 10ns (keep the same with ascend)
321           uint64_t duration = start_end.duration * kTimeUnit;
322           uint64_t end_timestamp = (duration + start_end.start_timestamp) / factor;
323           uint64_t start_timestamp = start_end.start_timestamp / factor;
324           ofs << start_timestamp << "," << end_timestamp << " ";
325         }
326         ofs << std::endl;
327       } catch (const std::exception &e) {
328         MS_LOG(ERROR) << "Write " << file_path << "failed:" << e.what();
329         ofs.close();
330         return;
331       }
332     }
333   }
334 
335   ofs.close();
336   ChangeFileMode(file_path);
337   MS_LOG(INFO) << "Write step trace infos into file: " << file_path;
338 }
339 
WriteStartTime(const std::string & saver_base_dir,const BaseTime & start_time)340 void GpuDataSaver::WriteStartTime(const std::string &saver_base_dir, const BaseTime &start_time) {
341   std::string file_path = saver_base_dir + "/start_time_" + device_id_ + ".txt";
342   std::ofstream ofs(file_path);
343   // check if the file is writable
344   if (!ofs.is_open()) {
345     MS_LOG(WARNING) << "Open file '" << file_path << "' failed!";
346     return;
347   }
348 
349   // write start time info into file
350   try {
351     ofs << "host_monotonic_raw_time(ns): " << start_time.host_start_monotonic_raw_time << std::endl;
352     ofs << "gpu_start_time(ns): " << start_time.gpu_start_time << std::endl;
353   } catch (const std::exception &e) {
354     MS_LOG(ERROR) << "Write " << file_path << "failed:" << e.what();
355     ofs.close();
356     return;
357   }
358 
359   ofs.close();
360   ChangeFileMode(file_path);
361   MS_LOG(INFO) << "Write profiler start time infos into file: " << file_path;
362 }
363 }  // namespace gpu
364 }  // namespace profiler
365 }  // namespace mindspore
366