1 /**
2 * Copyright 2020-2021 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #include "profiler/device/gpu/gpu_data_saver.h"
17 #include <fstream>
18 #include <numeric>
19 #include "sys/stat.h"
20 #include "utils/log_adapter.h"
21 #include "utils/ms_utils.h"
22 #include "utils/ms_context.h"
23
24 namespace mindspore {
25 namespace profiler {
26 namespace gpu {
ActivityData(std::shared_ptr<Event> data)27 ActivityData::ActivityData(std::shared_ptr<Event> data) : basic_info_(data) {
28 grid_dim_ = basic_info_->activity_type == ActivityType::kKernel
29 ? "\"" + std::to_string(basic_info_->kernel_info.grid_x) + ',' +
30 std::to_string(basic_info_->kernel_info.grid_y) + ',' +
31 std::to_string(basic_info_->kernel_info.grid_z) + "\""
32 : "";
33 block_dim_ = basic_info_->activity_type == ActivityType::kKernel
34 ? "\"" + std::to_string(basic_info_->kernel_info.block_x) + ',' +
35 std::to_string(basic_info_->kernel_info.block_y) + ',' +
36 std::to_string(basic_info_->kernel_info.block_z) + "\""
37 : "";
38 count_ = 1;
39 total_duration_ = (basic_info_->end_time_stamp - basic_info_->start_time_stamp) / kTimeUnit;
40 avg_duration_ = total_duration_;
41 max_duration_ = total_duration_;
42 min_duration_ = total_duration_;
43 start_duration.emplace_back(StartDuration({basic_info_->start_time_stamp, total_duration_}));
44 }
45
operator +=(const ActivityData & other)46 ActivityData &ActivityData::operator+=(const ActivityData &other) {
47 this->count_ += other.count_;
48 this->total_duration_ += other.total_duration_;
49 // update max or min duration
50 if (other.total_duration_ > this->max_duration_) {
51 this->max_duration_ = other.total_duration_;
52 } else if (other.max_duration_ < this->min_duration_) {
53 this->min_duration_ = other.total_duration_;
54 }
55 return *this;
56 }
57
ParseEvent(const std::vector<Event> & events)58 void GpuDataSaver::ParseEvent(const std::vector<Event> &events) {
59 // Put Kernel activity events into activity_infos_
60 for (const auto &event : events) {
61 if (event.op_name.empty() || event.api_type != CUPTIApiType::kActivity ||
62 event.activity_type != ActivityType::kKernel) {
63 continue;
64 }
65 AddKernelEvent(event);
66 }
67 // update average time of kernel op cost
68 for (auto &device_infos : activity_infos_) {
69 // device_infos: <device_id, DeviceActivityInfos>
70 for (auto &activity_info : device_infos.second) {
71 if (activity_info.second.count_ == 0) {
72 MS_LOG(ERROR) << "The number of activity operations can not be 0.";
73 return;
74 }
75 // activity_info: <kernel_name, Activity>
76 activity_info.second.avg_duration_ = activity_info.second.total_duration_ / activity_info.second.count_;
77 }
78 MS_LOG(DEBUG) << "Get " << device_infos.second.size() << " activity items for device:" << device_infos.first;
79 }
80 }
81
AddKernelEvent(const Event & event)82 void GpuDataSaver::AddKernelEvent(const Event &event) {
83 // Put kernel event to activity_infos according to device id
84 uint32_t device_id = event.device_id;
85 auto iter = activity_infos_.find(device_id);
86 if (iter == activity_infos_.end()) {
87 auto res_flag = activity_infos_.emplace(device_id, DeviceActivityInfos());
88 AddKernelEventToDevice(event, &res_flag.first->second);
89 } else {
90 AddKernelEventToDevice(event, &iter->second);
91 }
92 }
93
AddKernelEventToDevice(const Event & event,DeviceActivityInfos * device_activity_infos)94 void GpuDataSaver::AddKernelEventToDevice(const Event &event, DeviceActivityInfos *device_activity_infos) {
95 if (device_activity_infos == nullptr) {
96 MS_LOG(ERROR) << "Device activity information is invalid.";
97 return;
98 }
99 // Combine kernel activity with same kernel name
100 auto event_ptr = std::make_shared<Event>(event);
101 if (event_ptr == nullptr) {
102 MS_LOG(WARNING) << "Create event failed when add event to device.";
103 return;
104 }
105 ActivityData activity_data = ActivityData(event_ptr);
106 std::string kernel_name = event.kernel_name;
107 auto iter = device_activity_infos->find(kernel_name);
108 if (iter == device_activity_infos->end()) {
109 device_activity_infos->emplace(kernel_name, activity_data);
110 } else {
111 iter->second += activity_data;
112 iter->second.start_duration.emplace_back(StartDuration({event.start_time_stamp, activity_data.total_duration_}));
113 }
114 }
115
CpuProfilingTimeSynchronizedToGpu(const BaseTime & start_time)116 void GpuDataSaver::CpuProfilingTimeSynchronizedToGpu(const BaseTime &start_time) {
117 auto cpu_data_saver_inst = profiler::cpu::CpuDataSaver::GetInstance();
118 MS_EXCEPTION_IF_NULL(cpu_data_saver_inst);
119 auto &cpu_op_timestamps_map = cpu_data_saver_inst->GetOpTimeStampInfo();
120 auto cpu_op_iter = cpu_op_timestamps_map.begin();
121 while (cpu_op_iter != cpu_op_timestamps_map.end()) {
122 for (auto &time_iter : cpu_op_iter->second) {
123 time_iter.start_timestamp =
124 time_iter.start_timestamp - start_time.host_start_monotonic_raw_time + start_time.gpu_start_time;
125 // time unit from ms to us.
126 time_iter.duration *= kTimeUnit;
127 }
128 cpu_op_iter++;
129 }
130 }
131
WriteFile(std::string out_path_dir,const BaseTime & start_time)132 void GpuDataSaver::WriteFile(std::string out_path_dir, const BaseTime &start_time) {
133 if (out_path_dir.empty()) {
134 MS_LOG(WARNING) << "Output directory. Ignore the writing data.";
135 return;
136 }
137 if (op_detail_infos_.empty() || op_type_infos_.empty() || activity_infos_.empty()) {
138 MS_LOG(WARNING) << "No operation detail infos to write.";
139 return;
140 }
141 // not support multi-device for operator info per process yet
142 device_id_ = std::to_string(activity_infos_.begin()->first);
143 op_side_ = "gpu";
144 WriteOpDetail(out_path_dir);
145 WriteOpType(out_path_dir);
146 WriteActivity(out_path_dir);
147 WriteOpTimestamp(out_path_dir);
148 WriteStartTime(out_path_dir, start_time);
149 CpuProfilingTimeSynchronizedToGpu(start_time);
150 if (MsContext::GetInstance()->get_param<bool>(MS_CTX_ENABLE_MINDRT)) {
151 WriteStepTraceAsyncLaunchKernel(out_path_dir);
152 } else {
153 WriteStepTrace(out_path_dir);
154 }
155 }
156
WriteActivity(const std::string & saver_base_dir)157 void GpuDataSaver::WriteActivity(const std::string &saver_base_dir) {
158 std::string file_path_base = saver_base_dir + "/gpu_activity_data_";
159 std::string timestamp_file_path_base = saver_base_dir + "/activity_execute_timestamp_";
160 for (auto device_info : activity_infos_) {
161 // write activity result csv
162 std::string file_path = file_path_base + std::to_string(device_info.first) + ".csv";
163 std::ofstream ofs(file_path);
164 if (!ofs.is_open()) {
165 MS_LOG(WARNING) << "Open file '" << file_path << "' failed!";
166 return;
167 }
168 // write activity timestamp txt
169 std::string timestamp_file_path = timestamp_file_path_base + std::to_string(device_info.first) + ".txt";
170 std::ofstream activity_timestamp_ofs(timestamp_file_path);
171 if (!activity_timestamp_ofs.is_open()) {
172 ofs.close();
173 MS_LOG(WARNING) << "Open file '" << timestamp_file_path << "' failed!";
174 return;
175 }
176 // write activity data into file
177 ofs << ActivityData().GetHeader() << std::endl;
178 for (auto activity_data : device_info.second) {
179 ofs << activity_data.second << std::endl;
180 for (auto start_duration : activity_data.second.start_duration) {
181 activity_timestamp_ofs << activity_data.second.basic_info_->kernel_name << ";";
182 activity_timestamp_ofs << activity_data.second.basic_info_->stream_id << ";";
183 activity_timestamp_ofs << start_duration.start_timestamp << ";";
184 activity_timestamp_ofs << start_duration.duration << std::endl;
185 }
186 }
187 ofs.close();
188 ChangeFileMode(file_path);
189 activity_timestamp_ofs.close();
190 ChangeFileMode(timestamp_file_path);
191 MS_LOG(INFO) << "Write " << device_info.second.size() << " activity infos into file: " << file_path;
192 }
193 }
194
WriteStepTraceAsyncLaunchKernel(const std::string & saver_base_dir)195 void GpuDataSaver::WriteStepTraceAsyncLaunchKernel(const std::string &saver_base_dir) {
196 std::string file_path = saver_base_dir + "/step_trace_profiling_" + device_id_ + ".txt";
197 std::ofstream ofs(file_path);
198 // check if the file is writable
199 if (!ofs.is_open()) {
200 MS_LOG(WARNING) << "Open file '" << file_path << "' failed!";
201 return;
202 }
203
204 // cpu profiler information.
205 auto cpu_data_saver_inst = profiler::cpu::CpuDataSaver::GetInstance();
206 MS_EXCEPTION_IF_NULL(cpu_data_saver_inst);
207 auto &cpu_op_timestamps_map = cpu_data_saver_inst->GetOpTimeStampInfo();
208
209 // write step trace time info into file
210 uint32_t step = 0;
211 uint64_t duration;
212 for (auto step_start_end : all_step_start_end_info_) {
213 auto iter_start_op_name = step_start_end.iter_start_op_name;
214 auto fp_op_name = step_start_end.fp_start_op_name;
215 auto iter_end_op_name = step_start_end.iter_end_op_name;
216 auto iter_start_op_timestamp = op_timestamps_map_.find(iter_start_op_name);
217 auto fp_op_timestamp = op_timestamps_map_.find(fp_op_name);
218 auto bp_end_op_timestamp = op_timestamps_map_.find(step_trace_op_name_.trace_bp_end);
219 auto iter_end_op_timestamp = op_timestamps_map_.find(iter_end_op_name);
220
221 // if iter_start/fp_start/iter_end op is executed on cpu, update it.
222 if (iter_start_op_timestamp == op_timestamps_map_.end()) {
223 iter_start_op_timestamp = cpu_op_timestamps_map.find(iter_start_op_name);
224 }
225 if (fp_op_timestamp == op_timestamps_map_.end()) {
226 fp_op_timestamp = cpu_op_timestamps_map.find(fp_op_name);
227 }
228 if (iter_end_op_timestamp == op_timestamps_map_.end()) {
229 iter_end_op_timestamp = cpu_op_timestamps_map.find(iter_end_op_name);
230 }
231
232 if (iter_end_op_name == "Default/InitDataSetQueue-op0") {
233 continue;
234 }
235
236 if (iter_start_op_timestamp == op_timestamps_map_.end() || fp_op_timestamp == op_timestamps_map_.end() ||
237 iter_end_op_timestamp == op_timestamps_map_.end() || bp_end_op_timestamp == op_timestamps_map_.end()) {
238 MS_LOG(ERROR) << "[profiling step trace] failed, do not find \"" << fp_op_name << "\" or \"" << iter_end_op_name
239 << "\" or \"" << step_trace_op_name_.trace_bp_end << "\"";
240 ofs.close();
241 return;
242 }
243 if (iter_start_op_timestamp->second.size() <= step || fp_op_timestamp->second.size() <= step ||
244 iter_end_op_timestamp->second.size() <= step || bp_end_op_timestamp->second.size() <= step) {
245 MS_LOG(WARNING) << "[profiling step trace] insufficient number of timestamps for fp/bp/iter_end operators.";
246 ofs.close();
247 return;
248 }
249
250 try {
251 // write fp,bp and iter_end timestamp.
252 duration = iter_end_op_timestamp->second[step].duration * kTimeUnit;
253 uint64_t iter_end_timestamp = iter_end_op_timestamp->second[step].start_timestamp + duration;
254 ofs << iter_start_op_name << "," << iter_start_op_timestamp->second[step].start_timestamp << " " << fp_op_name
255 << "," << fp_op_timestamp->second[step].start_timestamp << " " << step_trace_op_name_.trace_bp_end << ","
256 << bp_end_op_timestamp->second[step].start_timestamp << " " << iter_end_op_name << "," << iter_end_timestamp;
257
258 // write communication op info
259 for (auto op_name : step_trace_op_name_.trace_custom_node) {
260 // convert the time unit from 1ns to 10ns (keep the same with ascend)
261 auto iter_op_timestamp = op_timestamps_map_.find(op_name);
262 if (iter_op_timestamp == op_timestamps_map_.end()) {
263 MS_LOG(ERROR) << "[profiling step trace] failed, do not find \"" << fp_op_name << "\" or " << iter_end_op_name
264 << "\" or \"" << step_trace_op_name_.trace_bp_end << "\"";
265 ofs.close();
266 return;
267 }
268
269 if (iter_op_timestamp->second.size() <= step) {
270 MS_LOG(WARNING) << "[profiling step trace] insufficient number of timestamps for communication operators.";
271 ofs.close();
272 return;
273 }
274
275 duration = iter_op_timestamp->second[step].duration * kTimeUnit;
276 uint64_t end_timestamp = (duration + iter_op_timestamp->second[step].start_timestamp);
277 uint64_t start_timestamp = iter_op_timestamp->second[step].start_timestamp;
278 ofs << " " << op_name << "," << start_timestamp << "," << end_timestamp;
279 }
280 ofs << std::endl;
281 } catch (const std::exception &e) {
282 MS_LOG(ERROR) << "Write " << file_path << "failed:" << e.what();
283 ofs.close();
284 return;
285 }
286 step++;
287 }
288
289 ofs.close();
290 ChangeFileMode(file_path);
291 MS_LOG(INFO) << "Write step trace infos into file: " << file_path;
292 }
293
WriteStepTrace(const std::string & saver_base_dir)294 void GpuDataSaver::WriteStepTrace(const std::string &saver_base_dir) {
295 std::string file_path = saver_base_dir + "/step_trace_profiling_" + device_id_ + ".txt";
296 std::ofstream ofs(file_path);
297 // check if the file is writable
298 if (!ofs.is_open()) {
299 MS_LOG(WARNING) << "Open file '" << file_path << "' failed!";
300 return;
301 }
302
303 // write step trace time info into file
304 const uint32_t factor = 10;
305 std::vector<std::string> op_name_arr;
306 op_name_arr.push_back(step_trace_op_name_from_graph_.trace_fp_start);
307 op_name_arr.push_back(step_trace_op_name_from_graph_.trace_bp_end);
308 op_name_arr.push_back(step_trace_op_name_from_graph_.trace_iter_end);
309 if (!step_trace_op_name_from_graph_.trace_custom_node.empty()) {
310 auto start = step_trace_op_name_from_graph_.trace_custom_node.begin();
311 auto end = step_trace_op_name_from_graph_.trace_custom_node.end();
312 std::copy(start, end, std::back_inserter(op_name_arr));
313 }
314 for (auto op_name : op_name_arr) {
315 auto iter_op_timestamp = op_timestamps_map_.find(op_name);
316 if (iter_op_timestamp != op_timestamps_map_.end()) {
317 try {
318 ofs << op_name << " ";
319 for (auto start_end : iter_op_timestamp->second) {
320 // convert the time unit from 1ns to 10ns (keep the same with ascend)
321 uint64_t duration = start_end.duration * kTimeUnit;
322 uint64_t end_timestamp = (duration + start_end.start_timestamp) / factor;
323 uint64_t start_timestamp = start_end.start_timestamp / factor;
324 ofs << start_timestamp << "," << end_timestamp << " ";
325 }
326 ofs << std::endl;
327 } catch (const std::exception &e) {
328 MS_LOG(ERROR) << "Write " << file_path << "failed:" << e.what();
329 ofs.close();
330 return;
331 }
332 }
333 }
334
335 ofs.close();
336 ChangeFileMode(file_path);
337 MS_LOG(INFO) << "Write step trace infos into file: " << file_path;
338 }
339
WriteStartTime(const std::string & saver_base_dir,const BaseTime & start_time)340 void GpuDataSaver::WriteStartTime(const std::string &saver_base_dir, const BaseTime &start_time) {
341 std::string file_path = saver_base_dir + "/start_time_" + device_id_ + ".txt";
342 std::ofstream ofs(file_path);
343 // check if the file is writable
344 if (!ofs.is_open()) {
345 MS_LOG(WARNING) << "Open file '" << file_path << "' failed!";
346 return;
347 }
348
349 // write start time info into file
350 try {
351 ofs << "host_monotonic_raw_time(ns): " << start_time.host_start_monotonic_raw_time << std::endl;
352 ofs << "gpu_start_time(ns): " << start_time.gpu_start_time << std::endl;
353 } catch (const std::exception &e) {
354 MS_LOG(ERROR) << "Write " << file_path << "failed:" << e.what();
355 ofs.close();
356 return;
357 }
358
359 ofs.close();
360 ChangeFileMode(file_path);
361 MS_LOG(INFO) << "Write profiler start time infos into file: " << file_path;
362 }
363 } // namespace gpu
364 } // namespace profiler
365 } // namespace mindspore
366