1 /**
2 * Copyright 2021 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #include "include/backend/debug/profiler/data_saver.h"
17 #include <fstream>
18 #include <numeric>
19 #include "sys/stat.h"
20 #include "utils/ms_utils.h"
21 #include "include/common/debug/common.h"
22
23 namespace mindspore {
24 namespace profiler {
OpDetailInfo(const std::shared_ptr<OpInfo> op_info,float proportion)25 OpDetailInfo::OpDetailInfo(const std::shared_ptr<OpInfo> op_info, float proportion)
26 : op_info_(op_info), proportion_(proportion) {
27 // op_full_name is like 'xxx/xxx/{op_type}-op{node_id}'
28 op_full_name_ = op_info->op_name;
29 auto op_type_begin_iter = op_full_name_.rfind('/') + 1;
30 auto op_type_end_iter = op_full_name_.rfind('-');
31 op_type_ = op_full_name_.substr(op_type_begin_iter, op_type_end_iter - op_type_begin_iter);
32 op_name_ = op_full_name_.substr(op_type_begin_iter);
33 if (op_info->op_count == 0) {
34 MS_LOG(ERROR) << "The num of operations can not be 0.";
35 return;
36 }
37 op_avg_time_ = op_info->op_host_cost_time / op_info->op_count;
38 }
39
ParseOpInfo(const OpInfoMap & op_info_maps)40 void DataSaver::ParseOpInfo(const OpInfoMap &op_info_maps) {
41 op_detail_infos_.reserve(op_info_maps.size());
42 float total_time_sum = GetTotalOpTime(op_info_maps);
43 for (auto item : op_info_maps) {
44 op_timestamps_map_[item.first] = item.second.start_duration;
45 if (common::IsFloatEqual(total_time_sum, 0.0)) {
46 MS_LOG(ERROR) << "The total operation times can not be 0.";
47 return;
48 }
49 float proportion = item.second.op_host_cost_time / total_time_sum;
50 auto op_info = std::make_shared<OpInfo>(item.second);
51 if (op_info == nullptr) {
52 MS_LOG(ERROR) << "Create Operation information node failed when parse operation information.";
53 return;
54 }
55 OpDetailInfo op_detail_info = OpDetailInfo(op_info, proportion);
56 op_detail_infos_.emplace_back(op_detail_info);
57 AddOpDetailInfoForType(op_detail_info);
58 }
59 // update average time of op type
60 for (auto &op_type : op_type_infos_) {
61 // device_infos: <type_name, op_type_info>
62 if (op_type.second.count_ == 0) {
63 MS_LOG(ERROR) << "The num of operation type can not be 0.";
64 return;
65 }
66 op_type.second.avg_time_ = op_type.second.total_time_ / op_type.second.count_;
67 }
68 MS_LOG(DEBUG) << "Get " << op_detail_infos_.size() << " operation items.";
69 MS_LOG(DEBUG) << "Get " << op_type_infos_.size() << " operation type items.";
70 }
71
AddOpDetailInfoForType(const OpDetailInfo & op_detail_info)72 void DataSaver::AddOpDetailInfoForType(const OpDetailInfo &op_detail_info) {
73 // Construct OpType object according to op detail info
74 OpType op_type = OpType{op_detail_info.op_type_,
75 op_detail_info.op_info_->op_count,
76 op_detail_info.op_info_->op_count,
77 op_detail_info.op_info_->op_host_cost_time,
78 0,
79 op_detail_info.proportion_};
80 // Set the OpType into op_type_infos_ map
81 std::string type_name = op_detail_info.op_type_;
82 auto iter = op_type_infos_.find(type_name);
83 if (iter == op_type_infos_.end()) {
84 op_type_infos_.emplace(type_name, op_type);
85 } else {
86 iter->second += op_type;
87 }
88 }
89
GetTotalOpTime(const OpInfoMap & op_info_maps) const90 float DataSaver::GetTotalOpTime(const OpInfoMap &op_info_maps) const {
91 float sum = 0;
92 sum = std::accumulate(op_info_maps.begin(), op_info_maps.end(), sum,
93 [](float i, auto iter) { return i + iter.second.op_host_cost_time; });
94 MS_LOG(DEBUG) << "The total op time is " << sum;
95 return sum;
96 }
97
WriteOpType(const std::string & saver_base_dir)98 void DataSaver::WriteOpType(const std::string &saver_base_dir) {
99 std::string file_path = saver_base_dir + "/" + op_side_ + "_op_type_info_" + device_id_ + ".csv";
100 std::ofstream ofs(file_path);
101 // check if the file is writable
102 if (!ofs.is_open()) {
103 MS_LOG(WARNING) << "Open file '" << file_path << "' failed!";
104 return;
105 }
106 try {
107 // write op type info into file
108 if (op_side_ == "cpu") {
109 ofs << OpType().GetCpuHeader() << std::endl;
110 for (auto op_type_info : op_type_infos_) {
111 op_type_info.second.OutputCpuOpTypeInfo(ofs);
112 }
113 }
114 if (op_side_ == "gpu") {
115 ofs << OpType().GetGpuHeader() << std::endl;
116 for (auto op_type_info : op_type_infos_) {
117 op_type_info.second.OutputGpuOpTypeInfo(ofs);
118 }
119 }
120 } catch (const std::exception &e) {
121 MS_LOG(ERROR) << "Write " << file_path << "failed: " << e.what();
122 }
123 ofs.close();
124 ChangeFileMode(file_path);
125 MS_LOG(INFO) << "Write " << op_type_infos_.size() << " op type infos into file: " << file_path;
126 op_type_infos_.clear();
127 }
128
WriteOpDetail(const std::string & saver_base_dir)129 void DataSaver::WriteOpDetail(const std::string &saver_base_dir) {
130 std::string file_path = saver_base_dir + "/" + op_side_ + "_op_detail_info_" + device_id_ + ".csv";
131 std::ofstream ofs(file_path);
132 if (!ofs.is_open()) {
133 MS_LOG(WARNING) << "Open file '" << file_path << "' failed!";
134 return;
135 }
136 try {
137 // write op detail info into file
138 if (op_side_ == "cpu") {
139 ofs << OpDetailInfo().GetCpuHeader() << std::endl;
140 for (auto op_detail : op_detail_infos_) {
141 op_detail.OutputCpuOpDetailInfo(ofs);
142 }
143 }
144 if (op_side_ == "gpu") {
145 ofs << OpDetailInfo().GetGpuHeader() << std::endl;
146 for (auto op_detail : op_detail_infos_) {
147 op_detail.OutputGpuOpDetailInfo(ofs);
148 }
149 }
150 } catch (const std::exception &e) {
151 MS_LOG(ERROR) << "Write " << file_path << "failed: " << e.what();
152 }
153 ofs.close();
154 ChangeFileMode(file_path);
155 MS_LOG(INFO) << "Write " << op_detail_infos_.size() << " op detail infos into file: " << file_path;
156 op_detail_infos_.clear();
157 }
158
WriteOpTimestamp(const std::string & saver_base_dir)159 void DataSaver::WriteOpTimestamp(const std::string &saver_base_dir) {
160 std::string file_path = saver_base_dir + "/" + op_side_ + "_op_execute_timestamp_" + device_id_ + ".txt";
161 std::ofstream ofs(file_path);
162 // check if the file is writable
163 if (!ofs.is_open()) {
164 MS_LOG(WARNING) << "Open file '" << file_path << "' failed!";
165 return;
166 }
167 try {
168 // write op timestamp info into file
169 for (const auto &op_timestamp_info : op_timestamps_map_) {
170 if (op_side_ == "cpu") {
171 ofs << op_timestamp_info.first << ";HostCpuOps;";
172 for (auto start_end : op_timestamp_info.second) {
173 ofs << start_end.start_timestamp << "," << start_end.duration << "," << start_end.tid << " ";
174 }
175 } else {
176 ofs << op_timestamp_info.first << ";GpuOps;";
177 for (auto start_end : op_timestamp_info.second) {
178 ofs << start_end.start_timestamp << "," << start_end.duration << " ";
179 }
180 }
181 ofs << std::endl;
182 }
183 } catch (const std::exception &e) {
184 MS_LOG(ERROR) << "Write " << file_path << "failed: " << e.what();
185 }
186 ofs.close();
187 ChangeFileMode(file_path);
188 if (op_side_ == "cpu") {
189 op_timestamps_map_.clear();
190 }
191 }
192
WriteFrameWork(const std::string & base_dir,const std::vector<CurKernelInfo> & all_kernel_info)193 void DataSaver::WriteFrameWork(const std::string &base_dir, const std::vector<CurKernelInfo> &all_kernel_info) {
194 std::string file_path = base_dir + "/" + op_side_ + "_framework_" + device_id_ + ".txt";
195 std::ofstream ofs(file_path);
196 if (!ofs.is_open()) {
197 MS_LOG(WARNING) << "Open file '" << file_path << "' failed!";
198 return;
199 }
200 for (auto kernel_info : all_kernel_info) {
201 auto op_name = kernel_info.op_name;
202 auto op_type = kernel_info.op_type;
203 auto graph_id = kernel_info.graph_id;
204 auto cur_kernel_all_inputs_info = kernel_info.cur_kernel_all_inputs_info;
205 try {
206 ofs << op_type << ";" << op_name << ";" << graph_id << ";";
207 for (auto cur_kernel_input_info : cur_kernel_all_inputs_info) {
208 ofs << "input_" << cur_kernel_input_info.input_id << ":" << cur_kernel_input_info.shape << ";";
209 }
210 } catch (const std::exception &e) {
211 MS_LOG(ERROR) << "Write " << file_path << "failed:" << e.what();
212 ofs.close();
213 return;
214 }
215 ofs << std::endl;
216 }
217 ofs.close();
218 ChangeFileMode(file_path);
219 MS_LOG(INFO) << "Write framework infos into file: " << file_path;
220 }
221
ParseMemoryInfo(const MemoryInfoList & memory_info_list)222 void DataSaver::ParseMemoryInfo(const MemoryInfoList &memory_info_list) {
223 for (auto memory_info : memory_info_list) {
224 memory_info_ += std::to_string(memory_info.time_stamp) + "," + std::to_string(memory_info.total_allocated) + "," +
225 std::to_string(memory_info.total_reserved) + "," + std::to_string(memory_info.total_active) + "\n";
226 }
227 }
228
WriteMemoryData(const std::string & saver_base_dir)229 void DataSaver::WriteMemoryData(const std::string &saver_base_dir) {
230 std::string file_path = saver_base_dir + "/" + op_side_ + "_ms_memory_record_" + device_id_ + ".txt";
231 auto realpath = Common::CreatePrefixPath(file_path);
232 if (!realpath.has_value()) {
233 MS_LOG(ERROR) << "Get realpath failed, path=" << file_path;
234 return;
235 }
236 std::ofstream ofs(realpath.value());
237 // check if the file is writable
238 if (!ofs.is_open()) {
239 MS_LOG(WARNING) << "Open file '" << file_path << "' failed!";
240 return;
241 }
242 try {
243 // write memory info into file
244 ofs << "Timestamp(ns),Total Allocated(Byte),Total Reserved(Byte),Total Active(Byte)\n";
245 ofs << memory_info_;
246 } catch (const std::exception &e) {
247 MS_LOG(ERROR) << "Write " << file_path << "failed: " << e.what();
248 }
249 ofs.close();
250 ChangeFileMode(file_path);
251 memory_info_.clear();
252 }
253
ChangeFileMode(const std::string & file_path) const254 void DataSaver::ChangeFileMode(const std::string &file_path) const {
255 if (chmod(common::SafeCStr(file_path), S_IRUSR | S_IWUSR) == -1) {
256 MS_LOG(WARNING) << "Modify file: " << file_path << " to rw fail.";
257 return;
258 }
259 }
260 } // namespace profiler
261 } // namespace mindspore
262