• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2021 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "include/backend/debug/profiler/data_saver.h"
17 #include <fstream>
18 #include <numeric>
19 #include "sys/stat.h"
20 #include "utils/ms_utils.h"
21 #include "include/common/debug/common.h"
22 
23 namespace mindspore {
24 namespace profiler {
OpDetailInfo(const std::shared_ptr<OpInfo> op_info,float proportion)25 OpDetailInfo::OpDetailInfo(const std::shared_ptr<OpInfo> op_info, float proportion)
26     : op_info_(op_info), proportion_(proportion) {
27   // op_full_name is like 'xxx/xxx/{op_type}-op{node_id}'
28   op_full_name_ = op_info->op_name;
29   auto op_type_begin_iter = op_full_name_.rfind('/') + 1;
30   auto op_type_end_iter = op_full_name_.rfind('-');
31   op_type_ = op_full_name_.substr(op_type_begin_iter, op_type_end_iter - op_type_begin_iter);
32   op_name_ = op_full_name_.substr(op_type_begin_iter);
33   if (op_info->op_count == 0) {
34     MS_LOG(ERROR) << "The num of operations can not be 0.";
35     return;
36   }
37   op_avg_time_ = op_info->op_host_cost_time / op_info->op_count;
38 }
39 
ParseOpInfo(const OpInfoMap & op_info_maps)40 void DataSaver::ParseOpInfo(const OpInfoMap &op_info_maps) {
41   op_detail_infos_.reserve(op_info_maps.size());
42   float total_time_sum = GetTotalOpTime(op_info_maps);
43   for (auto item : op_info_maps) {
44     op_timestamps_map_[item.first] = item.second.start_duration;
45     if (common::IsFloatEqual(total_time_sum, 0.0)) {
46       MS_LOG(ERROR) << "The total operation times can not be 0.";
47       return;
48     }
49     float proportion = item.second.op_host_cost_time / total_time_sum;
50     auto op_info = std::make_shared<OpInfo>(item.second);
51     if (op_info == nullptr) {
52       MS_LOG(ERROR) << "Create Operation information node failed when parse operation information.";
53       return;
54     }
55     OpDetailInfo op_detail_info = OpDetailInfo(op_info, proportion);
56     op_detail_infos_.emplace_back(op_detail_info);
57     AddOpDetailInfoForType(op_detail_info);
58   }
59   // update average time of op type
60   for (auto &op_type : op_type_infos_) {
61     // device_infos: <type_name, op_type_info>
62     if (op_type.second.count_ == 0) {
63       MS_LOG(ERROR) << "The num of operation type can not be 0.";
64       return;
65     }
66     op_type.second.avg_time_ = op_type.second.total_time_ / op_type.second.count_;
67   }
68   MS_LOG(DEBUG) << "Get " << op_detail_infos_.size() << " operation items.";
69   MS_LOG(DEBUG) << "Get " << op_type_infos_.size() << " operation type items.";
70 }
71 
AddOpDetailInfoForType(const OpDetailInfo & op_detail_info)72 void DataSaver::AddOpDetailInfoForType(const OpDetailInfo &op_detail_info) {
73   // Construct OpType object according to op detail info
74   OpType op_type = OpType{op_detail_info.op_type_,
75                           op_detail_info.op_info_->op_count,
76                           op_detail_info.op_info_->op_count,
77                           op_detail_info.op_info_->op_host_cost_time,
78                           0,
79                           op_detail_info.proportion_};
80   // Set the OpType into op_type_infos_ map
81   std::string type_name = op_detail_info.op_type_;
82   auto iter = op_type_infos_.find(type_name);
83   if (iter == op_type_infos_.end()) {
84     op_type_infos_.emplace(type_name, op_type);
85   } else {
86     iter->second += op_type;
87   }
88 }
89 
GetTotalOpTime(const OpInfoMap & op_info_maps) const90 float DataSaver::GetTotalOpTime(const OpInfoMap &op_info_maps) const {
91   float sum = 0;
92   sum = std::accumulate(op_info_maps.begin(), op_info_maps.end(), sum,
93                         [](float i, auto iter) { return i + iter.second.op_host_cost_time; });
94   MS_LOG(DEBUG) << "The total op time is " << sum;
95   return sum;
96 }
97 
WriteOpType(const std::string & saver_base_dir)98 void DataSaver::WriteOpType(const std::string &saver_base_dir) {
99   std::string file_path = saver_base_dir + "/" + op_side_ + "_op_type_info_" + device_id_ + ".csv";
100   std::ofstream ofs(file_path);
101   // check if the file is writable
102   if (!ofs.is_open()) {
103     MS_LOG(WARNING) << "Open file '" << file_path << "' failed!";
104     return;
105   }
106   try {
107     // write op type info into file
108     if (op_side_ == "cpu") {
109       ofs << OpType().GetCpuHeader() << std::endl;
110       for (auto op_type_info : op_type_infos_) {
111         op_type_info.second.OutputCpuOpTypeInfo(ofs);
112       }
113     }
114     if (op_side_ == "gpu") {
115       ofs << OpType().GetGpuHeader() << std::endl;
116       for (auto op_type_info : op_type_infos_) {
117         op_type_info.second.OutputGpuOpTypeInfo(ofs);
118       }
119     }
120   } catch (const std::exception &e) {
121     MS_LOG(ERROR) << "Write " << file_path << "failed: " << e.what();
122   }
123   ofs.close();
124   ChangeFileMode(file_path);
125   MS_LOG(INFO) << "Write " << op_type_infos_.size() << " op type infos into file: " << file_path;
126   op_type_infos_.clear();
127 }
128 
WriteOpDetail(const std::string & saver_base_dir)129 void DataSaver::WriteOpDetail(const std::string &saver_base_dir) {
130   std::string file_path = saver_base_dir + "/" + op_side_ + "_op_detail_info_" + device_id_ + ".csv";
131   std::ofstream ofs(file_path);
132   if (!ofs.is_open()) {
133     MS_LOG(WARNING) << "Open file '" << file_path << "' failed!";
134     return;
135   }
136   try {
137     // write op detail info into file
138     if (op_side_ == "cpu") {
139       ofs << OpDetailInfo().GetCpuHeader() << std::endl;
140       for (auto op_detail : op_detail_infos_) {
141         op_detail.OutputCpuOpDetailInfo(ofs);
142       }
143     }
144     if (op_side_ == "gpu") {
145       ofs << OpDetailInfo().GetGpuHeader() << std::endl;
146       for (auto op_detail : op_detail_infos_) {
147         op_detail.OutputGpuOpDetailInfo(ofs);
148       }
149     }
150   } catch (const std::exception &e) {
151     MS_LOG(ERROR) << "Write " << file_path << "failed: " << e.what();
152   }
153   ofs.close();
154   ChangeFileMode(file_path);
155   MS_LOG(INFO) << "Write " << op_detail_infos_.size() << " op detail infos into file: " << file_path;
156   op_detail_infos_.clear();
157 }
158 
WriteOpTimestamp(const std::string & saver_base_dir)159 void DataSaver::WriteOpTimestamp(const std::string &saver_base_dir) {
160   std::string file_path = saver_base_dir + "/" + op_side_ + "_op_execute_timestamp_" + device_id_ + ".txt";
161   std::ofstream ofs(file_path);
162   // check if the file is writable
163   if (!ofs.is_open()) {
164     MS_LOG(WARNING) << "Open file '" << file_path << "' failed!";
165     return;
166   }
167   try {
168     // write op timestamp info into file
169     for (const auto &op_timestamp_info : op_timestamps_map_) {
170       if (op_side_ == "cpu") {
171         ofs << op_timestamp_info.first << ";HostCpuOps;";
172         for (auto start_end : op_timestamp_info.second) {
173           ofs << start_end.start_timestamp << "," << start_end.duration << "," << start_end.tid << " ";
174         }
175       } else {
176         ofs << op_timestamp_info.first << ";GpuOps;";
177         for (auto start_end : op_timestamp_info.second) {
178           ofs << start_end.start_timestamp << "," << start_end.duration << " ";
179         }
180       }
181       ofs << std::endl;
182     }
183   } catch (const std::exception &e) {
184     MS_LOG(ERROR) << "Write " << file_path << "failed: " << e.what();
185   }
186   ofs.close();
187   ChangeFileMode(file_path);
188   if (op_side_ == "cpu") {
189     op_timestamps_map_.clear();
190   }
191 }
192 
WriteFrameWork(const std::string & base_dir,const std::vector<CurKernelInfo> & all_kernel_info)193 void DataSaver::WriteFrameWork(const std::string &base_dir, const std::vector<CurKernelInfo> &all_kernel_info) {
194   std::string file_path = base_dir + "/" + op_side_ + "_framework_" + device_id_ + ".txt";
195   std::ofstream ofs(file_path);
196   if (!ofs.is_open()) {
197     MS_LOG(WARNING) << "Open file '" << file_path << "' failed!";
198     return;
199   }
200   for (auto kernel_info : all_kernel_info) {
201     auto op_name = kernel_info.op_name;
202     auto op_type = kernel_info.op_type;
203     auto graph_id = kernel_info.graph_id;
204     auto cur_kernel_all_inputs_info = kernel_info.cur_kernel_all_inputs_info;
205     try {
206       ofs << op_type << ";" << op_name << ";" << graph_id << ";";
207       for (auto cur_kernel_input_info : cur_kernel_all_inputs_info) {
208         ofs << "input_" << cur_kernel_input_info.input_id << ":" << cur_kernel_input_info.shape << ";";
209       }
210     } catch (const std::exception &e) {
211       MS_LOG(ERROR) << "Write " << file_path << "failed:" << e.what();
212       ofs.close();
213       return;
214     }
215     ofs << std::endl;
216   }
217   ofs.close();
218   ChangeFileMode(file_path);
219   MS_LOG(INFO) << "Write framework infos into file: " << file_path;
220 }
221 
ParseMemoryInfo(const MemoryInfoList & memory_info_list)222 void DataSaver::ParseMemoryInfo(const MemoryInfoList &memory_info_list) {
223   for (auto memory_info : memory_info_list) {
224     memory_info_ += std::to_string(memory_info.time_stamp) + "," + std::to_string(memory_info.total_allocated) + "," +
225                     std::to_string(memory_info.total_reserved) + "," + std::to_string(memory_info.total_active) + "\n";
226   }
227 }
228 
WriteMemoryData(const std::string & saver_base_dir)229 void DataSaver::WriteMemoryData(const std::string &saver_base_dir) {
230   std::string file_path = saver_base_dir + "/" + op_side_ + "_ms_memory_record_" + device_id_ + ".txt";
231   auto realpath = Common::CreatePrefixPath(file_path);
232   if (!realpath.has_value()) {
233     MS_LOG(ERROR) << "Get realpath failed, path=" << file_path;
234     return;
235   }
236   std::ofstream ofs(realpath.value());
237   // check if the file is writable
238   if (!ofs.is_open()) {
239     MS_LOG(WARNING) << "Open file '" << file_path << "' failed!";
240     return;
241   }
242   try {
243     // write memory info into file
244     ofs << "Timestamp(ns),Total Allocated(Byte),Total Reserved(Byte),Total Active(Byte)\n";
245     ofs << memory_info_;
246   } catch (const std::exception &e) {
247     MS_LOG(ERROR) << "Write " << file_path << "failed: " << e.what();
248   }
249   ofs.close();
250   ChangeFileMode(file_path);
251   memory_info_.clear();
252 }
253 
ChangeFileMode(const std::string & file_path) const254 void DataSaver::ChangeFileMode(const std::string &file_path) const {
255   if (chmod(common::SafeCStr(file_path), S_IRUSR | S_IWUSR) == -1) {
256     MS_LOG(WARNING) << "Modify file: " << file_path << " to rw fail.";
257     return;
258   }
259 }
260 }  // namespace profiler
261 }  // namespace mindspore
262