1 /**
2 * Copyright 2021 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #include "profiler/device/data_saver.h"
17 #include <fstream>
18 #include <numeric>
19 #include "sys/stat.h"
20 #include "utils/ms_utils.h"
21 #include "utils/ms_context.h"
22
23 namespace mindspore {
24 namespace profiler {
OpDetailInfo(const std::shared_ptr<OpInfo> op_info,float proportion)25 OpDetailInfo::OpDetailInfo(const std::shared_ptr<OpInfo> op_info, float proportion)
26 : op_info_(op_info), proportion_(proportion) {
27 // op_full_name is like 'xxx/xxx/{op_type}-op{node_id}'
28 op_full_name_ = op_info->op_name;
29 auto op_type_begin_iter = op_full_name_.rfind('/') + 1;
30 auto op_type_end_iter = op_full_name_.rfind('-');
31 op_type_ = op_full_name_.substr(op_type_begin_iter, op_type_end_iter - op_type_begin_iter);
32 op_name_ = op_full_name_.substr(op_type_begin_iter);
33 if (op_info->op_count == 0) {
34 MS_LOG(ERROR) << "The num of operations can not be 0.";
35 return;
36 }
37 op_avg_time_ = op_info->op_host_cost_time / op_info->op_count;
38 }
39
ParseOpInfo(const OpInfoMap & op_info_maps)40 void DataSaver::ParseOpInfo(const OpInfoMap &op_info_maps) {
41 op_detail_infos_.reserve(op_info_maps.size());
42 float total_time_sum = GetTotalOpTime(op_info_maps);
43 for (auto item : op_info_maps) {
44 op_timestamps_map_[item.first] = item.second.start_duration;
45 if (total_time_sum == 0.0) {
46 MS_LOG(ERROR) << "The total operation times can not be 0.";
47 return;
48 }
49 float proportion = item.second.op_host_cost_time / total_time_sum;
50 auto op_info = std::make_shared<OpInfo>(item.second);
51 if (op_info == nullptr) {
52 MS_LOG(ERROR) << "Create Operation information node failed when parse operation information.";
53 return;
54 }
55 OpDetailInfo op_detail_info = OpDetailInfo(op_info, proportion);
56 op_detail_infos_.emplace_back(op_detail_info);
57 AddOpDetailInfoForType(op_detail_info);
58 }
59 // update average time of op type
60 for (auto &op_type : op_type_infos_) {
61 // device_infos: <type_name, op_type_info>
62 if (op_type.second.count_ == 0) {
63 MS_LOG(ERROR) << "The num of operation type can not be 0.";
64 return;
65 }
66 op_type.second.avg_time_ = op_type.second.total_time_ / op_type.second.count_;
67 }
68 MS_LOG(DEBUG) << "Get " << op_detail_infos_.size() << " operation items.";
69 MS_LOG(DEBUG) << "Get " << op_type_infos_.size() << " operation type items.";
70 }
71
AddOpDetailInfoForType(const OpDetailInfo & op_detail_info)72 void DataSaver::AddOpDetailInfoForType(const OpDetailInfo &op_detail_info) {
73 // Construct OpType object according to op detail info
74 OpType op_type = OpType{op_detail_info.op_type_,
75 op_detail_info.op_info_->op_count,
76 op_detail_info.op_info_->op_count,
77 op_detail_info.op_info_->op_host_cost_time,
78 0,
79 op_detail_info.proportion_};
80 // Set the OpType into op_type_infos_ map
81 std::string type_name = op_detail_info.op_type_;
82 auto iter = op_type_infos_.find(type_name);
83 if (iter == op_type_infos_.end()) {
84 op_type_infos_.emplace(type_name, op_type);
85 } else {
86 iter->second += op_type;
87 }
88 }
89
GetTotalOpTime(const OpInfoMap & op_info_maps) const90 float DataSaver::GetTotalOpTime(const OpInfoMap &op_info_maps) const {
91 float sum = 0;
92 sum = std::accumulate(op_info_maps.begin(), op_info_maps.end(), sum,
93 [](float i, auto iter) { return i + iter.second.op_host_cost_time; });
94 MS_LOG(DEBUG) << "The total op time is " << sum;
95 return sum;
96 }
97
WriteOpType(const std::string & saver_base_dir) const98 void DataSaver::WriteOpType(const std::string &saver_base_dir) const {
99 std::string file_path = saver_base_dir + "/" + op_side_ + "_op_type_info_" + device_id_ + ".csv";
100 std::ofstream ofs(file_path);
101 // check if the file is writable
102 if (!ofs.is_open()) {
103 MS_LOG(WARNING) << "Open file '" << file_path << "' failed!";
104 return;
105 }
106 try {
107 // write op type info into file
108 if (op_side_ == "cpu") {
109 ofs << OpType().GetCpuHeader() << std::endl;
110 for (auto op_type_info : op_type_infos_) {
111 op_type_info.second.OutputCpuOpTypeInfo(ofs);
112 }
113 }
114 if (op_side_ == "gpu") {
115 ofs << OpType().GetGpuHeader() << std::endl;
116 for (auto op_type_info : op_type_infos_) {
117 op_type_info.second.OutputGpuOpTypeInfo(ofs);
118 }
119 }
120 } catch (const std::exception &e) {
121 MS_LOG(ERROR) << "Write " << file_path << "failed: " << e.what();
122 }
123 ofs.close();
124 ChangeFileMode(file_path);
125 MS_LOG(INFO) << "Write " << op_type_infos_.size() << " op type infos into file: " << file_path;
126 }
127
WriteOpDetail(const std::string & saver_base_dir) const128 void DataSaver::WriteOpDetail(const std::string &saver_base_dir) const {
129 std::string file_path = saver_base_dir + "/" + op_side_ + "_op_detail_info_" + device_id_ + ".csv";
130 std::ofstream ofs(file_path);
131 if (!ofs.is_open()) {
132 MS_LOG(WARNING) << "Open file '" << file_path << "' failed!";
133 return;
134 }
135 try {
136 // write op detail info into file
137 if (op_side_ == "cpu") {
138 ofs << OpDetailInfo().GetCpuHeader() << std::endl;
139 for (auto op_detail : op_detail_infos_) {
140 op_detail.OutputCpuOpDetailInfo(ofs);
141 }
142 }
143 if (op_side_ == "gpu") {
144 ofs << OpDetailInfo().GetGpuHeader() << std::endl;
145 for (auto op_detail : op_detail_infos_) {
146 op_detail.OutputGpuOpDetailInfo(ofs);
147 }
148 }
149 } catch (const std::exception &e) {
150 MS_LOG(ERROR) << "Write " << file_path << "failed: " << e.what();
151 }
152 ofs.close();
153 ChangeFileMode(file_path);
154 MS_LOG(INFO) << "Write " << op_detail_infos_.size() << " op detail infos into file: " << file_path;
155 }
156
WriteOpTimestamp(const std::string & saver_base_dir) const157 void DataSaver::WriteOpTimestamp(const std::string &saver_base_dir) const {
158 std::string file_path = saver_base_dir + "/" + op_side_ + "_op_execute_timestamp_" + device_id_ + ".txt";
159 std::ofstream ofs(file_path);
160 // check if the file is writable
161 if (!ofs.is_open()) {
162 MS_LOG(WARNING) << "Open file '" << file_path << "' failed!";
163 return;
164 }
165 try {
166 // write op timestamp info into file
167 for (const auto &op_timestamp_info : op_timestamps_map_) {
168 if (op_side_ == "cpu") {
169 ofs << op_timestamp_info.first << ";HostCpuOps;";
170 } else {
171 ofs << op_timestamp_info.first << ";GpuOps;";
172 }
173 for (auto start_end : op_timestamp_info.second) {
174 ofs << start_end.start_timestamp << "," << start_end.duration << " ";
175 }
176 ofs << std::endl;
177 }
178 } catch (const std::exception &e) {
179 MS_LOG(ERROR) << "Write " << file_path << "failed: " << e.what();
180 }
181 ofs.close();
182 ChangeFileMode(file_path);
183 }
184
ChangeFileMode(const std::string & file_path) const185 void DataSaver::ChangeFileMode(const std::string &file_path) const {
186 if (chmod(common::SafeCStr(file_path), S_IRUSR | S_IWUSR) == -1) {
187 MS_LOG(WARNING) << "Modify file: " << file_path << " to rw fail.";
188 return;
189 }
190 }
191 } // namespace profiler
192 } // namespace mindspore
193