1 /* Copyright 2016 The TensorFlow Authors All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/core/profiler/internal/tfprof_stats.h"
17
18 #include <stdio.h>
19
20 #include <utility>
21
22 #include "absl/strings/numbers.h"
23 #include "absl/strings/str_format.h"
24 #include "absl/strings/str_split.h"
25 #include "tensorflow/core/platform/regexp.h"
26 #include "tensorflow/core/profiler/internal/tfprof_timeline.h"
27
28 namespace tensorflow {
29 namespace tfprof {
30 namespace {
31
32 const char* const kProfilePrefix = "Profile:\n";
33
CreateRunMetadataNode(const string & name,NodeDef * def)34 bool CreateRunMetadataNode(const string& name, NodeDef* def) {
35 // TODO(xpan): Better solution than denylisting this 2 nodes. They
36 // actually cost some resources, maybe include them. Some nodes, such
37 // as _SOURCE appear in multiple devices, which breaks tfprof's assumption.
38 if (name == "RecvTensor" || name == "_SOURCE" ||
39 name.find("MEMCPY") != name.npos) {
40 return false;
41 }
42 def->set_name(name);
43 // TODO(xpan): Better operation type.
44 // This is because some times a node doesn't have a op type,
45 // so we use node name as the op type.
46 def->set_op(name);
47 return true;
48 }
49 } // namespace
50
TFStats(std::unique_ptr<GraphDef> graph,std::unique_ptr<RunMetadata> run_meta,std::unique_ptr<OpLogProto> op_log,std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader)51 TFStats::TFStats(std::unique_ptr<GraphDef> graph,
52 std::unique_ptr<RunMetadata> run_meta,
53 std::unique_ptr<OpLogProto> op_log,
54 std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader)
55 : has_code_traces_(false),
56 miss_accelerator_stream_(false),
57 ckpt_reader_(std::move(ckpt_reader)) {
58 CHECK(graph) << "Must at least have GraphDef";
59
60 AddGraph(std::move(graph));
61 if (run_meta && run_meta->has_step_stats()) {
62 AddRunMeta(0, std::move(run_meta));
63 }
64 AddOpLogProto(std::move(op_log));
65
66 if (ckpt_reader_) {
67 for (const auto& v : ckpt_reader_->GetVariableToShapeMap()) {
68 auto node = nodes_map_.find(v.first);
69 if (node != nodes_map_.end()) {
70 node->second->AddOpType("_checkpoint_variables");
71 }
72 }
73 }
74 }
75
TFStats(const string & filename,std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader)76 TFStats::TFStats(const string& filename,
77 std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader)
78 : has_code_traces_(false),
79 miss_accelerator_stream_(false),
80 ckpt_reader_(std::move(ckpt_reader)) {
81 string str;
82 Status s = ReadFileToString(Env::Default(), filename, &str);
83 if (!s.ok()) {
84 absl::FPrintF(stderr, "Failed to read profile: %s", s.ToString());
85 return;
86 }
87
88 ProfileProto profile;
89 if (!profile.ParseFromString(str)) {
90 absl::FPrintF(stderr, "Failed to parse profile\n");
91 return;
92 }
93 for (const auto& entry : profile.id_to_string()) {
94 id_to_string_[entry.first] = entry.second;
95 }
96 for (const auto& node_pb : profile.nodes()) {
97 std::unique_ptr<TFGraphNode> node(
98 new TFGraphNode(node_pb.second, profile, &id_to_string_, &nodes_map_));
99 nodes_map_.insert(std::pair<string, std::unique_ptr<TFGraphNode>>(
100 node_pb.second.name(), std::move(node)));
101 }
102 has_code_traces_ = profile.has_trace();
103 for (int64_t s : profile.steps()) {
104 steps_.insert(s);
105 }
106 }
107
BuildView(const string & cmd)108 void TFStats::BuildView(const string& cmd) {
109 if (cmd == kCmds[0] && !scope_view_) {
110 scope_view_.reset(new TFScope(ckpt_reader_.get()));
111 for (auto it = nodes_map_.begin(); it != nodes_map_.end(); it++) {
112 scope_view_->AddNode(it->second.get());
113 }
114 scope_view_->Build();
115 }
116 if (cmd == kCmds[1] && !graph_view_) {
117 graph_view_.reset(new TFGraph(ckpt_reader_.get()));
118 for (auto it = nodes_map_.begin(); it != nodes_map_.end(); it++) {
119 graph_view_->AddNode(it->second.get());
120 }
121 graph_view_->Build();
122 }
123 if (cmd == kCmds[2] && !code_view_) {
124 code_view_.reset(new TFCode());
125 for (auto it = nodes_map_.begin(); it != nodes_map_.end(); it++) {
126 code_view_->AddNode(it->second.get());
127 }
128 code_view_->Build();
129 }
130 if (cmd == kCmds[3] && !op_view_) {
131 op_view_.reset(new TFOp());
132 for (auto it = nodes_map_.begin(); it != nodes_map_.end(); it++) {
133 op_view_->AddNode(it->second.get());
134 }
135 op_view_->Build();
136 }
137 }
138
BuildAllViews()139 void TFStats::BuildAllViews() {
140 std::vector<string> cmds_str(kCmds, kCmds + sizeof(kCmds) / sizeof(*kCmds));
141 for (const string& cmd : cmds_str) {
142 BuildView(cmd);
143 }
144 }
145
ShowGraphNode(const string & cmd,const Options & opts) const146 const GraphNodeProto& TFStats::ShowGraphNode(const string& cmd,
147 const Options& opts) const {
148 if (!Validate(opts)) {
149 return empty_graph_node_;
150 }
151 string prefix = MaybeReportMissingTrace();
152 prefix += QueryDoc(cmd, opts) + kProfilePrefix;
153
154 if (cmd == kCmds[0]) {
155 return scope_view_->Show(prefix, opts);
156 } else if (cmd == kCmds[1]) {
157 if (opts.step < 0 && opts.output_type == kOutput[0]) {
158 for (int64_t step : steps_) {
159 Options nopts = opts;
160 nopts.step = step;
161 graph_view_->Show(prefix, nopts);
162 }
163 return empty_graph_node_;
164 }
165 return graph_view_->Show(prefix, opts);
166 } else {
167 absl::FPrintF(stderr, "Unknown command: %s\n", cmd);
168 return empty_graph_node_;
169 }
170 }
171
ShowMultiGraphNode(const string & cmd,const Options & opts) const172 const MultiGraphNodeProto& TFStats::ShowMultiGraphNode(
173 const string& cmd, const Options& opts) const {
174 if (!Validate(opts)) {
175 return empty_multi_graph_node_;
176 }
177 string prefix = MaybeReportMissingTrace();
178 prefix += QueryDoc(cmd, opts) + kProfilePrefix;
179
180 if (cmd == kCmds[2]) {
181 if (!has_code_traces()) {
182 absl::FPrintF(stderr, "No code trace information\n");
183 return empty_multi_graph_node_;
184 }
185 return code_view_->Show(prefix, opts);
186 } else if (cmd == kCmds[3]) {
187 return op_view_->Show(prefix, opts);
188 } else {
189 absl::FPrintF(stderr, "Unknown command: %s\n", cmd);
190 return empty_multi_graph_node_;
191 }
192 }
193
AddGraph(std::unique_ptr<GraphDef> graph)194 void TFStats::AddGraph(std::unique_ptr<GraphDef> graph) {
195 std::map<string, const NodeDef*> node_defs;
196 bool node_added = false;
197 for (const NodeDef& node : graph->node()) {
198 if (nodes_map_.find(node.name()) != nodes_map_.end()) {
199 continue;
200 }
201 node_added = true;
202 size_t num_nodes = nodes_map_.size();
203 nodes_map_[node.name()] = std::unique_ptr<TFGraphNode>(
204 new TFGraphNode(&node, num_nodes, &nodes_map_));
205 node_defs[node.name()] = &node;
206 }
207 for (auto it = node_defs.begin(); it != node_defs.end(); it++) {
208 TFGraphNode* node = nodes_map_.at(it->first).get();
209 for (int i = 0; i < it->second->input_size(); ++i) {
210 string node_input = it->second->input(i);
211 int output_idx = 0;
212 // input name format can be: "^node:src_output"
213 // if not :src_output, then it's the first one (further verify?)
214 auto prefix_pos = node_input.find(':');
215 if (prefix_pos != node_input.npos) {
216 std::vector<string> input_parts = absl::StrSplit(node_input, ':');
217 DCHECK(input_parts.size() == 2)
218 << "Unknown NodeDef.input format: " << node_input;
219 node_input = input_parts[0];
220 DCHECK(absl::SimpleAtoi(input_parts[1], &output_idx))
221 << "Failed to parse integer: " << output_idx;
222 }
223 if (node_input.substr(0, 1) == "^") {
224 node_input = node_input.substr(1);
225 }
226 // Delay input TFGraphNode retrieval as late as possible.
227 // In long run, when we have TensorFlow runtime graph, the
228 // graph connection should be dynamic and per-step.
229 node->AddInput(node_input, output_idx, i);
230 }
231 }
232 if (node_added) {
233 graph_view_.reset(nullptr);
234 scope_view_.reset(nullptr);
235 op_view_.reset(nullptr);
236 code_view_.reset(nullptr);
237 }
238 }
239
AddOpLogProto(std::unique_ptr<OpLogProto> op_log)240 void TFStats::AddOpLogProto(std::unique_ptr<OpLogProto> op_log) {
241 if (!op_log) {
242 return;
243 }
244 for (const auto& entry : op_log->id_to_string()) {
245 if (id_to_string_.find(entry.first) == id_to_string_.end()) {
246 id_to_string_[entry.first] = entry.second;
247 }
248 }
249 for (const OpLogEntry& entry : op_log->log_entries()) {
250 auto node = nodes_map_.find(entry.name());
251 if (node == nodes_map_.end()) continue;
252 for (const string& type : entry.types()) {
253 node->second->AddOpType(type);
254 }
255 if (entry.float_ops()) {
256 node->second->AddFloatOps(entry.float_ops());
257 }
258 if (entry.has_code_def()) {
259 has_code_traces_ = true;
260 node->second->AddCode(entry.code_def(), &id_to_string_);
261 }
262 }
263 }
264
AddRunMeta(int64_t step,std::unique_ptr<RunMetadata> run_meta)265 void TFStats::AddRunMeta(int64_t step, std::unique_ptr<RunMetadata> run_meta) {
266 if (!run_meta || !run_meta->has_step_stats()) {
267 absl::FPrintF(stderr, "Invalid RunMetadata for step %d\n", step);
268 return;
269 }
270 if (steps_.find(step) == steps_.end()) {
271 steps_.insert(step);
272 }
273 steps_.insert(step);
274
275 bool has_gpu_scheduling = false;
276 bool has_gpu_stream = false;
277
278 for (const auto& dev_stat : run_meta->step_stats().dev_stats()) {
279 string dev = absl::AsciiStrToLower(dev_stat.device());
280 if (IsPlacedOnAccelerator(dev)) {
281 has_gpu_scheduling = true;
282 if (CountAsAcceleratorTime(dev)) {
283 has_gpu_stream = true;
284 }
285 }
286 for (const NodeExecStats& node_stat : dev_stat.node_stats()) {
287 string name = node_stat.node_name();
288 // Sometimes the node_name is suffixed with unnecessary information.
289 auto split_pos = node_stat.node_name().find(':');
290 if (split_pos != node_stat.node_name().npos) {
291 name = node_stat.node_name().substr(0, split_pos);
292 }
293 auto node = nodes_map_.find(name);
294 if (node == nodes_map_.end()) {
295 NodeDef def;
296 if (CreateRunMetadataNode(name, &def)) {
297 size_t num_nodes = nodes_map_.size();
298 nodes_map_[name] = std::unique_ptr<TFGraphNode>(
299 new TFGraphNode(&def, num_nodes, &nodes_map_));
300 nodes_map_.at(name)->AddStepStat(step, dev_stat.device(), node_stat);
301 }
302 } else {
303 covered_nodes_.insert(node->second->id());
304 node->second->AddStepStat(step, dev_stat.device(), node_stat);
305 }
306 }
307 }
308
309 if (has_gpu_scheduling && !has_gpu_stream) {
310 miss_accelerator_stream_ = true;
311 }
312 }
313
MaybeReportMissingTrace() const314 string TFStats::MaybeReportMissingTrace() const {
315 string report = "";
316 if (miss_accelerator_stream_) {
317 report +=
318 "\n\nFound accelerator operation but misses accelerator "
319 "stream stats!\n\n"
320 "It's likely a gpu tracing issue rather than tf-profiler issue.\n"
321 "If you found your operation missing accelerator time, "
322 "consider to post to discuss@tensorflow.org!\n\n";
323 }
324 return report;
325 }
326
SerializeToString(string * content)327 void TFStats::SerializeToString(string* content) {
328 ProfileProto profile;
329 for (const auto& entry : id_to_string_) {
330 (*profile.mutable_id_to_string())[entry.first] = entry.second;
331 }
332 for (auto it = nodes_map_.begin(); it != nodes_map_.end(); it++) {
333 if (it->second->id() < 0) {
334 continue;
335 }
336 (*profile.mutable_nodes())[it->second->id()].MergeFrom(
337 it->second->ToProto(nodes_map_));
338 }
339
340 profile.set_has_trace(has_code_traces_);
341 profile.set_miss_accelerator_stream(miss_accelerator_stream_);
342 for (int64_t s : steps_) {
343 profile.add_steps(s);
344 }
345 *content = profile.SerializeAsString();
346 }
347
WriteProfile(const string & filename)348 void TFStats::WriteProfile(const string& filename) {
349 string content;
350 SerializeToString(&content);
351 Status s = WriteStringToFile(Env::Default(), filename, content);
352 if (!s.ok()) {
353 absl::FPrintF(stderr, "%s\n", s.ToString());
354 }
355 }
356
Validate(const Options & opts) const357 bool TFStats::Validate(const Options& opts) const {
358 if (opts.step >= 0 && steps_.find(opts.step) == steps_.end()) {
359 absl::FPrintF(stderr,
360 "Options -step=%d not found.\nAvailable steps: ", opts.step);
361 for (int64_t s : steps_) {
362 absl::FPrintF(stderr, "%d ", s);
363 }
364 absl::FPrintF(stderr, "\n");
365 return false;
366 }
367 return true;
368 }
369
AddNodeForTest(int64_t step,std::unique_ptr<TFGraphNode> node)370 void TFStats::AddNodeForTest(int64_t step, std::unique_ptr<TFGraphNode> node) {
371 steps_.insert(step);
372 nodes_map_[node->name()] = std::move(node);
373 }
374 } // namespace tfprof
375 } // namespace tensorflow
376