• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2016 The TensorFlow Authors All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/core/profiler/internal/tfprof_stats.h"
17 
18 #include <stdio.h>
19 
20 #include <utility>
21 
22 #include "absl/strings/numbers.h"
23 #include "absl/strings/str_format.h"
24 #include "absl/strings/str_split.h"
25 #include "tensorflow/core/platform/regexp.h"
26 #include "tensorflow/core/profiler/internal/tfprof_timeline.h"
27 
28 namespace tensorflow {
29 namespace tfprof {
30 namespace {
31 
32 const char* const kProfilePrefix = "Profile:\n";
33 
CreateRunMetadataNode(const string & name,NodeDef * def)34 bool CreateRunMetadataNode(const string& name, NodeDef* def) {
35   // TODO(xpan): Better solution than denylisting this 2 nodes. They
36   // actually cost some resources, maybe include them. Some nodes, such
37   // as _SOURCE appear in multiple devices, which breaks tfprof's assumption.
38   if (name == "RecvTensor" || name == "_SOURCE" ||
39       name.find("MEMCPY") != name.npos) {
40     return false;
41   }
42   def->set_name(name);
43   // TODO(xpan): Better operation type.
44   // This is because some times a node doesn't have a op type,
45   // so we use node name as the op type.
46   def->set_op(name);
47   return true;
48 }
49 }  // namespace
50 
TFStats(std::unique_ptr<GraphDef> graph,std::unique_ptr<RunMetadata> run_meta,std::unique_ptr<OpLogProto> op_log,std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader)51 TFStats::TFStats(std::unique_ptr<GraphDef> graph,
52                  std::unique_ptr<RunMetadata> run_meta,
53                  std::unique_ptr<OpLogProto> op_log,
54                  std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader)
55     : has_code_traces_(false),
56       miss_accelerator_stream_(false),
57       ckpt_reader_(std::move(ckpt_reader)) {
58   CHECK(graph) << "Must at least have GraphDef";
59 
60   AddGraph(std::move(graph));
61   if (run_meta && run_meta->has_step_stats()) {
62     AddRunMeta(0, std::move(run_meta));
63   }
64   AddOpLogProto(std::move(op_log));
65 
66   if (ckpt_reader_) {
67     for (const auto& v : ckpt_reader_->GetVariableToShapeMap()) {
68       auto node = nodes_map_.find(v.first);
69       if (node != nodes_map_.end()) {
70         node->second->AddOpType("_checkpoint_variables");
71       }
72     }
73   }
74 }
75 
TFStats(const string & filename,std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader)76 TFStats::TFStats(const string& filename,
77                  std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader)
78     : has_code_traces_(false),
79       miss_accelerator_stream_(false),
80       ckpt_reader_(std::move(ckpt_reader)) {
81   string str;
82   Status s = ReadFileToString(Env::Default(), filename, &str);
83   if (!s.ok()) {
84     absl::FPrintF(stderr, "Failed to read profile: %s", s.ToString());
85     return;
86   }
87 
88   ProfileProto profile;
89   if (!profile.ParseFromString(str)) {
90     absl::FPrintF(stderr, "Failed to parse profile\n");
91     return;
92   }
93   for (const auto& entry : profile.id_to_string()) {
94     id_to_string_[entry.first] = entry.second;
95   }
96   for (const auto& node_pb : profile.nodes()) {
97     std::unique_ptr<TFGraphNode> node(
98         new TFGraphNode(node_pb.second, profile, &id_to_string_, &nodes_map_));
99     nodes_map_.insert(std::pair<string, std::unique_ptr<TFGraphNode>>(
100         node_pb.second.name(), std::move(node)));
101   }
102   has_code_traces_ = profile.has_trace();
103   for (int64_t s : profile.steps()) {
104     steps_.insert(s);
105   }
106 }
107 
BuildView(const string & cmd)108 void TFStats::BuildView(const string& cmd) {
109   if (cmd == kCmds[0] && !scope_view_) {
110     scope_view_.reset(new TFScope(ckpt_reader_.get()));
111     for (auto it = nodes_map_.begin(); it != nodes_map_.end(); it++) {
112       scope_view_->AddNode(it->second.get());
113     }
114     scope_view_->Build();
115   }
116   if (cmd == kCmds[1] && !graph_view_) {
117     graph_view_.reset(new TFGraph(ckpt_reader_.get()));
118     for (auto it = nodes_map_.begin(); it != nodes_map_.end(); it++) {
119       graph_view_->AddNode(it->second.get());
120     }
121     graph_view_->Build();
122   }
123   if (cmd == kCmds[2] && !code_view_) {
124     code_view_.reset(new TFCode());
125     for (auto it = nodes_map_.begin(); it != nodes_map_.end(); it++) {
126       code_view_->AddNode(it->second.get());
127     }
128     code_view_->Build();
129   }
130   if (cmd == kCmds[3] && !op_view_) {
131     op_view_.reset(new TFOp());
132     for (auto it = nodes_map_.begin(); it != nodes_map_.end(); it++) {
133       op_view_->AddNode(it->second.get());
134     }
135     op_view_->Build();
136   }
137 }
138 
BuildAllViews()139 void TFStats::BuildAllViews() {
140   std::vector<string> cmds_str(kCmds, kCmds + sizeof(kCmds) / sizeof(*kCmds));
141   for (const string& cmd : cmds_str) {
142     BuildView(cmd);
143   }
144 }
145 
ShowGraphNode(const string & cmd,const Options & opts) const146 const GraphNodeProto& TFStats::ShowGraphNode(const string& cmd,
147                                              const Options& opts) const {
148   if (!Validate(opts)) {
149     return empty_graph_node_;
150   }
151   string prefix = MaybeReportMissingTrace();
152   prefix += QueryDoc(cmd, opts) + kProfilePrefix;
153 
154   if (cmd == kCmds[0]) {
155     return scope_view_->Show(prefix, opts);
156   } else if (cmd == kCmds[1]) {
157     if (opts.step < 0 && opts.output_type == kOutput[0]) {
158       for (int64_t step : steps_) {
159         Options nopts = opts;
160         nopts.step = step;
161         graph_view_->Show(prefix, nopts);
162       }
163       return empty_graph_node_;
164     }
165     return graph_view_->Show(prefix, opts);
166   } else {
167     absl::FPrintF(stderr, "Unknown command: %s\n", cmd);
168     return empty_graph_node_;
169   }
170 }
171 
ShowMultiGraphNode(const string & cmd,const Options & opts) const172 const MultiGraphNodeProto& TFStats::ShowMultiGraphNode(
173     const string& cmd, const Options& opts) const {
174   if (!Validate(opts)) {
175     return empty_multi_graph_node_;
176   }
177   string prefix = MaybeReportMissingTrace();
178   prefix += QueryDoc(cmd, opts) + kProfilePrefix;
179 
180   if (cmd == kCmds[2]) {
181     if (!has_code_traces()) {
182       absl::FPrintF(stderr, "No code trace information\n");
183       return empty_multi_graph_node_;
184     }
185     return code_view_->Show(prefix, opts);
186   } else if (cmd == kCmds[3]) {
187     return op_view_->Show(prefix, opts);
188   } else {
189     absl::FPrintF(stderr, "Unknown command: %s\n", cmd);
190     return empty_multi_graph_node_;
191   }
192 }
193 
AddGraph(std::unique_ptr<GraphDef> graph)194 void TFStats::AddGraph(std::unique_ptr<GraphDef> graph) {
195   std::map<string, const NodeDef*> node_defs;
196   bool node_added = false;
197   for (const NodeDef& node : graph->node()) {
198     if (nodes_map_.find(node.name()) != nodes_map_.end()) {
199       continue;
200     }
201     node_added = true;
202     size_t num_nodes = nodes_map_.size();
203     nodes_map_[node.name()] = std::unique_ptr<TFGraphNode>(
204         new TFGraphNode(&node, num_nodes, &nodes_map_));
205     node_defs[node.name()] = &node;
206   }
207   for (auto it = node_defs.begin(); it != node_defs.end(); it++) {
208     TFGraphNode* node = nodes_map_.at(it->first).get();
209     for (int i = 0; i < it->second->input_size(); ++i) {
210       string node_input = it->second->input(i);
211       int output_idx = 0;
212       // input name format can be: "^node:src_output"
213       // if not :src_output, then it's the first one (further verify?)
214       auto prefix_pos = node_input.find(':');
215       if (prefix_pos != node_input.npos) {
216         std::vector<string> input_parts = absl::StrSplit(node_input, ':');
217         DCHECK(input_parts.size() == 2)
218             << "Unknown NodeDef.input format: " << node_input;
219         node_input = input_parts[0];
220         DCHECK(absl::SimpleAtoi(input_parts[1], &output_idx))
221             << "Failed to parse integer: " << output_idx;
222       }
223       if (node_input.substr(0, 1) == "^") {
224         node_input = node_input.substr(1);
225       }
226       // Delay input TFGraphNode retrieval as late as possible.
227       // In long run, when we have TensorFlow runtime graph, the
228       // graph connection should be dynamic and per-step.
229       node->AddInput(node_input, output_idx, i);
230     }
231   }
232   if (node_added) {
233     graph_view_.reset(nullptr);
234     scope_view_.reset(nullptr);
235     op_view_.reset(nullptr);
236     code_view_.reset(nullptr);
237   }
238 }
239 
AddOpLogProto(std::unique_ptr<OpLogProto> op_log)240 void TFStats::AddOpLogProto(std::unique_ptr<OpLogProto> op_log) {
241   if (!op_log) {
242     return;
243   }
244   for (const auto& entry : op_log->id_to_string()) {
245     if (id_to_string_.find(entry.first) == id_to_string_.end()) {
246       id_to_string_[entry.first] = entry.second;
247     }
248   }
249   for (const OpLogEntry& entry : op_log->log_entries()) {
250     auto node = nodes_map_.find(entry.name());
251     if (node == nodes_map_.end()) continue;
252     for (const string& type : entry.types()) {
253       node->second->AddOpType(type);
254     }
255     if (entry.float_ops()) {
256       node->second->AddFloatOps(entry.float_ops());
257     }
258     if (entry.has_code_def()) {
259       has_code_traces_ = true;
260       node->second->AddCode(entry.code_def(), &id_to_string_);
261     }
262   }
263 }
264 
AddRunMeta(int64_t step,std::unique_ptr<RunMetadata> run_meta)265 void TFStats::AddRunMeta(int64_t step, std::unique_ptr<RunMetadata> run_meta) {
266   if (!run_meta || !run_meta->has_step_stats()) {
267     absl::FPrintF(stderr, "Invalid RunMetadata for step %d\n", step);
268     return;
269   }
270   if (steps_.find(step) == steps_.end()) {
271     steps_.insert(step);
272   }
273   steps_.insert(step);
274 
275   bool has_gpu_scheduling = false;
276   bool has_gpu_stream = false;
277 
278   for (const auto& dev_stat : run_meta->step_stats().dev_stats()) {
279     string dev = absl::AsciiStrToLower(dev_stat.device());
280     if (IsPlacedOnAccelerator(dev)) {
281       has_gpu_scheduling = true;
282       if (CountAsAcceleratorTime(dev)) {
283         has_gpu_stream = true;
284       }
285     }
286     for (const NodeExecStats& node_stat : dev_stat.node_stats()) {
287       string name = node_stat.node_name();
288       // Sometimes the node_name is suffixed with unnecessary information.
289       auto split_pos = node_stat.node_name().find(':');
290       if (split_pos != node_stat.node_name().npos) {
291         name = node_stat.node_name().substr(0, split_pos);
292       }
293       auto node = nodes_map_.find(name);
294       if (node == nodes_map_.end()) {
295         NodeDef def;
296         if (CreateRunMetadataNode(name, &def)) {
297           size_t num_nodes = nodes_map_.size();
298           nodes_map_[name] = std::unique_ptr<TFGraphNode>(
299               new TFGraphNode(&def, num_nodes, &nodes_map_));
300           nodes_map_.at(name)->AddStepStat(step, dev_stat.device(), node_stat);
301         }
302       } else {
303         covered_nodes_.insert(node->second->id());
304         node->second->AddStepStat(step, dev_stat.device(), node_stat);
305       }
306     }
307   }
308 
309   if (has_gpu_scheduling && !has_gpu_stream) {
310     miss_accelerator_stream_ = true;
311   }
312 }
313 
MaybeReportMissingTrace() const314 string TFStats::MaybeReportMissingTrace() const {
315   string report = "";
316   if (miss_accelerator_stream_) {
317     report +=
318         "\n\nFound accelerator operation but misses accelerator "
319         "stream stats!\n\n"
320         "It's likely a gpu tracing issue rather than tf-profiler issue.\n"
321         "If you found your operation missing accelerator time, "
322         "consider to post to discuss@tensorflow.org!\n\n";
323   }
324   return report;
325 }
326 
SerializeToString(string * content)327 void TFStats::SerializeToString(string* content) {
328   ProfileProto profile;
329   for (const auto& entry : id_to_string_) {
330     (*profile.mutable_id_to_string())[entry.first] = entry.second;
331   }
332   for (auto it = nodes_map_.begin(); it != nodes_map_.end(); it++) {
333     if (it->second->id() < 0) {
334       continue;
335     }
336     (*profile.mutable_nodes())[it->second->id()].MergeFrom(
337         it->second->ToProto(nodes_map_));
338   }
339 
340   profile.set_has_trace(has_code_traces_);
341   profile.set_miss_accelerator_stream(miss_accelerator_stream_);
342   for (int64_t s : steps_) {
343     profile.add_steps(s);
344   }
345   *content = profile.SerializeAsString();
346 }
347 
WriteProfile(const string & filename)348 void TFStats::WriteProfile(const string& filename) {
349   string content;
350   SerializeToString(&content);
351   Status s = WriteStringToFile(Env::Default(), filename, content);
352   if (!s.ok()) {
353     absl::FPrintF(stderr, "%s\n", s.ToString());
354   }
355 }
356 
Validate(const Options & opts) const357 bool TFStats::Validate(const Options& opts) const {
358   if (opts.step >= 0 && steps_.find(opts.step) == steps_.end()) {
359     absl::FPrintF(stderr,
360                   "Options -step=%d not found.\nAvailable steps: ", opts.step);
361     for (int64_t s : steps_) {
362       absl::FPrintF(stderr, "%d ", s);
363     }
364     absl::FPrintF(stderr, "\n");
365     return false;
366   }
367   return true;
368 }
369 
AddNodeForTest(int64_t step,std::unique_ptr<TFGraphNode> node)370 void TFStats::AddNodeForTest(int64_t step, std::unique_ptr<TFGraphNode> node) {
371   steps_.insert(step);
372   nodes_map_[node->name()] = std::move(node);
373 }
374 }  // namespace tfprof
375 }  // namespace tensorflow
376