• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2016 The TensorFlow Authors All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 // This checker checks the accelerator's utilization.
16 #ifndef TENSORFLOW_CORE_PROFILER_INTERNAL_ADVISOR_ACCELERATOR_UTILIZATION_CHECKER_H_
17 #define TENSORFLOW_CORE_PROFILER_INTERNAL_ADVISOR_ACCELERATOR_UTILIZATION_CHECKER_H_
18 
19 #include "tensorflow/core/profiler/internal/advisor/checker.h"
20 
21 namespace tensorflow {
22 namespace tfprof {
23 
24 struct ExecStats {
25  public:
26   // Earliest start time of a step.
27   int64 start_micros;
28   // Latest finish time of a step.
29   int64 end_micros;
30   // The duration spent on running a kernel during a step.
31   int64 exec_micros;
32 };
33 
34 class AcceleratorUtilizationChecker : public Checker {
35  public:
name()36   string name() const override { return kCheckers[0]; }
37 
38  private:
Check(const AdvisorOptionsProto::CheckerOption & options,const TFStats * stats)39   AdviceProto::Checker Check(const AdvisorOptionsProto::CheckerOption& options,
40                              const TFStats* stats) override {
41     if (!stats) {
42       fprintf(stderr, "Missing profiles (e.g. graph, run_meta). Skip %s\n",
43               name().c_str());
44       return reports_;
45     }
46     for (const auto& n : stats->nodes()) {
47       BuildExecStats(n.second.get());
48     }
49     return CheckInternal();
50   }
51 
CheckInternal()52   AdviceProto::Checker CheckInternal() {
53     for (const auto& s : accelerator_exec_stats_) {
54       const ExecStats& stat = s.second;
55       int64 total_micros = stat.end_micros - stat.start_micros;
56       if (total_micros <= 0) continue;
57       double utilization = 1.0 * stat.exec_micros / total_micros;
58       if (utilization >= 0.5) {
59         reports_.add_reports(strings::Printf("device: %s utilization: %.2f",
60                                              s.first.c_str(), utilization));
61       } else if (utilization < 0.5 && utilization > 0.2) {
62         reports_.add_reports(strings::Printf("device: %s low utilization: %.2f",
63                                              s.first.c_str(), utilization));
64       } else if (utilization <= 0.2) {
65         reports_.add_reports(strings::Printf("device: %s low utilization: %.2f",
66                                              s.first.c_str(), utilization));
67       }
68     }
69     return reports_;
70   }
71 
BuildExecStats(const TFGraphNode * node)72   void BuildExecStats(const TFGraphNode* node) {
73     const auto& execs = node->all_op_execs();
74     if (execs.empty()) {
75       return;
76     }
77     if (!IsPlacedOnAccelerator(node->canonical_device())) {
78       return;
79     }
80 
81     if (accelerator_exec_stats_.find(node->canonical_device()) ==
82         accelerator_exec_stats_.end()) {
83       accelerator_exec_stats_.insert(
84           std::pair<string, ExecStats>(node->canonical_device(), ExecStats()));
85     }
86     ExecStats& stats = accelerator_exec_stats_.at(node->canonical_device());
87 
88     // TODO(xpan): Use multiple steps?
89     const ExecStep& exec = execs.rbegin()->second;
90 
91     if (stats.start_micros == 0) {
92       stats.start_micros = exec.all_start_micros();
93     } else if (exec.all_start_micros() != 0) {
94       stats.start_micros =
95           std::min(stats.start_micros, exec.all_start_micros());
96     }
97     stats.end_micros = std::max(stats.end_micros, exec.latest_end_micros());
98     stats.exec_micros += exec.accelerator_exec_micros();
99   }
100 
101   std::map<string, ExecStats> accelerator_exec_stats_;
102   std::map<string, int64> ps_placement_;
103   AdviceProto::Checker reports_;
104 };
105 
106 }  // namespace tfprof
107 }  // namespace tensorflow
108 
109 #endif  // TENSORFLOW_CORE_PROFILER_INTERNAL_ADVISOR_ACCELERATOR_UTILIZATION_CHECKER_H_
110