• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_HLO_EXECUTION_PROFILER_H_
17 #define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_HLO_EXECUTION_PROFILER_H_
18 
19 #include <memory>
20 #include <stack>
21 #include <unordered_set>
22 #include <vector>
23 
24 #include "tensorflow/compiler/xla/service/hlo_computation.h"
25 #include "tensorflow/compiler/xla/service/hlo_execution_profile.h"
26 #include "tensorflow/compiler/xla/service/stream_pool.h"
27 #include "tensorflow/core/platform/stream_executor_no_cuda.h"
28 
29 namespace xla {
30 namespace gpu {
31 
32 class ScopedInstructionProfiler;
33 
34 // A helper class for profiling HLO in the course of GPU program execution.
35 // All of the profiling is guarded internally, to avoid the caller needing to
36 // have lots of conditionals sprinkled around.
37 class HloExecutionProfiler {
38  public:
39   // If profiling is enabled, start an execution timer running.
40   explicit HloExecutionProfiler(bool do_profile, HloExecutionProfile* profile,
41                                 se::Stream* stream,
42                                 const std::vector<StreamPool::Ptr>& sub_streams,
43                                 size_t index);
44 
45   // If profiling is enabled, sets the total cycle count on the profile from the
46   // execution timer.
47   void FinishExecution();
48 
49   // If profiling is enabled, starts a timer for a (sub)computation.
50   void StartHloComputation();
51 
52   // If profiling is enabled stops the timer for a (sub)computation and records
53   // the time that the computation took to execute in the profile.
54   void FinishHloComputation(const HloComputation* computation);
55 
56   // If profiling is enabled stops the timer for a (sub)computation with the
57   // given profile index and records the time that the computation took to
58   // execute in the profile.
59   void FinishHloComputation(absl::optional<size_t> profile_index);
60 
61   // If profiling is enabled, starts a per-operation timer.
62   void StartHloInstruction();
63 
64   // If profiling is enabled, stops the per-operation timer and records the time
65   // that at `profile_index`. Profile indices can be looked up from
66   // HloProfileIndexMap.
67   void FinishHloInstruction(size_t profile_index);
68 
69   // Returns a ScopedInstructionProfiler and triggers a call to
70   // StartHloInstruction(). Once the returned ScopedInstructionProfiler goes
71   // out of scope, it triggers a call to FinishHloInstruction().
72   //
73   // If profile_index < 0, it results in a no-op.
74   std::unique_ptr<ScopedInstructionProfiler> MakeScopedInstructionProfiler(
75       absl::optional<int64> profile_index);
76 
77  private:
78   const bool do_profile_;
79   double clock_rate_ghz_;
80   HloExecutionProfile* profile_;
81   se::Stream* stream_;
82   const std::vector<StreamPool::Ptr>& sub_streams_;
83   size_t computation_profile_index_;
84   std::stack<std::unique_ptr<se::Timer>> timers_;
85   // Contains the HLO instructions for which we are currently measuring the
86   // time.
87   std::unordered_set<size_t> indices_;
88   bool finished_execution_ = false;
89 };
90 
91 // This class can be used within the ExecuteOnStream() implementations of
92 // Thunks. It ensures that we always have a pair of matching
93 // StartHloInstruction() and FinishHloInstruction() calls to the profiler.
94 class ScopedInstructionProfiler {
95  public:
ScopedInstructionProfiler(HloExecutionProfiler * profiler,absl::optional<int64> index)96   ScopedInstructionProfiler(HloExecutionProfiler* profiler,
97                             absl::optional<int64> index)
98       : profiler_(profiler), index_(index) {
99     if (index_.has_value()) {
100       profiler->StartHloInstruction();
101     }
102   }
~ScopedInstructionProfiler()103   ~ScopedInstructionProfiler() {
104     if (index_.has_value()) {
105       profiler_->FinishHloInstruction(*index_);
106     }
107   }
108 
109  private:
110   HloExecutionProfiler* profiler_;
111   absl::optional<int64> index_;
112 };
113 
114 }  // namespace gpu
115 }  // namespace xla
116 
117 #endif  // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_HLO_EXECUTION_PROFILER_H_
118