1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/compiler/xla/service/gpu/hlo_execution_profiler.h"
17
18 #include <memory>
19 #include <stack>
20 #include <unordered_set>
21 #include <vector>
22
23 #include "absl/memory/memory.h"
24 #include "tensorflow/compiler/xla/service/hlo_computation.h"
25 #include "tensorflow/compiler/xla/service/hlo_execution_profile.h"
26 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
27 #include "tensorflow/compiler/xla/service/stream_pool.h"
28 #include "tensorflow/core/platform/logging.h"
29 #include "tensorflow/core/platform/stream_executor_no_cuda.h"
30 #include "tensorflow/core/util/ptr_util.h"
31
32 namespace xla {
33 namespace gpu {
34 namespace {
InitAndStartTimer(std::stack<std::unique_ptr<se::Timer>> * timers,se::Stream * stream)35 void InitAndStartTimer(std::stack<std::unique_ptr<se::Timer>>* timers,
36 se::Stream* stream) {
37 timers->push(absl::make_unique<se::Timer>(stream->parent()));
38 stream->InitTimer(timers->top().get()).ThenStartTimer(timers->top().get());
39 }
40
GetCyclesTaken(std::stack<std::unique_ptr<se::Timer>> * timers,const std::vector<StreamPool::Ptr> & sub_streams,se::Stream * stream,double clock_rate_ghz)41 uint64 GetCyclesTaken(std::stack<std::unique_ptr<se::Timer>>* timers,
42 const std::vector<StreamPool::Ptr>& sub_streams,
43 se::Stream* stream, double clock_rate_ghz) {
44 CHECK_GT(timers->size(), 0);
45 stream->ThenWaitFor(&sub_streams);
46 stream->ThenStopTimer(timers->top().get());
47 stream->BlockHostUntilDone().IgnoreError();
48 double nanoseconds = timers->top()->Nanoseconds();
49 timers->pop();
50 return static_cast<uint64>(nanoseconds * clock_rate_ghz);
51 }
52 } // namespace
53
HloExecutionProfiler(bool do_profile,HloExecutionProfile * profile,se::Stream * stream,const std::vector<StreamPool::Ptr> & sub_streams,const HloComputation * computation)54 HloExecutionProfiler::HloExecutionProfiler(
55 bool do_profile, HloExecutionProfile* profile, se::Stream* stream,
56 const std::vector<StreamPool::Ptr>& sub_streams,
57 const HloComputation* computation)
58 : do_profile_(do_profile),
59 profile_(profile),
60 stream_(stream),
61 sub_streams_(sub_streams),
62 computation_(computation) {
63 if (do_profile_) {
64 clock_rate_ghz_ = stream->parent()->GetDeviceDescription().clock_rate_ghz();
65 InitAndStartTimer(&timers_, stream);
66 }
67 }
68
FinishExecution()69 void HloExecutionProfiler::FinishExecution() {
70 CHECK(!finished_execution_) << "Call FinishExecution only once!";
71 finished_execution_ = true;
72 if (do_profile_) {
73 profile_->set_total_cycles_executed(
74 *computation_,
75 GetCyclesTaken(&timers_, sub_streams_, stream_, clock_rate_ghz_));
76 }
77 }
78
StartHloComputation()79 void HloExecutionProfiler::StartHloComputation() {
80 if (do_profile_) {
81 InitAndStartTimer(&timers_, stream_);
82 }
83 }
84
FinishHloComputation(const HloComputation * computation)85 void HloExecutionProfiler::FinishHloComputation(
86 const HloComputation* computation) {
87 if (do_profile_) {
88 profile_->set_total_cycles_executed(
89 *computation,
90 GetCyclesTaken(&timers_, sub_streams_, stream_, clock_rate_ghz_));
91 }
92 }
93
StartHloInstruction()94 void HloExecutionProfiler::StartHloInstruction() {
95 if (do_profile_) {
96 InitAndStartTimer(&timers_, stream_);
97 }
98 }
99
FinishHloInstruction(const HloInstruction * hlo_instruction)100 void HloExecutionProfiler::FinishHloInstruction(
101 const HloInstruction* hlo_instruction) {
102 if (do_profile_) {
103 hlo_instructions_.erase(hlo_instruction);
104 profile_->SetCyclesTakenBy(
105 hlo_instruction,
106 GetCyclesTaken(&timers_, sub_streams_, stream_, clock_rate_ghz_));
107 }
108 }
109
110 std::unique_ptr<ScopedInstructionProfiler>
MakeScopedInstructionProfiler(const HloInstruction * hlo_instruction)111 HloExecutionProfiler::MakeScopedInstructionProfiler(
112 const HloInstruction* hlo_instruction) {
113 if (do_profile_ && hlo_instruction != nullptr) {
114 // Make sure that we are not already measuring the time for the same
115 // 'hlo_instruction'.
116 CHECK(hlo_instructions_.insert(hlo_instruction).second)
117 << hlo_instruction->name();
118 }
119 return absl::make_unique<ScopedInstructionProfiler>(this, hlo_instruction);
120 }
121
122 } // namespace gpu
123 } // namespace xla
124