1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #if GOOGLE_CUDA
17
18 #include <stdlib.h>
19
20 #include <memory>
21 #include <utility>
22
23 #include "absl/container/fixed_array.h"
24 #include "absl/container/flat_hash_map.h"
25 #include "absl/container/flat_hash_set.h"
26 #include "tensorflow/core/framework/step_stats.pb.h"
27 #include "tensorflow/core/platform/errors.h"
28 #include "tensorflow/core/platform/macros.h"
29 #include "tensorflow/core/platform/thread_annotations.h"
30 #include "tensorflow/core/profiler/backends/gpu/cupti_collector.h"
31 #include "tensorflow/core/profiler/backends/gpu/cupti_tracer.h"
32 #include "tensorflow/core/profiler/backends/gpu/cupti_wrapper.h"
33 #include "tensorflow/core/profiler/lib/profiler_factory.h"
34 #include "tensorflow/core/profiler/lib/profiler_interface.h"
35 #include "tensorflow/core/profiler/protobuf/xplane.pb.h"
36 #include "tensorflow/core/profiler/utils/time_utils.h"
37 #include "tensorflow/core/util/env_var.h"
38
39 namespace tensorflow {
40 namespace profiler {
41
42 // GpuTracer for GPU.
43 class GpuTracer : public profiler::ProfilerInterface {
44 public:
GpuTracer(CuptiTracer * cupti_tracer,CuptiInterface * cupti_interface)45 GpuTracer(CuptiTracer* cupti_tracer, CuptiInterface* cupti_interface)
46 : cupti_tracer_(cupti_tracer) {
47 VLOG(1) << "GpuTracer created.";
48 }
~GpuTracer()49 ~GpuTracer() override {}
50
51 // GpuTracer interface:
52 Status Start() override;
53 Status Stop() override;
54 Status CollectData(XSpace* space) override;
55
56 private:
57 Status DoStart();
58 Status DoStop();
59
60 enum State {
61 kNotStarted,
62 kStartedOk,
63 kStartedError,
64 kStoppedOk,
65 kStoppedError
66 };
67 State profiling_state_ = State::kNotStarted;
68
69 CuptiTracer* cupti_tracer_;
70 CuptiTracerOptions options_;
71 std::unique_ptr<CuptiTraceCollector> cupti_collector_;
72 };
73
DoStart()74 Status GpuTracer::DoStart() {
75 if (!cupti_tracer_->IsAvailable()) {
76 return errors::Unavailable("Another profile session running.");
77 }
78
79 options_.cbids_selected = {
80 // KERNEL
81 CUPTI_DRIVER_TRACE_CBID_cuLaunchKernel,
82 // MEMCPY
83 CUPTI_DRIVER_TRACE_CBID_cuMemcpy,
84 CUPTI_DRIVER_TRACE_CBID_cuMemcpyAsync,
85 CUPTI_DRIVER_TRACE_CBID_cuMemcpyHtoD_v2,
86 CUPTI_DRIVER_TRACE_CBID_cuMemcpyHtoDAsync_v2,
87 CUPTI_DRIVER_TRACE_CBID_cuMemcpyDtoH_v2,
88 CUPTI_DRIVER_TRACE_CBID_cuMemcpyDtoHAsync_v2,
89 CUPTI_DRIVER_TRACE_CBID_cuMemcpyDtoD_v2,
90 CUPTI_DRIVER_TRACE_CBID_cuMemcpyDtoDAsync_v2,
91 CUPTI_DRIVER_TRACE_CBID_cuMemcpyAtoH_v2,
92 CUPTI_DRIVER_TRACE_CBID_cuMemcpyAtoHAsync_v2,
93 CUPTI_DRIVER_TRACE_CBID_cuMemcpyAtoD_v2,
94 CUPTI_DRIVER_TRACE_CBID_cuMemcpyDtoA_v2,
95 CUPTI_DRIVER_TRACE_CBID_cuMemcpyAtoA_v2,
96 CUPTI_DRIVER_TRACE_CBID_cuMemcpy2D_v2,
97 CUPTI_DRIVER_TRACE_CBID_cuMemcpy2DUnaligned_v2,
98 CUPTI_DRIVER_TRACE_CBID_cuMemcpy2DAsync_v2,
99 CUPTI_DRIVER_TRACE_CBID_cuMemcpy3D_v2,
100 CUPTI_DRIVER_TRACE_CBID_cuMemcpy3DAsync_v2,
101 CUPTI_DRIVER_TRACE_CBID_cuMemcpyHtoA_v2,
102 CUPTI_DRIVER_TRACE_CBID_cuMemcpyHtoAAsync_v2,
103 // MemAlloc
104 CUPTI_DRIVER_TRACE_CBID_cuMemAlloc_v2,
105 CUPTI_DRIVER_TRACE_CBID_cuMemAllocPitch_v2,
106 // MemFree
107 CUPTI_DRIVER_TRACE_CBID_cuMemFree_v2,
108 // Memset
109 CUPTI_DRIVER_TRACE_CBID_cuMemsetD8_v2,
110 CUPTI_DRIVER_TRACE_CBID_cuMemsetD16_v2,
111 CUPTI_DRIVER_TRACE_CBID_cuMemsetD32_v2,
112 CUPTI_DRIVER_TRACE_CBID_cuMemsetD2D8_v2,
113 CUPTI_DRIVER_TRACE_CBID_cuMemsetD2D16_v2,
114 CUPTI_DRIVER_TRACE_CBID_cuMemsetD2D32_v2,
115 CUPTI_DRIVER_TRACE_CBID_cuMemsetD8Async,
116 CUPTI_DRIVER_TRACE_CBID_cuMemsetD16Async,
117 CUPTI_DRIVER_TRACE_CBID_cuMemsetD32Async,
118 CUPTI_DRIVER_TRACE_CBID_cuMemsetD2D8Async,
119 CUPTI_DRIVER_TRACE_CBID_cuMemsetD2D16Async,
120 CUPTI_DRIVER_TRACE_CBID_cuMemsetD2D32Async,
121 // GENERIC
122 CUPTI_DRIVER_TRACE_CBID_cuStreamSynchronize,
123 };
124
125 bool use_cupti_activity_api = true;
126 ReadBoolFromEnvVar("TF_GPU_CUPTI_USE_ACTIVITY_API", true,
127 &use_cupti_activity_api)
128 .IgnoreError();
129 options_.enable_event_based_activity = !use_cupti_activity_api;
130
131 bool trace_concurrent_kernels = false;
132 ReadBoolFromEnvVar("TF_GPU_CUPTI_FORCE_CONCURRENT_KERNEL", false,
133 &trace_concurrent_kernels)
134 .IgnoreError();
135 options_.activities_selected.push_back(
136 trace_concurrent_kernels ? CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL
137 : CUPTI_ACTIVITY_KIND_KERNEL);
138 options_.activities_selected.push_back(CUPTI_ACTIVITY_KIND_MEMCPY);
139 options_.activities_selected.push_back(CUPTI_ACTIVITY_KIND_MEMCPY2);
140 options_.activities_selected.push_back(CUPTI_ACTIVITY_KIND_OVERHEAD);
141 options_.activities_selected.push_back(CUPTI_ACTIVITY_KIND_MEMSET);
142
143 // CUDA/CUPTI 10 have issues (leaks and crashes) with CuptiFinalize.
144 #if CUDA_VERSION < 10000
145 if (!trace_concurrent_kernels) options_.cupti_finalize = true;
146 #elif CUDA_VERSION >= 11000
147 options_.cupti_finalize = true;
148 #endif
149
150 CuptiTracerCollectorOptions collector_options;
151 collector_options.num_gpus = cupti_tracer_->NumGpus();
152 uint64 start_gputime_ns = CuptiTracer::GetTimestamp();
153 uint64 start_walltime_ns = GetCurrentTimeNanos();
154 cupti_collector_ = CreateCuptiCollector(collector_options, start_walltime_ns,
155 start_gputime_ns);
156
157 cupti_tracer_->Enable(options_, cupti_collector_.get());
158 return OkStatus();
159 }
160
Start()161 Status GpuTracer::Start() {
162 Status status = DoStart();
163 if (status.ok()) {
164 profiling_state_ = State::kStartedOk;
165 return OkStatus();
166 } else {
167 profiling_state_ = State::kStartedError;
168 return status;
169 }
170 }
171
DoStop()172 Status GpuTracer::DoStop() {
173 cupti_tracer_->Disable();
174 return OkStatus();
175 }
176
Stop()177 Status GpuTracer::Stop() {
178 if (profiling_state_ == State::kStartedOk) {
179 Status status = DoStop();
180 profiling_state_ = status.ok() ? State::kStoppedOk : State::kStoppedError;
181 }
182 return OkStatus();
183 }
184
CollectData(XSpace * space)185 Status GpuTracer::CollectData(XSpace* space) {
186 VLOG(2) << "Collecting data to XSpace from GpuTracer.";
187 switch (profiling_state_) {
188 case State::kNotStarted:
189 VLOG(1) << "No trace data collected, session wasn't started";
190 return OkStatus();
191 case State::kStartedOk:
192 return errors::FailedPrecondition("Cannot collect trace before stopping");
193 case State::kStartedError:
194 LOG(ERROR) << "Cannot collect, profiler failed to start";
195 return OkStatus();
196 case State::kStoppedError:
197 VLOG(1) << "No trace data collected";
198 return OkStatus();
199 case State::kStoppedOk: {
200 std::string cupti_error = CuptiTracer::ErrorIfAny();
201 if (!cupti_error.empty()) {
202 space->add_errors(std::move(cupti_error));
203 }
204 std::string events_dropped = cupti_collector_->ReportNumEventsIfDropped();
205 if (!events_dropped.empty()) {
206 space->add_warnings(std::move(events_dropped));
207 }
208 if (cupti_collector_) {
209 uint64 end_gpu_ns = CuptiTracer::GetTimestamp();
210 cupti_collector_->Export(space, end_gpu_ns);
211 }
212 return OkStatus();
213 }
214 }
215 return errors::Internal("Invalid profiling state: ", profiling_state_);
216 }
217
218 // Not in anonymous namespace for testing purposes.
CreateGpuTracer(const ProfileOptions & options)219 std::unique_ptr<profiler::ProfilerInterface> CreateGpuTracer(
220 const ProfileOptions& options) {
221 if (options.device_tracer_level() == 0) return nullptr;
222 if (options.device_type() != ProfileOptions::GPU &&
223 options.device_type() != ProfileOptions::UNSPECIFIED)
224 return nullptr;
225 profiler::CuptiTracer* cupti_tracer =
226 profiler::CuptiTracer::GetCuptiTracerSingleton();
227 if (!cupti_tracer->IsAvailable()) {
228 return nullptr;
229 }
230 profiler::CuptiInterface* cupti_interface = profiler::GetCuptiInterface();
231 return std::make_unique<profiler::GpuTracer>(cupti_tracer, cupti_interface);
232 }
233
__anon1d2306580102null234 auto register_gpu_tracer_factory = [] {
235 RegisterProfilerFactory(&CreateGpuTracer);
236 return 0;
237 }();
238
239 } // namespace profiler
240 } // namespace tensorflow
241
242 #endif // GOOGLE_CUDA
243