• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #if GOOGLE_CUDA
17 
18 #include <stdlib.h>
19 
20 #include <memory>
21 #include <utility>
22 
23 #include "absl/container/fixed_array.h"
24 #include "absl/container/flat_hash_map.h"
25 #include "absl/container/flat_hash_set.h"
26 #include "tensorflow/core/framework/step_stats.pb.h"
27 #include "tensorflow/core/platform/errors.h"
28 #include "tensorflow/core/platform/macros.h"
29 #include "tensorflow/core/platform/thread_annotations.h"
30 #include "tensorflow/core/profiler/backends/gpu/cupti_collector.h"
31 #include "tensorflow/core/profiler/backends/gpu/cupti_tracer.h"
32 #include "tensorflow/core/profiler/backends/gpu/cupti_wrapper.h"
33 #include "tensorflow/core/profiler/lib/profiler_factory.h"
34 #include "tensorflow/core/profiler/lib/profiler_interface.h"
35 #include "tensorflow/core/profiler/protobuf/xplane.pb.h"
36 #include "tensorflow/core/profiler/utils/time_utils.h"
37 #include "tensorflow/core/util/env_var.h"
38 
39 namespace tensorflow {
40 namespace profiler {
41 
42 // GpuTracer for GPU.
43 class GpuTracer : public profiler::ProfilerInterface {
44  public:
GpuTracer(CuptiTracer * cupti_tracer,CuptiInterface * cupti_interface)45   GpuTracer(CuptiTracer* cupti_tracer, CuptiInterface* cupti_interface)
46       : cupti_tracer_(cupti_tracer) {
47     VLOG(1) << "GpuTracer created.";
48   }
~GpuTracer()49   ~GpuTracer() override {}
50 
51   // GpuTracer interface:
52   Status Start() override;
53   Status Stop() override;
54   Status CollectData(XSpace* space) override;
55 
56  private:
57   Status DoStart();
58   Status DoStop();
59 
60   enum State {
61     kNotStarted,
62     kStartedOk,
63     kStartedError,
64     kStoppedOk,
65     kStoppedError
66   };
67   State profiling_state_ = State::kNotStarted;
68 
69   CuptiTracer* cupti_tracer_;
70   CuptiTracerOptions options_;
71   std::unique_ptr<CuptiTraceCollector> cupti_collector_;
72 };
73 
DoStart()74 Status GpuTracer::DoStart() {
75   if (!cupti_tracer_->IsAvailable()) {
76     return errors::Unavailable("Another profile session running.");
77   }
78 
79   options_.cbids_selected = {
80       // KERNEL
81       CUPTI_DRIVER_TRACE_CBID_cuLaunchKernel,
82       // MEMCPY
83       CUPTI_DRIVER_TRACE_CBID_cuMemcpy,
84       CUPTI_DRIVER_TRACE_CBID_cuMemcpyAsync,
85       CUPTI_DRIVER_TRACE_CBID_cuMemcpyHtoD_v2,
86       CUPTI_DRIVER_TRACE_CBID_cuMemcpyHtoDAsync_v2,
87       CUPTI_DRIVER_TRACE_CBID_cuMemcpyDtoH_v2,
88       CUPTI_DRIVER_TRACE_CBID_cuMemcpyDtoHAsync_v2,
89       CUPTI_DRIVER_TRACE_CBID_cuMemcpyDtoD_v2,
90       CUPTI_DRIVER_TRACE_CBID_cuMemcpyDtoDAsync_v2,
91       CUPTI_DRIVER_TRACE_CBID_cuMemcpyAtoH_v2,
92       CUPTI_DRIVER_TRACE_CBID_cuMemcpyAtoHAsync_v2,
93       CUPTI_DRIVER_TRACE_CBID_cuMemcpyAtoD_v2,
94       CUPTI_DRIVER_TRACE_CBID_cuMemcpyDtoA_v2,
95       CUPTI_DRIVER_TRACE_CBID_cuMemcpyAtoA_v2,
96       CUPTI_DRIVER_TRACE_CBID_cuMemcpy2D_v2,
97       CUPTI_DRIVER_TRACE_CBID_cuMemcpy2DUnaligned_v2,
98       CUPTI_DRIVER_TRACE_CBID_cuMemcpy2DAsync_v2,
99       CUPTI_DRIVER_TRACE_CBID_cuMemcpy3D_v2,
100       CUPTI_DRIVER_TRACE_CBID_cuMemcpy3DAsync_v2,
101       CUPTI_DRIVER_TRACE_CBID_cuMemcpyHtoA_v2,
102       CUPTI_DRIVER_TRACE_CBID_cuMemcpyHtoAAsync_v2,
103       // MemAlloc
104       CUPTI_DRIVER_TRACE_CBID_cuMemAlloc_v2,
105       CUPTI_DRIVER_TRACE_CBID_cuMemAllocPitch_v2,
106       // MemFree
107       CUPTI_DRIVER_TRACE_CBID_cuMemFree_v2,
108       // Memset
109       CUPTI_DRIVER_TRACE_CBID_cuMemsetD8_v2,
110       CUPTI_DRIVER_TRACE_CBID_cuMemsetD16_v2,
111       CUPTI_DRIVER_TRACE_CBID_cuMemsetD32_v2,
112       CUPTI_DRIVER_TRACE_CBID_cuMemsetD2D8_v2,
113       CUPTI_DRIVER_TRACE_CBID_cuMemsetD2D16_v2,
114       CUPTI_DRIVER_TRACE_CBID_cuMemsetD2D32_v2,
115       CUPTI_DRIVER_TRACE_CBID_cuMemsetD8Async,
116       CUPTI_DRIVER_TRACE_CBID_cuMemsetD16Async,
117       CUPTI_DRIVER_TRACE_CBID_cuMemsetD32Async,
118       CUPTI_DRIVER_TRACE_CBID_cuMemsetD2D8Async,
119       CUPTI_DRIVER_TRACE_CBID_cuMemsetD2D16Async,
120       CUPTI_DRIVER_TRACE_CBID_cuMemsetD2D32Async,
121       // GENERIC
122       CUPTI_DRIVER_TRACE_CBID_cuStreamSynchronize,
123   };
124 
125   bool use_cupti_activity_api = true;
126   ReadBoolFromEnvVar("TF_GPU_CUPTI_USE_ACTIVITY_API", true,
127                      &use_cupti_activity_api)
128       .IgnoreError();
129   options_.enable_event_based_activity = !use_cupti_activity_api;
130 
131   bool trace_concurrent_kernels = false;
132   ReadBoolFromEnvVar("TF_GPU_CUPTI_FORCE_CONCURRENT_KERNEL", false,
133                      &trace_concurrent_kernels)
134       .IgnoreError();
135   options_.activities_selected.push_back(
136       trace_concurrent_kernels ? CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL
137                                : CUPTI_ACTIVITY_KIND_KERNEL);
138   options_.activities_selected.push_back(CUPTI_ACTIVITY_KIND_MEMCPY);
139   options_.activities_selected.push_back(CUPTI_ACTIVITY_KIND_MEMCPY2);
140   options_.activities_selected.push_back(CUPTI_ACTIVITY_KIND_OVERHEAD);
141   options_.activities_selected.push_back(CUPTI_ACTIVITY_KIND_MEMSET);
142 
143 // CUDA/CUPTI 10 have issues (leaks and crashes) with CuptiFinalize.
144 #if CUDA_VERSION < 10000
145   if (!trace_concurrent_kernels) options_.cupti_finalize = true;
146 #elif CUDA_VERSION >= 11000
147   options_.cupti_finalize = true;
148 #endif
149 
150   CuptiTracerCollectorOptions collector_options;
151   collector_options.num_gpus = cupti_tracer_->NumGpus();
152   uint64 start_gputime_ns = CuptiTracer::GetTimestamp();
153   uint64 start_walltime_ns = GetCurrentTimeNanos();
154   cupti_collector_ = CreateCuptiCollector(collector_options, start_walltime_ns,
155                                           start_gputime_ns);
156 
157   cupti_tracer_->Enable(options_, cupti_collector_.get());
158   return OkStatus();
159 }
160 
Start()161 Status GpuTracer::Start() {
162   Status status = DoStart();
163   if (status.ok()) {
164     profiling_state_ = State::kStartedOk;
165     return OkStatus();
166   } else {
167     profiling_state_ = State::kStartedError;
168     return status;
169   }
170 }
171 
DoStop()172 Status GpuTracer::DoStop() {
173   cupti_tracer_->Disable();
174   return OkStatus();
175 }
176 
Stop()177 Status GpuTracer::Stop() {
178   if (profiling_state_ == State::kStartedOk) {
179     Status status = DoStop();
180     profiling_state_ = status.ok() ? State::kStoppedOk : State::kStoppedError;
181   }
182   return OkStatus();
183 }
184 
CollectData(XSpace * space)185 Status GpuTracer::CollectData(XSpace* space) {
186   VLOG(2) << "Collecting data to XSpace from GpuTracer.";
187   switch (profiling_state_) {
188     case State::kNotStarted:
189       VLOG(1) << "No trace data collected, session wasn't started";
190       return OkStatus();
191     case State::kStartedOk:
192       return errors::FailedPrecondition("Cannot collect trace before stopping");
193     case State::kStartedError:
194       LOG(ERROR) << "Cannot collect, profiler failed to start";
195       return OkStatus();
196     case State::kStoppedError:
197       VLOG(1) << "No trace data collected";
198       return OkStatus();
199     case State::kStoppedOk: {
200       std::string cupti_error = CuptiTracer::ErrorIfAny();
201       if (!cupti_error.empty()) {
202         space->add_errors(std::move(cupti_error));
203       }
204       std::string events_dropped = cupti_collector_->ReportNumEventsIfDropped();
205       if (!events_dropped.empty()) {
206         space->add_warnings(std::move(events_dropped));
207       }
208       if (cupti_collector_) {
209         uint64 end_gpu_ns = CuptiTracer::GetTimestamp();
210         cupti_collector_->Export(space, end_gpu_ns);
211       }
212       return OkStatus();
213     }
214   }
215   return errors::Internal("Invalid profiling state: ", profiling_state_);
216 }
217 
218 // Not in anonymous namespace for testing purposes.
CreateGpuTracer(const ProfileOptions & options)219 std::unique_ptr<profiler::ProfilerInterface> CreateGpuTracer(
220     const ProfileOptions& options) {
221   if (options.device_tracer_level() == 0) return nullptr;
222   if (options.device_type() != ProfileOptions::GPU &&
223       options.device_type() != ProfileOptions::UNSPECIFIED)
224     return nullptr;
225   profiler::CuptiTracer* cupti_tracer =
226       profiler::CuptiTracer::GetCuptiTracerSingleton();
227   if (!cupti_tracer->IsAvailable()) {
228     return nullptr;
229   }
230   profiler::CuptiInterface* cupti_interface = profiler::GetCuptiInterface();
231   return std::make_unique<profiler::GpuTracer>(cupti_tracer, cupti_interface);
232 }
233 
__anon1d2306580102null234 auto register_gpu_tracer_factory = [] {
235   RegisterProfilerFactory(&CreateGpuTracer);
236   return 0;
237 }();
238 
239 }  // namespace profiler
240 }  // namespace tensorflow
241 
242 #endif  // GOOGLE_CUDA
243