• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_CORE_PROFILER_INTERNAL_GPU_CUPTI_TRACER_H_
17 #define TENSORFLOW_CORE_PROFILER_INTERNAL_GPU_CUPTI_TRACER_H_
18 
19 #include "absl/types/optional.h"
20 #include "third_party/gpus/cuda/extras/CUPTI/include/cupti.h"
21 #include "third_party/gpus/cuda/include/nvtx3/nvToolsExt.h"
22 #include "tensorflow/core/platform/errors.h"
23 #include "tensorflow/core/platform/macros.h"
24 #include "tensorflow/core/platform/status.h"
25 #include "tensorflow/core/platform/types.h"
26 #include "tensorflow/core/profiler/internal/gpu/cupti_collector.h"
27 #include "tensorflow/core/profiler/internal/gpu/cupti_interface.h"
28 
29 namespace tensorflow {
30 namespace profiler {
31 
32 struct CuptiTracerOptions {
33   bool enable_activity_api = true;
34 
35   // Use cuda events to enclose the kernel/memcpy to measure device activity.
36   // enable_event_based_activity, if true, will override the enable_activity_api
37   // setting.
38   bool enable_event_based_activity = false;
39 
40   bool required_callback_api_events = true;
41   // The callback ids that will be enabled and monitored, if empty, all
42   // Callback ids to be enabled using Callback API.
43   // We only care CUPTI_CB_DOMAIN_DRIVER_API domain for now. It is kind of
44   // redundant to have both CUPTI_CB_DOMAIN_DRIVER_API and
45   // CUPTI_CB_DOMAIN_RUNTIME_API.
46   std::vector<CUpti_driver_api_trace_cbid_enum> cbids_selected;
47   // Activity kinds to be collected using Activity API. If empty, the Activity
48   // API is disable.
49   std::vector<CUpti_ActivityKind> activities_selected;
50   // Whether to call cuptiFinalize.
51   bool cupti_finalize = false;
52   // Whether to call cuCtxSynchronize for each device before Stop().
53   bool sync_devices_before_stop = false;
54   // Whether to enable NVTX tracking, we need this for TensorRT tracking.
55   bool enable_nvtx_tracking = false;
56 };
57 
58 class CuptiDriverApiHook {
59  public:
~CuptiDriverApiHook()60   virtual ~CuptiDriverApiHook() {}
61 
62   virtual Status OnDriverApiEnter(int device_id, CUpti_CallbackDomain domain,
63                                   CUpti_CallbackId cbid,
64                                   const CUpti_CallbackData* callback_info) = 0;
65   virtual Status OnDriverApiExit(int device_id, CUpti_CallbackDomain domain,
66                                  CUpti_CallbackId cbid,
67                                  const CUpti_CallbackData* callback_info) = 0;
68   virtual Status SyncAndFlush() = 0;
69 
70  protected:
71   static Status AddDriverApiCallbackEvent(
72       CuptiTraceCollector* collector, CuptiInterface* cupti_interface,
73       int device_id, uint64 start_tsc, uint64 end_tsc,
74       CUpti_CallbackDomain domain, CUpti_CallbackId cbid,
75       const CUpti_CallbackData* callback_info);
76 };
77 
78 // The class use to enable cupti callback/activity API and forward the collected
79 // trace events to CuptiTraceCollector. There should be only one CuptiTracer
80 // per process.
81 class CuptiTracer {
82  public:
83   // Returns a pointer to singleton CuptiTracer.
84   static CuptiTracer* GetCuptiTracerSingleton();
85 
86   // Only one profile session can be live in the same time.
87   bool IsAvailable() const;
NeedRootAccess()88   bool NeedRootAccess() const { return need_root_access_; }
89 
90   void Enable(const CuptiTracerOptions& option, CuptiTraceCollector* collector);
91   void Disable();
92 
93   Status HandleCallback(CUpti_CallbackDomain domain, CUpti_CallbackId cbid,
94                         const CUpti_CallbackData* callback_info);
95 
96   // This function is public because called from registered callback.
97   Status ProcessActivityBuffer(CUcontext context, uint32_t stream_id,
98                                uint8_t* buffer, size_t size);
99 
100   static uint64 GetTimestamp();
101   static int NumGpus();
102   // Returns the error (if any) when using libcupti.
103   static std::string ErrorIfAny();
104 
105  protected:
106   // protected constructor for injecting mock cupti interface for testing.
CuptiTracer(CuptiInterface * cupti_interface)107   explicit CuptiTracer(CuptiInterface* cupti_interface)
108       : num_gpus_(NumGpus()), cupti_interface_(cupti_interface) {}
109 
110  private:
111   Status EnableApiTracing();
112   Status EnableActivityTracing();
113   Status DisableApiTracing();
114   Status DisableActivityTracing();
115   Status Finalize();
116   void ConfigureActivityUnifiedMemoryCounter(bool enable);
117   Status HandleNVTXCallback(CUpti_CallbackId cbid,
118                             const CUpti_CallbackData* cbdata);
119 
120   int num_gpus_;
121   absl::optional<CuptiTracerOptions> option_;
122   CuptiInterface* cupti_interface_ = nullptr;
123   CuptiTraceCollector* collector_ = nullptr;
124 
125   // CUPTI 10.1 and higher need root access to profile.
126   bool need_root_access_ = false;
127 
128   bool api_tracing_enabled_ = false;
129   // Cupti handle for driver or runtime API callbacks. Cupti permits a single
130   // subscriber to be active at any time and can be used to trace Cuda runtime
131   // as and driver calls for all contexts and devices.
132   CUpti_SubscriberHandle subscriber_;  // valid when api_tracing_enabled_.
133 
134   bool activity_tracing_enabled_ = false;
135 
136   std::unique_ptr<CuptiDriverApiHook> cupti_driver_api_hook_;
137 
138   TF_DISALLOW_COPY_AND_ASSIGN(CuptiTracer);
139 };
140 
141 }  // namespace profiler
142 }  // namespace tensorflow
143 
144 #endif  // TENSORFLOW_CORE_PROFILER_INTERNAL_GPU_CUPTI_TRACER_H_
145