• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_CORE_PROFILER_INTERNAL_GPU_CUPTI_TRACER_H_
17 #define TENSORFLOW_CORE_PROFILER_INTERNAL_GPU_CUPTI_TRACER_H_
18 
19 #include "absl/types/optional.h"
20 #include "third_party/gpus/cuda/extras/CUPTI/include/cupti.h"
21 #include "third_party/gpus/cuda/include/nvtx3/nvToolsExt.h"
22 #include "tensorflow/core/platform/errors.h"
23 #include "tensorflow/core/platform/status.h"
24 #include "tensorflow/core/platform/types.h"
25 #include "tensorflow/core/profiler/internal/gpu/cupti_collector.h"
26 #include "tensorflow/core/profiler/internal/gpu/cupti_interface.h"
27 #include "tensorflow/core/profiler/utils/buffer_pool.h"
28 
29 namespace tensorflow {
30 namespace profiler {
31 
32 struct CuptiTracerOptions {
33   bool enable_activity_api = true;
34 
35   // Use cuda events to enclose the kernel/memcpy to measure device activity.
36   // enable_event_based_activity, if true, will override the enable_activity_api
37   // setting.
38   bool enable_event_based_activity = false;
39 
40   bool required_callback_api_events = true;
41   // The callback ids that will be enabled and monitored, if empty, all
42   // Callback ids to be enabled using Callback API.
43   // We only care CUPTI_CB_DOMAIN_DRIVER_API domain for now. It is kind of
44   // redundant to have both CUPTI_CB_DOMAIN_DRIVER_API and
45   // CUPTI_CB_DOMAIN_RUNTIME_API.
46   std::vector<CUpti_driver_api_trace_cbid_enum> cbids_selected;
47   // Activity kinds to be collected using Activity API. If empty, the Activity
48   // API is disable.
49   std::vector<CUpti_ActivityKind> activities_selected;
50   // Whether to call cuptiFinalize.
51   bool cupti_finalize = false;
52   // Whether to call cuCtxSynchronize for each device before Stop().
53   bool sync_devices_before_stop = false;
54   // Whether to enable NVTX tracking, we need this for TensorRT tracking.
55   bool enable_nvtx_tracking = false;
56 };
57 
58 class CuptiDriverApiHook {
59  public:
~CuptiDriverApiHook()60   virtual ~CuptiDriverApiHook() {}
61 
62   virtual Status OnDriverApiEnter(int device_id, CUpti_CallbackDomain domain,
63                                   CUpti_CallbackId cbid,
64                                   const CUpti_CallbackData* callback_info) = 0;
65   virtual Status OnDriverApiExit(int device_id, CUpti_CallbackDomain domain,
66                                  CUpti_CallbackId cbid,
67                                  const CUpti_CallbackData* callback_info) = 0;
68   virtual Status SyncAndFlush() = 0;
69 
70  protected:
71   static Status AddDriverApiCallbackEvent(
72       CuptiTraceCollector* collector, CuptiInterface* cupti_interface,
73       int device_id, uint64 start_tsc, uint64 end_tsc,
74       CUpti_CallbackDomain domain, CUpti_CallbackId cbid,
75       const CUpti_CallbackData* callback_info);
76 };
77 
78 // The class use to enable cupti callback/activity API and forward the collected
79 // trace events to CuptiTraceCollector. There should be only one CuptiTracer
80 // per process.
81 class CuptiTracer {
82  public:
83   // Not copyable or movable
84   CuptiTracer(const CuptiTracer&) = delete;
85   CuptiTracer& operator=(const CuptiTracer&) = delete;
86 
87   // Returns a pointer to singleton CuptiTracer.
88   static CuptiTracer* GetCuptiTracerSingleton();
89 
90   // Only one profile session can be live in the same time.
91   bool IsAvailable() const;
NeedRootAccess()92   bool NeedRootAccess() const { return need_root_access_; }
93 
94   void Enable(const CuptiTracerOptions& option, CuptiTraceCollector* collector);
95   void Disable();
96 
97   Status HandleCallback(CUpti_CallbackDomain domain, CUpti_CallbackId cbid,
98                         const CUpti_CallbackData* callback_info);
99 
100   // Returns a buffer and its size for CUPTI to store activities. This buffer
101   // will be reclaimed when CUPTI makes a callback to ProcessActivityBuffer.
102   void RequestActivityBuffer(uint8_t** buffer, size_t* size);
103 
104   // Parses CUPTI activity events from activity buffer, and emits events for
105   // CuptiTraceCollector. This function is public because called from registered
106   // callback.
107   Status ProcessActivityBuffer(CUcontext context, uint32_t stream_id,
108                                uint8_t* buffer, size_t size);
109 
110   static uint64 GetTimestamp();
111   static int NumGpus();
112   // Returns the error (if any) when using libcupti.
113   static std::string ErrorIfAny();
114 
115  protected:
116   // protected constructor for injecting mock cupti interface for testing.
117   explicit CuptiTracer(CuptiInterface* cupti_interface);
118 
119  private:
120   // Buffer size and alignment, 32K and 8 as in CUPTI samples.
121   static constexpr size_t kBufferSizeInBytes = 32 * 1024;
122 
123   Status EnableApiTracing();
124   Status EnableActivityTracing();
125   Status DisableApiTracing();
126   Status DisableActivityTracing();
127   Status Finalize();
128   void ConfigureActivityUnifiedMemoryCounter(bool enable);
129   Status HandleNVTXCallback(CUpti_CallbackId cbid,
130                             const CUpti_CallbackData* cbdata);
131 
132   int num_gpus_;
133   absl::optional<CuptiTracerOptions> option_;
134   CuptiInterface* cupti_interface_ = nullptr;
135   CuptiTraceCollector* collector_ = nullptr;
136 
137   // CUPTI 10.1 and higher need root access to profile.
138   bool need_root_access_ = false;
139 
140   bool api_tracing_enabled_ = false;
141   // Cupti handle for driver or runtime API callbacks. Cupti permits a single
142   // subscriber to be active at any time and can be used to trace Cuda runtime
143   // as and driver calls for all contexts and devices.
144   CUpti_SubscriberHandle subscriber_;  // valid when api_tracing_enabled_.
145 
146   bool activity_tracing_enabled_ = false;
147 
148   std::unique_ptr<CuptiDriverApiHook> cupti_driver_api_hook_;
149 
150   BufferPool buffer_pool_;
151 };
152 
153 }  // namespace profiler
154 }  // namespace tensorflow
155 
156 #endif  // TENSORFLOW_CORE_PROFILER_INTERNAL_GPU_CUPTI_TRACER_H_
157