1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_CORE_PROFILER_INTERNAL_GPU_CUPTI_TRACER_H_ 17 #define TENSORFLOW_CORE_PROFILER_INTERNAL_GPU_CUPTI_TRACER_H_ 18 19 #include "absl/types/optional.h" 20 #include "third_party/gpus/cuda/extras/CUPTI/include/cupti.h" 21 #include "third_party/gpus/cuda/include/nvtx3/nvToolsExt.h" 22 #include "tensorflow/core/platform/errors.h" 23 #include "tensorflow/core/platform/status.h" 24 #include "tensorflow/core/platform/types.h" 25 #include "tensorflow/core/profiler/internal/gpu/cupti_collector.h" 26 #include "tensorflow/core/profiler/internal/gpu/cupti_interface.h" 27 #include "tensorflow/core/profiler/utils/buffer_pool.h" 28 29 namespace tensorflow { 30 namespace profiler { 31 32 struct CuptiTracerOptions { 33 bool enable_activity_api = true; 34 35 // Use cuda events to enclose the kernel/memcpy to measure device activity. 36 // enable_event_based_activity, if true, will override the enable_activity_api 37 // setting. 38 bool enable_event_based_activity = false; 39 40 bool required_callback_api_events = true; 41 // The callback ids that will be enabled and monitored, if empty, all 42 // Callback ids to be enabled using Callback API. 43 // We only care CUPTI_CB_DOMAIN_DRIVER_API domain for now. It is kind of 44 // redundant to have both CUPTI_CB_DOMAIN_DRIVER_API and 45 // CUPTI_CB_DOMAIN_RUNTIME_API. 46 std::vector<CUpti_driver_api_trace_cbid_enum> cbids_selected; 47 // Activity kinds to be collected using Activity API. If empty, the Activity 48 // API is disable. 49 std::vector<CUpti_ActivityKind> activities_selected; 50 // Whether to call cuptiFinalize. 51 bool cupti_finalize = false; 52 // Whether to call cuCtxSynchronize for each device before Stop(). 53 bool sync_devices_before_stop = false; 54 // Whether to enable NVTX tracking, we need this for TensorRT tracking. 55 bool enable_nvtx_tracking = false; 56 }; 57 58 class CuptiDriverApiHook { 59 public: ~CuptiDriverApiHook()60 virtual ~CuptiDriverApiHook() {} 61 62 virtual Status OnDriverApiEnter(int device_id, CUpti_CallbackDomain domain, 63 CUpti_CallbackId cbid, 64 const CUpti_CallbackData* callback_info) = 0; 65 virtual Status OnDriverApiExit(int device_id, CUpti_CallbackDomain domain, 66 CUpti_CallbackId cbid, 67 const CUpti_CallbackData* callback_info) = 0; 68 virtual Status SyncAndFlush() = 0; 69 70 protected: 71 static Status AddDriverApiCallbackEvent( 72 CuptiTraceCollector* collector, CuptiInterface* cupti_interface, 73 int device_id, uint64 start_tsc, uint64 end_tsc, 74 CUpti_CallbackDomain domain, CUpti_CallbackId cbid, 75 const CUpti_CallbackData* callback_info); 76 }; 77 78 // The class use to enable cupti callback/activity API and forward the collected 79 // trace events to CuptiTraceCollector. There should be only one CuptiTracer 80 // per process. 81 class CuptiTracer { 82 public: 83 // Not copyable or movable 84 CuptiTracer(const CuptiTracer&) = delete; 85 CuptiTracer& operator=(const CuptiTracer&) = delete; 86 87 // Returns a pointer to singleton CuptiTracer. 88 static CuptiTracer* GetCuptiTracerSingleton(); 89 90 // Only one profile session can be live in the same time. 91 bool IsAvailable() const; NeedRootAccess()92 bool NeedRootAccess() const { return need_root_access_; } 93 94 void Enable(const CuptiTracerOptions& option, CuptiTraceCollector* collector); 95 void Disable(); 96 97 Status HandleCallback(CUpti_CallbackDomain domain, CUpti_CallbackId cbid, 98 const CUpti_CallbackData* callback_info); 99 100 // Returns a buffer and its size for CUPTI to store activities. This buffer 101 // will be reclaimed when CUPTI makes a callback to ProcessActivityBuffer. 102 void RequestActivityBuffer(uint8_t** buffer, size_t* size); 103 104 // Parses CUPTI activity events from activity buffer, and emits events for 105 // CuptiTraceCollector. This function is public because called from registered 106 // callback. 107 Status ProcessActivityBuffer(CUcontext context, uint32_t stream_id, 108 uint8_t* buffer, size_t size); 109 110 static uint64 GetTimestamp(); 111 static int NumGpus(); 112 // Returns the error (if any) when using libcupti. 113 static std::string ErrorIfAny(); 114 115 protected: 116 // protected constructor for injecting mock cupti interface for testing. 117 explicit CuptiTracer(CuptiInterface* cupti_interface); 118 119 private: 120 // Buffer size and alignment, 32K and 8 as in CUPTI samples. 121 static constexpr size_t kBufferSizeInBytes = 32 * 1024; 122 123 Status EnableApiTracing(); 124 Status EnableActivityTracing(); 125 Status DisableApiTracing(); 126 Status DisableActivityTracing(); 127 Status Finalize(); 128 void ConfigureActivityUnifiedMemoryCounter(bool enable); 129 Status HandleNVTXCallback(CUpti_CallbackId cbid, 130 const CUpti_CallbackData* cbdata); 131 132 int num_gpus_; 133 absl::optional<CuptiTracerOptions> option_; 134 CuptiInterface* cupti_interface_ = nullptr; 135 CuptiTraceCollector* collector_ = nullptr; 136 137 // CUPTI 10.1 and higher need root access to profile. 138 bool need_root_access_ = false; 139 140 bool api_tracing_enabled_ = false; 141 // Cupti handle for driver or runtime API callbacks. Cupti permits a single 142 // subscriber to be active at any time and can be used to trace Cuda runtime 143 // as and driver calls for all contexts and devices. 144 CUpti_SubscriberHandle subscriber_; // valid when api_tracing_enabled_. 145 146 bool activity_tracing_enabled_ = false; 147 148 std::unique_ptr<CuptiDriverApiHook> cupti_driver_api_hook_; 149 150 BufferPool buffer_pool_; 151 }; 152 153 } // namespace profiler 154 } // namespace tensorflow 155 156 #endif // TENSORFLOW_CORE_PROFILER_INTERNAL_GPU_CUPTI_TRACER_H_ 157