1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_CORE_PROFILER_INTERNAL_GPU_CUPTI_TRACER_H_ 17 #define TENSORFLOW_CORE_PROFILER_INTERNAL_GPU_CUPTI_TRACER_H_ 18 19 #include "absl/types/optional.h" 20 #include "third_party/gpus/cuda/extras/CUPTI/include/cupti.h" 21 #include "third_party/gpus/cuda/include/nvtx3/nvToolsExt.h" 22 #include "tensorflow/core/platform/errors.h" 23 #include "tensorflow/core/platform/macros.h" 24 #include "tensorflow/core/platform/status.h" 25 #include "tensorflow/core/platform/types.h" 26 #include "tensorflow/core/profiler/internal/gpu/cupti_collector.h" 27 #include "tensorflow/core/profiler/internal/gpu/cupti_interface.h" 28 29 namespace tensorflow { 30 namespace profiler { 31 32 struct CuptiTracerOptions { 33 bool enable_activity_api = true; 34 35 // Use cuda events to enclose the kernel/memcpy to measure device activity. 36 // enable_event_based_activity, if true, will override the enable_activity_api 37 // setting. 38 bool enable_event_based_activity = false; 39 40 bool required_callback_api_events = true; 41 // The callback ids that will be enabled and monitored, if empty, all 42 // Callback ids to be enabled using Callback API. 43 // We only care CUPTI_CB_DOMAIN_DRIVER_API domain for now. It is kind of 44 // redundant to have both CUPTI_CB_DOMAIN_DRIVER_API and 45 // CUPTI_CB_DOMAIN_RUNTIME_API. 46 std::vector<CUpti_driver_api_trace_cbid_enum> cbids_selected; 47 // Activity kinds to be collected using Activity API. If empty, the Activity 48 // API is disable. 49 std::vector<CUpti_ActivityKind> activities_selected; 50 // Whether to call cuptiFinalize. 51 bool cupti_finalize = false; 52 // Whether to call cuCtxSynchronize for each device before Stop(). 53 bool sync_devices_before_stop = false; 54 // Whether to enable NVTX tracking, we need this for TensorRT tracking. 55 bool enable_nvtx_tracking = false; 56 }; 57 58 class CuptiDriverApiHook { 59 public: ~CuptiDriverApiHook()60 virtual ~CuptiDriverApiHook() {} 61 62 virtual Status OnDriverApiEnter(int device_id, CUpti_CallbackDomain domain, 63 CUpti_CallbackId cbid, 64 const CUpti_CallbackData* callback_info) = 0; 65 virtual Status OnDriverApiExit(int device_id, CUpti_CallbackDomain domain, 66 CUpti_CallbackId cbid, 67 const CUpti_CallbackData* callback_info) = 0; 68 virtual Status SyncAndFlush() = 0; 69 70 protected: 71 static Status AddDriverApiCallbackEvent( 72 CuptiTraceCollector* collector, CuptiInterface* cupti_interface, 73 int device_id, uint64 start_tsc, uint64 end_tsc, 74 CUpti_CallbackDomain domain, CUpti_CallbackId cbid, 75 const CUpti_CallbackData* callback_info); 76 }; 77 78 // The class use to enable cupti callback/activity API and forward the collected 79 // trace events to CuptiTraceCollector. There should be only one CuptiTracer 80 // per process. 81 class CuptiTracer { 82 public: 83 // Returns a pointer to singleton CuptiTracer. 84 static CuptiTracer* GetCuptiTracerSingleton(); 85 86 // Only one profile session can be live in the same time. 87 bool IsAvailable() const; NeedRootAccess()88 bool NeedRootAccess() const { return need_root_access_; } 89 90 void Enable(const CuptiTracerOptions& option, CuptiTraceCollector* collector); 91 void Disable(); 92 93 Status HandleCallback(CUpti_CallbackDomain domain, CUpti_CallbackId cbid, 94 const CUpti_CallbackData* callback_info); 95 96 // This function is public because called from registered callback. 97 Status ProcessActivityBuffer(CUcontext context, uint32_t stream_id, 98 uint8_t* buffer, size_t size); 99 100 static uint64 GetTimestamp(); 101 static int NumGpus(); 102 // Returns the error (if any) when using libcupti. 103 static std::string ErrorIfAny(); 104 105 protected: 106 // protected constructor for injecting mock cupti interface for testing. CuptiTracer(CuptiInterface * cupti_interface)107 explicit CuptiTracer(CuptiInterface* cupti_interface) 108 : num_gpus_(NumGpus()), cupti_interface_(cupti_interface) {} 109 110 private: 111 Status EnableApiTracing(); 112 Status EnableActivityTracing(); 113 Status DisableApiTracing(); 114 Status DisableActivityTracing(); 115 Status Finalize(); 116 void ConfigureActivityUnifiedMemoryCounter(bool enable); 117 Status HandleNVTXCallback(CUpti_CallbackId cbid, 118 const CUpti_CallbackData* cbdata); 119 120 int num_gpus_; 121 absl::optional<CuptiTracerOptions> option_; 122 CuptiInterface* cupti_interface_ = nullptr; 123 CuptiTraceCollector* collector_ = nullptr; 124 125 // CUPTI 10.1 and higher need root access to profile. 126 bool need_root_access_ = false; 127 128 bool api_tracing_enabled_ = false; 129 // Cupti handle for driver or runtime API callbacks. Cupti permits a single 130 // subscriber to be active at any time and can be used to trace Cuda runtime 131 // as and driver calls for all contexts and devices. 132 CUpti_SubscriberHandle subscriber_; // valid when api_tracing_enabled_. 133 134 bool activity_tracing_enabled_ = false; 135 136 std::unique_ptr<CuptiDriverApiHook> cupti_driver_api_hook_; 137 138 TF_DISALLOW_COPY_AND_ASSIGN(CuptiTracer); 139 }; 140 141 } // namespace profiler 142 } // namespace tensorflow 143 144 #endif // TENSORFLOW_CORE_PROFILER_INTERNAL_GPU_CUPTI_TRACER_H_ 145