1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_CORE_PROFILER_INTERNAL_GPU_CUPTI_WRAPPER_H_ 17 #define TENSORFLOW_CORE_PROFILER_INTERNAL_GPU_CUPTI_WRAPPER_H_ 18 19 #include <stddef.h> 20 #include <stdint.h> 21 22 #include "third_party/gpus/cuda/extras/CUPTI/include/cupti.h" 23 #include "third_party/gpus/cuda/include/cuda.h" 24 #include "tensorflow/core/profiler/internal/gpu/cupti_interface.h" 25 26 namespace tensorflow { 27 namespace profiler { 28 29 class CuptiWrapper : public tensorflow::profiler::CuptiInterface { 30 public: CuptiWrapper()31 CuptiWrapper() {} 32 ~CuptiWrapper()33 ~CuptiWrapper() override {} 34 35 // CUPTI activity API 36 CUptiResult ActivityDisable(CUpti_ActivityKind kind) override; 37 38 CUptiResult ActivityEnable(CUpti_ActivityKind kind) override; 39 40 CUptiResult ActivityFlushAll(uint32_t flag) override; 41 42 CUptiResult ActivityGetNextRecord(uint8_t* buffer, 43 size_t valid_buffer_size_bytes, 44 CUpti_Activity** record) override; 45 46 CUptiResult ActivityGetNumDroppedRecords(CUcontext context, 47 uint32_t stream_id, 48 size_t* dropped) override; 49 50 CUptiResult ActivityConfigureUnifiedMemoryCounter( 51 CUpti_ActivityUnifiedMemoryCounterConfig* config, 52 uint32_t count) override; 53 54 CUptiResult ActivityRegisterCallbacks( 55 CUpti_BuffersCallbackRequestFunc func_buffer_requested, 56 CUpti_BuffersCallbackCompleteFunc func_buffer_completed) override; 57 58 CUptiResult GetDeviceId(CUcontext context, uint32* deviceId) override; 59 60 CUptiResult GetTimestamp(uint64_t* timestamp) override; 61 62 // cuptiFinalize is only defined in CUDA8 and above. 63 // To enable it in CUDA8, the environment variable CUPTI_ENABLE_FINALIZE must 64 // be set to 1. 65 CUptiResult Finalize() override; 66 67 // CUPTI callback API 68 CUptiResult EnableCallback(uint32_t enable, CUpti_SubscriberHandle subscriber, 69 CUpti_CallbackDomain domain, 70 CUpti_CallbackId cbid) override; 71 72 CUptiResult EnableDomain(uint32_t enable, CUpti_SubscriberHandle subscriber, 73 CUpti_CallbackDomain domain) override; 74 75 CUptiResult Subscribe(CUpti_SubscriberHandle* subscriber, 76 CUpti_CallbackFunc callback, void* userdata) override; 77 78 CUptiResult Unsubscribe(CUpti_SubscriberHandle subscriber) override; 79 80 // CUPTI event API 81 CUptiResult DeviceEnumEventDomains( 82 CUdevice device, size_t* array_size_bytes, 83 CUpti_EventDomainID* domain_array) override; 84 85 CUptiResult DeviceGetEventDomainAttribute(CUdevice device, 86 CUpti_EventDomainID event_domain, 87 CUpti_EventDomainAttribute attrib, 88 size_t* value_size, 89 void* value) override; 90 91 CUptiResult DisableKernelReplayMode(CUcontext context) override; 92 93 CUptiResult EnableKernelReplayMode(CUcontext context) override; 94 95 CUptiResult DeviceGetNumEventDomains(CUdevice device, 96 uint32_t* num_domains) override; 97 98 CUptiResult EventDomainEnumEvents(CUpti_EventDomainID event_domain, 99 size_t* array_size_bytes, 100 CUpti_EventID* event_array) override; 101 102 CUptiResult EventDomainGetNumEvents(CUpti_EventDomainID event_domain, 103 uint32_t* num_events) override; 104 105 CUptiResult EventGetAttribute(CUpti_EventID event, 106 CUpti_EventAttribute attrib, size_t* value_size, 107 void* value) override; 108 109 CUptiResult EventGetIdFromName(CUdevice device, const char* event_name, 110 CUpti_EventID* event) override; 111 112 CUptiResult EventGroupDisable(CUpti_EventGroup event_group) override; 113 114 CUptiResult EventGroupEnable(CUpti_EventGroup event_group) override; 115 116 CUptiResult EventGroupGetAttribute(CUpti_EventGroup event_group, 117 CUpti_EventGroupAttribute attrib, 118 size_t* value_size, void* value) override; 119 120 CUptiResult EventGroupReadEvent(CUpti_EventGroup event_group, 121 CUpti_ReadEventFlags flags, 122 CUpti_EventID event, 123 size_t* event_value_buffer_size_bytes, 124 uint64_t* event_value_buffer) override; 125 126 CUptiResult EventGroupSetAttribute(CUpti_EventGroup event_group, 127 CUpti_EventGroupAttribute attrib, 128 size_t value_size, void* value) override; 129 130 CUptiResult EventGroupSetsCreate( 131 CUcontext context, size_t event_id_array_size_bytes, 132 CUpti_EventID* event_id_array, 133 CUpti_EventGroupSets** event_group_passes) override; 134 135 CUptiResult EventGroupSetsDestroy( 136 CUpti_EventGroupSets* event_group_sets) override; 137 138 // CUPTI metric API 139 CUptiResult DeviceEnumMetrics(CUdevice device, size_t* arraySizeBytes, 140 CUpti_MetricID* metricArray) override; 141 142 CUptiResult DeviceGetNumMetrics(CUdevice device, 143 uint32_t* num_metrics) override; 144 145 CUptiResult MetricGetIdFromName(CUdevice device, const char* metric_name, 146 CUpti_MetricID* metric) override; 147 148 CUptiResult MetricGetNumEvents(CUpti_MetricID metric, 149 uint32_t* num_events) override; 150 151 CUptiResult MetricEnumEvents(CUpti_MetricID metric, 152 size_t* event_id_array_size_bytes, 153 CUpti_EventID* event_id_array) override; 154 155 CUptiResult MetricGetAttribute(CUpti_MetricID metric, 156 CUpti_MetricAttribute attrib, 157 size_t* value_size, void* value) override; 158 159 CUptiResult MetricGetValue(CUdevice device, CUpti_MetricID metric, 160 size_t event_id_array_size_bytes, 161 CUpti_EventID* event_id_array, 162 size_t event_value_array_size_bytes, 163 uint64_t* event_value_array, 164 uint64_t time_duration, 165 CUpti_MetricValue* metric_value) override; 166 167 CUptiResult GetResultString(CUptiResult result, const char** str) override; 168 169 CUptiResult GetContextId(CUcontext context, uint32_t* context_id) override; 170 171 CUptiResult GetStreamIdEx(CUcontext context, CUstream stream, 172 uint8_t per_thread_stream, 173 uint32_t* stream_id) override; 174 CleanUp()175 void CleanUp() override {} Disabled()176 bool Disabled() const override { return false; } 177 178 private: 179 TF_DISALLOW_COPY_AND_ASSIGN(CuptiWrapper); 180 }; 181 182 } // namespace profiler 183 } // namespace tensorflow 184 185 #endif // PERFTOOLS_ACCELERATORS_XPROF_XPROFILEZ_NVIDIA_GPU_CUPTI_WRAPPER_H_ 186