• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020-2021 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef MINDSPORE_CCSRC_PROFILER_DEVICE_GPU_GPU_PROFILING_H
18 #define MINDSPORE_CCSRC_PROFILER_DEVICE_GPU_GPU_PROFILING_H
19 #include <cuda.h>
20 #include <cupti.h>
21 #include <algorithm>
22 #include <cstdio>
23 #include <map>
24 #include <memory>
25 #include <mutex>
26 #include <string>
27 #include <unordered_map>
28 #include <utility>
29 #include <vector>
30 #include "profiler/device/profiling.h"
31 #include "profiler/device/gpu/gpu_profiling_utils.h"
32 
33 namespace mindspore {
34 namespace profiler {
35 namespace gpu {
36 enum class CUPTIApiType { kCallback = 0, kActivity = 1 };
37 enum class ActivityType {
38   kKernel = 0,
39   kMemcpyH2D = 1,
40   kMemcpyD2H = 2,
41   kMemcpyH2A = 3,
42   kMemcpyA2H = 4,
43   kMemcpyA2D = 5,
44   kMemcpyD2A = 6,
45   kMemcpyD2D = 7,
46   kMemcpyP2P = 8,
47   kMemcpyH2H = 9,
48   kMemset = 10,
49   kMemcpyUnknown = 11
50 };
51 
52 struct MemcpyInfo {
53   size_t bytes;
54   unsigned char src_kind;
55   unsigned char dst_kind;
56 };
57 
58 struct KernelInfo {
59   uint64_t registers_per_thread;
60   uint64_t static_shared_memory;
61   uint64_t dynamic_shared_memory;
62   uint64_t block_x;
63   uint64_t block_y;
64   uint64_t block_z;
65   uint64_t grid_x;
66   uint64_t grid_y;
67   uint64_t grid_z;
68 };
69 
70 struct Event {
71   std::string kernel_name;
72   std::string kernel_type;
73   CUPTIApiType api_type;
74   ActivityType activity_type;
75   uint64_t start_time_stamp;
76   uint64_t end_time_stamp;
77   std::string op_name;
78   uint32_t device_id;
79   uint32_t correlation_id;
80   uint32_t thread_id;
81   uint32_t context_id;
82   uint32_t stream_id;
83   CUpti_CallbackId cb_id;
84   union {
85     MemcpyInfo memcpy_info;
86     KernelInfo kernel_info;
87   };
88 };
89 
90 struct BaseTime {
91   // nanosecond
92   uint64_t host_start_time = 0l;
93   uint64_t host_start_monotonic_raw_time = 0l;
94   uint64_t gpu_start_time = 0l;
95 };
96 
97 const float kTimeUnit = 1000;
98 
99 class ProfilingOp {
100  public:
101   ProfilingOp() = default;
102   virtual ~ProfilingOp() = default;
103   virtual void SaveProfilingData() = 0;
104   virtual void Init() = 0;
Name()105   std::string Name() const { return op_name_; }
106 
107  protected:
108   std::string op_name_;
109 };
110 
111 class GPUProfiler : public Profiler {
112  public:
113   static std::shared_ptr<GPUProfiler> &GetInstance();
114   GPUProfiler() = default;
~GPUProfiler()115   ~GPUProfiler() { StopCUPTI(); }
116   GPUProfiler(const GPUProfiler &) = delete;
117   GPUProfiler &operator=(const GPUProfiler &) = delete;
118 
119   void Init(const std::string &profileDataPath) override;
120   void Stop() override;
121   void StopCUPTI();
122   void StepProfilingEnable(const bool enable_flag) override;
123   void SyncEnable(const bool enable_flag);
GetEnableFlag()124   bool GetEnableFlag() const { return enable_flag_; }
GetSyncEnableFlag()125   bool GetSyncEnableFlag() const { return sync_enable_flag_; }
126   void EventHandleProcess(CUpti_CallbackId cbid, const CUpti_CallbackData *cbdata, const std::string &typestring,
127                           uint64_t startTimestamp, uint64_t endTimestamp);
128   void CUPTIAPI AllocBuffer(uint8_t **buffer, size_t *size, size_t *maxNumRecords);
129   void CUPTIAPI ProcessBuffer(CUcontext ctx, uint32_t streamId, uint8_t *buffer, size_t size, size_t validSize);
130   void OpDataProducerBegin(const std::string op_name, void *stream);
131   void OpDataProducerEnd() override;
132   void ProcessEvents();
133   void RegisterProfilingOp(std::shared_ptr<ProfilingOp> node);
134   void SetStepTraceOpName(ProfilingTraceInfo trace_op_name);
ProfileDataPath()135   std::string ProfileDataPath() const { return profile_data_path_; }
136 
137  private:
138   void SingleOpLaunchTimeProcess(float op_time_elapsed);
139   void OpsParser();
140   void EventLog(const Event &event);
141   void ClearInst() override;
142   void HandleActivityRecord(CUpti_Activity *record);
143   void AddEvent(Event &&event);
144   void SetRunTimeData(const std::string &op_name, void *stream);
145   void FixOpNameByCorrelationId(Event *event);
146 
147   static std::shared_ptr<GPUProfiler> profiler_inst_;
148   bool enable_flag_ = false;
149   bool sync_enable_flag_ = true;
150   std::unordered_map<uint32_t, std::string> op_name_map_;
151   std::vector<Event> events_;
152   BaseTime base_time_;
153   std::string op_name_;
154   void *stream_;
155   void SaveProfileData() override;
156   void SaveExtraProfileData();
157   std::mutex event_mutex_;
158 
159   std::vector<CUpti_ActivityKind> activities_enable_;
160 
161   uint64_t cupti_callback_events_count_ = 0l;
162   uint64_t cupti_callback_events_drop_count_ = 0l;
163   uint64_t max_cupti_callback_events_ = 2 * 1024 * 10000;
164 
165   uint64_t cupti_activity_events_count_ = 0l;
166   uint64_t cupti_activity_events_drop_count_ = 0l;
167   uint64_t max_cupti_activity_events_ = 2 * 1024 * 10000;
168 
169   CUpti_SubscriberHandle subscriber_ = nullptr;
170   cudaEvent_t op_event_start_;
171   cudaEvent_t op_event_stop_;
172   uint64_t op_host_time_start_;
173   uint64_t op_host_time_stop_;
174   uint64_t op_cupti_time_start_;
175   std::string profile_data_path_;
176   std::map<std::string, std::shared_ptr<ProfilingOp>> profiling_op_;
177   ProfilingTraceInfo step_trace_op_name_;
178 };
179 }  // namespace gpu
180 }  // namespace profiler
181 }  // namespace mindspore
182 
183 #endif  // MINDSPORE_CCSRC_PROFILER_DEVICE_GPU_PROFILING_H
184