1 /*
2 * Copyright © 2021 Google, Inc.
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include <perfetto.h>
7
8 #include "tu_perfetto.h"
9
10 #include "util/u_perfetto.h"
11 #include "util/hash_table.h"
12
13 #include "tu_tracepoints.h"
14 #include "tu_tracepoints_perfetto.h"
15
16 static uint32_t gpu_clock_id;
17 static uint64_t next_clock_sync_ns; /* cpu time of next clk sync */
18
19 /**
20 * The timestamp at the point where we first emitted the clock_sync..
21 * this will be a *later* timestamp that the first GPU traces (since
22 * we capture the first clock_sync from the CPU *after* the first GPU
23 * tracepoints happen). To avoid confusing perfetto we need to drop
24 * the GPU traces with timestamps before this.
25 */
26 static uint64_t sync_gpu_ts;
27
28 static uint64_t last_suspend_count;
29
30 static uint64_t gpu_max_timestamp;
31 static uint64_t gpu_timestamp_offset;
32
33 struct TuRenderpassIncrementalState {
34 bool was_cleared = true;
35 };
36
37 struct TuRenderpassTraits : public perfetto::DefaultDataSourceTraits {
38 using IncrementalStateType = TuRenderpassIncrementalState;
39 };
40
41 class TuRenderpassDataSource : public perfetto::DataSource<TuRenderpassDataSource, TuRenderpassTraits> {
42 public:
OnSetup(const SetupArgs &)43 void OnSetup(const SetupArgs &) override
44 {
45 // Use this callback to apply any custom configuration to your data source
46 // based on the TraceConfig in SetupArgs.
47 }
48
OnStart(const StartArgs &)49 void OnStart(const StartArgs &) override
50 {
51 // This notification can be used to initialize the GPU driver, enable
52 // counters, etc. StartArgs will contains the DataSourceDescriptor,
53 // which can be extended.
54 u_trace_perfetto_start();
55 PERFETTO_LOG("Tracing started");
56
57 /* Note: clock_id's below 128 are reserved.. for custom clock sources,
58 * using the hash of a namespaced string is the recommended approach.
59 * See: https://perfetto.dev/docs/concepts/clock-sync
60 */
61 gpu_clock_id =
62 _mesa_hash_string("org.freedesktop.mesa.freedreno") | 0x80000000;
63
64 gpu_timestamp_offset = 0;
65 gpu_max_timestamp = 0;
66 last_suspend_count = 0;
67 }
68
OnStop(const StopArgs &)69 void OnStop(const StopArgs &) override
70 {
71 PERFETTO_LOG("Tracing stopped");
72
73 // Undo any initialization done in OnStart.
74 u_trace_perfetto_stop();
75 // TODO we should perhaps block until queued traces are flushed?
76
77 Trace([](TuRenderpassDataSource::TraceContext ctx) {
78 auto packet = ctx.NewTracePacket();
79 packet->Finalize();
80 ctx.Flush();
81 });
82 }
83 };
84
85 PERFETTO_DECLARE_DATA_SOURCE_STATIC_MEMBERS(TuRenderpassDataSource);
86 PERFETTO_DEFINE_DATA_SOURCE_STATIC_MEMBERS(TuRenderpassDataSource);
87
88 static void
send_descriptors(TuRenderpassDataSource::TraceContext & ctx,uint64_t ts_ns)89 send_descriptors(TuRenderpassDataSource::TraceContext &ctx, uint64_t ts_ns)
90 {
91 PERFETTO_LOG("Sending renderstage descriptors");
92
93 auto packet = ctx.NewTracePacket();
94
95 packet->set_timestamp(0);
96
97 auto event = packet->set_gpu_render_stage_event();
98 event->set_gpu_id(0);
99
100 auto spec = event->set_specifications();
101
102 for (unsigned i = 0; i < ARRAY_SIZE(queues); i++) {
103 auto desc = spec->add_hw_queue();
104
105 desc->set_name(queues[i].name);
106 desc->set_description(queues[i].desc);
107 }
108
109 for (unsigned i = 0; i < ARRAY_SIZE(stages); i++) {
110 auto desc = spec->add_stage();
111
112 desc->set_name(stages[i].name);
113 if (stages[i].desc)
114 desc->set_description(stages[i].desc);
115 }
116 }
117
118 static void
stage_start(struct tu_device * dev,uint64_t ts_ns,enum tu_stage_id stage)119 stage_start(struct tu_device *dev, uint64_t ts_ns, enum tu_stage_id stage)
120 {
121 struct tu_perfetto_state *p = tu_device_get_perfetto_state(dev);
122
123 p->start_ts[stage] = ts_ns;
124 }
125
126 typedef void (*trace_payload_as_extra_func)(perfetto::protos::pbzero::GpuRenderStageEvent *, const void*);
127
128 static void
stage_end(struct tu_device * dev,uint64_t ts_ns,enum tu_stage_id stage,uint32_t submission_id,const void * payload=nullptr,trace_payload_as_extra_func payload_as_extra=nullptr)129 stage_end(struct tu_device *dev, uint64_t ts_ns, enum tu_stage_id stage,
130 uint32_t submission_id, const void* payload = nullptr,
131 trace_payload_as_extra_func payload_as_extra = nullptr)
132 {
133 struct tu_perfetto_state *p = tu_device_get_perfetto_state(dev);
134
135 /* If we haven't managed to calibrate the alignment between GPU and CPU
136 * timestamps yet, then skip this trace, otherwise perfetto won't know
137 * what to do with it.
138 */
139 if (!sync_gpu_ts)
140 return;
141
142 TuRenderpassDataSource::Trace([=](TuRenderpassDataSource::TraceContext tctx) {
143 if (auto state = tctx.GetIncrementalState(); state->was_cleared) {
144 send_descriptors(tctx, p->start_ts[stage]);
145 state->was_cleared = false;
146 }
147
148 auto packet = tctx.NewTracePacket();
149
150 gpu_max_timestamp = MAX2(gpu_max_timestamp, ts_ns + gpu_timestamp_offset);
151
152 packet->set_timestamp(p->start_ts[stage] + gpu_timestamp_offset);
153 packet->set_timestamp_clock_id(gpu_clock_id);
154
155 auto event = packet->set_gpu_render_stage_event();
156 event->set_event_id(0); // ???
157 event->set_hw_queue_id(DEFAULT_HW_QUEUE_ID);
158 event->set_duration(ts_ns - p->start_ts[stage]);
159 event->set_stage_id(stage);
160 event->set_context((uintptr_t)dev);
161 event->set_submission_id(submission_id);
162
163 if (payload && payload_as_extra) {
164 payload_as_extra(event, payload);
165 }
166 });
167 }
168
169 #ifdef __cplusplus
170 extern "C" {
171 #endif
172
173 void
tu_perfetto_init(void)174 tu_perfetto_init(void)
175 {
176 util_perfetto_init();
177
178 perfetto::DataSourceDescriptor dsd;
179 dsd.set_name("gpu.renderstages.msm");
180 TuRenderpassDataSource::Register(dsd);
181 }
182
183 static void
sync_timestamp(struct tu_device * dev)184 sync_timestamp(struct tu_device *dev)
185 {
186 uint64_t cpu_ts = perfetto::base::GetBootTimeNs().count();
187 uint64_t gpu_ts = 0;
188
189 if (cpu_ts < next_clock_sync_ns)
190 return;
191
192 if (tu_device_get_gpu_timestamp(dev, &gpu_ts)) {
193 PERFETTO_ELOG("Could not sync CPU and GPU clocks");
194 return;
195 }
196
197 uint64_t current_suspend_count = 0;
198 /* If we fail to get it we will use a fallback */
199 tu_device_get_suspend_count(dev, ¤t_suspend_count);
200
201 /* convert GPU ts into ns: */
202 gpu_ts = tu_device_ticks_to_ns(dev, gpu_ts);
203
204 /* GPU timestamp is being reset after suspend-resume cycle.
205 * Perfetto requires clock snapshots to be monotonic,
206 * so we have to fix-up the time.
207 */
208 if (current_suspend_count != last_suspend_count) {
209 gpu_timestamp_offset = gpu_max_timestamp;
210 last_suspend_count = current_suspend_count;
211 }
212
213 gpu_ts += gpu_timestamp_offset;
214
215 /* Fallback check, detect non-monotonic cases which would happen
216 * if we cannot retrieve suspend count.
217 */
218 if (sync_gpu_ts > gpu_ts) {
219 gpu_ts += (gpu_max_timestamp - gpu_timestamp_offset);
220 gpu_timestamp_offset = gpu_max_timestamp;
221 }
222
223 if (sync_gpu_ts > gpu_ts) {
224 PERFETTO_ELOG("Non-monotonic gpu timestamp detected, bailing out");
225 return;
226 }
227
228 gpu_max_timestamp = gpu_ts;
229
230 TuRenderpassDataSource::Trace([=](TuRenderpassDataSource::TraceContext tctx) {
231 auto packet = tctx.NewTracePacket();
232
233 packet->set_timestamp(cpu_ts);
234
235 auto event = packet->set_clock_snapshot();
236
237 {
238 auto clock = event->add_clocks();
239
240 clock->set_clock_id(perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME);
241 clock->set_timestamp(cpu_ts);
242 }
243
244 {
245 auto clock = event->add_clocks();
246
247 clock->set_clock_id(gpu_clock_id);
248 clock->set_timestamp(gpu_ts);
249 }
250
251 sync_gpu_ts = gpu_ts;
252 next_clock_sync_ns = cpu_ts + 30000000;
253 });
254 }
255
256 static void
emit_submit_id(uint32_t submission_id)257 emit_submit_id(uint32_t submission_id)
258 {
259 TuRenderpassDataSource::Trace([=](TuRenderpassDataSource::TraceContext tctx) {
260 auto packet = tctx.NewTracePacket();
261
262 packet->set_timestamp(perfetto::base::GetBootTimeNs().count());
263
264 auto event = packet->set_vulkan_api_event();
265 auto submit = event->set_vk_queue_submit();
266
267 submit->set_submission_id(submission_id);
268 });
269 }
270
271 void
tu_perfetto_submit(struct tu_device * dev,uint32_t submission_id)272 tu_perfetto_submit(struct tu_device *dev, uint32_t submission_id)
273 {
274 /* sync_timestamp isn't free */
275 if (!ut_perfetto_enabled)
276 return;
277
278 sync_timestamp(dev);
279 emit_submit_id(submission_id);
280 }
281
282 /*
283 * Trace callbacks, called from u_trace once the timestamps from GPU have been
284 * collected.
285 */
286
287 #define CREATE_EVENT_CALLBACK(event_name, stage) \
288 void \
289 tu_start_##event_name(struct tu_device *dev, uint64_t ts_ns, \
290 const void *flush_data, \
291 const struct trace_start_##event_name *payload) \
292 { \
293 stage_start(dev, ts_ns, stage); \
294 } \
295 \
296 void \
297 tu_end_##event_name(struct tu_device *dev, uint64_t ts_ns, \
298 const void *flush_data, \
299 const struct trace_end_##event_name *payload) \
300 { \
301 auto trace_flush_data = (const struct tu_u_trace_submission_data *) flush_data; \
302 uint32_t submission_id = \
303 tu_u_trace_submission_data_get_submit_id(trace_flush_data); \
304 stage_end(dev, ts_ns, stage, submission_id, payload, \
305 (trace_payload_as_extra_func) &trace_payload_as_extra_end_##event_name); \
306 }
307
308 CREATE_EVENT_CALLBACK(render_pass, SURFACE_STAGE_ID)
309 CREATE_EVENT_CALLBACK(binning_ib, BINNING_STAGE_ID)
310 CREATE_EVENT_CALLBACK(draw_ib_gmem, GMEM_STAGE_ID)
311 CREATE_EVENT_CALLBACK(draw_ib_sysmem, BYPASS_STAGE_ID)
312 CREATE_EVENT_CALLBACK(blit, BLIT_STAGE_ID)
313 CREATE_EVENT_CALLBACK(compute, COMPUTE_STAGE_ID)
314 CREATE_EVENT_CALLBACK(gmem_clear, CLEAR_GMEM_STAGE_ID)
315 CREATE_EVENT_CALLBACK(sysmem_clear, CLEAR_SYSMEM_STAGE_ID)
316 CREATE_EVENT_CALLBACK(sysmem_clear_all, CLEAR_SYSMEM_STAGE_ID)
317 CREATE_EVENT_CALLBACK(gmem_load, GMEM_LOAD_STAGE_ID)
318 CREATE_EVENT_CALLBACK(gmem_store, GMEM_STORE_STAGE_ID)
319 CREATE_EVENT_CALLBACK(sysmem_resolve, SYSMEM_RESOLVE_STAGE_ID)
320
321 #ifdef __cplusplus
322 }
323 #endif
324