1 /*
2 * Copyright © 2021 Google, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include <perfetto.h>
25
26 #include "util/u_perfetto.h"
27
28 #include "freedreno_tracepoints.h"
29
30 static uint32_t gpu_clock_id;
31 static uint64_t next_clock_sync_ns; /* cpu time of next clk sync */
32
33 /**
34 * The timestamp at the point where we first emitted the clock_sync..
35 * this will be a *later* timestamp that the first GPU traces (since
36 * we capture the first clock_sync from the CPU *after* the first GPU
37 * tracepoints happen). To avoid confusing perfetto we need to drop
38 * the GPU traces with timestamps before this.
39 */
40 static uint64_t sync_gpu_ts;
41
42 struct FdRenderpassIncrementalState {
43 bool was_cleared = true;
44 };
45
46 struct FdRenderpassTraits : public perfetto::DefaultDataSourceTraits {
47 using IncrementalStateType = FdRenderpassIncrementalState;
48 };
49
50 class FdRenderpassDataSource : public perfetto::DataSource<FdRenderpassDataSource, FdRenderpassTraits> {
51 public:
OnSetup(const SetupArgs &)52 void OnSetup(const SetupArgs &) override
53 {
54 // Use this callback to apply any custom configuration to your data source
55 // based on the TraceConfig in SetupArgs.
56 }
57
OnStart(const StartArgs &)58 void OnStart(const StartArgs &) override
59 {
60 // This notification can be used to initialize the GPU driver, enable
61 // counters, etc. StartArgs will contains the DataSourceDescriptor,
62 // which can be extended.
63 u_trace_perfetto_start();
64 PERFETTO_LOG("Tracing started");
65
66 /* Note: clock_id's below 128 are reserved.. for custom clock sources,
67 * using the hash of a namespaced string is the recommended approach.
68 * See: https://perfetto.dev/docs/concepts/clock-sync
69 */
70 gpu_clock_id =
71 _mesa_hash_string("org.freedesktop.mesa.freedreno") | 0x80000000;
72 }
73
OnStop(const StopArgs &)74 void OnStop(const StopArgs &) override
75 {
76 PERFETTO_LOG("Tracing stopped");
77
78 // Undo any initialization done in OnStart.
79 u_trace_perfetto_stop();
80 // TODO we should perhaps block until queued traces are flushed?
81
82 Trace([](FdRenderpassDataSource::TraceContext ctx) {
83 auto packet = ctx.NewTracePacket();
84 packet->Finalize();
85 ctx.Flush();
86 });
87 }
88 };
89
90 PERFETTO_DECLARE_DATA_SOURCE_STATIC_MEMBERS(FdRenderpassDataSource);
91 PERFETTO_DEFINE_DATA_SOURCE_STATIC_MEMBERS(FdRenderpassDataSource);
92
93 static void
send_descriptors(FdRenderpassDataSource::TraceContext & ctx,uint64_t ts_ns)94 send_descriptors(FdRenderpassDataSource::TraceContext &ctx, uint64_t ts_ns)
95 {
96 PERFETTO_LOG("Sending renderstage descriptors");
97
98 auto packet = ctx.NewTracePacket();
99
100 packet->set_timestamp(0);
101 // packet->set_timestamp(ts_ns);
102 // packet->set_timestamp_clock_id(gpu_clock_id);
103
104 auto event = packet->set_gpu_render_stage_event();
105 event->set_gpu_id(0);
106
107 auto spec = event->set_specifications();
108
109 for (unsigned i = 0; i < ARRAY_SIZE(queues); i++) {
110 auto desc = spec->add_hw_queue();
111
112 desc->set_name(queues[i].name);
113 desc->set_description(queues[i].desc);
114 }
115
116 for (unsigned i = 0; i < ARRAY_SIZE(stages); i++) {
117 auto desc = spec->add_stage();
118
119 desc->set_name(stages[i].name);
120 if (stages[i].desc)
121 desc->set_description(stages[i].desc);
122 }
123 }
124
125 static void
stage_start(struct pipe_context * pctx,uint64_t ts_ns,enum fd_stage_id stage)126 stage_start(struct pipe_context *pctx, uint64_t ts_ns, enum fd_stage_id stage)
127 {
128 struct fd_context *ctx = fd_context(pctx);
129 struct fd_perfetto_state *p = &ctx->perfetto;
130
131 p->start_ts[stage] = ts_ns;
132 }
133
134 static void
stage_end(struct pipe_context * pctx,uint64_t ts_ns,enum fd_stage_id stage)135 stage_end(struct pipe_context *pctx, uint64_t ts_ns, enum fd_stage_id stage)
136 {
137 struct fd_context *ctx = fd_context(pctx);
138 struct fd_perfetto_state *p = &ctx->perfetto;
139
140 /* If we haven't managed to calibrate the alignment between GPU and CPU
141 * timestamps yet, then skip this trace, otherwise perfetto won't know
142 * what to do with it.
143 */
144 if (!sync_gpu_ts)
145 return;
146
147 FdRenderpassDataSource::Trace([=](FdRenderpassDataSource::TraceContext tctx) {
148 if (auto state = tctx.GetIncrementalState(); state->was_cleared) {
149 send_descriptors(tctx, p->start_ts[stage]);
150 state->was_cleared = false;
151 }
152
153 auto packet = tctx.NewTracePacket();
154
155 packet->set_timestamp(p->start_ts[stage]);
156 packet->set_timestamp_clock_id(gpu_clock_id);
157
158 auto event = packet->set_gpu_render_stage_event();
159 event->set_event_id(0); // ???
160 event->set_hw_queue_id(DEFAULT_HW_QUEUE_ID);
161 event->set_duration(ts_ns - p->start_ts[stage]);
162 event->set_stage_id(stage);
163 event->set_context((uintptr_t)pctx);
164
165 /* The "surface" meta-stage has extra info about render target: */
166 if (stage == SURFACE_STAGE_ID) {
167
168 event->set_submission_id(p->submit_id);
169
170 if (p->cbuf0_format) {
171 auto data = event->add_extra_data();
172
173 data->set_name("color0 format");
174 data->set_value(util_format_short_name(p->cbuf0_format));
175 }
176
177 if (p->zs_format) {
178 auto data = event->add_extra_data();
179
180 data->set_name("zs format");
181 data->set_value(util_format_short_name(p->zs_format));
182 }
183
184 {
185 auto data = event->add_extra_data();
186
187 data->set_name("width");
188 data->set_value(std::to_string(p->width));
189 }
190
191 {
192 auto data = event->add_extra_data();
193
194 data->set_name("height");
195 data->set_value(std::to_string(p->height));
196 }
197
198 {
199 auto data = event->add_extra_data();
200
201 data->set_name("MSAA");
202 data->set_value(std::to_string(p->samples));
203 }
204
205 {
206 auto data = event->add_extra_data();
207
208 data->set_name("MRTs");
209 data->set_value(std::to_string(p->mrts));
210 }
211
212 // "renderMode"
213 // "surfaceID"
214
215 if (p->nbins) {
216 auto data = event->add_extra_data();
217
218 data->set_name("numberOfBins");
219 data->set_value(std::to_string(p->nbins));
220 }
221
222 if (p->binw) {
223 auto data = event->add_extra_data();
224
225 data->set_name("binWidth");
226 data->set_value(std::to_string(p->binw));
227 }
228
229 if (p->binh) {
230 auto data = event->add_extra_data();
231
232 data->set_name("binHeight");
233 data->set_value(std::to_string(p->binh));
234 }
235 }
236 });
237 }
238
239 #ifdef __cplusplus
240 extern "C" {
241 #endif
242
243 void
fd_perfetto_init(void)244 fd_perfetto_init(void)
245 {
246 util_perfetto_init();
247
248 perfetto::DataSourceDescriptor dsd;
249 dsd.set_name("gpu.renderstages.msm");
250 FdRenderpassDataSource::Register(dsd);
251 }
252
253 static void
sync_timestamp(struct fd_context * ctx)254 sync_timestamp(struct fd_context *ctx)
255 {
256 uint64_t cpu_ts = perfetto::base::GetBootTimeNs().count();
257 uint64_t gpu_ts;
258
259 if (cpu_ts < next_clock_sync_ns)
260 return;
261
262 if (fd_pipe_get_param(ctx->pipe, FD_TIMESTAMP, &gpu_ts)) {
263 PERFETTO_ELOG("Could not sync CPU and GPU clocks");
264 return;
265 }
266
267 /* convert GPU ts into ns: */
268 gpu_ts = ctx->ts_to_ns(gpu_ts);
269
270 FdRenderpassDataSource::Trace([=](FdRenderpassDataSource::TraceContext tctx) {
271 auto packet = tctx.NewTracePacket();
272
273 packet->set_timestamp(cpu_ts);
274
275 auto event = packet->set_clock_snapshot();
276
277 {
278 auto clock = event->add_clocks();
279
280 clock->set_clock_id(perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME);
281 clock->set_timestamp(cpu_ts);
282 }
283
284 {
285 auto clock = event->add_clocks();
286
287 clock->set_clock_id(gpu_clock_id);
288 clock->set_timestamp(gpu_ts);
289 }
290
291 sync_gpu_ts = gpu_ts;
292 next_clock_sync_ns = cpu_ts + 30000000;
293 });
294 }
295
296 static void
emit_submit_id(struct fd_context * ctx)297 emit_submit_id(struct fd_context *ctx)
298 {
299 FdRenderpassDataSource::Trace([=](FdRenderpassDataSource::TraceContext tctx) {
300 auto packet = tctx.NewTracePacket();
301
302 packet->set_timestamp(perfetto::base::GetBootTimeNs().count());
303
304 auto event = packet->set_vulkan_api_event();
305 auto submit = event->set_vk_queue_submit();
306
307 submit->set_submission_id(ctx->submit_count);
308 });
309 }
310
311 void
fd_perfetto_submit(struct fd_context * ctx)312 fd_perfetto_submit(struct fd_context *ctx)
313 {
314 sync_timestamp(ctx);
315 emit_submit_id(ctx);
316 }
317
318 /*
319 * Trace callbacks, called from u_trace once the timestamps from GPU have been
320 * collected.
321 */
322
323 void
fd_start_render_pass(struct pipe_context * pctx,uint64_t ts_ns,const void * flush_data,const struct trace_start_render_pass * payload)324 fd_start_render_pass(struct pipe_context *pctx, uint64_t ts_ns,
325 const void *flush_data,
326 const struct trace_start_render_pass *payload)
327 {
328 stage_start(pctx, ts_ns, SURFACE_STAGE_ID);
329
330 struct fd_perfetto_state *p = &fd_context(pctx)->perfetto;
331
332 p->submit_id = payload->submit_id;
333 p->cbuf0_format = payload->cbuf0_format;
334 p->zs_format = payload->zs_format;
335 p->width = payload->width;
336 p->height = payload->height;
337 p->mrts = payload->mrts;
338 p->samples = payload->samples;
339 p->nbins = payload->nbins;
340 p->binw = payload->binw;
341 p->binh = payload->binh;
342 }
343
344 void
fd_end_render_pass(struct pipe_context * pctx,uint64_t ts_ns,const void * flush_data,const struct trace_end_render_pass * payload)345 fd_end_render_pass(struct pipe_context *pctx, uint64_t ts_ns,
346 const void *flush_data,
347 const struct trace_end_render_pass *payload)
348 {
349 stage_end(pctx, ts_ns, SURFACE_STAGE_ID);
350 }
351
352 void
fd_start_binning_ib(struct pipe_context * pctx,uint64_t ts_ns,const void * flush_data,const struct trace_start_binning_ib * payload)353 fd_start_binning_ib(struct pipe_context *pctx, uint64_t ts_ns,
354 const void *flush_data,
355 const struct trace_start_binning_ib *payload)
356 {
357 stage_start(pctx, ts_ns, BINNING_STAGE_ID);
358 }
359
360 void
fd_end_binning_ib(struct pipe_context * pctx,uint64_t ts_ns,const void * flush_data,const struct trace_end_binning_ib * payload)361 fd_end_binning_ib(struct pipe_context *pctx, uint64_t ts_ns,
362 const void *flush_data,
363 const struct trace_end_binning_ib *payload)
364 {
365 stage_end(pctx, ts_ns, BINNING_STAGE_ID);
366 }
367
368 void
fd_start_draw_ib(struct pipe_context * pctx,uint64_t ts_ns,const void * flush_data,const struct trace_start_draw_ib * payload)369 fd_start_draw_ib(struct pipe_context *pctx, uint64_t ts_ns,
370 const void *flush_data,
371 const struct trace_start_draw_ib *payload)
372 {
373 stage_start(
374 pctx, ts_ns,
375 fd_context(pctx)->perfetto.nbins ? GMEM_STAGE_ID : BYPASS_STAGE_ID);
376 }
377
378 void
fd_end_draw_ib(struct pipe_context * pctx,uint64_t ts_ns,const void * flush_data,const struct trace_end_draw_ib * payload)379 fd_end_draw_ib(struct pipe_context *pctx, uint64_t ts_ns,
380 const void *flush_data,
381 const struct trace_end_draw_ib *payload)
382 {
383 stage_end(
384 pctx, ts_ns,
385 fd_context(pctx)->perfetto.nbins ? GMEM_STAGE_ID : BYPASS_STAGE_ID);
386 }
387
388 void
fd_start_blit(struct pipe_context * pctx,uint64_t ts_ns,const void * flush_data,const struct trace_start_blit * payload)389 fd_start_blit(struct pipe_context *pctx, uint64_t ts_ns,
390 const void *flush_data,
391 const struct trace_start_blit *payload)
392 {
393 stage_start(pctx, ts_ns, BLIT_STAGE_ID);
394 }
395
396 void
fd_end_blit(struct pipe_context * pctx,uint64_t ts_ns,const void * flush_data,const struct trace_end_blit * payload)397 fd_end_blit(struct pipe_context *pctx, uint64_t ts_ns,
398 const void *flush_data,
399 const struct trace_end_blit *payload)
400 {
401 stage_end(pctx, ts_ns, BLIT_STAGE_ID);
402 }
403
404 void
fd_start_compute(struct pipe_context * pctx,uint64_t ts_ns,const void * flush_data,const struct trace_start_compute * payload)405 fd_start_compute(struct pipe_context *pctx, uint64_t ts_ns,
406 const void *flush_data,
407 const struct trace_start_compute *payload)
408 {
409 stage_start(pctx, ts_ns, COMPUTE_STAGE_ID);
410 }
411
412 void
fd_end_compute(struct pipe_context * pctx,uint64_t ts_ns,const void * flush_data,const struct trace_end_compute * payload)413 fd_end_compute(struct pipe_context *pctx, uint64_t ts_ns,
414 const void *flush_data,
415 const struct trace_end_compute *payload)
416 {
417 stage_end(pctx, ts_ns, COMPUTE_STAGE_ID);
418 }
419
420 void
fd_start_clear_restore(struct pipe_context * pctx,uint64_t ts_ns,const void * flush_data,const struct trace_start_clear_restore * payload)421 fd_start_clear_restore(struct pipe_context *pctx, uint64_t ts_ns,
422 const void *flush_data,
423 const struct trace_start_clear_restore *payload)
424 {
425 stage_start(pctx, ts_ns, CLEAR_RESTORE_STAGE_ID);
426 }
427
428 void
fd_end_clear_restore(struct pipe_context * pctx,uint64_t ts_ns,const void * flush_data,const struct trace_end_clear_restore * payload)429 fd_end_clear_restore(struct pipe_context *pctx, uint64_t ts_ns,
430 const void *flush_data,
431 const struct trace_end_clear_restore *payload)
432 {
433 stage_end(pctx, ts_ns, CLEAR_RESTORE_STAGE_ID);
434 }
435
436 void
fd_start_resolve(struct pipe_context * pctx,uint64_t ts_ns,const void * flush_data,const struct trace_start_resolve * payload)437 fd_start_resolve(struct pipe_context *pctx, uint64_t ts_ns,
438 const void *flush_data,
439 const struct trace_start_resolve *payload)
440 {
441 stage_start(pctx, ts_ns, RESOLVE_STAGE_ID);
442 }
443
444 void
fd_end_resolve(struct pipe_context * pctx,uint64_t ts_ns,const void * flush_data,const struct trace_end_resolve * payload)445 fd_end_resolve(struct pipe_context *pctx, uint64_t ts_ns,
446 const void *flush_data,
447 const struct trace_end_resolve *payload)
448 {
449 stage_end(pctx, ts_ns, RESOLVE_STAGE_ID);
450 }
451
452 void
fd_start_state_restore(struct pipe_context * pctx,uint64_t ts_ns,const void * flush_data,const struct trace_start_state_restore * payload)453 fd_start_state_restore(struct pipe_context *pctx, uint64_t ts_ns,
454 const void *flush_data,
455 const struct trace_start_state_restore *payload)
456 {
457 stage_start(pctx, ts_ns, STATE_RESTORE_STAGE_ID);
458 }
459
460 void
fd_end_state_restore(struct pipe_context * pctx,uint64_t ts_ns,const void * flush_data,const struct trace_end_state_restore * payload)461 fd_end_state_restore(struct pipe_context *pctx, uint64_t ts_ns,
462 const void *flush_data,
463 const struct trace_end_state_restore *payload)
464 {
465 stage_end(pctx, ts_ns, STATE_RESTORE_STAGE_ID);
466 }
467
468 void
fd_start_vsc_overflow_test(struct pipe_context * pctx,uint64_t ts_ns,const void * flush_data,const struct trace_start_vsc_overflow_test * payload)469 fd_start_vsc_overflow_test(struct pipe_context *pctx, uint64_t ts_ns,
470 const void *flush_data,
471 const struct trace_start_vsc_overflow_test *payload)
472 {
473 stage_start(pctx, ts_ns, VSC_OVERFLOW_STAGE_ID);
474 }
475
476 void
fd_end_vsc_overflow_test(struct pipe_context * pctx,uint64_t ts_ns,const void * flush_data,const struct trace_end_vsc_overflow_test * payload)477 fd_end_vsc_overflow_test(struct pipe_context *pctx, uint64_t ts_ns,
478 const void *flush_data,
479 const struct trace_end_vsc_overflow_test *payload)
480 {
481 stage_end(pctx, ts_ns, VSC_OVERFLOW_STAGE_ID);
482 }
483
484 void
fd_start_prologue(struct pipe_context * pctx,uint64_t ts_ns,const void * flush_data,const struct trace_start_prologue * payload)485 fd_start_prologue(struct pipe_context *pctx, uint64_t ts_ns,
486 const void *flush_data,
487 const struct trace_start_prologue *payload)
488 {
489 stage_start(pctx, ts_ns, PROLOGUE_STAGE_ID);
490 }
491
492 void
fd_end_prologue(struct pipe_context * pctx,uint64_t ts_ns,const void * flush_data,const struct trace_end_prologue * payload)493 fd_end_prologue(struct pipe_context *pctx, uint64_t ts_ns,
494 const void *flush_data,
495 const struct trace_end_prologue *payload)
496 {
497 stage_end(pctx, ts_ns, PROLOGUE_STAGE_ID);
498 }
499
500 #ifdef __cplusplus
501 }
502 #endif
503