• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2021 Google, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #include <perfetto.h>
25 
26 #include "util/perf/u_perfetto.h"
27 #include "util/perf/u_perfetto_renderpass.h"
28 
29 #include "freedreno_tracepoints.h"
30 
31 static uint32_t gpu_clock_id;
32 static uint64_t next_clock_sync_ns; /* cpu time of next clk sync */
33 
34 /**
35  * The timestamp at the point where we first emitted the clock_sync..
36  * this  will be a *later* timestamp that the first GPU traces (since
37  * we capture the first clock_sync from the CPU *after* the first GPU
38  * tracepoints happen).  To avoid confusing perfetto we need to drop
39  * the GPU traces with timestamps before this.
40  */
41 static uint64_t sync_gpu_ts;
42 
43 struct FdRenderpassIncrementalState {
44    bool was_cleared = true;
45 };
46 
47 struct FdRenderpassTraits : public perfetto::DefaultDataSourceTraits {
48    using IncrementalStateType = FdRenderpassIncrementalState;
49 };
50 
51 class FdRenderpassDataSource : public MesaRenderpassDataSource<FdRenderpassDataSource, FdRenderpassTraits> {
52 public:
53 
OnStart(const StartArgs & args)54    void OnStart(const StartArgs &args) override
55    {
56       MesaRenderpassDataSource<FdRenderpassDataSource, FdRenderpassTraits>::OnStart(args);
57 
58       /* Note: clock_id's below 128 are reserved.. for custom clock sources,
59        * using the hash of a namespaced string is the recommended approach.
60        * See: https://perfetto.dev/docs/concepts/clock-sync
61        */
62       gpu_clock_id =
63          _mesa_hash_string("org.freedesktop.mesa.freedreno") | 0x80000000;
64    }
65 };
66 
67 PERFETTO_DECLARE_DATA_SOURCE_STATIC_MEMBERS(FdRenderpassDataSource);
68 PERFETTO_DEFINE_DATA_SOURCE_STATIC_MEMBERS(FdRenderpassDataSource);
69 
70 static void
send_descriptors(FdRenderpassDataSource::TraceContext & ctx,uint64_t ts_ns)71 send_descriptors(FdRenderpassDataSource::TraceContext &ctx, uint64_t ts_ns)
72 {
73    PERFETTO_LOG("Sending renderstage descriptors");
74 
75    auto packet = ctx.NewTracePacket();
76 
77    packet->set_timestamp(0);
78 //   packet->set_timestamp(ts_ns);
79 //   packet->set_timestamp_clock_id(gpu_clock_id);
80 
81    auto event = packet->set_gpu_render_stage_event();
82    event->set_gpu_id(0);
83 
84    auto spec = event->set_specifications();
85 
86    for (unsigned i = 0; i < ARRAY_SIZE(queues); i++) {
87       auto desc = spec->add_hw_queue();
88 
89       desc->set_name(queues[i].name);
90       desc->set_description(queues[i].desc);
91    }
92 
93    for (unsigned i = 0; i < ARRAY_SIZE(stages); i++) {
94       auto desc = spec->add_stage();
95 
96       desc->set_name(stages[i].name);
97       if (stages[i].desc)
98          desc->set_description(stages[i].desc);
99    }
100 }
101 
102 static void
stage_start(struct pipe_context * pctx,uint64_t ts_ns,enum fd_stage_id stage)103 stage_start(struct pipe_context *pctx, uint64_t ts_ns, enum fd_stage_id stage)
104 {
105    struct fd_context *ctx = fd_context(pctx);
106    struct fd_perfetto_state *p = &ctx->perfetto;
107 
108    p->start_ts[stage] = ts_ns;
109 }
110 
111 static void
stage_end(struct pipe_context * pctx,uint64_t ts_ns,enum fd_stage_id stage)112 stage_end(struct pipe_context *pctx, uint64_t ts_ns, enum fd_stage_id stage)
113 {
114    struct fd_context *ctx = fd_context(pctx);
115    struct fd_perfetto_state *p = &ctx->perfetto;
116 
117    /* If we haven't managed to calibrate the alignment between GPU and CPU
118     * timestamps yet, then skip this trace, otherwise perfetto won't know
119     * what to do with it.
120     */
121    if (!sync_gpu_ts)
122       return;
123 
124    FdRenderpassDataSource::Trace([=](FdRenderpassDataSource::TraceContext tctx) {
125       if (auto state = tctx.GetIncrementalState(); state->was_cleared) {
126          send_descriptors(tctx, p->start_ts[stage]);
127          state->was_cleared = false;
128       }
129 
130       auto packet = tctx.NewTracePacket();
131 
132       packet->set_timestamp(p->start_ts[stage]);
133       packet->set_timestamp_clock_id(gpu_clock_id);
134 
135       auto event = packet->set_gpu_render_stage_event();
136       event->set_event_id(0); // ???
137       event->set_hw_queue_id(DEFAULT_HW_QUEUE_ID);
138       event->set_duration(ts_ns - p->start_ts[stage]);
139       event->set_stage_id(stage);
140       event->set_context((uintptr_t)pctx);
141 
142       /* The "surface" meta-stage has extra info about render target: */
143       if (stage == SURFACE_STAGE_ID) {
144 
145          event->set_submission_id(p->submit_id);
146 
147          if (p->cbuf0_format) {
148             auto data = event->add_extra_data();
149 
150             data->set_name("color0 format");
151             data->set_value(util_format_short_name(p->cbuf0_format));
152          }
153 
154          if (p->zs_format) {
155             auto data = event->add_extra_data();
156 
157             data->set_name("zs format");
158             data->set_value(util_format_short_name(p->zs_format));
159          }
160 
161          {
162             auto data = event->add_extra_data();
163 
164             data->set_name("width");
165             data->set_value(std::to_string(p->width));
166          }
167 
168          {
169             auto data = event->add_extra_data();
170 
171             data->set_name("height");
172             data->set_value(std::to_string(p->height));
173          }
174 
175          {
176             auto data = event->add_extra_data();
177 
178             data->set_name("MSAA");
179             data->set_value(std::to_string(p->samples));
180          }
181 
182          {
183             auto data = event->add_extra_data();
184 
185             data->set_name("MRTs");
186             data->set_value(std::to_string(p->mrts));
187          }
188 
189          // "renderMode"
190          // "surfaceID"
191 
192          if (p->nbins) {
193             auto data = event->add_extra_data();
194 
195             data->set_name("numberOfBins");
196             data->set_value(std::to_string(p->nbins));
197          }
198 
199          if (p->binw) {
200             auto data = event->add_extra_data();
201 
202             data->set_name("binWidth");
203             data->set_value(std::to_string(p->binw));
204          }
205 
206          if (p->binh) {
207             auto data = event->add_extra_data();
208 
209             data->set_name("binHeight");
210             data->set_value(std::to_string(p->binh));
211          }
212       } else if (stage == COMPUTE_STAGE_ID) {
213          {
214             auto data = event->add_extra_data();
215 
216             data->set_name("indirect");
217             data->set_value(std::to_string(p->indirect));
218          }
219 
220          {
221             auto data = event->add_extra_data();
222 
223             data->set_name("work_dim");
224             data->set_value(std::to_string(p->work_dim));
225          }
226 
227          {
228             auto data = event->add_extra_data();
229 
230             data->set_name("local_size_x");
231             data->set_value(std::to_string(p->local_size_x));
232          }
233 
234          {
235             auto data = event->add_extra_data();
236 
237             data->set_name("local_size_y");
238             data->set_value(std::to_string(p->local_size_y));
239          }
240 
241          {
242             auto data = event->add_extra_data();
243 
244             data->set_name("local_size_z");
245             data->set_value(std::to_string(p->local_size_z));
246          }
247 
248          {
249             auto data = event->add_extra_data();
250 
251             data->set_name("num_groups_x");
252             data->set_value(std::to_string(p->num_groups_x));
253          }
254 
255          {
256             auto data = event->add_extra_data();
257 
258             data->set_name("num_groups_y");
259             data->set_value(std::to_string(p->num_groups_y));
260          }
261 
262          {
263             auto data = event->add_extra_data();
264 
265             data->set_name("num_groups_z");
266             data->set_value(std::to_string(p->num_groups_z));
267          }
268 
269          {
270             auto data = event->add_extra_data();
271 
272             data->set_name("shader_id");
273             data->set_value(std::to_string(p->shader_id));
274          }
275       }
276    });
277 }
278 
279 #ifdef __cplusplus
280 extern "C" {
281 #endif
282 
283 void
fd_perfetto_init(void)284 fd_perfetto_init(void)
285 {
286    util_perfetto_init();
287 
288    perfetto::DataSourceDescriptor dsd;
289    dsd.set_name("gpu.renderstages.msm");
290    FdRenderpassDataSource::Register(dsd);
291 }
292 
293 static void
sync_timestamp(struct fd_context * ctx)294 sync_timestamp(struct fd_context *ctx)
295 {
296    uint64_t cpu_ts = perfetto::base::GetBootTimeNs().count();
297    uint64_t gpu_ts;
298 
299    if (!ctx->ts_to_ns)
300       return;
301 
302    if (cpu_ts < next_clock_sync_ns)
303       return;
304 
305    if (fd_pipe_get_param(ctx->pipe, FD_TIMESTAMP, &gpu_ts)) {
306       PERFETTO_ELOG("Could not sync CPU and GPU clocks");
307       return;
308    }
309 
310    /* get cpu timestamp again because FD_TIMESTAMP can take >100us */
311    cpu_ts = perfetto::base::GetBootTimeNs().count();
312 
313    /* convert GPU ts into ns: */
314    gpu_ts = ctx->ts_to_ns(gpu_ts);
315 
316    FdRenderpassDataSource::Trace([=](auto tctx) {
317       MesaRenderpassDataSource<FdRenderpassDataSource,
318                                FdRenderpassTraits>::EmitClockSync(tctx, cpu_ts,
319                                                                   gpu_ts, gpu_clock_id);
320    });
321 
322    sync_gpu_ts = gpu_ts;
323    next_clock_sync_ns = cpu_ts + 30000000;
324 }
325 
326 static void
emit_submit_id(struct fd_context * ctx)327 emit_submit_id(struct fd_context *ctx)
328 {
329    FdRenderpassDataSource::Trace([=](FdRenderpassDataSource::TraceContext tctx) {
330       auto packet = tctx.NewTracePacket();
331 
332       packet->set_timestamp(perfetto::base::GetBootTimeNs().count());
333 
334       auto event = packet->set_vulkan_api_event();
335       auto submit = event->set_vk_queue_submit();
336 
337       submit->set_submission_id(ctx->submit_count);
338    });
339 }
340 
341 void
fd_perfetto_submit(struct fd_context * ctx)342 fd_perfetto_submit(struct fd_context *ctx)
343 {
344    /* sync_timestamp isn't free */
345    if (!u_trace_perfetto_active(&ctx->trace_context))
346       return;
347 
348    sync_timestamp(ctx);
349    emit_submit_id(ctx);
350 }
351 
352 /*
353  * Trace callbacks, called from u_trace once the timestamps from GPU have been
354  * collected.
355  */
356 
357 void
fd_start_render_pass(struct pipe_context * pctx,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_start_render_pass * payload)358 fd_start_render_pass(struct pipe_context *pctx, uint64_t ts_ns,
359                      uint16_t tp_idx, const void *flush_data,
360                      const struct trace_start_render_pass *payload)
361 {
362    stage_start(pctx, ts_ns, SURFACE_STAGE_ID);
363 
364    struct fd_perfetto_state *p = &fd_context(pctx)->perfetto;
365 
366    p->submit_id = payload->submit_id;
367    p->cbuf0_format = payload->cbuf0_format;
368    p->zs_format = payload->zs_format;
369    p->width = payload->width;
370    p->height = payload->height;
371    p->mrts = payload->mrts;
372    p->samples = payload->samples;
373    p->nbins = payload->nbins;
374    p->binw = payload->binw;
375    p->binh = payload->binh;
376 }
377 
378 void
fd_end_render_pass(struct pipe_context * pctx,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_end_render_pass * payload)379 fd_end_render_pass(struct pipe_context *pctx, uint64_t ts_ns,
380                    uint16_t tp_idx, const void *flush_data,
381                    const struct trace_end_render_pass *payload)
382 {
383    stage_end(pctx, ts_ns, SURFACE_STAGE_ID);
384 }
385 
386 void
fd_start_binning_ib(struct pipe_context * pctx,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_start_binning_ib * payload)387 fd_start_binning_ib(struct pipe_context *pctx, uint64_t ts_ns,
388                     uint16_t tp_idx, const void *flush_data,
389                     const struct trace_start_binning_ib *payload)
390 {
391    stage_start(pctx, ts_ns, BINNING_STAGE_ID);
392 }
393 
394 void
fd_end_binning_ib(struct pipe_context * pctx,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_end_binning_ib * payload)395 fd_end_binning_ib(struct pipe_context *pctx, uint64_t ts_ns,
396                   uint16_t tp_idx, const void *flush_data,
397                   const struct trace_end_binning_ib *payload)
398 {
399    stage_end(pctx, ts_ns, BINNING_STAGE_ID);
400 }
401 
402 void
fd_start_draw_ib(struct pipe_context * pctx,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_start_draw_ib * payload)403 fd_start_draw_ib(struct pipe_context *pctx, uint64_t ts_ns,
404                  uint16_t tp_idx, const void *flush_data,
405                  const struct trace_start_draw_ib *payload)
406 {
407    stage_start(
408       pctx, ts_ns,
409       fd_context(pctx)->perfetto.nbins ? GMEM_STAGE_ID : BYPASS_STAGE_ID);
410 }
411 
412 void
fd_end_draw_ib(struct pipe_context * pctx,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_end_draw_ib * payload)413 fd_end_draw_ib(struct pipe_context *pctx, uint64_t ts_ns,
414                uint16_t tp_idx, const void *flush_data,
415                const struct trace_end_draw_ib *payload)
416 {
417    stage_end(
418       pctx, ts_ns,
419       fd_context(pctx)->perfetto.nbins ? GMEM_STAGE_ID : BYPASS_STAGE_ID);
420 }
421 
422 void
fd_start_blit(struct pipe_context * pctx,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_start_blit * payload)423 fd_start_blit(struct pipe_context *pctx, uint64_t ts_ns,
424               uint16_t tp_idx, const void *flush_data,
425               const struct trace_start_blit *payload)
426 {
427    stage_start(pctx, ts_ns, BLIT_STAGE_ID);
428 }
429 
430 void
fd_end_blit(struct pipe_context * pctx,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_end_blit * payload)431 fd_end_blit(struct pipe_context *pctx, uint64_t ts_ns,
432             uint16_t tp_idx, const void *flush_data,
433             const struct trace_end_blit *payload)
434 {
435    stage_end(pctx, ts_ns, BLIT_STAGE_ID);
436 }
437 
438 void
fd_start_compute(struct pipe_context * pctx,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_start_compute * payload)439 fd_start_compute(struct pipe_context *pctx, uint64_t ts_ns,
440                  uint16_t tp_idx, const void *flush_data,
441                  const struct trace_start_compute *payload)
442 {
443    stage_start(pctx, ts_ns, COMPUTE_STAGE_ID);
444 
445    struct fd_perfetto_state *p = &fd_context(pctx)->perfetto;
446 
447    p->indirect = payload->indirect;
448    p->work_dim = payload->work_dim;
449    p->local_size_x = payload->local_size_x;
450    p->local_size_y = payload->local_size_y;
451    p->local_size_z = payload->local_size_z;
452    p->num_groups_x = payload->num_groups_x;
453    p->num_groups_y = payload->num_groups_y;
454    p->num_groups_z = payload->num_groups_z;
455    p->shader_id    = payload->shader_id;
456 }
457 
458 void
fd_end_compute(struct pipe_context * pctx,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_end_compute * payload)459 fd_end_compute(struct pipe_context *pctx, uint64_t ts_ns,
460                uint16_t tp_idx, const void *flush_data,
461                const struct trace_end_compute *payload)
462 {
463    stage_end(pctx, ts_ns, COMPUTE_STAGE_ID);
464 }
465 
466 void
fd_start_clears(struct pipe_context * pctx,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_start_clears * payload)467 fd_start_clears(struct pipe_context *pctx, uint64_t ts_ns,
468                 uint16_t tp_idx, const void *flush_data,
469                 const struct trace_start_clears *payload)
470 {
471    stage_start(pctx, ts_ns, CLEAR_STAGE_ID);
472 }
473 
474 void
fd_end_clears(struct pipe_context * pctx,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_end_clears * payload)475 fd_end_clears(struct pipe_context *pctx, uint64_t ts_ns,
476               uint16_t tp_idx, const void *flush_data,
477               const struct trace_end_clears *payload)
478 {
479    stage_end(pctx, ts_ns, CLEAR_STAGE_ID);
480 }
481 
482 void
fd_start_tile_loads(struct pipe_context * pctx,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_start_tile_loads * payload)483 fd_start_tile_loads(struct pipe_context *pctx, uint64_t ts_ns,
484                     uint16_t tp_idx, const void *flush_data,
485                     const struct trace_start_tile_loads *payload)
486 {
487    stage_start(pctx, ts_ns, TILE_LOAD_STAGE_ID);
488 }
489 
490 void
fd_end_tile_loads(struct pipe_context * pctx,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_end_tile_loads * payload)491 fd_end_tile_loads(struct pipe_context *pctx, uint64_t ts_ns,
492                   uint16_t tp_idx, const void *flush_data,
493                   const struct trace_end_tile_loads *payload)
494 {
495    stage_end(pctx, ts_ns, TILE_LOAD_STAGE_ID);
496 }
497 
498 void
fd_start_tile_stores(struct pipe_context * pctx,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_start_tile_stores * payload)499 fd_start_tile_stores(struct pipe_context *pctx, uint64_t ts_ns,
500                      uint16_t tp_idx, const void *flush_data,
501                      const struct trace_start_tile_stores *payload)
502 {
503    stage_start(pctx, ts_ns, TILE_STORE_STAGE_ID);
504 }
505 
506 void
fd_end_tile_stores(struct pipe_context * pctx,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_end_tile_stores * payload)507 fd_end_tile_stores(struct pipe_context *pctx, uint64_t ts_ns,
508                    uint16_t tp_idx, const void *flush_data,
509                    const struct trace_end_tile_stores *payload)
510 {
511    stage_end(pctx, ts_ns, TILE_STORE_STAGE_ID);
512 }
513 
514 void
fd_start_state_restore(struct pipe_context * pctx,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_start_state_restore * payload)515 fd_start_state_restore(struct pipe_context *pctx, uint64_t ts_ns,
516                        uint16_t tp_idx, const void *flush_data,
517                        const struct trace_start_state_restore *payload)
518 {
519    stage_start(pctx, ts_ns, STATE_RESTORE_STAGE_ID);
520 }
521 
522 void
fd_end_state_restore(struct pipe_context * pctx,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_end_state_restore * payload)523 fd_end_state_restore(struct pipe_context *pctx, uint64_t ts_ns,
524                      uint16_t tp_idx, const void *flush_data,
525                      const struct trace_end_state_restore *payload)
526 {
527    stage_end(pctx, ts_ns, STATE_RESTORE_STAGE_ID);
528 }
529 
530 void
fd_start_vsc_overflow_test(struct pipe_context * pctx,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_start_vsc_overflow_test * payload)531 fd_start_vsc_overflow_test(struct pipe_context *pctx, uint64_t ts_ns,
532                            uint16_t tp_idx, const void *flush_data,
533                            const struct trace_start_vsc_overflow_test *payload)
534 {
535    stage_start(pctx, ts_ns, VSC_OVERFLOW_STAGE_ID);
536 }
537 
538 void
fd_end_vsc_overflow_test(struct pipe_context * pctx,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_end_vsc_overflow_test * payload)539 fd_end_vsc_overflow_test(struct pipe_context *pctx, uint64_t ts_ns,
540                          uint16_t tp_idx, const void *flush_data,
541                          const struct trace_end_vsc_overflow_test *payload)
542 {
543    stage_end(pctx, ts_ns, VSC_OVERFLOW_STAGE_ID);
544 }
545 
546 void
fd_start_prologue(struct pipe_context * pctx,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_start_prologue * payload)547 fd_start_prologue(struct pipe_context *pctx, uint64_t ts_ns,
548                   uint16_t tp_idx, const void *flush_data,
549                   const struct trace_start_prologue *payload)
550 {
551    stage_start(pctx, ts_ns, PROLOGUE_STAGE_ID);
552 }
553 
554 void
fd_end_prologue(struct pipe_context * pctx,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_end_prologue * payload)555 fd_end_prologue(struct pipe_context *pctx, uint64_t ts_ns,
556                 uint16_t tp_idx, const void *flush_data,
557                 const struct trace_end_prologue *payload)
558 {
559    stage_end(pctx, ts_ns, PROLOGUE_STAGE_ID);
560 }
561 
562 #ifdef __cplusplus
563 }
564 #endif
565