• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2021 Google, Inc.
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include <perfetto.h>
7 
8 #include "tu_perfetto.h"
9 #include "tu_buffer.h"
10 #include "tu_device.h"
11 #include "tu_queue.h"
12 #include "tu_image.h"
13 
14 #include "util/hash_table.h"
15 #include "util/perf/u_perfetto.h"
16 #include "util/perf/u_perfetto_renderpass.h"
17 
18 #include "tu_tracepoints.h"
19 #include "tu_tracepoints_perfetto.h"
20 
21 /* we can't include tu_knl.h and tu_device.h */
22 
23 int
24 tu_device_get_gpu_timestamp(struct tu_device *dev,
25                             uint64_t *ts);
26 int
27 tu_device_get_suspend_count(struct tu_device *dev,
28                             uint64_t *suspend_count);
29 uint64_t
30 tu_device_ticks_to_ns(struct tu_device *dev, uint64_t ts);
31 
32 struct u_trace_context *
33 tu_device_get_u_trace(struct tu_device *device);
34 
35 /**
36  * Queue-id's
37  */
38 enum {
39    DEFAULT_HW_QUEUE_ID,
40 };
41 
42 /**
43  * Render-stage id's
44  */
45 enum tu_stage_id {
46    CMD_BUFFER_STAGE_ID,
47    CMD_BUFFER_ANNOTATION_STAGE_ID,
48    RENDER_PASS_STAGE_ID,
49    CMD_BUFFER_ANNOTATION_RENDER_PASS_STAGE_ID,
50    BINNING_STAGE_ID,
51    GMEM_STAGE_ID,
52    BYPASS_STAGE_ID,
53    BLIT_STAGE_ID,
54    COMPUTE_STAGE_ID,
55    CLEAR_SYSMEM_STAGE_ID,
56    CLEAR_GMEM_STAGE_ID,
57    GENERIC_CLEAR_STAGE_ID,
58    GMEM_LOAD_STAGE_ID,
59    GMEM_STORE_STAGE_ID,
60    SYSMEM_RESOLVE_STAGE_ID,
61    // TODO add the rest from fd_stage_id
62 };
63 
64 static const struct {
65    const char *name;
66    const char *desc;
67 } queues[] = {
68    [DEFAULT_HW_QUEUE_ID] = {"GPU Queue 0", "Default Adreno Hardware Queue"},
69 };
70 
71 static const struct {
72    const char *name;
73    const char *desc;
74 } stages[] = {
75    [CMD_BUFFER_STAGE_ID]     = { "Command Buffer" },
76    [CMD_BUFFER_ANNOTATION_STAGE_ID]     = { "Annotation", "Command Buffer Annotation" },
77    [RENDER_PASS_STAGE_ID]    = { "Render Pass" },
78    [CMD_BUFFER_ANNOTATION_RENDER_PASS_STAGE_ID]    = { "Annotation", "Render Pass Command Buffer Annotation" },
79    [BINNING_STAGE_ID]        = { "Binning", "Perform Visibility pass and determine target bins" },
80    [GMEM_STAGE_ID]           = { "GMEM", "Rendering to GMEM" },
81    [BYPASS_STAGE_ID]         = { "Bypass", "Rendering to system memory" },
82    [BLIT_STAGE_ID]           = { "Blit", "Performing a Blit operation" },
83    [COMPUTE_STAGE_ID]        = { "Compute", "Compute job" },
84    [CLEAR_SYSMEM_STAGE_ID]   = { "Clear Sysmem", "" },
85    [CLEAR_GMEM_STAGE_ID]     = { "Clear GMEM", "Per-tile (GMEM) clear" },
86    [GENERIC_CLEAR_STAGE_ID]  = { "Clear Sysmem/Gmem", ""},
87    [GMEM_LOAD_STAGE_ID]      = { "GMEM Load", "Per tile system memory to GMEM load" },
88    [GMEM_STORE_STAGE_ID]     = { "GMEM Store", "Per tile GMEM to system memory store" },
89    [SYSMEM_RESOLVE_STAGE_ID] = { "SysMem Resolve", "System memory MSAA resolve" },
90    // TODO add the rest
91 };
92 
93 static uint32_t gpu_clock_id;
94 static uint64_t next_clock_sync_ns; /* cpu time of next clk sync */
95 
96 /**
97  * The timestamp at the point where we first emitted the clock_sync..
98  * this  will be a *later* timestamp that the first GPU traces (since
99  * we capture the first clock_sync from the CPU *after* the first GPU
100  * tracepoints happen).  To avoid confusing perfetto we need to drop
101  * the GPU traces with timestamps before this.
102  */
103 static uint64_t sync_gpu_ts;
104 
105 static uint64_t last_suspend_count;
106 
107 static uint64_t gpu_max_timestamp;
108 static uint64_t gpu_timestamp_offset;
109 
110 struct TuRenderpassIncrementalState {
111    bool was_cleared = true;
112 };
113 
114 struct TuRenderpassTraits : public perfetto::DefaultDataSourceTraits {
115    using IncrementalStateType = TuRenderpassIncrementalState;
116 };
117 
118 class TuRenderpassDataSource : public MesaRenderpassDataSource<TuRenderpassDataSource,
119                                                                TuRenderpassTraits> {
OnStart(const StartArgs & args)120    void OnStart(const StartArgs &args) override
121    {
122       MesaRenderpassDataSource<TuRenderpassDataSource, TuRenderpassTraits>::OnStart(args);
123 
124       /* Note: clock_id's below 128 are reserved.. for custom clock sources,
125        * using the hash of a namespaced string is the recommended approach.
126        * See: https://perfetto.dev/docs/concepts/clock-sync
127        */
128       gpu_clock_id =
129          _mesa_hash_string("org.freedesktop.mesa.freedreno") | 0x80000000;
130 
131       gpu_timestamp_offset = 0;
132       gpu_max_timestamp = 0;
133       last_suspend_count = 0;
134    }
135 };
136 
137 PERFETTO_DECLARE_DATA_SOURCE_STATIC_MEMBERS(TuRenderpassDataSource);
138 PERFETTO_DEFINE_DATA_SOURCE_STATIC_MEMBERS(TuRenderpassDataSource);
139 
140 static void
send_descriptors(TuRenderpassDataSource::TraceContext & ctx)141 send_descriptors(TuRenderpassDataSource::TraceContext &ctx)
142 {
143    PERFETTO_LOG("Sending renderstage descriptors");
144 
145    auto packet = ctx.NewTracePacket();
146 
147    /* This must be set before interned data is sent. */
148    packet->set_sequence_flags(perfetto::protos::pbzero::TracePacket::SEQ_INCREMENTAL_STATE_CLEARED);
149 
150    packet->set_timestamp(0);
151 
152    auto event = packet->set_gpu_render_stage_event();
153    event->set_gpu_id(0);
154 
155    auto spec = event->set_specifications();
156 
157    for (unsigned i = 0; i < ARRAY_SIZE(queues); i++) {
158       auto desc = spec->add_hw_queue();
159 
160       desc->set_name(queues[i].name);
161       desc->set_description(queues[i].desc);
162    }
163 
164    for (unsigned i = 0; i < ARRAY_SIZE(stages); i++) {
165       auto desc = spec->add_stage();
166 
167       desc->set_name(stages[i].name);
168       if (stages[i].desc)
169          desc->set_description(stages[i].desc);
170    }
171 }
172 
173 static struct tu_perfetto_stage *
stage_push(struct tu_device * dev)174 stage_push(struct tu_device *dev)
175 {
176    struct tu_perfetto_state *p = &dev->perfetto;
177 
178    if (p->stage_depth >= ARRAY_SIZE(p->stages)) {
179       p->skipped_depth++;
180       return NULL;
181    }
182 
183    return &p->stages[p->stage_depth++];
184 }
185 
186 static struct tu_perfetto_stage *
stage_pop(struct tu_device * dev)187 stage_pop(struct tu_device *dev)
188 {
189    struct tu_perfetto_state *p = &dev->perfetto;
190 
191    if (!p->stage_depth)
192       return NULL;
193 
194    if (p->skipped_depth) {
195       p->skipped_depth--;
196       return NULL;
197    }
198 
199    return &p->stages[--p->stage_depth];
200 }
201 
202 static void
stage_start(struct tu_device * dev,uint64_t ts_ns,enum tu_stage_id stage_id,const char * app_event,const void * payload=nullptr,size_t payload_size=0,const void * indirect=nullptr,trace_payload_as_extra_func payload_as_extra=nullptr)203 stage_start(struct tu_device *dev,
204             uint64_t ts_ns,
205             enum tu_stage_id stage_id,
206             const char *app_event,
207             const void *payload = nullptr,
208             size_t payload_size = 0,
209             const void *indirect = nullptr,
210             trace_payload_as_extra_func payload_as_extra = nullptr)
211 {
212    struct tu_perfetto_stage *stage = stage_push(dev);
213 
214    if (!stage) {
215       PERFETTO_ELOG("stage %d is nested too deep", stage_id);
216       return;
217    }
218 
219    if (payload) {
220       void* new_payload = malloc(payload_size);
221       if (new_payload)
222          memcpy(new_payload, payload, payload_size);
223       else
224          PERFETTO_ELOG("Failed to allocate payload for stage %d", stage_id);
225       payload = new_payload;
226    }
227 
228    *stage = (struct tu_perfetto_stage) {
229       .stage_id = stage_id,
230       .stage_iid = 0,
231       .start_ts = ts_ns,
232       .payload = payload,
233       .start_payload_function = (void *) payload_as_extra,
234    };
235 
236    if (app_event) {
237       TuRenderpassDataSource::Trace([=](auto tctx) {
238          stage->stage_iid =
239             tctx.GetDataSourceLocked()->debug_marker_stage(tctx, app_event);
240       });
241    }
242 }
243 
244 static void
stage_end(struct tu_device * dev,uint64_t ts_ns,enum tu_stage_id stage_id,const void * flush_data,const void * payload=nullptr,const void * indirect=nullptr,trace_payload_as_extra_func payload_as_extra=nullptr)245 stage_end(struct tu_device *dev, uint64_t ts_ns, enum tu_stage_id stage_id,
246           const void *flush_data,
247           const void* payload = nullptr,
248           const void *indirect = nullptr,
249           trace_payload_as_extra_func payload_as_extra = nullptr)
250 {
251    struct tu_perfetto_stage *stage = stage_pop(dev);
252    auto trace_flush_data =
253       (const struct tu_u_trace_submission_data *) flush_data;
254    uint32_t submission_id = trace_flush_data->submission_id;
255    uint64_t gpu_ts_offset = trace_flush_data->gpu_ts_offset;
256 
257    if (!stage)
258       return;
259 
260    if (stage->stage_id != stage_id) {
261       PERFETTO_ELOG("stage %d ended while stage %d is expected",
262             stage_id, stage->stage_id);
263       return;
264    }
265 
266    /* If we haven't managed to calibrate the alignment between GPU and CPU
267     * timestamps yet, then skip this trace, otherwise perfetto won't know
268     * what to do with it.
269     */
270    if (!sync_gpu_ts)
271       return;
272 
273    TuRenderpassDataSource::Trace([=](TuRenderpassDataSource::TraceContext tctx) {
274       if (auto state = tctx.GetIncrementalState(); state->was_cleared) {
275          send_descriptors(tctx);
276          state->was_cleared = false;
277       }
278 
279       auto packet = tctx.NewTracePacket();
280 
281       gpu_max_timestamp = MAX2(gpu_max_timestamp, ts_ns + gpu_ts_offset);
282 
283       packet->set_timestamp(stage->start_ts + gpu_ts_offset);
284       packet->set_timestamp_clock_id(gpu_clock_id);
285 
286       auto event = packet->set_gpu_render_stage_event();
287       event->set_event_id(0); // ???
288       event->set_hw_queue_id(DEFAULT_HW_QUEUE_ID);
289       event->set_duration(ts_ns - stage->start_ts);
290       if (stage->stage_iid)
291          event->set_stage_iid(stage->stage_iid);
292       else
293          event->set_stage_id(stage->stage_id);
294       event->set_context((uintptr_t) dev);
295       event->set_submission_id(submission_id);
296 
297       if (stage->payload) {
298          if (stage->start_payload_function)
299             ((trace_payload_as_extra_func) stage->start_payload_function)(
300                event, stage->payload, nullptr);
301          free((void *)stage->payload);
302       }
303 
304       if (payload && payload_as_extra)
305          payload_as_extra(event, payload, indirect);
306    });
307 }
308 
309 class TuMemoryDataSource : public perfetto::DataSource<TuMemoryDataSource> {
310  public:
OnSetup(const SetupArgs &)311    void OnSetup(const SetupArgs &) override
312    {
313    }
314 
OnStart(const StartArgs &)315    void OnStart(const StartArgs &) override
316    {
317       PERFETTO_LOG("Memory tracing started");
318    }
319 
OnStop(const StopArgs &)320    void OnStop(const StopArgs &) override
321    {
322       PERFETTO_LOG("Memory tracing stopped");
323    }
324 };
325 
326 PERFETTO_DECLARE_DATA_SOURCE_STATIC_MEMBERS(TuMemoryDataSource);
327 PERFETTO_DEFINE_DATA_SOURCE_STATIC_MEMBERS(TuMemoryDataSource);
328 
329 
330 #ifdef __cplusplus
331 extern "C" {
332 #endif
333 
334 void
tu_perfetto_init(void)335 tu_perfetto_init(void)
336 {
337    util_perfetto_init();
338 
339    {
340    perfetto::DataSourceDescriptor dsd;
341 #if DETECT_OS_ANDROID
342      /* AGI requires this name */
343      dsd.set_name("gpu.renderstages");
344 #else
345       dsd.set_name("gpu.renderstages.msm");
346 #endif
347       TuRenderpassDataSource::Register(dsd);
348    }
349 
350    {
351      perfetto::DataSourceDescriptor dsd;
352      dsd.set_name("gpu.memory.msm");
353      TuMemoryDataSource::Register(dsd);
354    }
355 }
356 
357 static void
emit_sync_timestamp(uint64_t cpu_ts,uint64_t gpu_ts)358 emit_sync_timestamp(uint64_t cpu_ts, uint64_t gpu_ts)
359 {
360    TuRenderpassDataSource::Trace([=](auto tctx) {
361       MesaRenderpassDataSource<TuRenderpassDataSource,
362                                TuRenderpassTraits>::EmitClockSync(tctx, cpu_ts,
363                                                                   gpu_ts, gpu_clock_id);
364    });
365 }
366 
367 uint64_t
tu_perfetto_begin_submit()368 tu_perfetto_begin_submit()
369 {
370    return perfetto::base::GetBootTimeNs().count();
371 }
372 
373 static struct tu_perfetto_clocks
sync_clocks(struct tu_device * dev,const struct tu_perfetto_clocks * gpu_clocks)374 sync_clocks(struct tu_device *dev,
375             const struct tu_perfetto_clocks *gpu_clocks)
376 {
377    struct tu_perfetto_clocks clocks {};
378    if (gpu_clocks) {
379       clocks = *gpu_clocks;
380    }
381 
382    clocks.cpu = perfetto::base::GetBootTimeNs().count();
383 
384    if (gpu_clocks) {
385       /* TODO: It would be better to use CPU time that comes
386        * together with GPU time from the KGSL, but it's not
387        * equal to GetBootTimeNs.
388        */
389 
390       clocks.gpu_ts_offset = MAX2(gpu_timestamp_offset, clocks.gpu_ts_offset);
391       gpu_timestamp_offset = clocks.gpu_ts_offset;
392       sync_gpu_ts = clocks.gpu_ts + clocks.gpu_ts_offset;
393    } else {
394       clocks.gpu_ts = 0;
395       clocks.gpu_ts_offset = gpu_timestamp_offset;
396 
397       if (clocks.cpu < next_clock_sync_ns)
398          return clocks;
399 
400       if (tu_device_get_gpu_timestamp(dev, &clocks.gpu_ts)) {
401          PERFETTO_ELOG("Could not sync CPU and GPU clocks");
402          return {};
403       }
404 
405       clocks.gpu_ts = tu_device_ticks_to_ns(dev, clocks.gpu_ts);
406 
407       /* get cpu timestamp again because tu_device_get_gpu_timestamp can take
408        * >100us
409        */
410       clocks.cpu = perfetto::base::GetBootTimeNs().count();
411 
412       uint64_t current_suspend_count = 0;
413       /* If we fail to get it we will use a fallback */
414       tu_device_get_suspend_count(dev, &current_suspend_count);
415 
416       /* GPU timestamp is being reset after suspend-resume cycle.
417        * Perfetto requires clock snapshots to be monotonic,
418        * so we have to fix-up the time.
419        */
420       if (current_suspend_count != last_suspend_count) {
421          gpu_timestamp_offset = gpu_max_timestamp;
422          last_suspend_count = current_suspend_count;
423       }
424       clocks.gpu_ts_offset = gpu_timestamp_offset;
425 
426       uint64_t gpu_absolute_ts = clocks.gpu_ts + clocks.gpu_ts_offset;
427 
428       /* Fallback check, detect non-monotonic cases which would happen
429        * if we cannot retrieve suspend count.
430        */
431       if (sync_gpu_ts > gpu_absolute_ts) {
432          gpu_absolute_ts += (gpu_max_timestamp - gpu_timestamp_offset);
433          gpu_timestamp_offset = gpu_max_timestamp;
434          clocks.gpu_ts = gpu_absolute_ts - gpu_timestamp_offset;
435       }
436 
437       if (sync_gpu_ts > gpu_absolute_ts) {
438          PERFETTO_ELOG("Non-monotonic gpu timestamp detected, bailing out");
439          return {};
440       }
441 
442       gpu_max_timestamp = clocks.gpu_ts;
443       sync_gpu_ts = clocks.gpu_ts;
444       next_clock_sync_ns = clocks.cpu + 30000000;
445    }
446 
447    return clocks;
448 }
449 
450 struct tu_perfetto_clocks
tu_perfetto_end_submit(struct tu_queue * queue,uint32_t submission_id,uint64_t start_ts,struct tu_perfetto_clocks * gpu_clocks)451 tu_perfetto_end_submit(struct tu_queue *queue,
452                        uint32_t submission_id,
453                        uint64_t start_ts,
454                        struct tu_perfetto_clocks *gpu_clocks)
455 {
456    struct tu_device *dev = queue->device;
457    if (!u_trace_perfetto_active(tu_device_get_u_trace(dev)))
458       return {};
459 
460    struct tu_perfetto_clocks clocks = sync_clocks(dev, gpu_clocks);
461    if (clocks.gpu_ts > 0)
462       emit_sync_timestamp(clocks.cpu, clocks.gpu_ts + clocks.gpu_ts_offset);
463 
464    TuRenderpassDataSource::Trace([=](TuRenderpassDataSource::TraceContext tctx) {
465       auto packet = tctx.NewTracePacket();
466 
467       packet->set_timestamp(start_ts);
468 
469       auto event = packet->set_vulkan_api_event();
470       auto submit = event->set_vk_queue_submit();
471 
472       submit->set_duration_ns(clocks.cpu - start_ts);
473       submit->set_vk_queue((uintptr_t) queue);
474       submit->set_submission_id(submission_id);
475    });
476 
477    return clocks;
478 }
479 
480 /*
481  * Trace callbacks, called from u_trace once the timestamps from GPU have been
482  * collected.
483  *
484  * The default "extra" funcs are code-generated into tu_tracepoints_perfetto.h
485  * and just take the tracepoint's args and add them as name/value pairs in the
486  * perfetto events.  This file can usually just map a tu_perfetto_* to
487  * stage_start/end with a call to that codegenned "extra" func.  But you can
488  * also provide your own entrypoint and extra funcs if you want to change that
489  * mapping.
490  */
491 
492 #define CREATE_EVENT_CALLBACK(event_name, stage_id)                                 \
493    void tu_perfetto_start_##event_name(                                             \
494       struct tu_device *dev, uint64_t ts_ns, uint16_t tp_idx,                       \
495       const void *flush_data, const struct trace_start_##event_name *payload,       \
496       const void *indirect_data)                                                    \
497    {                                                                                \
498       stage_start(                                                                  \
499          dev, ts_ns, stage_id, NULL, payload, sizeof(*payload), indirect_data,      \
500          (trace_payload_as_extra_func) &trace_payload_as_extra_start_##event_name); \
501    }                                                                                \
502                                                                                     \
503    void tu_perfetto_end_##event_name(                                               \
504       struct tu_device *dev, uint64_t ts_ns, uint16_t tp_idx,                       \
505       const void *flush_data, const struct trace_end_##event_name *payload,         \
506       const void *indirect_data)                                                    \
507    {                                                                                \
508       stage_end(                                                                    \
509          dev, ts_ns, stage_id, flush_data, payload, indirect_data,                  \
510          (trace_payload_as_extra_func) &trace_payload_as_extra_end_##event_name);   \
511    }
512 
CREATE_EVENT_CALLBACK(cmd_buffer,CMD_BUFFER_STAGE_ID)513 CREATE_EVENT_CALLBACK(cmd_buffer, CMD_BUFFER_STAGE_ID)
514 CREATE_EVENT_CALLBACK(render_pass, RENDER_PASS_STAGE_ID)
515 CREATE_EVENT_CALLBACK(binning_ib, BINNING_STAGE_ID)
516 CREATE_EVENT_CALLBACK(draw_ib_gmem, GMEM_STAGE_ID)
517 CREATE_EVENT_CALLBACK(draw_ib_sysmem, BYPASS_STAGE_ID)
518 CREATE_EVENT_CALLBACK(blit, BLIT_STAGE_ID)
519 CREATE_EVENT_CALLBACK(compute, COMPUTE_STAGE_ID)
520 CREATE_EVENT_CALLBACK(compute_indirect, COMPUTE_STAGE_ID)
521 CREATE_EVENT_CALLBACK(generic_clear, GENERIC_CLEAR_STAGE_ID)
522 CREATE_EVENT_CALLBACK(gmem_clear, CLEAR_GMEM_STAGE_ID)
523 CREATE_EVENT_CALLBACK(sysmem_clear, CLEAR_SYSMEM_STAGE_ID)
524 CREATE_EVENT_CALLBACK(sysmem_clear_all, CLEAR_SYSMEM_STAGE_ID)
525 CREATE_EVENT_CALLBACK(gmem_load, GMEM_LOAD_STAGE_ID)
526 CREATE_EVENT_CALLBACK(gmem_store, GMEM_STORE_STAGE_ID)
527 CREATE_EVENT_CALLBACK(sysmem_resolve, SYSMEM_RESOLVE_STAGE_ID)
528 
529 void
530 tu_perfetto_start_cmd_buffer_annotation(
531    struct tu_device *dev,
532    uint64_t ts_ns,
533    uint16_t tp_idx,
534    const void *flush_data,
535    const struct trace_start_cmd_buffer_annotation *payload,
536    const void *indirect_data)
537 {
538    /* No extra func necessary, the only arg is in the end payload.*/
539    stage_start(dev, ts_ns, CMD_BUFFER_ANNOTATION_STAGE_ID, payload->str, payload,
540                sizeof(*payload), NULL);
541 }
542 
543 void
tu_perfetto_end_cmd_buffer_annotation(struct tu_device * dev,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_end_cmd_buffer_annotation * payload,const void * indirect_data)544 tu_perfetto_end_cmd_buffer_annotation(
545    struct tu_device *dev,
546    uint64_t ts_ns,
547    uint16_t tp_idx,
548    const void *flush_data,
549    const struct trace_end_cmd_buffer_annotation *payload,
550    const void *indirect_data)
551 {
552    /* Pass the payload string as the app_event, which will appear right on the
553     * event block, rather than as metadata inside.
554     */
555    stage_end(dev, ts_ns, CMD_BUFFER_ANNOTATION_STAGE_ID, flush_data,
556              payload, NULL);
557 }
558 
559 void
tu_perfetto_start_cmd_buffer_annotation_rp(struct tu_device * dev,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_start_cmd_buffer_annotation_rp * payload,const void * indirect_data)560 tu_perfetto_start_cmd_buffer_annotation_rp(
561    struct tu_device *dev,
562    uint64_t ts_ns,
563    uint16_t tp_idx,
564    const void *flush_data,
565    const struct trace_start_cmd_buffer_annotation_rp *payload,
566    const void *indirect_data)
567 {
568    /* No extra func necessary, the only arg is in the end payload.*/
569    stage_start(dev, ts_ns, CMD_BUFFER_ANNOTATION_RENDER_PASS_STAGE_ID,
570                payload->str, payload, sizeof(*payload), NULL);
571 }
572 
573 void
tu_perfetto_end_cmd_buffer_annotation_rp(struct tu_device * dev,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_end_cmd_buffer_annotation_rp * payload,const void * indirect_data)574 tu_perfetto_end_cmd_buffer_annotation_rp(
575    struct tu_device *dev,
576    uint64_t ts_ns,
577    uint16_t tp_idx,
578    const void *flush_data,
579    const struct trace_end_cmd_buffer_annotation_rp *payload,
580    const void *indirect_data)
581 {
582    /* Pass the payload string as the app_event, which will appear right on the
583     * event block, rather than as metadata inside.
584     */
585    stage_end(dev, ts_ns, CMD_BUFFER_ANNOTATION_RENDER_PASS_STAGE_ID,
586              flush_data, payload, NULL);
587 }
588 
589 
590 static void
log_mem(struct tu_device * dev,struct tu_buffer * buffer,struct tu_image * image,perfetto::protos::pbzero::perfetto_pbzero_enum_VulkanMemoryEvent::Operation op)591 log_mem(struct tu_device *dev, struct tu_buffer *buffer, struct tu_image *image,
592         perfetto::protos::pbzero::perfetto_pbzero_enum_VulkanMemoryEvent::Operation op)
593 {
594    TuMemoryDataSource::Trace([=](TuMemoryDataSource::TraceContext tctx) {
595       auto packet = tctx.NewTracePacket();
596 
597       packet->set_timestamp(perfetto::base::GetBootTimeNs().count());
598 
599       auto event = packet->set_vulkan_memory_event();
600 
601       event->set_timestamp(perfetto::base::GetBootTimeNs().count());
602       event->set_operation(op);
603       event->set_pid(getpid());
604 
605       if (buffer) {
606          event->set_source(perfetto::protos::pbzero::perfetto_pbzero_enum_VulkanMemoryEvent::SOURCE_BUFFER);
607          event->set_memory_size(buffer->vk.size);
608          if (buffer->bo)
609             event->set_memory_address(buffer->iova);
610       } else {
611          assert(image);
612          event->set_source(perfetto::protos::pbzero::perfetto_pbzero_enum_VulkanMemoryEvent::SOURCE_IMAGE);
613          event->set_memory_size(image->layout[0].size);
614          if (image->bo)
615             event->set_memory_address(image->iova);
616       }
617 
618    });
619 }
620 
621 void
tu_perfetto_log_create_buffer(struct tu_device * dev,struct tu_buffer * buffer)622 tu_perfetto_log_create_buffer(struct tu_device *dev, struct tu_buffer *buffer)
623 {
624    log_mem(dev, buffer, NULL, perfetto::protos::pbzero::perfetto_pbzero_enum_VulkanMemoryEvent::OP_CREATE);
625 }
626 
627 void
tu_perfetto_log_bind_buffer(struct tu_device * dev,struct tu_buffer * buffer)628 tu_perfetto_log_bind_buffer(struct tu_device *dev, struct tu_buffer *buffer)
629 {
630    log_mem(dev, buffer, NULL, perfetto::protos::pbzero::perfetto_pbzero_enum_VulkanMemoryEvent::OP_BIND);
631 }
632 
633 void
tu_perfetto_log_destroy_buffer(struct tu_device * dev,struct tu_buffer * buffer)634 tu_perfetto_log_destroy_buffer(struct tu_device *dev, struct tu_buffer *buffer)
635 {
636    log_mem(dev, buffer, NULL, buffer->bo ?
637       perfetto::protos::pbzero::perfetto_pbzero_enum_VulkanMemoryEvent::OP_DESTROY_BOUND :
638       perfetto::protos::pbzero::perfetto_pbzero_enum_VulkanMemoryEvent::OP_DESTROY);
639 }
640 
641 void
tu_perfetto_log_create_image(struct tu_device * dev,struct tu_image * image)642 tu_perfetto_log_create_image(struct tu_device *dev, struct tu_image *image)
643 {
644    log_mem(dev, NULL, image, perfetto::protos::pbzero::perfetto_pbzero_enum_VulkanMemoryEvent::OP_CREATE);
645 }
646 
647 void
tu_perfetto_log_bind_image(struct tu_device * dev,struct tu_image * image)648 tu_perfetto_log_bind_image(struct tu_device *dev, struct tu_image *image)
649 {
650    log_mem(dev, NULL, image, perfetto::protos::pbzero::perfetto_pbzero_enum_VulkanMemoryEvent::OP_BIND);
651 }
652 
653 void
tu_perfetto_log_destroy_image(struct tu_device * dev,struct tu_image * image)654 tu_perfetto_log_destroy_image(struct tu_device *dev, struct tu_image *image)
655 {
656    log_mem(dev, NULL, image, image->bo ?
657       perfetto::protos::pbzero::perfetto_pbzero_enum_VulkanMemoryEvent::OP_DESTROY_BOUND :
658       perfetto::protos::pbzero::perfetto_pbzero_enum_VulkanMemoryEvent::OP_DESTROY);
659 }
660 
661 
662 
663 #ifdef __cplusplus
664 }
665 #endif
666