1 /*
2 * Copyright © 2021 Google, Inc.
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include <perfetto.h>
7
8 #include "tu_perfetto.h"
9 #include "tu_buffer.h"
10 #include "tu_device.h"
11 #include "tu_queue.h"
12 #include "tu_image.h"
13
14 #include "util/hash_table.h"
15 #include "util/perf/u_perfetto.h"
16 #include "util/perf/u_perfetto_renderpass.h"
17
18 #include "tu_tracepoints.h"
19 #include "tu_tracepoints_perfetto.h"
20
21 /* we can't include tu_knl.h and tu_device.h */
22
23 int
24 tu_device_get_gpu_timestamp(struct tu_device *dev,
25 uint64_t *ts);
26 int
27 tu_device_get_suspend_count(struct tu_device *dev,
28 uint64_t *suspend_count);
29 uint64_t
30 tu_device_ticks_to_ns(struct tu_device *dev, uint64_t ts);
31
32 struct u_trace_context *
33 tu_device_get_u_trace(struct tu_device *device);
34
35 /**
36 * Queue-id's
37 */
38 enum {
39 DEFAULT_HW_QUEUE_ID,
40 };
41
42 /**
43 * Render-stage id's
44 */
45 enum tu_stage_id {
46 CMD_BUFFER_STAGE_ID,
47 CMD_BUFFER_ANNOTATION_STAGE_ID,
48 RENDER_PASS_STAGE_ID,
49 CMD_BUFFER_ANNOTATION_RENDER_PASS_STAGE_ID,
50 BINNING_STAGE_ID,
51 GMEM_STAGE_ID,
52 BYPASS_STAGE_ID,
53 BLIT_STAGE_ID,
54 COMPUTE_STAGE_ID,
55 CLEAR_SYSMEM_STAGE_ID,
56 CLEAR_GMEM_STAGE_ID,
57 GENERIC_CLEAR_STAGE_ID,
58 GMEM_LOAD_STAGE_ID,
59 GMEM_STORE_STAGE_ID,
60 SYSMEM_RESOLVE_STAGE_ID,
61 // TODO add the rest from fd_stage_id
62 };
63
64 static const struct {
65 const char *name;
66 const char *desc;
67 } queues[] = {
68 [DEFAULT_HW_QUEUE_ID] = {"GPU Queue 0", "Default Adreno Hardware Queue"},
69 };
70
71 static const struct {
72 const char *name;
73 const char *desc;
74 } stages[] = {
75 [CMD_BUFFER_STAGE_ID] = { "Command Buffer" },
76 [CMD_BUFFER_ANNOTATION_STAGE_ID] = { "Annotation", "Command Buffer Annotation" },
77 [RENDER_PASS_STAGE_ID] = { "Render Pass" },
78 [CMD_BUFFER_ANNOTATION_RENDER_PASS_STAGE_ID] = { "Annotation", "Render Pass Command Buffer Annotation" },
79 [BINNING_STAGE_ID] = { "Binning", "Perform Visibility pass and determine target bins" },
80 [GMEM_STAGE_ID] = { "GMEM", "Rendering to GMEM" },
81 [BYPASS_STAGE_ID] = { "Bypass", "Rendering to system memory" },
82 [BLIT_STAGE_ID] = { "Blit", "Performing a Blit operation" },
83 [COMPUTE_STAGE_ID] = { "Compute", "Compute job" },
84 [CLEAR_SYSMEM_STAGE_ID] = { "Clear Sysmem", "" },
85 [CLEAR_GMEM_STAGE_ID] = { "Clear GMEM", "Per-tile (GMEM) clear" },
86 [GENERIC_CLEAR_STAGE_ID] = { "Clear Sysmem/Gmem", ""},
87 [GMEM_LOAD_STAGE_ID] = { "GMEM Load", "Per tile system memory to GMEM load" },
88 [GMEM_STORE_STAGE_ID] = { "GMEM Store", "Per tile GMEM to system memory store" },
89 [SYSMEM_RESOLVE_STAGE_ID] = { "SysMem Resolve", "System memory MSAA resolve" },
90 // TODO add the rest
91 };
92
93 static uint32_t gpu_clock_id;
94 static uint64_t next_clock_sync_ns; /* cpu time of next clk sync */
95
96 /**
97 * The timestamp at the point where we first emitted the clock_sync..
98 * this will be a *later* timestamp that the first GPU traces (since
99 * we capture the first clock_sync from the CPU *after* the first GPU
100 * tracepoints happen). To avoid confusing perfetto we need to drop
101 * the GPU traces with timestamps before this.
102 */
103 static uint64_t sync_gpu_ts;
104
105 static uint64_t last_suspend_count;
106
107 static uint64_t gpu_max_timestamp;
108 static uint64_t gpu_timestamp_offset;
109
110 struct TuRenderpassIncrementalState {
111 bool was_cleared = true;
112 };
113
114 struct TuRenderpassTraits : public perfetto::DefaultDataSourceTraits {
115 using IncrementalStateType = TuRenderpassIncrementalState;
116 };
117
118 class TuRenderpassDataSource : public MesaRenderpassDataSource<TuRenderpassDataSource,
119 TuRenderpassTraits> {
OnStart(const StartArgs & args)120 void OnStart(const StartArgs &args) override
121 {
122 MesaRenderpassDataSource<TuRenderpassDataSource, TuRenderpassTraits>::OnStart(args);
123
124 /* Note: clock_id's below 128 are reserved.. for custom clock sources,
125 * using the hash of a namespaced string is the recommended approach.
126 * See: https://perfetto.dev/docs/concepts/clock-sync
127 */
128 gpu_clock_id =
129 _mesa_hash_string("org.freedesktop.mesa.freedreno") | 0x80000000;
130
131 gpu_timestamp_offset = 0;
132 gpu_max_timestamp = 0;
133 last_suspend_count = 0;
134 }
135 };
136
137 PERFETTO_DECLARE_DATA_SOURCE_STATIC_MEMBERS(TuRenderpassDataSource);
138 PERFETTO_DEFINE_DATA_SOURCE_STATIC_MEMBERS(TuRenderpassDataSource);
139
140 static void
send_descriptors(TuRenderpassDataSource::TraceContext & ctx)141 send_descriptors(TuRenderpassDataSource::TraceContext &ctx)
142 {
143 PERFETTO_LOG("Sending renderstage descriptors");
144
145 auto packet = ctx.NewTracePacket();
146
147 /* This must be set before interned data is sent. */
148 packet->set_sequence_flags(perfetto::protos::pbzero::TracePacket::SEQ_INCREMENTAL_STATE_CLEARED);
149
150 packet->set_timestamp(0);
151
152 auto event = packet->set_gpu_render_stage_event();
153 event->set_gpu_id(0);
154
155 auto spec = event->set_specifications();
156
157 for (unsigned i = 0; i < ARRAY_SIZE(queues); i++) {
158 auto desc = spec->add_hw_queue();
159
160 desc->set_name(queues[i].name);
161 desc->set_description(queues[i].desc);
162 }
163
164 for (unsigned i = 0; i < ARRAY_SIZE(stages); i++) {
165 auto desc = spec->add_stage();
166
167 desc->set_name(stages[i].name);
168 if (stages[i].desc)
169 desc->set_description(stages[i].desc);
170 }
171 }
172
173 static struct tu_perfetto_stage *
stage_push(struct tu_device * dev)174 stage_push(struct tu_device *dev)
175 {
176 struct tu_perfetto_state *p = &dev->perfetto;
177
178 if (p->stage_depth >= ARRAY_SIZE(p->stages)) {
179 p->skipped_depth++;
180 return NULL;
181 }
182
183 return &p->stages[p->stage_depth++];
184 }
185
186 static struct tu_perfetto_stage *
stage_pop(struct tu_device * dev)187 stage_pop(struct tu_device *dev)
188 {
189 struct tu_perfetto_state *p = &dev->perfetto;
190
191 if (!p->stage_depth)
192 return NULL;
193
194 if (p->skipped_depth) {
195 p->skipped_depth--;
196 return NULL;
197 }
198
199 return &p->stages[--p->stage_depth];
200 }
201
202 static void
stage_start(struct tu_device * dev,uint64_t ts_ns,enum tu_stage_id stage_id,const char * app_event,const void * payload=nullptr,size_t payload_size=0,const void * indirect=nullptr,trace_payload_as_extra_func payload_as_extra=nullptr)203 stage_start(struct tu_device *dev,
204 uint64_t ts_ns,
205 enum tu_stage_id stage_id,
206 const char *app_event,
207 const void *payload = nullptr,
208 size_t payload_size = 0,
209 const void *indirect = nullptr,
210 trace_payload_as_extra_func payload_as_extra = nullptr)
211 {
212 struct tu_perfetto_stage *stage = stage_push(dev);
213
214 if (!stage) {
215 PERFETTO_ELOG("stage %d is nested too deep", stage_id);
216 return;
217 }
218
219 if (payload) {
220 void* new_payload = malloc(payload_size);
221 if (new_payload)
222 memcpy(new_payload, payload, payload_size);
223 else
224 PERFETTO_ELOG("Failed to allocate payload for stage %d", stage_id);
225 payload = new_payload;
226 }
227
228 *stage = (struct tu_perfetto_stage) {
229 .stage_id = stage_id,
230 .stage_iid = 0,
231 .start_ts = ts_ns,
232 .payload = payload,
233 .start_payload_function = (void *) payload_as_extra,
234 };
235
236 if (app_event) {
237 TuRenderpassDataSource::Trace([=](auto tctx) {
238 stage->stage_iid =
239 tctx.GetDataSourceLocked()->debug_marker_stage(tctx, app_event);
240 });
241 }
242 }
243
244 static void
stage_end(struct tu_device * dev,uint64_t ts_ns,enum tu_stage_id stage_id,const void * flush_data,const void * payload=nullptr,const void * indirect=nullptr,trace_payload_as_extra_func payload_as_extra=nullptr)245 stage_end(struct tu_device *dev, uint64_t ts_ns, enum tu_stage_id stage_id,
246 const void *flush_data,
247 const void* payload = nullptr,
248 const void *indirect = nullptr,
249 trace_payload_as_extra_func payload_as_extra = nullptr)
250 {
251 struct tu_perfetto_stage *stage = stage_pop(dev);
252 auto trace_flush_data =
253 (const struct tu_u_trace_submission_data *) flush_data;
254 uint32_t submission_id = trace_flush_data->submission_id;
255 uint64_t gpu_ts_offset = trace_flush_data->gpu_ts_offset;
256
257 if (!stage)
258 return;
259
260 if (stage->stage_id != stage_id) {
261 PERFETTO_ELOG("stage %d ended while stage %d is expected",
262 stage_id, stage->stage_id);
263 return;
264 }
265
266 /* If we haven't managed to calibrate the alignment between GPU and CPU
267 * timestamps yet, then skip this trace, otherwise perfetto won't know
268 * what to do with it.
269 */
270 if (!sync_gpu_ts)
271 return;
272
273 TuRenderpassDataSource::Trace([=](TuRenderpassDataSource::TraceContext tctx) {
274 if (auto state = tctx.GetIncrementalState(); state->was_cleared) {
275 send_descriptors(tctx);
276 state->was_cleared = false;
277 }
278
279 auto packet = tctx.NewTracePacket();
280
281 gpu_max_timestamp = MAX2(gpu_max_timestamp, ts_ns + gpu_ts_offset);
282
283 packet->set_timestamp(stage->start_ts + gpu_ts_offset);
284 packet->set_timestamp_clock_id(gpu_clock_id);
285
286 auto event = packet->set_gpu_render_stage_event();
287 event->set_event_id(0); // ???
288 event->set_hw_queue_id(DEFAULT_HW_QUEUE_ID);
289 event->set_duration(ts_ns - stage->start_ts);
290 if (stage->stage_iid)
291 event->set_stage_iid(stage->stage_iid);
292 else
293 event->set_stage_id(stage->stage_id);
294 event->set_context((uintptr_t) dev);
295 event->set_submission_id(submission_id);
296
297 if (stage->payload) {
298 if (stage->start_payload_function)
299 ((trace_payload_as_extra_func) stage->start_payload_function)(
300 event, stage->payload, nullptr);
301 free((void *)stage->payload);
302 }
303
304 if (payload && payload_as_extra)
305 payload_as_extra(event, payload, indirect);
306 });
307 }
308
309 class TuMemoryDataSource : public perfetto::DataSource<TuMemoryDataSource> {
310 public:
OnSetup(const SetupArgs &)311 void OnSetup(const SetupArgs &) override
312 {
313 }
314
OnStart(const StartArgs &)315 void OnStart(const StartArgs &) override
316 {
317 PERFETTO_LOG("Memory tracing started");
318 }
319
OnStop(const StopArgs &)320 void OnStop(const StopArgs &) override
321 {
322 PERFETTO_LOG("Memory tracing stopped");
323 }
324 };
325
326 PERFETTO_DECLARE_DATA_SOURCE_STATIC_MEMBERS(TuMemoryDataSource);
327 PERFETTO_DEFINE_DATA_SOURCE_STATIC_MEMBERS(TuMemoryDataSource);
328
329
330 #ifdef __cplusplus
331 extern "C" {
332 #endif
333
334 void
tu_perfetto_init(void)335 tu_perfetto_init(void)
336 {
337 util_perfetto_init();
338
339 {
340 perfetto::DataSourceDescriptor dsd;
341 #if DETECT_OS_ANDROID
342 /* AGI requires this name */
343 dsd.set_name("gpu.renderstages");
344 #else
345 dsd.set_name("gpu.renderstages.msm");
346 #endif
347 TuRenderpassDataSource::Register(dsd);
348 }
349
350 {
351 perfetto::DataSourceDescriptor dsd;
352 dsd.set_name("gpu.memory.msm");
353 TuMemoryDataSource::Register(dsd);
354 }
355 }
356
357 static void
emit_sync_timestamp(uint64_t cpu_ts,uint64_t gpu_ts)358 emit_sync_timestamp(uint64_t cpu_ts, uint64_t gpu_ts)
359 {
360 TuRenderpassDataSource::Trace([=](auto tctx) {
361 MesaRenderpassDataSource<TuRenderpassDataSource,
362 TuRenderpassTraits>::EmitClockSync(tctx, cpu_ts,
363 gpu_ts, gpu_clock_id);
364 });
365 }
366
367 uint64_t
tu_perfetto_begin_submit()368 tu_perfetto_begin_submit()
369 {
370 return perfetto::base::GetBootTimeNs().count();
371 }
372
373 static struct tu_perfetto_clocks
sync_clocks(struct tu_device * dev,const struct tu_perfetto_clocks * gpu_clocks)374 sync_clocks(struct tu_device *dev,
375 const struct tu_perfetto_clocks *gpu_clocks)
376 {
377 struct tu_perfetto_clocks clocks {};
378 if (gpu_clocks) {
379 clocks = *gpu_clocks;
380 }
381
382 clocks.cpu = perfetto::base::GetBootTimeNs().count();
383
384 if (gpu_clocks) {
385 /* TODO: It would be better to use CPU time that comes
386 * together with GPU time from the KGSL, but it's not
387 * equal to GetBootTimeNs.
388 */
389
390 clocks.gpu_ts_offset = MAX2(gpu_timestamp_offset, clocks.gpu_ts_offset);
391 gpu_timestamp_offset = clocks.gpu_ts_offset;
392 sync_gpu_ts = clocks.gpu_ts + clocks.gpu_ts_offset;
393 } else {
394 clocks.gpu_ts = 0;
395 clocks.gpu_ts_offset = gpu_timestamp_offset;
396
397 if (clocks.cpu < next_clock_sync_ns)
398 return clocks;
399
400 if (tu_device_get_gpu_timestamp(dev, &clocks.gpu_ts)) {
401 PERFETTO_ELOG("Could not sync CPU and GPU clocks");
402 return {};
403 }
404
405 clocks.gpu_ts = tu_device_ticks_to_ns(dev, clocks.gpu_ts);
406
407 /* get cpu timestamp again because tu_device_get_gpu_timestamp can take
408 * >100us
409 */
410 clocks.cpu = perfetto::base::GetBootTimeNs().count();
411
412 uint64_t current_suspend_count = 0;
413 /* If we fail to get it we will use a fallback */
414 tu_device_get_suspend_count(dev, ¤t_suspend_count);
415
416 /* GPU timestamp is being reset after suspend-resume cycle.
417 * Perfetto requires clock snapshots to be monotonic,
418 * so we have to fix-up the time.
419 */
420 if (current_suspend_count != last_suspend_count) {
421 gpu_timestamp_offset = gpu_max_timestamp;
422 last_suspend_count = current_suspend_count;
423 }
424 clocks.gpu_ts_offset = gpu_timestamp_offset;
425
426 uint64_t gpu_absolute_ts = clocks.gpu_ts + clocks.gpu_ts_offset;
427
428 /* Fallback check, detect non-monotonic cases which would happen
429 * if we cannot retrieve suspend count.
430 */
431 if (sync_gpu_ts > gpu_absolute_ts) {
432 gpu_absolute_ts += (gpu_max_timestamp - gpu_timestamp_offset);
433 gpu_timestamp_offset = gpu_max_timestamp;
434 clocks.gpu_ts = gpu_absolute_ts - gpu_timestamp_offset;
435 }
436
437 if (sync_gpu_ts > gpu_absolute_ts) {
438 PERFETTO_ELOG("Non-monotonic gpu timestamp detected, bailing out");
439 return {};
440 }
441
442 gpu_max_timestamp = clocks.gpu_ts;
443 sync_gpu_ts = clocks.gpu_ts;
444 next_clock_sync_ns = clocks.cpu + 30000000;
445 }
446
447 return clocks;
448 }
449
450 struct tu_perfetto_clocks
tu_perfetto_end_submit(struct tu_queue * queue,uint32_t submission_id,uint64_t start_ts,struct tu_perfetto_clocks * gpu_clocks)451 tu_perfetto_end_submit(struct tu_queue *queue,
452 uint32_t submission_id,
453 uint64_t start_ts,
454 struct tu_perfetto_clocks *gpu_clocks)
455 {
456 struct tu_device *dev = queue->device;
457 if (!u_trace_perfetto_active(tu_device_get_u_trace(dev)))
458 return {};
459
460 struct tu_perfetto_clocks clocks = sync_clocks(dev, gpu_clocks);
461 if (clocks.gpu_ts > 0)
462 emit_sync_timestamp(clocks.cpu, clocks.gpu_ts + clocks.gpu_ts_offset);
463
464 TuRenderpassDataSource::Trace([=](TuRenderpassDataSource::TraceContext tctx) {
465 auto packet = tctx.NewTracePacket();
466
467 packet->set_timestamp(start_ts);
468
469 auto event = packet->set_vulkan_api_event();
470 auto submit = event->set_vk_queue_submit();
471
472 submit->set_duration_ns(clocks.cpu - start_ts);
473 submit->set_vk_queue((uintptr_t) queue);
474 submit->set_submission_id(submission_id);
475 });
476
477 return clocks;
478 }
479
480 /*
481 * Trace callbacks, called from u_trace once the timestamps from GPU have been
482 * collected.
483 *
484 * The default "extra" funcs are code-generated into tu_tracepoints_perfetto.h
485 * and just take the tracepoint's args and add them as name/value pairs in the
486 * perfetto events. This file can usually just map a tu_perfetto_* to
487 * stage_start/end with a call to that codegenned "extra" func. But you can
488 * also provide your own entrypoint and extra funcs if you want to change that
489 * mapping.
490 */
491
492 #define CREATE_EVENT_CALLBACK(event_name, stage_id) \
493 void tu_perfetto_start_##event_name( \
494 struct tu_device *dev, uint64_t ts_ns, uint16_t tp_idx, \
495 const void *flush_data, const struct trace_start_##event_name *payload, \
496 const void *indirect_data) \
497 { \
498 stage_start( \
499 dev, ts_ns, stage_id, NULL, payload, sizeof(*payload), indirect_data, \
500 (trace_payload_as_extra_func) &trace_payload_as_extra_start_##event_name); \
501 } \
502 \
503 void tu_perfetto_end_##event_name( \
504 struct tu_device *dev, uint64_t ts_ns, uint16_t tp_idx, \
505 const void *flush_data, const struct trace_end_##event_name *payload, \
506 const void *indirect_data) \
507 { \
508 stage_end( \
509 dev, ts_ns, stage_id, flush_data, payload, indirect_data, \
510 (trace_payload_as_extra_func) &trace_payload_as_extra_end_##event_name); \
511 }
512
CREATE_EVENT_CALLBACK(cmd_buffer,CMD_BUFFER_STAGE_ID)513 CREATE_EVENT_CALLBACK(cmd_buffer, CMD_BUFFER_STAGE_ID)
514 CREATE_EVENT_CALLBACK(render_pass, RENDER_PASS_STAGE_ID)
515 CREATE_EVENT_CALLBACK(binning_ib, BINNING_STAGE_ID)
516 CREATE_EVENT_CALLBACK(draw_ib_gmem, GMEM_STAGE_ID)
517 CREATE_EVENT_CALLBACK(draw_ib_sysmem, BYPASS_STAGE_ID)
518 CREATE_EVENT_CALLBACK(blit, BLIT_STAGE_ID)
519 CREATE_EVENT_CALLBACK(compute, COMPUTE_STAGE_ID)
520 CREATE_EVENT_CALLBACK(compute_indirect, COMPUTE_STAGE_ID)
521 CREATE_EVENT_CALLBACK(generic_clear, GENERIC_CLEAR_STAGE_ID)
522 CREATE_EVENT_CALLBACK(gmem_clear, CLEAR_GMEM_STAGE_ID)
523 CREATE_EVENT_CALLBACK(sysmem_clear, CLEAR_SYSMEM_STAGE_ID)
524 CREATE_EVENT_CALLBACK(sysmem_clear_all, CLEAR_SYSMEM_STAGE_ID)
525 CREATE_EVENT_CALLBACK(gmem_load, GMEM_LOAD_STAGE_ID)
526 CREATE_EVENT_CALLBACK(gmem_store, GMEM_STORE_STAGE_ID)
527 CREATE_EVENT_CALLBACK(sysmem_resolve, SYSMEM_RESOLVE_STAGE_ID)
528
529 void
530 tu_perfetto_start_cmd_buffer_annotation(
531 struct tu_device *dev,
532 uint64_t ts_ns,
533 uint16_t tp_idx,
534 const void *flush_data,
535 const struct trace_start_cmd_buffer_annotation *payload,
536 const void *indirect_data)
537 {
538 /* No extra func necessary, the only arg is in the end payload.*/
539 stage_start(dev, ts_ns, CMD_BUFFER_ANNOTATION_STAGE_ID, payload->str, payload,
540 sizeof(*payload), NULL);
541 }
542
543 void
tu_perfetto_end_cmd_buffer_annotation(struct tu_device * dev,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_end_cmd_buffer_annotation * payload,const void * indirect_data)544 tu_perfetto_end_cmd_buffer_annotation(
545 struct tu_device *dev,
546 uint64_t ts_ns,
547 uint16_t tp_idx,
548 const void *flush_data,
549 const struct trace_end_cmd_buffer_annotation *payload,
550 const void *indirect_data)
551 {
552 /* Pass the payload string as the app_event, which will appear right on the
553 * event block, rather than as metadata inside.
554 */
555 stage_end(dev, ts_ns, CMD_BUFFER_ANNOTATION_STAGE_ID, flush_data,
556 payload, NULL);
557 }
558
559 void
tu_perfetto_start_cmd_buffer_annotation_rp(struct tu_device * dev,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_start_cmd_buffer_annotation_rp * payload,const void * indirect_data)560 tu_perfetto_start_cmd_buffer_annotation_rp(
561 struct tu_device *dev,
562 uint64_t ts_ns,
563 uint16_t tp_idx,
564 const void *flush_data,
565 const struct trace_start_cmd_buffer_annotation_rp *payload,
566 const void *indirect_data)
567 {
568 /* No extra func necessary, the only arg is in the end payload.*/
569 stage_start(dev, ts_ns, CMD_BUFFER_ANNOTATION_RENDER_PASS_STAGE_ID,
570 payload->str, payload, sizeof(*payload), NULL);
571 }
572
573 void
tu_perfetto_end_cmd_buffer_annotation_rp(struct tu_device * dev,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_end_cmd_buffer_annotation_rp * payload,const void * indirect_data)574 tu_perfetto_end_cmd_buffer_annotation_rp(
575 struct tu_device *dev,
576 uint64_t ts_ns,
577 uint16_t tp_idx,
578 const void *flush_data,
579 const struct trace_end_cmd_buffer_annotation_rp *payload,
580 const void *indirect_data)
581 {
582 /* Pass the payload string as the app_event, which will appear right on the
583 * event block, rather than as metadata inside.
584 */
585 stage_end(dev, ts_ns, CMD_BUFFER_ANNOTATION_RENDER_PASS_STAGE_ID,
586 flush_data, payload, NULL);
587 }
588
589
590 static void
log_mem(struct tu_device * dev,struct tu_buffer * buffer,struct tu_image * image,perfetto::protos::pbzero::perfetto_pbzero_enum_VulkanMemoryEvent::Operation op)591 log_mem(struct tu_device *dev, struct tu_buffer *buffer, struct tu_image *image,
592 perfetto::protos::pbzero::perfetto_pbzero_enum_VulkanMemoryEvent::Operation op)
593 {
594 TuMemoryDataSource::Trace([=](TuMemoryDataSource::TraceContext tctx) {
595 auto packet = tctx.NewTracePacket();
596
597 packet->set_timestamp(perfetto::base::GetBootTimeNs().count());
598
599 auto event = packet->set_vulkan_memory_event();
600
601 event->set_timestamp(perfetto::base::GetBootTimeNs().count());
602 event->set_operation(op);
603 event->set_pid(getpid());
604
605 if (buffer) {
606 event->set_source(perfetto::protos::pbzero::perfetto_pbzero_enum_VulkanMemoryEvent::SOURCE_BUFFER);
607 event->set_memory_size(buffer->vk.size);
608 if (buffer->bo)
609 event->set_memory_address(buffer->iova);
610 } else {
611 assert(image);
612 event->set_source(perfetto::protos::pbzero::perfetto_pbzero_enum_VulkanMemoryEvent::SOURCE_IMAGE);
613 event->set_memory_size(image->layout[0].size);
614 if (image->bo)
615 event->set_memory_address(image->iova);
616 }
617
618 });
619 }
620
621 void
tu_perfetto_log_create_buffer(struct tu_device * dev,struct tu_buffer * buffer)622 tu_perfetto_log_create_buffer(struct tu_device *dev, struct tu_buffer *buffer)
623 {
624 log_mem(dev, buffer, NULL, perfetto::protos::pbzero::perfetto_pbzero_enum_VulkanMemoryEvent::OP_CREATE);
625 }
626
627 void
tu_perfetto_log_bind_buffer(struct tu_device * dev,struct tu_buffer * buffer)628 tu_perfetto_log_bind_buffer(struct tu_device *dev, struct tu_buffer *buffer)
629 {
630 log_mem(dev, buffer, NULL, perfetto::protos::pbzero::perfetto_pbzero_enum_VulkanMemoryEvent::OP_BIND);
631 }
632
633 void
tu_perfetto_log_destroy_buffer(struct tu_device * dev,struct tu_buffer * buffer)634 tu_perfetto_log_destroy_buffer(struct tu_device *dev, struct tu_buffer *buffer)
635 {
636 log_mem(dev, buffer, NULL, buffer->bo ?
637 perfetto::protos::pbzero::perfetto_pbzero_enum_VulkanMemoryEvent::OP_DESTROY_BOUND :
638 perfetto::protos::pbzero::perfetto_pbzero_enum_VulkanMemoryEvent::OP_DESTROY);
639 }
640
641 void
tu_perfetto_log_create_image(struct tu_device * dev,struct tu_image * image)642 tu_perfetto_log_create_image(struct tu_device *dev, struct tu_image *image)
643 {
644 log_mem(dev, NULL, image, perfetto::protos::pbzero::perfetto_pbzero_enum_VulkanMemoryEvent::OP_CREATE);
645 }
646
647 void
tu_perfetto_log_bind_image(struct tu_device * dev,struct tu_image * image)648 tu_perfetto_log_bind_image(struct tu_device *dev, struct tu_image *image)
649 {
650 log_mem(dev, NULL, image, perfetto::protos::pbzero::perfetto_pbzero_enum_VulkanMemoryEvent::OP_BIND);
651 }
652
653 void
tu_perfetto_log_destroy_image(struct tu_device * dev,struct tu_image * image)654 tu_perfetto_log_destroy_image(struct tu_device *dev, struct tu_image *image)
655 {
656 log_mem(dev, NULL, image, image->bo ?
657 perfetto::protos::pbzero::perfetto_pbzero_enum_VulkanMemoryEvent::OP_DESTROY_BOUND :
658 perfetto::protos::pbzero::perfetto_pbzero_enum_VulkanMemoryEvent::OP_DESTROY);
659 }
660
661
662
663 #ifdef __cplusplus
664 }
665 #endif
666