1 /*
2 * Copyright 2024 Google LLC
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "panvk_utrace_perfetto.h"
7
8 #include <functional>
9 #include <perfetto.h>
10
11 #include "c11/threads.h"
12 #include "util/log.h"
13 #include "util/perf/u_perfetto.h"
14 #include "util/perf/u_perfetto_renderpass.h"
15 #include "util/timespec.h"
16 #include "util/u_process.h"
17
18 #include "panvk_device.h"
19 #include "panvk_tracepoints.h"
20 #include "panvk_tracepoints_perfetto.h"
21 #include "panvk_utrace.h"
22
23 struct PanVKRenderpassIncrementalState {
24 bool was_cleared = true;
25 };
26
27 struct PanVKRenderpassTraits : public perfetto::DefaultDataSourceTraits {
28 using IncrementalStateType = PanVKRenderpassIncrementalState;
29 };
30
31 class PanVKRenderpassDataSource
32 : public MesaRenderpassDataSource<PanVKRenderpassDataSource,
33 PanVKRenderpassTraits> {};
34
35 PERFETTO_DECLARE_DATA_SOURCE_STATIC_MEMBERS(PanVKRenderpassDataSource);
36 PERFETTO_DEFINE_DATA_SOURCE_STATIC_MEMBERS(PanVKRenderpassDataSource);
37
38 static const char *
get_stage_name(enum panvk_utrace_perfetto_stage stage)39 get_stage_name(enum panvk_utrace_perfetto_stage stage)
40 {
41 switch (stage) {
42 #define CASE(x) \
43 case PANVK_UTRACE_PERFETTO_STAGE_##x: \
44 return #x
45 CASE(CMDBUF);
46 #undef CASE
47 default:
48 unreachable("bad stage");
49 }
50 }
51
52 static void
emit_interned_data_packet(struct panvk_device * dev,PanVKRenderpassDataSource::TraceContext & ctx,uint64_t now)53 emit_interned_data_packet(struct panvk_device *dev,
54 PanVKRenderpassDataSource::TraceContext &ctx,
55 uint64_t now)
56 {
57 const struct panvk_utrace_perfetto *utp = &dev->utrace.utp;
58
59 auto packet = ctx.NewTracePacket();
60 packet->set_timestamp(now);
61 packet->set_sequence_flags(
62 perfetto::protos::pbzero::TracePacket::SEQ_INCREMENTAL_STATE_CLEARED);
63
64 auto interned_data = packet->set_interned_data();
65
66 for (uint32_t i = 0; i < ARRAY_SIZE(utp->queue_iids); i++) {
67 char name[64];
68 snprintf(name, sizeof(name), "%s-queue-%d", util_get_process_name(), i);
69
70 auto specs = interned_data->add_gpu_specifications();
71 specs->set_iid(utp->queue_iids[i]);
72 specs->set_name(name);
73 }
74
75 for (uint32_t i = 0; i < ARRAY_SIZE(utp->stage_iids); i++) {
76 auto specs = interned_data->add_gpu_specifications();
77 specs->set_iid(utp->stage_iids[i]);
78 specs->set_name(get_stage_name((enum panvk_utrace_perfetto_stage)i));
79 }
80 }
81
82 static uint64_t
get_gpu_time_ns(struct panvk_device * dev)83 get_gpu_time_ns(struct panvk_device *dev)
84 {
85 const struct panvk_physical_device *pdev =
86 to_panvk_physical_device(dev->vk.physical);
87 const struct pan_kmod_dev_props *props = &pdev->kmod.props;
88
89 const uint64_t ts = pan_kmod_query_timestamp(dev->kmod.dev);
90 return ts * NSEC_PER_SEC / props->timestamp_frequency;
91 }
92
93 static void
emit_clock_snapshot_packet(struct panvk_device * dev,PanVKRenderpassDataSource::TraceContext & ctx)94 emit_clock_snapshot_packet(struct panvk_device *dev,
95 PanVKRenderpassDataSource::TraceContext &ctx)
96 {
97 const struct panvk_utrace_perfetto *utp = &dev->utrace.utp;
98 const uint64_t gpu_ns = get_gpu_time_ns(dev);
99 const uint64_t cpu_ns = perfetto::base::GetBootTimeNs().count();
100
101 MesaRenderpassDataSource<PanVKRenderpassDataSource, PanVKRenderpassTraits>::
102 EmitClockSync(ctx, cpu_ns, gpu_ns, utp->gpu_clock_id);
103 }
104
105 static void
emit_setup_packets(struct panvk_device * dev,PanVKRenderpassDataSource::TraceContext & ctx)106 emit_setup_packets(struct panvk_device *dev,
107 PanVKRenderpassDataSource::TraceContext &ctx)
108 {
109 struct panvk_utrace_perfetto *utp = &dev->utrace.utp;
110
111 const uint64_t now = perfetto::base::GetBootTimeNs().count();
112
113 /* emit interned data if cleared */
114 auto state = ctx.GetIncrementalState();
115 if (state->was_cleared) {
116 emit_interned_data_packet(dev, ctx, now);
117
118 state->was_cleared = false;
119 utp->next_clock_snapshot = 0;
120 }
121
122 /* emit clock snapshots periodically */
123 if (now >= utp->next_clock_snapshot) {
124 emit_clock_snapshot_packet(dev, ctx);
125
126 utp->next_clock_snapshot = now + NSEC_PER_SEC;
127 }
128 }
129
130 static struct panvk_utrace_perfetto_event *
begin_event(struct panvk_device * dev,const struct panvk_utrace_flush_data * data,enum panvk_utrace_perfetto_stage stage)131 begin_event(struct panvk_device *dev,
132 const struct panvk_utrace_flush_data *data,
133 enum panvk_utrace_perfetto_stage stage)
134 {
135 struct panvk_utrace_perfetto *utp = &dev->utrace.utp;
136 struct panvk_utrace_perfetto_queue *queue = &utp->queues[data->subqueue];
137 struct panvk_utrace_perfetto_event *ev = &queue->stack[queue->stack_depth++];
138
139 assert(data->subqueue < PANVK_UTRACE_PERFETTO_QUEUE_COUNT);
140
141 if (queue->stack_depth > PANVK_UTRACE_PERFETTO_STACK_DEPTH) {
142 PERFETTO_ELOG("queue %d stage %d too deep", data->subqueue, stage);
143 return NULL;
144 }
145
146 ev->stage = stage;
147 return ev;
148 }
149
150 static struct panvk_utrace_perfetto_event *
end_event(struct panvk_device * dev,const struct panvk_utrace_flush_data * data,enum panvk_utrace_perfetto_stage stage)151 end_event(struct panvk_device *dev, const struct panvk_utrace_flush_data *data,
152 enum panvk_utrace_perfetto_stage stage)
153 {
154 struct panvk_utrace_perfetto *utp = &dev->utrace.utp;
155 struct panvk_utrace_perfetto_queue *queue = &utp->queues[data->subqueue];
156
157 assert(data->subqueue < PANVK_UTRACE_PERFETTO_QUEUE_COUNT);
158
159 if (!queue->stack_depth)
160 return NULL;
161
162 struct panvk_utrace_perfetto_event *ev = &queue->stack[--queue->stack_depth];
163 if (queue->stack_depth >= PANVK_UTRACE_PERFETTO_STACK_DEPTH)
164 return NULL;
165
166 assert(ev->stage == stage);
167 return ev;
168 }
169
170 static void
panvk_utrace_perfetto_begin_event(struct panvk_device * dev,const struct panvk_utrace_flush_data * data,enum panvk_utrace_perfetto_stage stage,uint64_t ts_ns)171 panvk_utrace_perfetto_begin_event(struct panvk_device *dev,
172 const struct panvk_utrace_flush_data *data,
173 enum panvk_utrace_perfetto_stage stage,
174 uint64_t ts_ns)
175 {
176 struct panvk_utrace_perfetto_event *ev = begin_event(dev, data, stage);
177 if (!ev)
178 return;
179
180 ev->begin_ns = ts_ns;
181 }
182
183 static void
panvk_utrace_perfetto_end_event(struct panvk_device * dev,const struct panvk_utrace_flush_data * data,enum panvk_utrace_perfetto_stage stage,uint64_t ts_ns,std::function<void (perfetto::protos::pbzero::GpuRenderStageEvent *)> emit_event_extra)184 panvk_utrace_perfetto_end_event(
185 struct panvk_device *dev, const struct panvk_utrace_flush_data *data,
186 enum panvk_utrace_perfetto_stage stage, uint64_t ts_ns,
187 std::function<void(perfetto::protos::pbzero::GpuRenderStageEvent *)>
188 emit_event_extra)
189 {
190 const struct panvk_utrace_perfetto_event *ev = end_event(dev, data, stage);
191 if (!ev)
192 return;
193
194 PanVKRenderpassDataSource::Trace(
195 [=](PanVKRenderpassDataSource::TraceContext ctx) {
196 struct panvk_utrace_perfetto *utp = &dev->utrace.utp;
197
198 emit_setup_packets(dev, ctx);
199
200 auto packet = ctx.NewTracePacket();
201 packet->set_timestamp(ev->begin_ns);
202 packet->set_timestamp_clock_id(utp->gpu_clock_id);
203
204 auto event = packet->set_gpu_render_stage_event();
205 event->set_event_id(utp->event_id++);
206 event->set_duration(ts_ns - ev->begin_ns);
207 event->set_hw_queue_iid(utp->queue_iids[data->subqueue]);
208 event->set_stage_iid(utp->stage_iids[stage]);
209 event->set_context(utp->device_id);
210
211 emit_event_extra(event);
212 });
213 }
214
215 #define PANVK_UTRACE_PERFETTO_PROCESS_EVENT(tp, stage) \
216 void panvk_utrace_perfetto_begin_##tp( \
217 struct panvk_device *dev, uint64_t ts_ns, uint16_t tp_idx, \
218 const void *flush_data, const struct trace_begin_##tp *payload, \
219 const void *indirect_data) \
220 { \
221 /* we can ignore them or save them if we choose to */ \
222 assert(!payload && !indirect_data); \
223 panvk_utrace_perfetto_begin_event( \
224 dev, (const struct panvk_utrace_flush_data *)flush_data, \
225 PANVK_UTRACE_PERFETTO_STAGE_##stage, ts_ns); \
226 } \
227 \
228 void panvk_utrace_perfetto_end_##tp( \
229 struct panvk_device *dev, uint64_t ts_ns, uint16_t tp_idx, \
230 const void *flush_data, const struct trace_end_##tp *payload, \
231 const void *indirect_data) \
232 { \
233 auto emit_event_extra = \
234 [=](perfetto::protos::pbzero::GpuRenderStageEvent *event) { \
235 trace_payload_as_extra_end_##tp(event, payload, indirect_data); \
236 }; \
237 panvk_utrace_perfetto_end_event( \
238 dev, (const struct panvk_utrace_flush_data *)flush_data, \
239 PANVK_UTRACE_PERFETTO_STAGE_##stage, ts_ns, emit_event_extra); \
240 }
241
242 /* u_trace_context_process dispatches trace events to a background thread
243 * (traceq) for processing. These callbacks are called from traceq.
244 */
PANVK_UTRACE_PERFETTO_PROCESS_EVENT(cmdbuf,CMDBUF)245 PANVK_UTRACE_PERFETTO_PROCESS_EVENT(cmdbuf, CMDBUF)
246
247 static uint32_t
248 get_gpu_clock_id(void)
249 {
250 /* see https://perfetto.dev/docs/concepts/clock-sync */
251 return _mesa_hash_string("org.freedesktop.mesa.panfrost") | 0x80000000;
252 }
253
254 static void
register_data_source(void)255 register_data_source(void)
256 {
257 perfetto::DataSourceDescriptor dsd;
258 dsd.set_name("gpu.renderstages.panfrost");
259 PanVKRenderpassDataSource::Register(dsd);
260 }
261
262 void
panvk_utrace_perfetto_init(struct panvk_device * dev,uint32_t queue_count)263 panvk_utrace_perfetto_init(struct panvk_device *dev, uint32_t queue_count)
264 {
265 const struct panvk_physical_device *pdev =
266 to_panvk_physical_device(dev->vk.physical);
267 const struct pan_kmod_dev_props *props = &pdev->kmod.props;
268 struct panvk_utrace_perfetto *utp = &dev->utrace.utp;
269
270 if (queue_count > PANVK_UTRACE_PERFETTO_QUEUE_COUNT) {
271 assert(!"PANVK_UTRACE_PERFETTO_QUEUE_COUNT too small");
272 return;
273 }
274
275 /* check for timestamp support */
276 if (!props->gpu_can_query_timestamp || !props->timestamp_frequency ||
277 !get_gpu_time_ns(dev))
278 return;
279
280 utp->gpu_clock_id = get_gpu_clock_id();
281 utp->device_id = (uintptr_t)dev;
282
283 uint64_t next_iid = 1;
284 for (uint32_t i = 0; i < ARRAY_SIZE(utp->queue_iids); i++)
285 utp->queue_iids[i] = next_iid++;
286 for (uint32_t i = 0; i < ARRAY_SIZE(utp->stage_iids); i++)
287 utp->stage_iids[i] = next_iid++;
288
289 util_perfetto_init();
290
291 static once_flag register_ds_once = ONCE_FLAG_INIT;
292 call_once(®ister_ds_once, register_data_source);
293 }
294