• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2024 Google LLC
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "panvk_utrace_perfetto.h"
7 
8 #include <functional>
9 #include <perfetto.h>
10 
11 #include "c11/threads.h"
12 #include "util/log.h"
13 #include "util/perf/u_perfetto.h"
14 #include "util/perf/u_perfetto_renderpass.h"
15 #include "util/timespec.h"
16 #include "util/u_process.h"
17 
18 #include "panvk_device.h"
19 #include "panvk_tracepoints.h"
20 #include "panvk_tracepoints_perfetto.h"
21 #include "panvk_utrace.h"
22 
23 struct PanVKRenderpassIncrementalState {
24    bool was_cleared = true;
25 };
26 
27 struct PanVKRenderpassTraits : public perfetto::DefaultDataSourceTraits {
28    using IncrementalStateType = PanVKRenderpassIncrementalState;
29 };
30 
31 class PanVKRenderpassDataSource
32     : public MesaRenderpassDataSource<PanVKRenderpassDataSource,
33                                       PanVKRenderpassTraits> {};
34 
35 PERFETTO_DECLARE_DATA_SOURCE_STATIC_MEMBERS(PanVKRenderpassDataSource);
36 PERFETTO_DEFINE_DATA_SOURCE_STATIC_MEMBERS(PanVKRenderpassDataSource);
37 
38 static const char *
get_stage_name(enum panvk_utrace_perfetto_stage stage)39 get_stage_name(enum panvk_utrace_perfetto_stage stage)
40 {
41    switch (stage) {
42 #define CASE(x)                                                                \
43    case PANVK_UTRACE_PERFETTO_STAGE_##x:                                       \
44       return #x
45       CASE(CMDBUF);
46 #undef CASE
47    default:
48       unreachable("bad stage");
49    }
50 }
51 
52 static void
emit_interned_data_packet(struct panvk_device * dev,PanVKRenderpassDataSource::TraceContext & ctx,uint64_t now)53 emit_interned_data_packet(struct panvk_device *dev,
54                           PanVKRenderpassDataSource::TraceContext &ctx,
55                           uint64_t now)
56 {
57    const struct panvk_utrace_perfetto *utp = &dev->utrace.utp;
58 
59    auto packet = ctx.NewTracePacket();
60    packet->set_timestamp(now);
61    packet->set_sequence_flags(
62       perfetto::protos::pbzero::TracePacket::SEQ_INCREMENTAL_STATE_CLEARED);
63 
64    auto interned_data = packet->set_interned_data();
65 
66    for (uint32_t i = 0; i < ARRAY_SIZE(utp->queue_iids); i++) {
67       char name[64];
68       snprintf(name, sizeof(name), "%s-queue-%d", util_get_process_name(), i);
69 
70       auto specs = interned_data->add_gpu_specifications();
71       specs->set_iid(utp->queue_iids[i]);
72       specs->set_name(name);
73    }
74 
75    for (uint32_t i = 0; i < ARRAY_SIZE(utp->stage_iids); i++) {
76       auto specs = interned_data->add_gpu_specifications();
77       specs->set_iid(utp->stage_iids[i]);
78       specs->set_name(get_stage_name((enum panvk_utrace_perfetto_stage)i));
79    }
80 }
81 
82 static uint64_t
get_gpu_time_ns(struct panvk_device * dev)83 get_gpu_time_ns(struct panvk_device *dev)
84 {
85    const struct panvk_physical_device *pdev =
86       to_panvk_physical_device(dev->vk.physical);
87    const struct pan_kmod_dev_props *props = &pdev->kmod.props;
88 
89    const uint64_t ts = pan_kmod_query_timestamp(dev->kmod.dev);
90    return ts * NSEC_PER_SEC / props->timestamp_frequency;
91 }
92 
93 static void
emit_clock_snapshot_packet(struct panvk_device * dev,PanVKRenderpassDataSource::TraceContext & ctx)94 emit_clock_snapshot_packet(struct panvk_device *dev,
95                            PanVKRenderpassDataSource::TraceContext &ctx)
96 {
97    const struct panvk_utrace_perfetto *utp = &dev->utrace.utp;
98    const uint64_t gpu_ns = get_gpu_time_ns(dev);
99    const uint64_t cpu_ns = perfetto::base::GetBootTimeNs().count();
100 
101    MesaRenderpassDataSource<PanVKRenderpassDataSource, PanVKRenderpassTraits>::
102       EmitClockSync(ctx, cpu_ns, gpu_ns, utp->gpu_clock_id);
103 }
104 
105 static void
emit_setup_packets(struct panvk_device * dev,PanVKRenderpassDataSource::TraceContext & ctx)106 emit_setup_packets(struct panvk_device *dev,
107                    PanVKRenderpassDataSource::TraceContext &ctx)
108 {
109    struct panvk_utrace_perfetto *utp = &dev->utrace.utp;
110 
111    const uint64_t now = perfetto::base::GetBootTimeNs().count();
112 
113    /* emit interned data if cleared */
114    auto state = ctx.GetIncrementalState();
115    if (state->was_cleared) {
116       emit_interned_data_packet(dev, ctx, now);
117 
118       state->was_cleared = false;
119       utp->next_clock_snapshot = 0;
120    }
121 
122    /* emit clock snapshots periodically */
123    if (now >= utp->next_clock_snapshot) {
124       emit_clock_snapshot_packet(dev, ctx);
125 
126       utp->next_clock_snapshot = now + NSEC_PER_SEC;
127    }
128 }
129 
130 static struct panvk_utrace_perfetto_event *
begin_event(struct panvk_device * dev,const struct panvk_utrace_flush_data * data,enum panvk_utrace_perfetto_stage stage)131 begin_event(struct panvk_device *dev,
132             const struct panvk_utrace_flush_data *data,
133             enum panvk_utrace_perfetto_stage stage)
134 {
135    struct panvk_utrace_perfetto *utp = &dev->utrace.utp;
136    struct panvk_utrace_perfetto_queue *queue = &utp->queues[data->subqueue];
137    struct panvk_utrace_perfetto_event *ev = &queue->stack[queue->stack_depth++];
138 
139    assert(data->subqueue < PANVK_UTRACE_PERFETTO_QUEUE_COUNT);
140 
141    if (queue->stack_depth > PANVK_UTRACE_PERFETTO_STACK_DEPTH) {
142       PERFETTO_ELOG("queue %d stage %d too deep", data->subqueue, stage);
143       return NULL;
144    }
145 
146    ev->stage = stage;
147    return ev;
148 }
149 
150 static struct panvk_utrace_perfetto_event *
end_event(struct panvk_device * dev,const struct panvk_utrace_flush_data * data,enum panvk_utrace_perfetto_stage stage)151 end_event(struct panvk_device *dev, const struct panvk_utrace_flush_data *data,
152           enum panvk_utrace_perfetto_stage stage)
153 {
154    struct panvk_utrace_perfetto *utp = &dev->utrace.utp;
155    struct panvk_utrace_perfetto_queue *queue = &utp->queues[data->subqueue];
156 
157    assert(data->subqueue < PANVK_UTRACE_PERFETTO_QUEUE_COUNT);
158 
159    if (!queue->stack_depth)
160       return NULL;
161 
162    struct panvk_utrace_perfetto_event *ev = &queue->stack[--queue->stack_depth];
163    if (queue->stack_depth >= PANVK_UTRACE_PERFETTO_STACK_DEPTH)
164       return NULL;
165 
166    assert(ev->stage == stage);
167    return ev;
168 }
169 
170 static void
panvk_utrace_perfetto_begin_event(struct panvk_device * dev,const struct panvk_utrace_flush_data * data,enum panvk_utrace_perfetto_stage stage,uint64_t ts_ns)171 panvk_utrace_perfetto_begin_event(struct panvk_device *dev,
172                                   const struct panvk_utrace_flush_data *data,
173                                   enum panvk_utrace_perfetto_stage stage,
174                                   uint64_t ts_ns)
175 {
176    struct panvk_utrace_perfetto_event *ev = begin_event(dev, data, stage);
177    if (!ev)
178       return;
179 
180    ev->begin_ns = ts_ns;
181 }
182 
183 static void
panvk_utrace_perfetto_end_event(struct panvk_device * dev,const struct panvk_utrace_flush_data * data,enum panvk_utrace_perfetto_stage stage,uint64_t ts_ns,std::function<void (perfetto::protos::pbzero::GpuRenderStageEvent *)> emit_event_extra)184 panvk_utrace_perfetto_end_event(
185    struct panvk_device *dev, const struct panvk_utrace_flush_data *data,
186    enum panvk_utrace_perfetto_stage stage, uint64_t ts_ns,
187    std::function<void(perfetto::protos::pbzero::GpuRenderStageEvent *)>
188       emit_event_extra)
189 {
190    const struct panvk_utrace_perfetto_event *ev = end_event(dev, data, stage);
191    if (!ev)
192       return;
193 
194    PanVKRenderpassDataSource::Trace(
195       [=](PanVKRenderpassDataSource::TraceContext ctx) {
196          struct panvk_utrace_perfetto *utp = &dev->utrace.utp;
197 
198          emit_setup_packets(dev, ctx);
199 
200          auto packet = ctx.NewTracePacket();
201          packet->set_timestamp(ev->begin_ns);
202          packet->set_timestamp_clock_id(utp->gpu_clock_id);
203 
204          auto event = packet->set_gpu_render_stage_event();
205          event->set_event_id(utp->event_id++);
206          event->set_duration(ts_ns - ev->begin_ns);
207          event->set_hw_queue_iid(utp->queue_iids[data->subqueue]);
208          event->set_stage_iid(utp->stage_iids[stage]);
209          event->set_context(utp->device_id);
210 
211          emit_event_extra(event);
212       });
213 }
214 
215 #define PANVK_UTRACE_PERFETTO_PROCESS_EVENT(tp, stage)                         \
216    void panvk_utrace_perfetto_begin_##tp(                                      \
217       struct panvk_device *dev, uint64_t ts_ns, uint16_t tp_idx,               \
218       const void *flush_data, const struct trace_begin_##tp *payload,          \
219       const void *indirect_data)                                               \
220    {                                                                           \
221       /* we can ignore them or save them if we choose to */                    \
222       assert(!payload && !indirect_data);                                      \
223       panvk_utrace_perfetto_begin_event(                                       \
224          dev, (const struct panvk_utrace_flush_data *)flush_data,              \
225          PANVK_UTRACE_PERFETTO_STAGE_##stage, ts_ns);                          \
226    }                                                                           \
227                                                                                \
228    void panvk_utrace_perfetto_end_##tp(                                        \
229       struct panvk_device *dev, uint64_t ts_ns, uint16_t tp_idx,               \
230       const void *flush_data, const struct trace_end_##tp *payload,            \
231       const void *indirect_data)                                               \
232    {                                                                           \
233       auto emit_event_extra =                                                  \
234          [=](perfetto::protos::pbzero::GpuRenderStageEvent *event) {           \
235             trace_payload_as_extra_end_##tp(event, payload, indirect_data);    \
236          };                                                                    \
237       panvk_utrace_perfetto_end_event(                                         \
238          dev, (const struct panvk_utrace_flush_data *)flush_data,              \
239          PANVK_UTRACE_PERFETTO_STAGE_##stage, ts_ns, emit_event_extra);        \
240    }
241 
242 /* u_trace_context_process dispatches trace events to a background thread
243  * (traceq) for processing.  These callbacks are called from traceq.
244  */
PANVK_UTRACE_PERFETTO_PROCESS_EVENT(cmdbuf,CMDBUF)245 PANVK_UTRACE_PERFETTO_PROCESS_EVENT(cmdbuf, CMDBUF)
246 
247 static uint32_t
248 get_gpu_clock_id(void)
249 {
250    /* see https://perfetto.dev/docs/concepts/clock-sync */
251    return _mesa_hash_string("org.freedesktop.mesa.panfrost") | 0x80000000;
252 }
253 
254 static void
register_data_source(void)255 register_data_source(void)
256 {
257    perfetto::DataSourceDescriptor dsd;
258    dsd.set_name("gpu.renderstages.panfrost");
259    PanVKRenderpassDataSource::Register(dsd);
260 }
261 
262 void
panvk_utrace_perfetto_init(struct panvk_device * dev,uint32_t queue_count)263 panvk_utrace_perfetto_init(struct panvk_device *dev, uint32_t queue_count)
264 {
265    const struct panvk_physical_device *pdev =
266       to_panvk_physical_device(dev->vk.physical);
267    const struct pan_kmod_dev_props *props = &pdev->kmod.props;
268    struct panvk_utrace_perfetto *utp = &dev->utrace.utp;
269 
270    if (queue_count > PANVK_UTRACE_PERFETTO_QUEUE_COUNT) {
271       assert(!"PANVK_UTRACE_PERFETTO_QUEUE_COUNT too small");
272       return;
273    }
274 
275    /* check for timestamp support */
276    if (!props->gpu_can_query_timestamp || !props->timestamp_frequency ||
277        !get_gpu_time_ns(dev))
278       return;
279 
280    utp->gpu_clock_id = get_gpu_clock_id();
281    utp->device_id = (uintptr_t)dev;
282 
283    uint64_t next_iid = 1;
284    for (uint32_t i = 0; i < ARRAY_SIZE(utp->queue_iids); i++)
285       utp->queue_iids[i] = next_iid++;
286    for (uint32_t i = 0; i < ARRAY_SIZE(utp->stage_iids); i++)
287       utp->stage_iids[i] = next_iid++;
288 
289    util_perfetto_init();
290 
291    static once_flag register_ds_once = ONCE_FLAG_INIT;
292    call_once(&register_ds_once, register_data_source);
293 }
294