• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2021 Google, Inc.
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include <perfetto.h>
7 
8 #include "tu_perfetto.h"
9 
10 #include "util/u_perfetto.h"
11 #include "util/hash_table.h"
12 
13 #include "tu_tracepoints.h"
14 #include "tu_tracepoints_perfetto.h"
15 
16 static uint32_t gpu_clock_id;
17 static uint64_t next_clock_sync_ns; /* cpu time of next clk sync */
18 
19 /**
20  * The timestamp at the point where we first emitted the clock_sync..
21  * this  will be a *later* timestamp that the first GPU traces (since
22  * we capture the first clock_sync from the CPU *after* the first GPU
23  * tracepoints happen).  To avoid confusing perfetto we need to drop
24  * the GPU traces with timestamps before this.
25  */
26 static uint64_t sync_gpu_ts;
27 
28 static uint64_t last_suspend_count;
29 
30 static uint64_t gpu_max_timestamp;
31 static uint64_t gpu_timestamp_offset;
32 
33 struct TuRenderpassIncrementalState {
34    bool was_cleared = true;
35 };
36 
37 struct TuRenderpassTraits : public perfetto::DefaultDataSourceTraits {
38    using IncrementalStateType = TuRenderpassIncrementalState;
39 };
40 
41 class TuRenderpassDataSource : public perfetto::DataSource<TuRenderpassDataSource, TuRenderpassTraits> {
42 public:
OnSetup(const SetupArgs &)43    void OnSetup(const SetupArgs &) override
44    {
45       // Use this callback to apply any custom configuration to your data source
46       // based on the TraceConfig in SetupArgs.
47    }
48 
OnStart(const StartArgs &)49    void OnStart(const StartArgs &) override
50    {
51       // This notification can be used to initialize the GPU driver, enable
52       // counters, etc. StartArgs will contains the DataSourceDescriptor,
53       // which can be extended.
54       u_trace_perfetto_start();
55       PERFETTO_LOG("Tracing started");
56 
57       /* Note: clock_id's below 128 are reserved.. for custom clock sources,
58        * using the hash of a namespaced string is the recommended approach.
59        * See: https://perfetto.dev/docs/concepts/clock-sync
60        */
61       gpu_clock_id =
62          _mesa_hash_string("org.freedesktop.mesa.freedreno") | 0x80000000;
63 
64       gpu_timestamp_offset = 0;
65       gpu_max_timestamp = 0;
66       last_suspend_count = 0;
67    }
68 
OnStop(const StopArgs &)69    void OnStop(const StopArgs &) override
70    {
71       PERFETTO_LOG("Tracing stopped");
72 
73       // Undo any initialization done in OnStart.
74       u_trace_perfetto_stop();
75       // TODO we should perhaps block until queued traces are flushed?
76 
77       Trace([](TuRenderpassDataSource::TraceContext ctx) {
78          auto packet = ctx.NewTracePacket();
79          packet->Finalize();
80          ctx.Flush();
81       });
82    }
83 };
84 
85 PERFETTO_DECLARE_DATA_SOURCE_STATIC_MEMBERS(TuRenderpassDataSource);
86 PERFETTO_DEFINE_DATA_SOURCE_STATIC_MEMBERS(TuRenderpassDataSource);
87 
88 static void
send_descriptors(TuRenderpassDataSource::TraceContext & ctx,uint64_t ts_ns)89 send_descriptors(TuRenderpassDataSource::TraceContext &ctx, uint64_t ts_ns)
90 {
91    PERFETTO_LOG("Sending renderstage descriptors");
92 
93    auto packet = ctx.NewTracePacket();
94 
95    packet->set_timestamp(0);
96 
97    auto event = packet->set_gpu_render_stage_event();
98    event->set_gpu_id(0);
99 
100    auto spec = event->set_specifications();
101 
102    for (unsigned i = 0; i < ARRAY_SIZE(queues); i++) {
103       auto desc = spec->add_hw_queue();
104 
105       desc->set_name(queues[i].name);
106       desc->set_description(queues[i].desc);
107    }
108 
109    for (unsigned i = 0; i < ARRAY_SIZE(stages); i++) {
110       auto desc = spec->add_stage();
111 
112       desc->set_name(stages[i].name);
113       if (stages[i].desc)
114          desc->set_description(stages[i].desc);
115    }
116 }
117 
118 static void
stage_start(struct tu_device * dev,uint64_t ts_ns,enum tu_stage_id stage)119 stage_start(struct tu_device *dev, uint64_t ts_ns, enum tu_stage_id stage)
120 {
121    struct tu_perfetto_state *p = tu_device_get_perfetto_state(dev);
122 
123    p->start_ts[stage] = ts_ns;
124 }
125 
126 typedef void (*trace_payload_as_extra_func)(perfetto::protos::pbzero::GpuRenderStageEvent *, const void*);
127 
128 static void
stage_end(struct tu_device * dev,uint64_t ts_ns,enum tu_stage_id stage,uint32_t submission_id,const void * payload=nullptr,trace_payload_as_extra_func payload_as_extra=nullptr)129 stage_end(struct tu_device *dev, uint64_t ts_ns, enum tu_stage_id stage,
130           uint32_t submission_id, const void* payload = nullptr,
131           trace_payload_as_extra_func payload_as_extra = nullptr)
132 {
133    struct tu_perfetto_state *p = tu_device_get_perfetto_state(dev);
134 
135    /* If we haven't managed to calibrate the alignment between GPU and CPU
136     * timestamps yet, then skip this trace, otherwise perfetto won't know
137     * what to do with it.
138     */
139    if (!sync_gpu_ts)
140       return;
141 
142    TuRenderpassDataSource::Trace([=](TuRenderpassDataSource::TraceContext tctx) {
143       if (auto state = tctx.GetIncrementalState(); state->was_cleared) {
144          send_descriptors(tctx, p->start_ts[stage]);
145          state->was_cleared = false;
146       }
147 
148       auto packet = tctx.NewTracePacket();
149 
150       gpu_max_timestamp = MAX2(gpu_max_timestamp, ts_ns + gpu_timestamp_offset);
151 
152       packet->set_timestamp(p->start_ts[stage] + gpu_timestamp_offset);
153       packet->set_timestamp_clock_id(gpu_clock_id);
154 
155       auto event = packet->set_gpu_render_stage_event();
156       event->set_event_id(0); // ???
157       event->set_hw_queue_id(DEFAULT_HW_QUEUE_ID);
158       event->set_duration(ts_ns - p->start_ts[stage]);
159       event->set_stage_id(stage);
160       event->set_context((uintptr_t)dev);
161       event->set_submission_id(submission_id);
162 
163       if (payload && payload_as_extra) {
164          payload_as_extra(event, payload);
165       }
166    });
167 }
168 
169 #ifdef __cplusplus
170 extern "C" {
171 #endif
172 
173 void
tu_perfetto_init(void)174 tu_perfetto_init(void)
175 {
176    util_perfetto_init();
177 
178    perfetto::DataSourceDescriptor dsd;
179    dsd.set_name("gpu.renderstages.msm");
180    TuRenderpassDataSource::Register(dsd);
181 }
182 
183 static void
sync_timestamp(struct tu_device * dev)184 sync_timestamp(struct tu_device *dev)
185 {
186    uint64_t cpu_ts = perfetto::base::GetBootTimeNs().count();
187    uint64_t gpu_ts = 0;
188 
189    if (cpu_ts < next_clock_sync_ns)
190       return;
191 
192     if (tu_device_get_gpu_timestamp(dev, &gpu_ts)) {
193       PERFETTO_ELOG("Could not sync CPU and GPU clocks");
194       return;
195     }
196 
197    uint64_t current_suspend_count = 0;
198    /* If we fail to get it we will use a fallback */
199    tu_device_get_suspend_count(dev, &current_suspend_count);
200 
201    /* convert GPU ts into ns: */
202    gpu_ts = tu_device_ticks_to_ns(dev, gpu_ts);
203 
204    /* GPU timestamp is being reset after suspend-resume cycle.
205     * Perfetto requires clock snapshots to be monotonic,
206     * so we have to fix-up the time.
207     */
208    if (current_suspend_count != last_suspend_count) {
209       gpu_timestamp_offset = gpu_max_timestamp;
210       last_suspend_count = current_suspend_count;
211    }
212 
213    gpu_ts += gpu_timestamp_offset;
214 
215    /* Fallback check, detect non-monotonic cases which would happen
216     * if we cannot retrieve suspend count.
217     */
218    if (sync_gpu_ts > gpu_ts) {
219       gpu_ts += (gpu_max_timestamp - gpu_timestamp_offset);
220       gpu_timestamp_offset = gpu_max_timestamp;
221    }
222 
223    if (sync_gpu_ts > gpu_ts) {
224       PERFETTO_ELOG("Non-monotonic gpu timestamp detected, bailing out");
225       return;
226    }
227 
228    gpu_max_timestamp = gpu_ts;
229 
230    TuRenderpassDataSource::Trace([=](TuRenderpassDataSource::TraceContext tctx) {
231       auto packet = tctx.NewTracePacket();
232 
233       packet->set_timestamp(cpu_ts);
234 
235       auto event = packet->set_clock_snapshot();
236 
237       {
238          auto clock = event->add_clocks();
239 
240          clock->set_clock_id(perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME);
241          clock->set_timestamp(cpu_ts);
242       }
243 
244       {
245          auto clock = event->add_clocks();
246 
247          clock->set_clock_id(gpu_clock_id);
248          clock->set_timestamp(gpu_ts);
249       }
250 
251       sync_gpu_ts = gpu_ts;
252       next_clock_sync_ns = cpu_ts + 30000000;
253    });
254 }
255 
256 static void
emit_submit_id(uint32_t submission_id)257 emit_submit_id(uint32_t submission_id)
258 {
259    TuRenderpassDataSource::Trace([=](TuRenderpassDataSource::TraceContext tctx) {
260       auto packet = tctx.NewTracePacket();
261 
262       packet->set_timestamp(perfetto::base::GetBootTimeNs().count());
263 
264       auto event = packet->set_vulkan_api_event();
265       auto submit = event->set_vk_queue_submit();
266 
267       submit->set_submission_id(submission_id);
268    });
269 }
270 
271 void
tu_perfetto_submit(struct tu_device * dev,uint32_t submission_id)272 tu_perfetto_submit(struct tu_device *dev, uint32_t submission_id)
273 {
274    /* sync_timestamp isn't free */
275    if (!ut_perfetto_enabled)
276       return;
277 
278    sync_timestamp(dev);
279    emit_submit_id(submission_id);
280 }
281 
282 /*
283  * Trace callbacks, called from u_trace once the timestamps from GPU have been
284  * collected.
285  */
286 
287 #define CREATE_EVENT_CALLBACK(event_name, stage)                              \
288 void                                                                          \
289 tu_start_##event_name(struct tu_device *dev, uint64_t ts_ns,                  \
290                    const void *flush_data,                                    \
291                    const struct trace_start_##event_name *payload)            \
292 {                                                                             \
293    stage_start(dev, ts_ns, stage);                                            \
294 }                                                                             \
295                                                                               \
296 void                                                                          \
297 tu_end_##event_name(struct tu_device *dev, uint64_t ts_ns,                    \
298                    const void *flush_data,                                    \
299                    const struct trace_end_##event_name *payload)              \
300 {                                                                             \
301    auto trace_flush_data = (const struct tu_u_trace_submission_data *) flush_data; \
302    uint32_t submission_id =                                                        \
303       tu_u_trace_submission_data_get_submit_id(trace_flush_data);                  \
304    stage_end(dev, ts_ns, stage, submission_id, payload,                            \
305       (trace_payload_as_extra_func) &trace_payload_as_extra_end_##event_name);     \
306 }
307 
308 CREATE_EVENT_CALLBACK(render_pass, SURFACE_STAGE_ID)
309 CREATE_EVENT_CALLBACK(binning_ib, BINNING_STAGE_ID)
310 CREATE_EVENT_CALLBACK(draw_ib_gmem, GMEM_STAGE_ID)
311 CREATE_EVENT_CALLBACK(draw_ib_sysmem, BYPASS_STAGE_ID)
312 CREATE_EVENT_CALLBACK(blit, BLIT_STAGE_ID)
313 CREATE_EVENT_CALLBACK(compute, COMPUTE_STAGE_ID)
314 CREATE_EVENT_CALLBACK(gmem_clear, CLEAR_GMEM_STAGE_ID)
315 CREATE_EVENT_CALLBACK(sysmem_clear, CLEAR_SYSMEM_STAGE_ID)
316 CREATE_EVENT_CALLBACK(sysmem_clear_all, CLEAR_SYSMEM_STAGE_ID)
317 CREATE_EVENT_CALLBACK(gmem_load, GMEM_LOAD_STAGE_ID)
318 CREATE_EVENT_CALLBACK(gmem_store, GMEM_STORE_STAGE_ID)
319 CREATE_EVENT_CALLBACK(sysmem_resolve, SYSMEM_RESOLVE_STAGE_ID)
320 
321 #ifdef __cplusplus
322 }
323 #endif
324