• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2021 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #include <stdio.h>
25 #include <stdarg.h>
26 
27 #include "common/intel_gem.h"
28 #include "perf/intel_perf.h"
29 
30 #include "util/hash_table.h"
31 #include "util/u_process.h"
32 
33 #include "intel_driver_ds.h"
34 #include "intel_pps_priv.h"
35 #include "intel_tracepoints.h"
36 
37 #ifdef HAVE_PERFETTO
38 
39 #include "util/perf/u_perfetto.h"
40 #include "util/perf/u_perfetto_renderpass.h"
41 
42 #include "intel_tracepoints_perfetto.h"
43 
44 /* Just naming stages */
45 static const struct {
46    const char *name;
47 
48    /* The perfetto UI requires that there is a parent-child relationship
49     * within a row of elements. Which means that all children elements must
50     * end within the lifespan of their parent.
51     *
52     * Some elements like stalls and command buffers follow that relationship,
53     * but not all. This tells us in which UI row the elements should live.
54     */
55    enum intel_ds_queue_stage draw_stage;
56 } intel_queue_stage_desc[INTEL_DS_QUEUE_STAGE_N_STAGES] = {
57    /* Order must match the enum! */
58    {
59       "queue",
60       INTEL_DS_QUEUE_STAGE_QUEUE,
61    },
62    {
63       "frame",
64       INTEL_DS_QUEUE_STAGE_FRAME,
65    },
66    {
67       "cmd-buffer",
68       INTEL_DS_QUEUE_STAGE_CMD_BUFFER,
69    },
70    {
71       "internal-ops",
72       INTEL_DS_QUEUE_STAGE_INTERNAL_OPS,
73    },
74    {
75       "stall",
76       INTEL_DS_QUEUE_STAGE_STALL,
77    },
78    {
79       "compute",
80       INTEL_DS_QUEUE_STAGE_COMPUTE,
81    },
82    {
83       "as-build",
84       INTEL_DS_QUEUE_STAGE_AS,
85    },
86    {
87       "RT",
88       INTEL_DS_QUEUE_STAGE_RT,
89    },
90    {
91       "render-pass",
92       INTEL_DS_QUEUE_STAGE_RENDER_PASS,
93    },
94    {
95       "blorp",
96       INTEL_DS_QUEUE_STAGE_BLORP,
97    },
98    {
99       "draw",
100       INTEL_DS_QUEUE_STAGE_DRAW,
101    },
102    {
103       "draw_mesh",
104       INTEL_DS_QUEUE_STAGE_DRAW_MESH,
105    },
106 };
107 
108 struct IntelRenderpassIncrementalState {
109    bool was_cleared = true;
110 };
111 
112 struct IntelRenderpassTraits : public perfetto::DefaultDataSourceTraits {
113    using IncrementalStateType = IntelRenderpassIncrementalState;
114 };
115 
116 class IntelRenderpassDataSource : public MesaRenderpassDataSource<IntelRenderpassDataSource,
117                                                                   IntelRenderpassTraits> {
118 public:
119    /* Make sure we're not losing traces due to lack of shared memory space */
120    constexpr static perfetto::BufferExhaustedPolicy kBufferExhaustedPolicy =
121       perfetto::BufferExhaustedPolicy::kDrop;
122 };
123 
124 PERFETTO_DECLARE_DATA_SOURCE_STATIC_MEMBERS(IntelRenderpassDataSource);
125 PERFETTO_DEFINE_DATA_SOURCE_STATIC_MEMBERS(IntelRenderpassDataSource);
126 
127 using perfetto::protos::pbzero::InternedGpuRenderStageSpecification_RenderStageCategory;
128 
129 static void
sync_timestamp(IntelRenderpassDataSource::TraceContext & ctx,struct intel_ds_device * device)130 sync_timestamp(IntelRenderpassDataSource::TraceContext &ctx,
131                struct intel_ds_device *device)
132 {
133    uint64_t cpu_ts, gpu_ts;
134 
135    if (!intel_gem_read_correlate_cpu_gpu_timestamp(device->fd,
136                                                    device->info.kmd_type,
137                                                    INTEL_ENGINE_CLASS_RENDER, 0,
138                                                    CLOCK_BOOTTIME,
139                                                    &cpu_ts, &gpu_ts, NULL)) {
140       cpu_ts = perfetto::base::GetBootTimeNs().count();
141       intel_gem_read_render_timestamp(device->fd, device->info.kmd_type,
142                                       &gpu_ts);
143    }
144    gpu_ts = intel_device_info_timebase_scale(&device->info, gpu_ts);
145 
146    if (cpu_ts < device->next_clock_sync_ns)
147       return;
148 
149    PERFETTO_LOG("sending clocks gpu=0x%08x", device->gpu_clock_id);
150 
151    device->sync_gpu_ts = gpu_ts;
152    device->next_clock_sync_ns = cpu_ts + 1000000000ull;
153 
154    MesaRenderpassDataSource<IntelRenderpassDataSource, IntelRenderpassTraits>::EmitClockSync(ctx,
155       cpu_ts, gpu_ts, device->gpu_clock_id);
156 }
157 
158 static void
send_descriptors(IntelRenderpassDataSource::TraceContext & ctx,struct intel_ds_device * device)159 send_descriptors(IntelRenderpassDataSource::TraceContext &ctx,
160                  struct intel_ds_device *device)
161 {
162    PERFETTO_LOG("Sending renderstage descriptors");
163 
164    device->event_id = 0;
165    list_for_each_entry_safe(struct intel_ds_queue, queue, &device->queues, link) {
166       for (uint32_t s = 0; s < ARRAY_SIZE(queue->stages); s++) {
167          queue->stages[s].start_ns[0] = 0;
168       }
169    }
170 
171    {
172       auto packet = ctx.NewTracePacket();
173 
174       packet->set_timestamp(perfetto::base::GetBootTimeNs().count());
175       packet->set_timestamp_clock_id(perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME);
176       packet->set_sequence_flags(perfetto::protos::pbzero::TracePacket::SEQ_INCREMENTAL_STATE_CLEARED);
177 
178       auto interned_data = packet->set_interned_data();
179 
180       {
181          auto desc = interned_data->add_graphics_contexts();
182          desc->set_iid(device->iid);
183          desc->set_pid(getpid());
184          switch (device->api) {
185          case INTEL_DS_API_OPENGL:
186             desc->set_api(perfetto::protos::pbzero::InternedGraphicsContext_Api::OPEN_GL);
187             break;
188          case INTEL_DS_API_VULKAN:
189             desc->set_api(perfetto::protos::pbzero::InternedGraphicsContext_Api::VULKAN);
190             break;
191          default:
192             break;
193          }
194       }
195 
196       /* Emit all the IID picked at device/queue creation. */
197       list_for_each_entry_safe(struct intel_ds_queue, queue, &device->queues, link) {
198          for (unsigned s = 0; s < INTEL_DS_QUEUE_STAGE_N_STAGES; s++) {
199             {
200                /* We put the stage number in there so that all rows are order
201                 * by intel_ds_queue_stage.
202                 */
203                char name[100];
204                snprintf(name, sizeof(name), "%.10s-%s-%u-%s",
205                         util_get_process_name(),
206                         queue->name, s, intel_queue_stage_desc[s].name);
207 
208                auto desc = interned_data->add_gpu_specifications();
209                desc->set_iid(queue->stages[s].queue_iid);
210                desc->set_name(name);
211             }
212          }
213       }
214 
215       for (unsigned i = 0; i < ARRAY_SIZE(intel_tracepoint_names); i++) {
216          /* Skip the begin tracepoint, the label represent the couple of
217           * begin/end tracepoints.
218           */
219          if (strstr(intel_tracepoint_names[i], "intel_begin_") != NULL)
220             continue;
221 
222          auto desc = interned_data->add_gpu_specifications();
223          desc->set_iid(device->tracepoint_iids[i]);
224          desc->set_name(intel_tracepoint_names[i] + strlen("intel_end_"));
225       }
226    }
227 
228    device->next_clock_sync_ns = 0;
229    sync_timestamp(ctx, device);
230 }
231 
232 static void
begin_event(struct intel_ds_queue * queue,uint64_t ts_ns,enum intel_ds_queue_stage stage_id)233 begin_event(struct intel_ds_queue *queue, uint64_t ts_ns,
234             enum intel_ds_queue_stage stage_id)
235 {
236    uint32_t level = queue->stages[stage_id].level;
237    /* If we haven't managed to calibrate the alignment between GPU and CPU
238     * timestamps yet, then skip this trace, otherwise perfetto won't know
239     * what to do with it.
240     */
241    if (!queue->device->sync_gpu_ts) {
242       queue->stages[stage_id].start_ns[level] = 0;
243       return;
244    }
245 
246    if (level >= (ARRAY_SIZE(queue->stages[stage_id].start_ns) - 1))
247       return;
248 
249    queue->stages[stage_id].start_ns[level] = ts_ns;
250    queue->stages[stage_id].level++;
251 }
252 
253 static void
end_event(struct intel_ds_queue * queue,uint64_t ts_ns,enum intel_ds_queue_stage stage_id,uint32_t submission_id,uint16_t tracepoint_idx,const char * app_event,const void * payload=nullptr,const void * indirect_data=nullptr,trace_payload_as_extra_func payload_as_extra=nullptr)254 end_event(struct intel_ds_queue *queue, uint64_t ts_ns,
255           enum intel_ds_queue_stage stage_id,
256           uint32_t submission_id,
257           uint16_t tracepoint_idx,
258           const char *app_event,
259           const void *payload = nullptr,
260           const void *indirect_data = nullptr,
261           trace_payload_as_extra_func payload_as_extra = nullptr)
262 {
263    struct intel_ds_device *device = queue->device;
264 
265    /* If we haven't managed to calibrate the alignment between GPU and CPU
266     * timestamps yet, then skip this trace, otherwise perfetto won't know
267     * what to do with it.
268     */
269    if (!device->sync_gpu_ts)
270       return;
271 
272    if (queue->stages[stage_id].level == 0)
273       return;
274 
275    uint32_t level = --queue->stages[stage_id].level;
276    struct intel_ds_stage *stage = &queue->stages[stage_id];
277    uint64_t start_ns = stage->start_ns[level];
278 
279    if (!start_ns)
280       return;
281 
282    IntelRenderpassDataSource::Trace([=](IntelRenderpassDataSource::TraceContext tctx) {
283       if (auto state = tctx.GetIncrementalState(); state->was_cleared) {
284          send_descriptors(tctx, queue->device);
285          state->was_cleared = false;
286       }
287 
288       sync_timestamp(tctx, queue->device);
289 
290       uint64_t evt_id = device->event_id++;
291 
292       /* If this is an application event, we might need to generate a new
293        * stage_iid if not already seen. Otherwise, it's a driver event and we
294        * have use the internal stage_iid.
295        */
296       uint64_t stage_iid = app_event ?
297          tctx.GetDataSourceLocked()->debug_marker_stage(tctx, app_event) :
298          device->tracepoint_iids[tracepoint_idx];
299 
300       auto packet = tctx.NewTracePacket();
301 
302       packet->set_timestamp(start_ns);
303       packet->set_timestamp_clock_id(queue->device->gpu_clock_id);
304 
305       assert(ts_ns >= start_ns);
306 
307       auto event = packet->set_gpu_render_stage_event();
308       event->set_gpu_id(queue->device->gpu_id);
309 
310       event->set_hw_queue_iid(stage->queue_iid);
311       event->set_stage_iid(stage_iid);
312       event->set_context(queue->device->iid);
313       event->set_event_id(evt_id);
314       event->set_duration(ts_ns - start_ns);
315       event->set_submission_id(submission_id);
316 
317       if ((payload || indirect_data) && payload_as_extra) {
318          payload_as_extra(event, payload, indirect_data);
319       }
320    });
321 
322    stage->start_ns[level] = 0;
323 }
324 
325 static void
custom_trace_payload_as_extra_end_stall(perfetto::protos::pbzero::GpuRenderStageEvent * event,const struct trace_intel_end_stall * payload)326 custom_trace_payload_as_extra_end_stall(perfetto::protos::pbzero::GpuRenderStageEvent *event,
327                                         const struct trace_intel_end_stall *payload)
328 {
329    char buf[256];
330 
331    {
332       auto data = event->add_extra_data();
333       data->set_name("stall_reason");
334 
335       snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s : %s%s%s%s%s%s%s",
336               (payload->flags & INTEL_DS_DEPTH_CACHE_FLUSH_BIT) ? "+depth_flush" : "",
337               (payload->flags & INTEL_DS_DATA_CACHE_FLUSH_BIT) ? "+dc_flush" : "",
338               (payload->flags & INTEL_DS_HDC_PIPELINE_FLUSH_BIT) ? "+hdc_flush" : "",
339               (payload->flags & INTEL_DS_RENDER_TARGET_CACHE_FLUSH_BIT) ? "+rt_flush" : "",
340               (payload->flags & INTEL_DS_TILE_CACHE_FLUSH_BIT) ? "+tile_flush" : "",
341               (payload->flags & INTEL_DS_L3_FABRIC_FLUSH_BIT) ? "+l3_fabric_flush" : "",
342               (payload->flags & INTEL_DS_STATE_CACHE_INVALIDATE_BIT) ? "+state_inv" : "",
343               (payload->flags & INTEL_DS_CONST_CACHE_INVALIDATE_BIT) ? "+const_inv" : "",
344               (payload->flags & INTEL_DS_VF_CACHE_INVALIDATE_BIT) ? "+vf_inv" : "",
345               (payload->flags & INTEL_DS_TEXTURE_CACHE_INVALIDATE_BIT) ? "+tex_inv" : "",
346               (payload->flags & INTEL_DS_INST_CACHE_INVALIDATE_BIT) ? "+inst_inv" : "",
347               (payload->flags & INTEL_DS_STALL_AT_SCOREBOARD_BIT) ? "+pb_stall" : "",
348               (payload->flags & INTEL_DS_DEPTH_STALL_BIT) ? "+depth_stall" : "",
349               (payload->flags & INTEL_DS_CS_STALL_BIT) ? "+cs_stall" : "",
350               (payload->flags & INTEL_DS_UNTYPED_DATAPORT_CACHE_FLUSH_BIT) ? "+udp_flush" : "",
351               (payload->flags & INTEL_DS_END_OF_PIPE_BIT) ? "+eop" : "",
352               (payload->flags & INTEL_DS_CCS_CACHE_FLUSH_BIT) ? "+ccs_flush" : "",
353               (payload->reason1) ? payload->reason1 : "unknown",
354               (payload->reason2) ? "; " : "",
355               (payload->reason2) ? payload->reason2 : "",
356               (payload->reason3) ? "; " : "",
357               (payload->reason3) ? payload->reason3 : "",
358               (payload->reason4) ? "; " : "",
359               (payload->reason4) ? payload->reason4 : "");
360 
361       assert(strlen(buf) > 0);
362 
363       data->set_value(buf);
364    }
365 }
366 
367 #endif /* HAVE_PERFETTO */
368 
369 #ifdef __cplusplus
370 extern "C" {
371 #endif
372 
373 #ifdef HAVE_PERFETTO
374 
375 /*
376  * Trace callbacks, called from u_trace once the timestamps from GPU have been
377  * collected.
378  */
379 
380 #define CREATE_DUAL_EVENT_CALLBACK(event_name, stage)                   \
381    void                                                                 \
382    intel_ds_begin_##event_name(struct intel_ds_device *device,          \
383                                uint64_t ts_ns,                          \
384                                uint16_t tp_idx,                         \
385                                const void *flush_data,                  \
386                                const struct trace_intel_begin_##event_name *payload, \
387                                const void *indirect_data)               \
388    {                                                                    \
389       const struct intel_ds_flush_data *flush =                         \
390          (const struct intel_ds_flush_data *) flush_data;               \
391       begin_event(flush->queue, ts_ns, stage);                          \
392    }                                                                    \
393                                                                         \
394    void                                                                 \
395    intel_ds_end_##event_name(struct intel_ds_device *device,            \
396                              uint64_t ts_ns,                            \
397                              uint16_t tp_idx,                           \
398                              const void *flush_data,                    \
399                              const struct trace_intel_end_##event_name *payload, \
400                              const void *indirect_data)                 \
401    {                                                                    \
402       const struct intel_ds_flush_data *flush =                         \
403          (const struct intel_ds_flush_data *) flush_data;               \
404       end_event(flush->queue, ts_ns, stage, flush->submission_id,       \
405                 tp_idx, NULL, payload, indirect_data,                   \
406                 (trace_payload_as_extra_func)                           \
407                 &trace_payload_as_extra_intel_end_##event_name);        \
408    }                                                                    \
409 
CREATE_DUAL_EVENT_CALLBACK(frame,INTEL_DS_QUEUE_STAGE_FRAME)410 CREATE_DUAL_EVENT_CALLBACK(frame, INTEL_DS_QUEUE_STAGE_FRAME)
411 CREATE_DUAL_EVENT_CALLBACK(batch, INTEL_DS_QUEUE_STAGE_CMD_BUFFER)
412 CREATE_DUAL_EVENT_CALLBACK(cmd_buffer, INTEL_DS_QUEUE_STAGE_CMD_BUFFER)
413 CREATE_DUAL_EVENT_CALLBACK(render_pass, INTEL_DS_QUEUE_STAGE_RENDER_PASS)
414 CREATE_DUAL_EVENT_CALLBACK(blorp, INTEL_DS_QUEUE_STAGE_BLORP)
415 CREATE_DUAL_EVENT_CALLBACK(draw, INTEL_DS_QUEUE_STAGE_DRAW)
416 CREATE_DUAL_EVENT_CALLBACK(draw_indexed, INTEL_DS_QUEUE_STAGE_DRAW)
417 CREATE_DUAL_EVENT_CALLBACK(draw_indexed_multi, INTEL_DS_QUEUE_STAGE_DRAW)
418 CREATE_DUAL_EVENT_CALLBACK(draw_indexed_indirect, INTEL_DS_QUEUE_STAGE_DRAW)
419 CREATE_DUAL_EVENT_CALLBACK(draw_multi, INTEL_DS_QUEUE_STAGE_DRAW)
420 CREATE_DUAL_EVENT_CALLBACK(draw_indirect, INTEL_DS_QUEUE_STAGE_DRAW)
421 CREATE_DUAL_EVENT_CALLBACK(draw_indirect_count, INTEL_DS_QUEUE_STAGE_DRAW)
422 CREATE_DUAL_EVENT_CALLBACK(draw_indirect_byte_count, INTEL_DS_QUEUE_STAGE_DRAW)
423 CREATE_DUAL_EVENT_CALLBACK(draw_indexed_indirect_count, INTEL_DS_QUEUE_STAGE_DRAW)
424 CREATE_DUAL_EVENT_CALLBACK(draw_mesh, INTEL_DS_QUEUE_STAGE_DRAW_MESH)
425 CREATE_DUAL_EVENT_CALLBACK(draw_mesh_indirect, INTEL_DS_QUEUE_STAGE_DRAW_MESH)
426 CREATE_DUAL_EVENT_CALLBACK(draw_mesh_indirect_count, INTEL_DS_QUEUE_STAGE_DRAW_MESH)
427 CREATE_DUAL_EVENT_CALLBACK(xfb, INTEL_DS_QUEUE_STAGE_CMD_BUFFER)
428 CREATE_DUAL_EVENT_CALLBACK(compute, INTEL_DS_QUEUE_STAGE_COMPUTE)
429 CREATE_DUAL_EVENT_CALLBACK(compute_indirect, INTEL_DS_QUEUE_STAGE_COMPUTE)
430 CREATE_DUAL_EVENT_CALLBACK(generate_draws, INTEL_DS_QUEUE_STAGE_INTERNAL_OPS)
431 CREATE_DUAL_EVENT_CALLBACK(generate_commands, INTEL_DS_QUEUE_STAGE_INTERNAL_OPS)
432 CREATE_DUAL_EVENT_CALLBACK(trace_copy, INTEL_DS_QUEUE_STAGE_INTERNAL_OPS)
433 CREATE_DUAL_EVENT_CALLBACK(trace_copy_cb, INTEL_DS_QUEUE_STAGE_INTERNAL_OPS)
434 CREATE_DUAL_EVENT_CALLBACK(query_clear_blorp, INTEL_DS_QUEUE_STAGE_INTERNAL_OPS)
435 CREATE_DUAL_EVENT_CALLBACK(query_clear_cs, INTEL_DS_QUEUE_STAGE_INTERNAL_OPS)
436 CREATE_DUAL_EVENT_CALLBACK(query_copy_cs, INTEL_DS_QUEUE_STAGE_INTERNAL_OPS)
437 CREATE_DUAL_EVENT_CALLBACK(query_copy_shader, INTEL_DS_QUEUE_STAGE_INTERNAL_OPS)
438 CREATE_DUAL_EVENT_CALLBACK(write_buffer_marker, INTEL_DS_QUEUE_STAGE_CMD_BUFFER)
439 CREATE_DUAL_EVENT_CALLBACK(rays, INTEL_DS_QUEUE_STAGE_RT)
440 CREATE_DUAL_EVENT_CALLBACK(as_build, INTEL_DS_QUEUE_STAGE_AS)
441 CREATE_DUAL_EVENT_CALLBACK(as_build_leaves, INTEL_DS_QUEUE_STAGE_AS)
442 CREATE_DUAL_EVENT_CALLBACK(as_morton_generate, INTEL_DS_QUEUE_STAGE_AS)
443 CREATE_DUAL_EVENT_CALLBACK(as_morton_sort, INTEL_DS_QUEUE_STAGE_AS)
444 CREATE_DUAL_EVENT_CALLBACK(as_lbvh_build_internal, INTEL_DS_QUEUE_STAGE_AS)
445 CREATE_DUAL_EVENT_CALLBACK(as_ploc_build_internal, INTEL_DS_QUEUE_STAGE_AS)
446 CREATE_DUAL_EVENT_CALLBACK(as_encode, INTEL_DS_QUEUE_STAGE_AS)
447 CREATE_DUAL_EVENT_CALLBACK(as_copy, INTEL_DS_QUEUE_STAGE_AS)
448 
449 void
450 intel_ds_begin_cmd_buffer_annotation(struct intel_ds_device *device,
451                                      uint64_t ts_ns,
452                                      uint16_t tp_idx,
453                                      const void *flush_data,
454                                      const struct trace_intel_begin_cmd_buffer_annotation *payload,
455                                      const void *indirect_data)
456 {
457    const struct intel_ds_flush_data *flush =
458       (const struct intel_ds_flush_data *) flush_data;
459    begin_event(flush->queue, ts_ns, INTEL_DS_QUEUE_STAGE_CMD_BUFFER);
460 }
461 
462 void
intel_ds_end_cmd_buffer_annotation(struct intel_ds_device * device,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_intel_end_cmd_buffer_annotation * payload,const void * indirect_data)463 intel_ds_end_cmd_buffer_annotation(struct intel_ds_device *device,
464                                    uint64_t ts_ns,
465                                    uint16_t tp_idx,
466                                    const void *flush_data,
467                                    const struct trace_intel_end_cmd_buffer_annotation *payload,
468                                    const void *indirect_data)
469 {
470    const struct intel_ds_flush_data *flush =
471       (const struct intel_ds_flush_data *) flush_data;
472    end_event(flush->queue, ts_ns, INTEL_DS_QUEUE_STAGE_CMD_BUFFER,
473              flush->submission_id, tp_idx, payload->str, NULL, NULL, NULL);
474 }
475 
476 void
intel_ds_begin_queue_annotation(struct intel_ds_device * device,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_intel_begin_queue_annotation * payload,const void * indirect_data)477 intel_ds_begin_queue_annotation(struct intel_ds_device *device,
478                                 uint64_t ts_ns,
479                                 uint16_t tp_idx,
480                                 const void *flush_data,
481                                 const struct trace_intel_begin_queue_annotation *payload,
482                                 const void *indirect_data)
483 {
484    const struct intel_ds_flush_data *flush =
485       (const struct intel_ds_flush_data *) flush_data;
486    begin_event(flush->queue, ts_ns, INTEL_DS_QUEUE_STAGE_QUEUE);
487 }
488 
489 void
intel_ds_end_queue_annotation(struct intel_ds_device * device,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_intel_end_queue_annotation * payload,const void * indirect_data)490 intel_ds_end_queue_annotation(struct intel_ds_device *device,
491                               uint64_t ts_ns,
492                               uint16_t tp_idx,
493                               const void *flush_data,
494                               const struct trace_intel_end_queue_annotation *payload,
495                               const void *indirect_data)
496 {
497    const struct intel_ds_flush_data *flush =
498       (const struct intel_ds_flush_data *) flush_data;
499    end_event(flush->queue, ts_ns, INTEL_DS_QUEUE_STAGE_QUEUE,
500              flush->submission_id, tp_idx, payload->str, NULL, NULL, NULL);
501 }
502 
503 void
intel_ds_begin_stall(struct intel_ds_device * device,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_intel_begin_stall * payload,const void * indirect_data)504 intel_ds_begin_stall(struct intel_ds_device *device,
505                      uint64_t ts_ns,
506                      uint16_t tp_idx,
507                      const void *flush_data,
508                      const struct trace_intel_begin_stall *payload,
509                      const void *indirect_data)
510 {
511    const struct intel_ds_flush_data *flush =
512       (const struct intel_ds_flush_data *) flush_data;
513    begin_event(flush->queue, ts_ns, INTEL_DS_QUEUE_STAGE_STALL);
514 }
515 
516 void
intel_ds_end_stall(struct intel_ds_device * device,uint64_t ts_ns,uint16_t tp_idx,const void * flush_data,const struct trace_intel_end_stall * payload,const void * indirect_data)517 intel_ds_end_stall(struct intel_ds_device *device,
518                    uint64_t ts_ns,
519                    uint16_t tp_idx,
520                    const void *flush_data,
521                    const struct trace_intel_end_stall *payload,
522                    const void *indirect_data)
523 {
524    const struct intel_ds_flush_data *flush =
525       (const struct intel_ds_flush_data *) flush_data;
526    end_event(flush->queue, ts_ns, INTEL_DS_QUEUE_STAGE_STALL,
527              flush->submission_id, tp_idx, NULL, payload, indirect_data,
528              (trace_payload_as_extra_func)custom_trace_payload_as_extra_end_stall);
529 }
530 
531 uint64_t
intel_ds_begin_submit(struct intel_ds_queue * queue)532 intel_ds_begin_submit(struct intel_ds_queue *queue)
533 {
534    return perfetto::base::GetBootTimeNs().count();
535 }
536 
537 void
intel_ds_end_submit(struct intel_ds_queue * queue,uint64_t start_ts)538 intel_ds_end_submit(struct intel_ds_queue *queue,
539                     uint64_t start_ts)
540 {
541    if (!u_trace_should_process(&queue->device->trace_context)) {
542       queue->device->sync_gpu_ts = 0;
543       queue->device->next_clock_sync_ns = 0;
544       return;
545    }
546 
547    uint64_t end_ts = perfetto::base::GetBootTimeNs().count();
548    uint32_t submission_id = queue->submission_id++;
549 
550    IntelRenderpassDataSource::Trace([=](IntelRenderpassDataSource::TraceContext tctx) {
551       if (auto state = tctx.GetIncrementalState(); state->was_cleared) {
552          send_descriptors(tctx, queue->device);
553          state->was_cleared = false;
554       }
555 
556       sync_timestamp(tctx, queue->device);
557 
558       auto packet = tctx.NewTracePacket();
559 
560       packet->set_timestamp(start_ts);
561 
562       auto event = packet->set_vulkan_api_event();
563       auto submit = event->set_vk_queue_submit();
564 
565       // submit->set_pid(os_get_pid());
566       // submit->set_tid(os_get_tid());
567       submit->set_duration_ns(end_ts - start_ts);
568       submit->set_vk_queue((uintptr_t) queue);
569       submit->set_submission_id(submission_id);
570    });
571 }
572 
573 #endif /* HAVE_PERFETTO */
574 
575 static void
intel_driver_ds_init_once(void)576 intel_driver_ds_init_once(void)
577 {
578 #ifdef HAVE_PERFETTO
579    util_perfetto_init();
580    perfetto::DataSourceDescriptor dsd;
581    dsd.set_name("gpu.renderstages.intel");
582    IntelRenderpassDataSource::Register(dsd);
583 #endif
584 }
585 
586 static once_flag intel_driver_ds_once_flag = ONCE_FLAG_INIT;
587 static uint64_t iid = 1;
588 
get_iid()589 static uint64_t get_iid()
590 {
591    return iid++;
592 }
593 
594 void
intel_driver_ds_init(void)595 intel_driver_ds_init(void)
596 {
597    call_once(&intel_driver_ds_once_flag,
598              intel_driver_ds_init_once);
599    intel_gpu_tracepoint_config_variable();
600 }
601 
602 void
intel_ds_device_init(struct intel_ds_device * device,const struct intel_device_info * devinfo,int drm_fd,uint32_t gpu_id,enum intel_ds_api api)603 intel_ds_device_init(struct intel_ds_device *device,
604                      const struct intel_device_info *devinfo,
605                      int drm_fd,
606                      uint32_t gpu_id,
607                      enum intel_ds_api api)
608 {
609    memset(device, 0, sizeof(*device));
610 
611    device->gpu_id = gpu_id;
612    device->gpu_clock_id = intel_pps_clock_id(gpu_id);
613    device->fd = drm_fd;
614    device->info = *devinfo;
615    device->iid = get_iid();
616    device->api = api;
617 
618 #ifdef HAVE_PERFETTO
619    assert(ARRAY_SIZE(intel_tracepoint_names) < ARRAY_SIZE(device->tracepoint_iids));
620    for (unsigned i = 0; i < ARRAY_SIZE(intel_tracepoint_names); i++)
621       device->tracepoint_iids[i] = get_iid();
622 #endif
623 
624    list_inithead(&device->queues);
625    simple_mtx_init(&device->trace_context_mutex, mtx_plain);
626 }
627 
628 void
intel_ds_device_fini(struct intel_ds_device * device)629 intel_ds_device_fini(struct intel_ds_device *device)
630 {
631    u_trace_context_fini(&device->trace_context);
632    simple_mtx_destroy(&device->trace_context_mutex);
633 }
634 
635 struct intel_ds_queue *
intel_ds_device_init_queue(struct intel_ds_device * device,struct intel_ds_queue * queue,const char * fmt_name,...)636 intel_ds_device_init_queue(struct intel_ds_device *device,
637                            struct intel_ds_queue *queue,
638                            const char *fmt_name,
639                            ...)
640 {
641    va_list ap;
642 
643    memset(queue, 0, sizeof(*queue));
644 
645    queue->device = device;
646 
647    va_start(ap, fmt_name);
648    vsnprintf(queue->name, sizeof(queue->name), fmt_name, ap);
649    va_end(ap);
650 
651    for (unsigned s = 0; s < INTEL_DS_QUEUE_STAGE_N_STAGES; s++) {
652       queue->stages[s].queue_iid = get_iid();
653    }
654 
655    list_add(&queue->link, &device->queues);
656 
657    return queue;
658 }
659 
intel_ds_flush_data_init(struct intel_ds_flush_data * data,struct intel_ds_queue * queue,uint64_t submission_id)660 void intel_ds_flush_data_init(struct intel_ds_flush_data *data,
661                               struct intel_ds_queue *queue,
662                               uint64_t submission_id)
663 {
664    memset(data, 0, sizeof(*data));
665 
666    data->queue = queue;
667    data->submission_id = submission_id;
668 
669    u_trace_init(&data->trace, &queue->device->trace_context);
670 }
671 
intel_ds_flush_data_fini(struct intel_ds_flush_data * data)672 void intel_ds_flush_data_fini(struct intel_ds_flush_data *data)
673 {
674    u_trace_fini(&data->trace);
675 }
676 
intel_ds_queue_flush_data(struct intel_ds_queue * queue,struct u_trace * ut,struct intel_ds_flush_data * data,uint32_t frame_nr,bool free_data)677 void intel_ds_queue_flush_data(struct intel_ds_queue *queue,
678                                struct u_trace *ut,
679                                struct intel_ds_flush_data *data,
680                                uint32_t frame_nr,
681                                bool free_data)
682 {
683    simple_mtx_lock(&queue->device->trace_context_mutex);
684    u_trace_flush(ut, data, frame_nr, free_data);
685    simple_mtx_unlock(&queue->device->trace_context_mutex);
686 }
687 
intel_ds_device_process(struct intel_ds_device * device,bool eof)688 void intel_ds_device_process(struct intel_ds_device *device,
689                              bool eof)
690 {
691    simple_mtx_lock(&device->trace_context_mutex);
692    u_trace_context_process(&device->trace_context, eof);
693    simple_mtx_unlock(&device->trace_context_mutex);
694 }
695 
696 #ifdef __cplusplus
697 }
698 #endif
699