• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2020 Valve Corporation
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "radv_cmd_buffer.h"
8 #include "radv_cs.h"
9 #include "radv_entrypoints.h"
10 #include "radv_pipeline_rt.h"
11 #include "radv_queue.h"
12 #include "radv_shader.h"
13 #include "radv_spm.h"
14 #include "radv_sqtt.h"
15 #include "vk_common_entrypoints.h"
16 #include "vk_semaphore.h"
17 
18 #include "ac_rgp.h"
19 #include "ac_sqtt.h"
20 
21 void
radv_sqtt_emit_relocated_shaders(struct radv_cmd_buffer * cmd_buffer,struct radv_graphics_pipeline * pipeline)22 radv_sqtt_emit_relocated_shaders(struct radv_cmd_buffer *cmd_buffer, struct radv_graphics_pipeline *pipeline)
23 {
24    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
25    const struct radv_physical_device *pdev = radv_device_physical(device);
26    const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
27    struct radv_sqtt_shaders_reloc *reloc = pipeline->sqtt_shaders_reloc;
28    struct radeon_cmdbuf *cs = cmd_buffer->cs;
29    uint64_t va;
30 
31    radv_cs_add_buffer(device->ws, cs, reloc->bo);
32 
33    /* VS */
34    if (pipeline->base.shaders[MESA_SHADER_VERTEX]) {
35       struct radv_shader *vs = pipeline->base.shaders[MESA_SHADER_VERTEX];
36 
37       va = reloc->va[MESA_SHADER_VERTEX];
38       if (vs->info.vs.as_ls) {
39          radeon_set_sh_reg(cs, vs->info.regs.pgm_lo, va >> 8);
40       } else if (vs->info.vs.as_es) {
41          radeon_set_sh_reg_seq(cs, vs->info.regs.pgm_lo, 2);
42          radeon_emit(cs, va >> 8);
43          radeon_emit(cs, S_00B324_MEM_BASE(va >> 40));
44       } else if (vs->info.is_ngg) {
45          radeon_set_sh_reg(cs, vs->info.regs.pgm_lo, va >> 8);
46       } else {
47          radeon_set_sh_reg_seq(cs, vs->info.regs.pgm_lo, 2);
48          radeon_emit(cs, va >> 8);
49          radeon_emit(cs, S_00B124_MEM_BASE(va >> 40));
50       }
51    }
52 
53    /* TCS */
54    if (pipeline->base.shaders[MESA_SHADER_TESS_CTRL]) {
55       const struct radv_shader *tcs = pipeline->base.shaders[MESA_SHADER_TESS_CTRL];
56 
57       va = reloc->va[MESA_SHADER_TESS_CTRL];
58 
59       if (gfx_level >= GFX9) {
60          radeon_set_sh_reg(cs, tcs->info.regs.pgm_lo, va >> 8);
61       } else {
62          radeon_set_sh_reg_seq(cs, tcs->info.regs.pgm_lo, 2);
63          radeon_emit(cs, va >> 8);
64          radeon_emit(cs, S_00B424_MEM_BASE(va >> 40));
65       }
66    }
67 
68    /* TES */
69    if (pipeline->base.shaders[MESA_SHADER_TESS_EVAL]) {
70       struct radv_shader *tes = pipeline->base.shaders[MESA_SHADER_TESS_EVAL];
71 
72       va = reloc->va[MESA_SHADER_TESS_EVAL];
73       if (tes->info.is_ngg) {
74          radeon_set_sh_reg(cs, tes->info.regs.pgm_lo, va >> 8);
75       } else if (tes->info.tes.as_es) {
76          radeon_set_sh_reg_seq(cs, tes->info.regs.pgm_lo, 2);
77          radeon_emit(cs, va >> 8);
78          radeon_emit(cs, S_00B324_MEM_BASE(va >> 40));
79       } else {
80          radeon_set_sh_reg_seq(cs, tes->info.regs.pgm_lo, 2);
81          radeon_emit(cs, va >> 8);
82          radeon_emit(cs, S_00B124_MEM_BASE(va >> 40));
83       }
84    }
85 
86    /* GS */
87    if (pipeline->base.shaders[MESA_SHADER_GEOMETRY]) {
88       struct radv_shader *gs = pipeline->base.shaders[MESA_SHADER_GEOMETRY];
89 
90       va = reloc->va[MESA_SHADER_GEOMETRY];
91       if (gs->info.is_ngg) {
92          radeon_set_sh_reg(cs, gs->info.regs.pgm_lo, va >> 8);
93       } else {
94          if (gfx_level >= GFX9) {
95             radeon_set_sh_reg(cs, gs->info.regs.pgm_lo, va >> 8);
96          } else {
97             radeon_set_sh_reg_seq(cs, gs->info.regs.pgm_lo, 2);
98             radeon_emit(cs, va >> 8);
99             radeon_emit(cs, S_00B224_MEM_BASE(va >> 40));
100          }
101       }
102    }
103 
104    /* FS */
105    if (pipeline->base.shaders[MESA_SHADER_FRAGMENT]) {
106       const struct radv_shader *ps = pipeline->base.shaders[MESA_SHADER_FRAGMENT];
107 
108       va = reloc->va[MESA_SHADER_FRAGMENT];
109 
110       radeon_set_sh_reg_seq(cs, ps->info.regs.pgm_lo, 2);
111       radeon_emit(cs, va >> 8);
112       radeon_emit(cs, S_00B024_MEM_BASE(va >> 40));
113    }
114 
115    /* MS */
116    if (pipeline->base.shaders[MESA_SHADER_MESH]) {
117       const struct radv_shader *ms = pipeline->base.shaders[MESA_SHADER_MESH];
118 
119       va = reloc->va[MESA_SHADER_MESH];
120 
121       radeon_set_sh_reg(cs, ms->info.regs.pgm_lo, va >> 8);
122    }
123 }
124 
125 static uint64_t
radv_sqtt_shader_get_va_reloc(struct radv_pipeline * pipeline,gl_shader_stage stage)126 radv_sqtt_shader_get_va_reloc(struct radv_pipeline *pipeline, gl_shader_stage stage)
127 {
128    if (pipeline->type == RADV_PIPELINE_GRAPHICS) {
129       struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline);
130       struct radv_sqtt_shaders_reloc *reloc = graphics_pipeline->sqtt_shaders_reloc;
131       return reloc->va[stage];
132    }
133 
134    return radv_shader_get_va(pipeline->shaders[stage]);
135 }
136 
137 static VkResult
radv_sqtt_reloc_graphics_shaders(struct radv_device * device,struct radv_graphics_pipeline * pipeline)138 radv_sqtt_reloc_graphics_shaders(struct radv_device *device, struct radv_graphics_pipeline *pipeline)
139 {
140    struct radv_shader_dma_submission *submission = NULL;
141    struct radv_sqtt_shaders_reloc *reloc;
142    uint32_t code_size = 0;
143    VkResult result;
144 
145    reloc = calloc(1, sizeof(*reloc));
146    if (!reloc)
147       return VK_ERROR_OUT_OF_HOST_MEMORY;
148 
149    /* Compute the total code size. */
150    for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; i++) {
151       const struct radv_shader *shader = pipeline->base.shaders[i];
152       if (!shader)
153          continue;
154 
155       code_size += align(shader->code_size, RADV_SHADER_ALLOC_ALIGNMENT);
156    }
157 
158    /* Allocate memory for all shader binaries. */
159    reloc->alloc = radv_alloc_shader_memory(device, code_size, false, pipeline);
160    if (!reloc->alloc) {
161       result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
162       goto fail;
163    }
164 
165    reloc->bo = reloc->alloc->arena->bo;
166 
167    /* Relocate shader binaries to be contiguous in memory as requested by RGP. */
168    uint64_t slab_va = radv_buffer_get_va(reloc->bo) + reloc->alloc->offset;
169    char *slab_ptr = reloc->alloc->arena->ptr + reloc->alloc->offset;
170    uint64_t offset = 0;
171 
172    if (device->shader_use_invisible_vram) {
173       submission = radv_shader_dma_get_submission(device, reloc->bo, slab_va, code_size);
174       if (!submission) {
175          result = VK_ERROR_UNKNOWN;
176          goto fail;
177       }
178    }
179 
180    for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) {
181       const struct radv_shader *shader = pipeline->base.shaders[i];
182       void *dest_ptr;
183       if (!shader)
184          continue;
185 
186       reloc->va[i] = slab_va + offset;
187 
188       if (device->shader_use_invisible_vram)
189          dest_ptr = submission->ptr + offset;
190       else
191          dest_ptr = slab_ptr + offset;
192 
193       memcpy(dest_ptr, shader->code, shader->code_size);
194 
195       offset += align(shader->code_size, RADV_SHADER_ALLOC_ALIGNMENT);
196    }
197 
198    if (device->shader_use_invisible_vram) {
199       uint64_t upload_seq = 0;
200 
201       if (!radv_shader_dma_submit(device, submission, &upload_seq)) {
202          result = VK_ERROR_UNKNOWN;
203          goto fail;
204       }
205 
206       for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) {
207          struct radv_shader *shader = pipeline->base.shaders[i];
208 
209          if (!shader)
210             continue;
211 
212          shader->upload_seq = upload_seq;
213       }
214 
215       if (pipeline->base.gs_copy_shader)
216          pipeline->base.gs_copy_shader->upload_seq = upload_seq;
217    }
218 
219    pipeline->sqtt_shaders_reloc = reloc;
220 
221    return VK_SUCCESS;
222 
223 fail:
224    if (reloc->alloc)
225       radv_free_shader_memory(device, reloc->alloc);
226    free(reloc);
227    return result;
228 }
229 
230 static void
radv_write_begin_general_api_marker(struct radv_cmd_buffer * cmd_buffer,enum rgp_sqtt_marker_general_api_type api_type)231 radv_write_begin_general_api_marker(struct radv_cmd_buffer *cmd_buffer, enum rgp_sqtt_marker_general_api_type api_type)
232 {
233    struct rgp_sqtt_marker_general_api marker = {0};
234 
235    marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_GENERAL_API;
236    marker.api_type = api_type;
237 
238    radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
239 }
240 
241 static void
radv_write_end_general_api_marker(struct radv_cmd_buffer * cmd_buffer,enum rgp_sqtt_marker_general_api_type api_type)242 radv_write_end_general_api_marker(struct radv_cmd_buffer *cmd_buffer, enum rgp_sqtt_marker_general_api_type api_type)
243 {
244    struct rgp_sqtt_marker_general_api marker = {0};
245 
246    marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_GENERAL_API;
247    marker.api_type = api_type;
248    marker.is_end = 1;
249 
250    radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
251 }
252 
253 static void
radv_write_event_marker(struct radv_cmd_buffer * cmd_buffer,enum rgp_sqtt_marker_event_type api_type,uint32_t vertex_offset_user_data,uint32_t instance_offset_user_data,uint32_t draw_index_user_data)254 radv_write_event_marker(struct radv_cmd_buffer *cmd_buffer, enum rgp_sqtt_marker_event_type api_type,
255                         uint32_t vertex_offset_user_data, uint32_t instance_offset_user_data,
256                         uint32_t draw_index_user_data)
257 {
258    struct rgp_sqtt_marker_event marker = {0};
259 
260    marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_EVENT;
261    marker.api_type = api_type;
262    marker.cmd_id = cmd_buffer->state.num_events++;
263    marker.cb_id = cmd_buffer->sqtt_cb_id;
264 
265    if (vertex_offset_user_data == UINT_MAX || instance_offset_user_data == UINT_MAX) {
266       vertex_offset_user_data = 0;
267       instance_offset_user_data = 0;
268    }
269 
270    if (draw_index_user_data == UINT_MAX)
271       draw_index_user_data = vertex_offset_user_data;
272 
273    marker.vertex_offset_reg_idx = vertex_offset_user_data;
274    marker.instance_offset_reg_idx = instance_offset_user_data;
275    marker.draw_index_reg_idx = draw_index_user_data;
276 
277    radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
278 }
279 
280 static void
radv_write_event_with_dims_marker(struct radv_cmd_buffer * cmd_buffer,enum rgp_sqtt_marker_event_type api_type,uint32_t x,uint32_t y,uint32_t z)281 radv_write_event_with_dims_marker(struct radv_cmd_buffer *cmd_buffer, enum rgp_sqtt_marker_event_type api_type,
282                                   uint32_t x, uint32_t y, uint32_t z)
283 {
284    struct rgp_sqtt_marker_event_with_dims marker = {0};
285 
286    marker.event.identifier = RGP_SQTT_MARKER_IDENTIFIER_EVENT;
287    marker.event.api_type = api_type;
288    marker.event.cmd_id = cmd_buffer->state.num_events++;
289    marker.event.cb_id = cmd_buffer->sqtt_cb_id;
290    marker.event.has_thread_dims = 1;
291 
292    marker.thread_x = x;
293    marker.thread_y = y;
294    marker.thread_z = z;
295 
296    radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
297 }
298 
299 void
radv_write_user_event_marker(struct radv_cmd_buffer * cmd_buffer,enum rgp_sqtt_marker_user_event_type type,const char * str)300 radv_write_user_event_marker(struct radv_cmd_buffer *cmd_buffer, enum rgp_sqtt_marker_user_event_type type,
301                              const char *str)
302 {
303    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
304 
305    if (likely(!device->sqtt.bo))
306       return;
307 
308    if (type == UserEventPop) {
309       assert(str == NULL);
310       struct rgp_sqtt_marker_user_event marker = {0};
311       marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_USER_EVENT;
312       marker.data_type = type;
313 
314       radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
315    } else {
316       assert(str != NULL);
317       unsigned len = strlen(str);
318       struct rgp_sqtt_marker_user_event_with_length marker = {0};
319       marker.user_event.identifier = RGP_SQTT_MARKER_IDENTIFIER_USER_EVENT;
320       marker.user_event.data_type = type;
321       marker.length = align(len, 4);
322 
323       uint8_t *buffer = alloca(sizeof(marker) + marker.length);
324       memset(buffer, 0, sizeof(marker) + marker.length);
325       memcpy(buffer, &marker, sizeof(marker));
326       memcpy(buffer + sizeof(marker), str, len);
327 
328       radv_emit_sqtt_userdata(cmd_buffer, buffer, sizeof(marker) / 4 + marker.length / 4);
329    }
330 }
331 
332 void
radv_describe_begin_cmd_buffer(struct radv_cmd_buffer * cmd_buffer)333 radv_describe_begin_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
334 {
335    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
336    uint64_t device_id = (uintptr_t)device;
337    struct rgp_sqtt_marker_cb_start marker = {0};
338 
339    if (likely(!device->sqtt.bo))
340       return;
341 
342    /* Reserve a command buffer ID for SQTT. */
343    const struct radv_physical_device *pdev = radv_device_physical(device);
344    enum amd_ip_type ip_type = radv_queue_family_to_ring(pdev, cmd_buffer->qf);
345    union rgp_sqtt_marker_cb_id cb_id = ac_sqtt_get_next_cmdbuf_id(&device->sqtt, ip_type);
346    cmd_buffer->sqtt_cb_id = cb_id.all;
347 
348    marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_CB_START;
349    marker.cb_id = cmd_buffer->sqtt_cb_id;
350    marker.device_id_low = device_id;
351    marker.device_id_high = device_id >> 32;
352    marker.queue = cmd_buffer->qf;
353    marker.queue_flags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT;
354 
355    if (cmd_buffer->qf == RADV_QUEUE_GENERAL)
356       marker.queue_flags |= VK_QUEUE_GRAPHICS_BIT;
357 
358    if (!radv_sparse_queue_enabled(pdev))
359       marker.queue_flags |= VK_QUEUE_SPARSE_BINDING_BIT;
360 
361    radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
362 }
363 
364 void
radv_describe_end_cmd_buffer(struct radv_cmd_buffer * cmd_buffer)365 radv_describe_end_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
366 {
367    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
368    uint64_t device_id = (uintptr_t)device;
369    struct rgp_sqtt_marker_cb_end marker = {0};
370 
371    if (likely(!device->sqtt.bo))
372       return;
373 
374    marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_CB_END;
375    marker.cb_id = cmd_buffer->sqtt_cb_id;
376    marker.device_id_low = device_id;
377    marker.device_id_high = device_id >> 32;
378 
379    radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
380 }
381 
382 void
radv_describe_draw(struct radv_cmd_buffer * cmd_buffer)383 radv_describe_draw(struct radv_cmd_buffer *cmd_buffer)
384 {
385    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
386 
387    if (likely(!device->sqtt.bo))
388       return;
389 
390    radv_write_event_marker(cmd_buffer, cmd_buffer->state.current_event_type, UINT_MAX, UINT_MAX, UINT_MAX);
391 }
392 
393 void
radv_describe_dispatch(struct radv_cmd_buffer * cmd_buffer,const struct radv_dispatch_info * info)394 radv_describe_dispatch(struct radv_cmd_buffer *cmd_buffer, const struct radv_dispatch_info *info)
395 {
396    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
397 
398    if (likely(!device->sqtt.bo))
399       return;
400 
401    if (info->indirect) {
402       radv_write_event_marker(cmd_buffer, cmd_buffer->state.current_event_type, UINT_MAX, UINT_MAX, UINT_MAX);
403    } else {
404       radv_write_event_with_dims_marker(cmd_buffer, cmd_buffer->state.current_event_type, info->blocks[0],
405                                         info->blocks[1], info->blocks[2]);
406    }
407 }
408 
409 void
radv_describe_begin_render_pass_clear(struct radv_cmd_buffer * cmd_buffer,VkImageAspectFlagBits aspects)410 radv_describe_begin_render_pass_clear(struct radv_cmd_buffer *cmd_buffer, VkImageAspectFlagBits aspects)
411 {
412    cmd_buffer->state.current_event_type =
413       (aspects & VK_IMAGE_ASPECT_COLOR_BIT) ? EventRenderPassColorClear : EventRenderPassDepthStencilClear;
414 }
415 
416 void
radv_describe_end_render_pass_clear(struct radv_cmd_buffer * cmd_buffer)417 radv_describe_end_render_pass_clear(struct radv_cmd_buffer *cmd_buffer)
418 {
419    cmd_buffer->state.current_event_type = EventInternalUnknown;
420 }
421 
422 void
radv_describe_begin_render_pass_resolve(struct radv_cmd_buffer * cmd_buffer)423 radv_describe_begin_render_pass_resolve(struct radv_cmd_buffer *cmd_buffer)
424 {
425    cmd_buffer->state.current_event_type = EventRenderPassResolve;
426 }
427 
428 void
radv_describe_end_render_pass_resolve(struct radv_cmd_buffer * cmd_buffer)429 radv_describe_end_render_pass_resolve(struct radv_cmd_buffer *cmd_buffer)
430 {
431    cmd_buffer->state.current_event_type = EventInternalUnknown;
432 }
433 
434 void
radv_describe_barrier_end_delayed(struct radv_cmd_buffer * cmd_buffer)435 radv_describe_barrier_end_delayed(struct radv_cmd_buffer *cmd_buffer)
436 {
437    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
438    struct rgp_sqtt_marker_barrier_end marker = {0};
439 
440    if (likely(!device->sqtt.bo) || !cmd_buffer->state.pending_sqtt_barrier_end)
441       return;
442 
443    cmd_buffer->state.pending_sqtt_barrier_end = false;
444 
445    marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BARRIER_END;
446    marker.cb_id = cmd_buffer->sqtt_cb_id;
447 
448    marker.num_layout_transitions = cmd_buffer->state.num_layout_transitions;
449 
450    if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_WAIT_ON_EOP_TS)
451       marker.wait_on_eop_ts = true;
452    if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_VS_PARTIAL_FLUSH)
453       marker.vs_partial_flush = true;
454    if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_PS_PARTIAL_FLUSH)
455       marker.ps_partial_flush = true;
456    if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_CS_PARTIAL_FLUSH)
457       marker.cs_partial_flush = true;
458    if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_PFP_SYNC_ME)
459       marker.pfp_sync_me = true;
460    if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_SYNC_CP_DMA)
461       marker.sync_cp_dma = true;
462    if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_VMEM_L0)
463       marker.inval_tcp = true;
464    if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_ICACHE)
465       marker.inval_sqI = true;
466    if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_SMEM_L0)
467       marker.inval_sqK = true;
468    if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_FLUSH_L2)
469       marker.flush_tcc = true;
470    if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_L2)
471       marker.inval_tcc = true;
472    if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_FLUSH_CB)
473       marker.flush_cb = true;
474    if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_CB)
475       marker.inval_cb = true;
476    if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_FLUSH_DB)
477       marker.flush_db = true;
478    if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_DB)
479       marker.inval_db = true;
480    if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_L1)
481       marker.inval_gl1 = true;
482 
483    radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
484 
485    cmd_buffer->state.num_layout_transitions = 0;
486 }
487 
488 void
radv_describe_barrier_start(struct radv_cmd_buffer * cmd_buffer,enum rgp_barrier_reason reason)489 radv_describe_barrier_start(struct radv_cmd_buffer *cmd_buffer, enum rgp_barrier_reason reason)
490 {
491    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
492    struct rgp_sqtt_marker_barrier_start marker = {0};
493 
494    if (likely(!device->sqtt.bo))
495       return;
496 
497    if (cmd_buffer->state.in_barrier) {
498       assert(!"attempted to start a barrier while already in a barrier");
499       return;
500    }
501 
502    radv_describe_barrier_end_delayed(cmd_buffer);
503    cmd_buffer->state.sqtt_flush_bits = 0;
504    cmd_buffer->state.in_barrier = true;
505 
506    marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BARRIER_START;
507    marker.cb_id = cmd_buffer->sqtt_cb_id;
508    marker.dword02 = reason;
509 
510    radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
511 }
512 
513 void
radv_describe_barrier_end(struct radv_cmd_buffer * cmd_buffer)514 radv_describe_barrier_end(struct radv_cmd_buffer *cmd_buffer)
515 {
516    cmd_buffer->state.in_barrier = false;
517    cmd_buffer->state.pending_sqtt_barrier_end = true;
518 }
519 
520 void
radv_describe_layout_transition(struct radv_cmd_buffer * cmd_buffer,const struct radv_barrier_data * barrier)521 radv_describe_layout_transition(struct radv_cmd_buffer *cmd_buffer, const struct radv_barrier_data *barrier)
522 {
523    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
524    struct rgp_sqtt_marker_layout_transition marker = {0};
525 
526    if (likely(!device->sqtt.bo))
527       return;
528 
529    if (!cmd_buffer->state.in_barrier) {
530       assert(!"layout transition marker should be only emitted inside a barrier marker");
531       return;
532    }
533 
534    marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_LAYOUT_TRANSITION;
535    marker.depth_stencil_expand = barrier->layout_transitions.depth_stencil_expand;
536    marker.htile_hiz_range_expand = barrier->layout_transitions.htile_hiz_range_expand;
537    marker.depth_stencil_resummarize = barrier->layout_transitions.depth_stencil_resummarize;
538    marker.dcc_decompress = barrier->layout_transitions.dcc_decompress;
539    marker.fmask_decompress = barrier->layout_transitions.fmask_decompress;
540    marker.fast_clear_eliminate = barrier->layout_transitions.fast_clear_eliminate;
541    marker.fmask_color_expand = barrier->layout_transitions.fmask_color_expand;
542    marker.init_mask_ram = barrier->layout_transitions.init_mask_ram;
543 
544    radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
545 
546    cmd_buffer->state.num_layout_transitions++;
547 }
548 
549 void
radv_describe_begin_accel_struct_build(struct radv_cmd_buffer * cmd_buffer,uint32_t count)550 radv_describe_begin_accel_struct_build(struct radv_cmd_buffer *cmd_buffer, uint32_t count)
551 {
552    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
553 
554    if (likely(!device->sqtt.bo))
555       return;
556 
557    char marker[64];
558    snprintf(marker, sizeof(marker), "vkCmdBuildAccelerationStructuresKHR(%u)", count);
559    radv_write_user_event_marker(cmd_buffer, UserEventPush, marker);
560 }
561 
562 void
radv_describe_end_accel_struct_build(struct radv_cmd_buffer * cmd_buffer)563 radv_describe_end_accel_struct_build(struct radv_cmd_buffer *cmd_buffer)
564 {
565    radv_write_user_event_marker(cmd_buffer, UserEventPop, NULL);
566 }
567 
568 static void
radv_describe_pipeline_bind(struct radv_cmd_buffer * cmd_buffer,VkPipelineBindPoint pipelineBindPoint,struct radv_pipeline * pipeline)569 radv_describe_pipeline_bind(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint pipelineBindPoint,
570                             struct radv_pipeline *pipeline)
571 {
572    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
573    struct rgp_sqtt_marker_pipeline_bind marker = {0};
574 
575    if (likely(!device->sqtt.bo))
576       return;
577 
578    marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BIND_PIPELINE;
579    marker.cb_id = cmd_buffer->sqtt_cb_id;
580    marker.bind_point = pipelineBindPoint;
581    marker.api_pso_hash[0] = pipeline->pipeline_hash;
582    marker.api_pso_hash[1] = pipeline->pipeline_hash >> 32;
583 
584    radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
585 }
586 
587 /* Queue events */
588 static void
radv_describe_queue_event(struct radv_queue * queue,struct rgp_queue_event_record * record)589 radv_describe_queue_event(struct radv_queue *queue, struct rgp_queue_event_record *record)
590 {
591    struct radv_device *device = radv_queue_device(queue);
592    struct ac_sqtt *sqtt = &device->sqtt;
593    struct rgp_queue_event *queue_event = &sqtt->rgp_queue_event;
594 
595    simple_mtx_lock(&queue_event->lock);
596    list_addtail(&record->list, &queue_event->record);
597    queue_event->record_count++;
598    simple_mtx_unlock(&queue_event->lock);
599 }
600 
601 static VkResult
radv_describe_queue_present(struct radv_queue * queue,uint64_t cpu_timestamp,void * gpu_timestamp_ptr)602 radv_describe_queue_present(struct radv_queue *queue, uint64_t cpu_timestamp, void *gpu_timestamp_ptr)
603 {
604    struct rgp_queue_event_record *record;
605 
606    record = calloc(1, sizeof(struct rgp_queue_event_record));
607    if (!record)
608       return VK_ERROR_OUT_OF_HOST_MEMORY;
609 
610    record->event_type = SQTT_QUEUE_TIMING_EVENT_PRESENT;
611    record->cpu_timestamp = cpu_timestamp;
612    record->gpu_timestamps[0] = gpu_timestamp_ptr;
613    record->queue_info_index = queue->vk.queue_family_index;
614 
615    radv_describe_queue_event(queue, record);
616 
617    return VK_SUCCESS;
618 }
619 
620 static VkResult
radv_describe_queue_submit(struct radv_queue * queue,struct radv_cmd_buffer * cmd_buffer,uint32_t cmdbuf_idx,uint64_t cpu_timestamp,void * pre_gpu_timestamp_ptr,void * post_gpu_timestamp_ptr)621 radv_describe_queue_submit(struct radv_queue *queue, struct radv_cmd_buffer *cmd_buffer, uint32_t cmdbuf_idx,
622                            uint64_t cpu_timestamp, void *pre_gpu_timestamp_ptr, void *post_gpu_timestamp_ptr)
623 {
624    struct radv_device *device = radv_queue_device(queue);
625    struct rgp_queue_event_record *record;
626 
627    record = calloc(1, sizeof(struct rgp_queue_event_record));
628    if (!record)
629       return VK_ERROR_OUT_OF_HOST_MEMORY;
630 
631    record->event_type = SQTT_QUEUE_TIMING_EVENT_CMDBUF_SUBMIT;
632    record->api_id = (uintptr_t)cmd_buffer;
633    record->cpu_timestamp = cpu_timestamp;
634    record->frame_index = device->vk.current_frame;
635    record->gpu_timestamps[0] = pre_gpu_timestamp_ptr;
636    record->gpu_timestamps[1] = post_gpu_timestamp_ptr;
637    record->queue_info_index = queue->vk.queue_family_index;
638    record->submit_sub_index = cmdbuf_idx;
639 
640    radv_describe_queue_event(queue, record);
641 
642    return VK_SUCCESS;
643 }
644 
645 static VkResult
radv_describe_queue_semaphore(struct radv_queue * queue,struct vk_semaphore * sync,enum sqtt_queue_event_type event_type)646 radv_describe_queue_semaphore(struct radv_queue *queue, struct vk_semaphore *sync,
647                               enum sqtt_queue_event_type event_type)
648 {
649    struct rgp_queue_event_record *record;
650 
651    record = calloc(1, sizeof(struct rgp_queue_event_record));
652    if (!record)
653       return VK_ERROR_OUT_OF_HOST_MEMORY;
654 
655    record->event_type = event_type;
656    record->api_id = (uintptr_t)sync;
657    record->cpu_timestamp = os_time_get_nano();
658    record->queue_info_index = queue->vk.queue_family_index;
659 
660    radv_describe_queue_event(queue, record);
661 
662    return VK_SUCCESS;
663 }
664 
665 static void
radv_handle_sqtt(VkQueue _queue)666 radv_handle_sqtt(VkQueue _queue)
667 {
668    VK_FROM_HANDLE(radv_queue, queue, _queue);
669    struct radv_device *device = radv_queue_device(queue);
670    bool trigger = device->sqtt_triggered;
671    device->sqtt_triggered = false;
672 
673    if (device->sqtt_enabled) {
674       if (!radv_sqtt_stop_capturing(queue)) {
675          /* Try to capture the next frame if the buffer was too small initially. */
676          trigger = true;
677       }
678    }
679 
680    if (trigger) {
681       radv_sqtt_start_capturing(queue);
682    }
683 }
684 
685 VKAPI_ATTR VkResult VKAPI_CALL
sqtt_QueuePresentKHR(VkQueue _queue,const VkPresentInfoKHR * pPresentInfo)686 sqtt_QueuePresentKHR(VkQueue _queue, const VkPresentInfoKHR *pPresentInfo)
687 {
688    VK_FROM_HANDLE(radv_queue, queue, _queue);
689    struct radv_device *device = radv_queue_device(queue);
690    VkResult result;
691 
692    queue->sqtt_present = true;
693 
694    result = device->layer_dispatch.rgp.QueuePresentKHR(_queue, pPresentInfo);
695    if (result != VK_SUCCESS && result != VK_SUBOPTIMAL_KHR)
696       return result;
697 
698    queue->sqtt_present = false;
699 
700    radv_handle_sqtt(_queue);
701 
702    return VK_SUCCESS;
703 }
704 
705 static VkResult
radv_sqtt_wsi_submit(VkQueue _queue,uint32_t submitCount,const VkSubmitInfo2 * pSubmits,VkFence _fence)706 radv_sqtt_wsi_submit(VkQueue _queue, uint32_t submitCount, const VkSubmitInfo2 *pSubmits, VkFence _fence)
707 {
708    VK_FROM_HANDLE(radv_queue, queue, _queue);
709    struct radv_device *device = radv_queue_device(queue);
710    VkCommandBufferSubmitInfo *new_cmdbufs = NULL;
711    struct radeon_winsys_bo *gpu_timestamp_bo;
712    uint32_t gpu_timestamp_offset;
713    VkCommandBuffer timed_cmdbuf;
714    void *gpu_timestamp_ptr;
715    uint64_t cpu_timestamp;
716    VkResult result = VK_SUCCESS;
717 
718    assert(submitCount <= 1 && pSubmits != NULL);
719 
720    for (uint32_t i = 0; i < submitCount; i++) {
721       const VkSubmitInfo2 *pSubmit = &pSubmits[i];
722       VkSubmitInfo2 sqtt_submit = *pSubmit;
723 
724       assert(sqtt_submit.commandBufferInfoCount <= 1);
725 
726       /* Command buffers */
727       uint32_t new_cmdbuf_count = sqtt_submit.commandBufferInfoCount + 1;
728 
729       new_cmdbufs = malloc(new_cmdbuf_count * sizeof(*new_cmdbufs));
730       if (!new_cmdbufs)
731          return VK_ERROR_OUT_OF_HOST_MEMORY;
732 
733       /* Sample the current CPU time before building the GPU timestamp cmdbuf. */
734       cpu_timestamp = os_time_get_nano();
735 
736       result = radv_sqtt_acquire_gpu_timestamp(device, &gpu_timestamp_bo, &gpu_timestamp_offset, &gpu_timestamp_ptr);
737       if (result != VK_SUCCESS)
738          goto fail;
739 
740       result = radv_sqtt_get_timed_cmdbuf(queue, gpu_timestamp_bo, gpu_timestamp_offset,
741                                           VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT, &timed_cmdbuf);
742       if (result != VK_SUCCESS)
743          goto fail;
744 
745       new_cmdbufs[0] = (VkCommandBufferSubmitInfo){
746          .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO,
747          .commandBuffer = timed_cmdbuf,
748       };
749 
750       if (sqtt_submit.commandBufferInfoCount == 1)
751          new_cmdbufs[1] = sqtt_submit.pCommandBufferInfos[0];
752 
753       sqtt_submit.commandBufferInfoCount = new_cmdbuf_count;
754       sqtt_submit.pCommandBufferInfos = new_cmdbufs;
755 
756       radv_describe_queue_present(queue, cpu_timestamp, gpu_timestamp_ptr);
757 
758       result = device->layer_dispatch.rgp.QueueSubmit2(_queue, 1, &sqtt_submit, _fence);
759       if (result != VK_SUCCESS)
760          goto fail;
761 
762       FREE(new_cmdbufs);
763    }
764 
765    return result;
766 
767 fail:
768    FREE(new_cmdbufs);
769    return result;
770 }
771 
772 VKAPI_ATTR VkResult VKAPI_CALL
sqtt_QueueSubmit2(VkQueue _queue,uint32_t submitCount,const VkSubmitInfo2 * pSubmits,VkFence _fence)773 sqtt_QueueSubmit2(VkQueue _queue, uint32_t submitCount, const VkSubmitInfo2 *pSubmits, VkFence _fence)
774 {
775    VK_FROM_HANDLE(radv_queue, queue, _queue);
776    struct radv_device *device = radv_queue_device(queue);
777    const struct radv_physical_device *pdev = radv_device_physical(device);
778    const struct radv_instance *instance = radv_physical_device_instance(pdev);
779    const bool is_gfx_or_ace = queue->state.qf == RADV_QUEUE_GENERAL || queue->state.qf == RADV_QUEUE_COMPUTE;
780    VkCommandBufferSubmitInfo *new_cmdbufs = NULL;
781    VkResult result = VK_SUCCESS;
782 
783    /* Only consider queue events on graphics/compute when enabled. */
784    if (((!device->sqtt_enabled || !radv_sqtt_queue_events_enabled()) && !instance->vk.trace_per_submit) ||
785        !is_gfx_or_ace)
786       return device->layer_dispatch.rgp.QueueSubmit2(_queue, submitCount, pSubmits, _fence);
787 
788    for (uint32_t i = 0; i < submitCount; i++) {
789       const VkSubmitInfo2 *pSubmit = &pSubmits[i];
790 
791       /* Wait semaphores */
792       for (uint32_t j = 0; j < pSubmit->waitSemaphoreInfoCount; j++) {
793          const VkSemaphoreSubmitInfo *pWaitSemaphoreInfo = &pSubmit->pWaitSemaphoreInfos[j];
794          VK_FROM_HANDLE(vk_semaphore, sem, pWaitSemaphoreInfo->semaphore);
795          radv_describe_queue_semaphore(queue, sem, SQTT_QUEUE_TIMING_EVENT_WAIT_SEMAPHORE);
796       }
797    }
798 
799    if (queue->sqtt_present)
800       return radv_sqtt_wsi_submit(_queue, submitCount, pSubmits, _fence);
801 
802    if (instance->vk.trace_per_submit)
803       radv_sqtt_start_capturing(queue);
804 
805    for (uint32_t i = 0; i < submitCount; i++) {
806       const VkSubmitInfo2 *pSubmit = &pSubmits[i];
807       VkSubmitInfo2 sqtt_submit = *pSubmit;
808 
809       /* Command buffers */
810       uint32_t new_cmdbuf_count = sqtt_submit.commandBufferInfoCount * 3;
811       uint32_t cmdbuf_idx = 0;
812 
813       new_cmdbufs = malloc(new_cmdbuf_count * sizeof(*new_cmdbufs));
814       if (!new_cmdbufs)
815          return VK_ERROR_OUT_OF_HOST_MEMORY;
816 
817       for (uint32_t j = 0; j < sqtt_submit.commandBufferInfoCount; j++) {
818          const VkCommandBufferSubmitInfo *pCommandBufferInfo = &sqtt_submit.pCommandBufferInfos[j];
819          struct radeon_winsys_bo *gpu_timestamps_bo[2];
820          uint32_t gpu_timestamps_offset[2];
821          VkCommandBuffer pre_timed_cmdbuf, post_timed_cmdbuf;
822          void *gpu_timestamps_ptr[2];
823          uint64_t cpu_timestamp;
824 
825          /* Sample the current CPU time before building the timed cmdbufs. */
826          cpu_timestamp = os_time_get_nano();
827 
828          result = radv_sqtt_acquire_gpu_timestamp(device, &gpu_timestamps_bo[0], &gpu_timestamps_offset[0],
829                                                   &gpu_timestamps_ptr[0]);
830          if (result != VK_SUCCESS)
831             goto fail;
832 
833          result = radv_sqtt_get_timed_cmdbuf(queue, gpu_timestamps_bo[0], gpu_timestamps_offset[0],
834                                              VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT, &pre_timed_cmdbuf);
835          if (result != VK_SUCCESS)
836             goto fail;
837 
838          new_cmdbufs[cmdbuf_idx++] = (VkCommandBufferSubmitInfo){
839             .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO,
840             .commandBuffer = pre_timed_cmdbuf,
841          };
842 
843          new_cmdbufs[cmdbuf_idx++] = *pCommandBufferInfo;
844 
845          result = radv_sqtt_acquire_gpu_timestamp(device, &gpu_timestamps_bo[1], &gpu_timestamps_offset[1],
846                                                   &gpu_timestamps_ptr[1]);
847          if (result != VK_SUCCESS)
848             goto fail;
849 
850          result = radv_sqtt_get_timed_cmdbuf(queue, gpu_timestamps_bo[1], gpu_timestamps_offset[1],
851                                              VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT, &post_timed_cmdbuf);
852          if (result != VK_SUCCESS)
853             goto fail;
854 
855          new_cmdbufs[cmdbuf_idx++] = (VkCommandBufferSubmitInfo){
856             .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO,
857             .commandBuffer = post_timed_cmdbuf,
858          };
859 
860          VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, pCommandBufferInfo->commandBuffer);
861          radv_describe_queue_submit(queue, cmd_buffer, j, cpu_timestamp, gpu_timestamps_ptr[0], gpu_timestamps_ptr[1]);
862       }
863 
864       sqtt_submit.commandBufferInfoCount = new_cmdbuf_count;
865       sqtt_submit.pCommandBufferInfos = new_cmdbufs;
866 
867       result = device->layer_dispatch.rgp.QueueSubmit2(_queue, 1, &sqtt_submit, _fence);
868       if (result != VK_SUCCESS)
869          goto fail;
870 
871       /* Signal semaphores */
872       for (uint32_t j = 0; j < sqtt_submit.signalSemaphoreInfoCount; j++) {
873          const VkSemaphoreSubmitInfo *pSignalSemaphoreInfo = &sqtt_submit.pSignalSemaphoreInfos[j];
874          VK_FROM_HANDLE(vk_semaphore, sem, pSignalSemaphoreInfo->semaphore);
875          radv_describe_queue_semaphore(queue, sem, SQTT_QUEUE_TIMING_EVENT_SIGNAL_SEMAPHORE);
876       }
877 
878       FREE(new_cmdbufs);
879    }
880 
881    if (instance->vk.trace_per_submit) {
882       if (!radv_sqtt_stop_capturing(queue)) {
883          fprintf(stderr,
884                  "radv: Failed to capture RGP for this submit because the buffer is too small and auto-resizing "
885                  "is disabled. See RADV_THREAD_TRACE_BUFFER_SIZE for increasing the size.\n");
886       }
887    }
888 
889    return result;
890 
891 fail:
892    FREE(new_cmdbufs);
893    return result;
894 }
895 
896 #define EVENT_MARKER_BASE(cmd_name, api_name, event_name, ...)                                                         \
897    VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);                                                         \
898    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);                                                    \
899    radv_write_begin_general_api_marker(cmd_buffer, ApiCmd##api_name);                                                  \
900    cmd_buffer->state.current_event_type = EventCmd##event_name;                                                        \
901    device->layer_dispatch.rgp.Cmd##cmd_name(__VA_ARGS__);                                                              \
902    cmd_buffer->state.current_event_type = EventInternalUnknown;                                                        \
903    radv_write_end_general_api_marker(cmd_buffer, ApiCmd##api_name);
904 
905 #define EVENT_MARKER_ALIAS(cmd_name, api_name, ...) EVENT_MARKER_BASE(cmd_name, api_name, api_name, __VA_ARGS__);
906 
907 #define EVENT_MARKER(cmd_name, ...) EVENT_MARKER_ALIAS(cmd_name, cmd_name, __VA_ARGS__);
908 
909 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDraw(VkCommandBuffer commandBuffer,uint32_t vertexCount,uint32_t instanceCount,uint32_t firstVertex,uint32_t firstInstance)910 sqtt_CmdDraw(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t instanceCount, uint32_t firstVertex,
911              uint32_t firstInstance)
912 {
913    EVENT_MARKER(Draw, commandBuffer, vertexCount, instanceCount, firstVertex, firstInstance);
914 }
915 
916 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDrawIndexed(VkCommandBuffer commandBuffer,uint32_t indexCount,uint32_t instanceCount,uint32_t firstIndex,int32_t vertexOffset,uint32_t firstInstance)917 sqtt_CmdDrawIndexed(VkCommandBuffer commandBuffer, uint32_t indexCount, uint32_t instanceCount, uint32_t firstIndex,
918                     int32_t vertexOffset, uint32_t firstInstance)
919 {
920    EVENT_MARKER(DrawIndexed, commandBuffer, indexCount, instanceCount, firstIndex, vertexOffset, firstInstance);
921 }
922 
923 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDrawIndirect(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,uint32_t drawCount,uint32_t stride)924 sqtt_CmdDrawIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t drawCount,
925                      uint32_t stride)
926 {
927    EVENT_MARKER(DrawIndirect, commandBuffer, buffer, offset, drawCount, stride);
928 }
929 
930 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,uint32_t drawCount,uint32_t stride)931 sqtt_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t drawCount,
932                             uint32_t stride)
933 {
934    EVENT_MARKER(DrawIndexedIndirect, commandBuffer, buffer, offset, drawCount, stride);
935 }
936 
937 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDrawIndirectCount(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,VkBuffer countBuffer,VkDeviceSize countBufferOffset,uint32_t maxDrawCount,uint32_t stride)938 sqtt_CmdDrawIndirectCount(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, VkBuffer countBuffer,
939                           VkDeviceSize countBufferOffset, uint32_t maxDrawCount, uint32_t stride)
940 {
941    EVENT_MARKER(DrawIndirectCount, commandBuffer, buffer, offset, countBuffer, countBufferOffset, maxDrawCount, stride);
942 }
943 
944 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,VkBuffer countBuffer,VkDeviceSize countBufferOffset,uint32_t maxDrawCount,uint32_t stride)945 sqtt_CmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
946                                  VkBuffer countBuffer, VkDeviceSize countBufferOffset, uint32_t maxDrawCount,
947                                  uint32_t stride)
948 {
949    EVENT_MARKER(DrawIndexedIndirectCount, commandBuffer, buffer, offset, countBuffer, countBufferOffset, maxDrawCount,
950                 stride);
951 }
952 
953 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDispatch(VkCommandBuffer commandBuffer,uint32_t x,uint32_t y,uint32_t z)954 sqtt_CmdDispatch(VkCommandBuffer commandBuffer, uint32_t x, uint32_t y, uint32_t z)
955 {
956    EVENT_MARKER_ALIAS(DispatchBase, Dispatch, commandBuffer, 0, 0, 0, x, y, z);
957 }
958 
959 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDispatchIndirect(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset)960 sqtt_CmdDispatchIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset)
961 {
962    EVENT_MARKER(DispatchIndirect, commandBuffer, buffer, offset);
963 }
964 
965 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdCopyBuffer2(VkCommandBuffer commandBuffer,const VkCopyBufferInfo2 * pCopyBufferInfo)966 sqtt_CmdCopyBuffer2(VkCommandBuffer commandBuffer, const VkCopyBufferInfo2 *pCopyBufferInfo)
967 {
968    EVENT_MARKER_ALIAS(CopyBuffer2, CopyBuffer, commandBuffer, pCopyBufferInfo);
969 }
970 
971 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdFillBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize fillSize,uint32_t data)972 sqtt_CmdFillBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize fillSize,
973                    uint32_t data)
974 {
975    EVENT_MARKER(FillBuffer, commandBuffer, dstBuffer, dstOffset, fillSize, data);
976 }
977 
978 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdUpdateBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize dataSize,const void * pData)979 sqtt_CmdUpdateBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize dataSize,
980                      const void *pData)
981 {
982    EVENT_MARKER(UpdateBuffer, commandBuffer, dstBuffer, dstOffset, dataSize, pData);
983 }
984 
985 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdCopyImage2(VkCommandBuffer commandBuffer,const VkCopyImageInfo2 * pCopyImageInfo)986 sqtt_CmdCopyImage2(VkCommandBuffer commandBuffer, const VkCopyImageInfo2 *pCopyImageInfo)
987 {
988    EVENT_MARKER_ALIAS(CopyImage2, CopyImage, commandBuffer, pCopyImageInfo);
989 }
990 
991 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer,const VkCopyBufferToImageInfo2 * pCopyBufferToImageInfo)992 sqtt_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer, const VkCopyBufferToImageInfo2 *pCopyBufferToImageInfo)
993 {
994    EVENT_MARKER_ALIAS(CopyBufferToImage2, CopyBufferToImage, commandBuffer, pCopyBufferToImageInfo);
995 }
996 
997 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer,const VkCopyImageToBufferInfo2 * pCopyImageToBufferInfo)998 sqtt_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer, const VkCopyImageToBufferInfo2 *pCopyImageToBufferInfo)
999 {
1000    EVENT_MARKER_ALIAS(CopyImageToBuffer2, CopyImageToBuffer, commandBuffer, pCopyImageToBufferInfo);
1001 }
1002 
1003 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdBlitImage2(VkCommandBuffer commandBuffer,const VkBlitImageInfo2 * pBlitImageInfo)1004 sqtt_CmdBlitImage2(VkCommandBuffer commandBuffer, const VkBlitImageInfo2 *pBlitImageInfo)
1005 {
1006    EVENT_MARKER_ALIAS(BlitImage2, BlitImage, commandBuffer, pBlitImageInfo);
1007 }
1008 
1009 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdClearColorImage(VkCommandBuffer commandBuffer,VkImage image_h,VkImageLayout imageLayout,const VkClearColorValue * pColor,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)1010 sqtt_CmdClearColorImage(VkCommandBuffer commandBuffer, VkImage image_h, VkImageLayout imageLayout,
1011                         const VkClearColorValue *pColor, uint32_t rangeCount, const VkImageSubresourceRange *pRanges)
1012 {
1013    EVENT_MARKER(ClearColorImage, commandBuffer, image_h, imageLayout, pColor, rangeCount, pRanges);
1014 }
1015 
1016 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,VkImage image_h,VkImageLayout imageLayout,const VkClearDepthStencilValue * pDepthStencil,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)1017 sqtt_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer, VkImage image_h, VkImageLayout imageLayout,
1018                                const VkClearDepthStencilValue *pDepthStencil, uint32_t rangeCount,
1019                                const VkImageSubresourceRange *pRanges)
1020 {
1021    EVENT_MARKER(ClearDepthStencilImage, commandBuffer, image_h, imageLayout, pDepthStencil, rangeCount, pRanges);
1022 }
1023 
1024 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdClearAttachments(VkCommandBuffer commandBuffer,uint32_t attachmentCount,const VkClearAttachment * pAttachments,uint32_t rectCount,const VkClearRect * pRects)1025 sqtt_CmdClearAttachments(VkCommandBuffer commandBuffer, uint32_t attachmentCount, const VkClearAttachment *pAttachments,
1026                          uint32_t rectCount, const VkClearRect *pRects)
1027 {
1028    EVENT_MARKER(ClearAttachments, commandBuffer, attachmentCount, pAttachments, rectCount, pRects);
1029 }
1030 
1031 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdResolveImage2(VkCommandBuffer commandBuffer,const VkResolveImageInfo2 * pResolveImageInfo)1032 sqtt_CmdResolveImage2(VkCommandBuffer commandBuffer, const VkResolveImageInfo2 *pResolveImageInfo)
1033 {
1034    EVENT_MARKER_ALIAS(ResolveImage2, ResolveImage, commandBuffer, pResolveImageInfo);
1035 }
1036 
1037 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdWaitEvents2(VkCommandBuffer commandBuffer,uint32_t eventCount,const VkEvent * pEvents,const VkDependencyInfo * pDependencyInfos)1038 sqtt_CmdWaitEvents2(VkCommandBuffer commandBuffer, uint32_t eventCount, const VkEvent *pEvents,
1039                     const VkDependencyInfo *pDependencyInfos)
1040 {
1041    EVENT_MARKER_ALIAS(WaitEvents2, WaitEvents, commandBuffer, eventCount, pEvents, pDependencyInfos);
1042 }
1043 
1044 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdPipelineBarrier2(VkCommandBuffer commandBuffer,const VkDependencyInfo * pDependencyInfo)1045 sqtt_CmdPipelineBarrier2(VkCommandBuffer commandBuffer, const VkDependencyInfo *pDependencyInfo)
1046 {
1047    EVENT_MARKER_ALIAS(PipelineBarrier2, PipelineBarrier, commandBuffer, pDependencyInfo);
1048 }
1049 
1050 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdResetQueryPool(VkCommandBuffer commandBuffer,VkQueryPool queryPool,uint32_t firstQuery,uint32_t queryCount)1051 sqtt_CmdResetQueryPool(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t firstQuery, uint32_t queryCount)
1052 {
1053    EVENT_MARKER(ResetQueryPool, commandBuffer, queryPool, firstQuery, queryCount);
1054 }
1055 
1056 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer,VkQueryPool queryPool,uint32_t firstQuery,uint32_t queryCount,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize stride,VkQueryResultFlags flags)1057 sqtt_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t firstQuery,
1058                              uint32_t queryCount, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize stride,
1059                              VkQueryResultFlags flags)
1060 {
1061    EVENT_MARKER(CopyQueryPoolResults, commandBuffer, queryPool, firstQuery, queryCount, dstBuffer, dstOffset, stride,
1062                 flags);
1063 }
1064 
1065 #define EVENT_RT_MARKER(cmd_name, flags, ...) EVENT_MARKER_BASE(cmd_name, Dispatch, cmd_name | flags, __VA_ARGS__);
1066 
1067 #define EVENT_RT_MARKER_ALIAS(cmd_name, event_name, flags, ...)                                                        \
1068    EVENT_MARKER_BASE(cmd_name, Dispatch, event_name | flags, __VA_ARGS__);
1069 
1070 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdTraceRaysKHR(VkCommandBuffer commandBuffer,const VkStridedDeviceAddressRegionKHR * pRaygenShaderBindingTable,const VkStridedDeviceAddressRegionKHR * pMissShaderBindingTable,const VkStridedDeviceAddressRegionKHR * pHitShaderBindingTable,const VkStridedDeviceAddressRegionKHR * pCallableShaderBindingTable,uint32_t width,uint32_t height,uint32_t depth)1071 sqtt_CmdTraceRaysKHR(VkCommandBuffer commandBuffer, const VkStridedDeviceAddressRegionKHR *pRaygenShaderBindingTable,
1072                      const VkStridedDeviceAddressRegionKHR *pMissShaderBindingTable,
1073                      const VkStridedDeviceAddressRegionKHR *pHitShaderBindingTable,
1074                      const VkStridedDeviceAddressRegionKHR *pCallableShaderBindingTable, uint32_t width,
1075                      uint32_t height, uint32_t depth)
1076 {
1077    EVENT_RT_MARKER(TraceRaysKHR, ApiRayTracingSeparateCompiled, commandBuffer, pRaygenShaderBindingTable,
1078                    pMissShaderBindingTable, pHitShaderBindingTable, pCallableShaderBindingTable, width, height, depth);
1079 }
1080 
1081 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdTraceRaysIndirectKHR(VkCommandBuffer commandBuffer,const VkStridedDeviceAddressRegionKHR * pRaygenShaderBindingTable,const VkStridedDeviceAddressRegionKHR * pMissShaderBindingTable,const VkStridedDeviceAddressRegionKHR * pHitShaderBindingTable,const VkStridedDeviceAddressRegionKHR * pCallableShaderBindingTable,VkDeviceAddress indirectDeviceAddress)1082 sqtt_CmdTraceRaysIndirectKHR(VkCommandBuffer commandBuffer,
1083                              const VkStridedDeviceAddressRegionKHR *pRaygenShaderBindingTable,
1084                              const VkStridedDeviceAddressRegionKHR *pMissShaderBindingTable,
1085                              const VkStridedDeviceAddressRegionKHR *pHitShaderBindingTable,
1086                              const VkStridedDeviceAddressRegionKHR *pCallableShaderBindingTable,
1087                              VkDeviceAddress indirectDeviceAddress)
1088 {
1089    EVENT_RT_MARKER(TraceRaysIndirectKHR, ApiRayTracingSeparateCompiled, commandBuffer, pRaygenShaderBindingTable,
1090                    pMissShaderBindingTable, pHitShaderBindingTable, pCallableShaderBindingTable, indirectDeviceAddress);
1091 }
1092 
1093 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdTraceRaysIndirect2KHR(VkCommandBuffer commandBuffer,VkDeviceAddress indirectDeviceAddress)1094 sqtt_CmdTraceRaysIndirect2KHR(VkCommandBuffer commandBuffer, VkDeviceAddress indirectDeviceAddress)
1095 {
1096    EVENT_RT_MARKER_ALIAS(TraceRaysIndirect2KHR, TraceRaysIndirectKHR, ApiRayTracingSeparateCompiled, commandBuffer,
1097                          indirectDeviceAddress);
1098 }
1099 
1100 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdCopyAccelerationStructureKHR(VkCommandBuffer commandBuffer,const VkCopyAccelerationStructureInfoKHR * pInfo)1101 sqtt_CmdCopyAccelerationStructureKHR(VkCommandBuffer commandBuffer, const VkCopyAccelerationStructureInfoKHR *pInfo)
1102 {
1103    EVENT_RT_MARKER(CopyAccelerationStructureKHR, 0, commandBuffer, pInfo);
1104 }
1105 
1106 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdCopyAccelerationStructureToMemoryKHR(VkCommandBuffer commandBuffer,const VkCopyAccelerationStructureToMemoryInfoKHR * pInfo)1107 sqtt_CmdCopyAccelerationStructureToMemoryKHR(VkCommandBuffer commandBuffer,
1108                                              const VkCopyAccelerationStructureToMemoryInfoKHR *pInfo)
1109 {
1110    EVENT_RT_MARKER(CopyAccelerationStructureToMemoryKHR, 0, commandBuffer, pInfo);
1111 }
1112 
1113 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdCopyMemoryToAccelerationStructureKHR(VkCommandBuffer commandBuffer,const VkCopyMemoryToAccelerationStructureInfoKHR * pInfo)1114 sqtt_CmdCopyMemoryToAccelerationStructureKHR(VkCommandBuffer commandBuffer,
1115                                              const VkCopyMemoryToAccelerationStructureInfoKHR *pInfo)
1116 {
1117    EVENT_RT_MARKER(CopyMemoryToAccelerationStructureKHR, 0, commandBuffer, pInfo);
1118 }
1119 
1120 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDrawMeshTasksEXT(VkCommandBuffer commandBuffer,uint32_t x,uint32_t y,uint32_t z)1121 sqtt_CmdDrawMeshTasksEXT(VkCommandBuffer commandBuffer, uint32_t x, uint32_t y, uint32_t z)
1122 {
1123    EVENT_MARKER(DrawMeshTasksEXT, commandBuffer, x, y, z);
1124 }
1125 
1126 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDrawMeshTasksIndirectEXT(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,uint32_t drawCount,uint32_t stride)1127 sqtt_CmdDrawMeshTasksIndirectEXT(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
1128                                  uint32_t drawCount, uint32_t stride)
1129 {
1130    EVENT_MARKER(DrawMeshTasksIndirectEXT, commandBuffer, buffer, offset, drawCount, stride);
1131 }
1132 
1133 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDrawMeshTasksIndirectCountEXT(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,VkBuffer countBuffer,VkDeviceSize countBufferOffset,uint32_t maxDrawCount,uint32_t stride)1134 sqtt_CmdDrawMeshTasksIndirectCountEXT(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
1135                                       VkBuffer countBuffer, VkDeviceSize countBufferOffset, uint32_t maxDrawCount,
1136                                       uint32_t stride)
1137 {
1138    EVENT_MARKER(DrawMeshTasksIndirectCountEXT, commandBuffer, buffer, offset, countBuffer, countBufferOffset,
1139                 maxDrawCount, stride);
1140 }
1141 
1142 #undef EVENT_RT_MARKER_ALIAS
1143 #undef EVENT_RT_MARKER
1144 
1145 #undef EVENT_MARKER
1146 #undef EVENT_MARKER_ALIAS
1147 #undef EVENT_MARKER_BASE
1148 
1149 #define API_MARKER_ALIAS(cmd_name, api_name, ...)                                                                      \
1150    VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);                                                         \
1151    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);                                                    \
1152    radv_write_begin_general_api_marker(cmd_buffer, ApiCmd##api_name);                                                  \
1153    device->layer_dispatch.rgp.Cmd##cmd_name(__VA_ARGS__);                                                              \
1154    radv_write_end_general_api_marker(cmd_buffer, ApiCmd##api_name);
1155 
1156 #define API_MARKER(cmd_name, ...) API_MARKER_ALIAS(cmd_name, cmd_name, __VA_ARGS__);
1157 
1158 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdBindPipeline(VkCommandBuffer commandBuffer,VkPipelineBindPoint pipelineBindPoint,VkPipeline _pipeline)1159 sqtt_CmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipeline _pipeline)
1160 {
1161    VK_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
1162 
1163    API_MARKER(BindPipeline, commandBuffer, pipelineBindPoint, _pipeline);
1164 
1165    if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR) {
1166       /* RGP seems to expect a compute bind point to detect and report RT pipelines, which makes
1167        * sense somehow given that RT shaders are compiled to an unified compute shader.
1168        */
1169       radv_describe_pipeline_bind(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1170    } else {
1171       radv_describe_pipeline_bind(cmd_buffer, pipelineBindPoint, pipeline);
1172    }
1173 }
1174 
1175 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,VkPipelineBindPoint pipelineBindPoint,VkPipelineLayout layout,uint32_t firstSet,uint32_t descriptorSetCount,const VkDescriptorSet * pDescriptorSets,uint32_t dynamicOffsetCount,const uint32_t * pDynamicOffsets)1176 sqtt_CmdBindDescriptorSets(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint,
1177                            VkPipelineLayout layout, uint32_t firstSet, uint32_t descriptorSetCount,
1178                            const VkDescriptorSet *pDescriptorSets, uint32_t dynamicOffsetCount,
1179                            const uint32_t *pDynamicOffsets)
1180 {
1181    API_MARKER(BindDescriptorSets, commandBuffer, pipelineBindPoint, layout, firstSet, descriptorSetCount,
1182               pDescriptorSets, dynamicOffsetCount, pDynamicOffsets);
1183 }
1184 
1185 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdBindIndexBuffer(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,VkIndexType indexType)1186 sqtt_CmdBindIndexBuffer(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, VkIndexType indexType)
1187 {
1188    API_MARKER(BindIndexBuffer, commandBuffer, buffer, offset, indexType);
1189 }
1190 
1191 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdBindVertexBuffers2(VkCommandBuffer commandBuffer,uint32_t firstBinding,uint32_t bindingCount,const VkBuffer * pBuffers,const VkDeviceSize * pOffsets,const VkDeviceSize * pSizes,const VkDeviceSize * pStrides)1192 sqtt_CmdBindVertexBuffers2(VkCommandBuffer commandBuffer, uint32_t firstBinding, uint32_t bindingCount,
1193                            const VkBuffer *pBuffers, const VkDeviceSize *pOffsets, const VkDeviceSize *pSizes,
1194                            const VkDeviceSize *pStrides)
1195 {
1196    API_MARKER_ALIAS(BindVertexBuffers2, BindVertexBuffers, commandBuffer, firstBinding, bindingCount, pBuffers,
1197                     pOffsets, pSizes, pStrides);
1198 }
1199 
1200 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdBeginQuery(VkCommandBuffer commandBuffer,VkQueryPool queryPool,uint32_t query,VkQueryControlFlags flags)1201 sqtt_CmdBeginQuery(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t query, VkQueryControlFlags flags)
1202 {
1203    API_MARKER(BeginQuery, commandBuffer, queryPool, query, flags);
1204 }
1205 
1206 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdEndQuery(VkCommandBuffer commandBuffer,VkQueryPool queryPool,uint32_t query)1207 sqtt_CmdEndQuery(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t query)
1208 {
1209    API_MARKER(EndQuery, commandBuffer, queryPool, query);
1210 }
1211 
1212 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdWriteTimestamp2(VkCommandBuffer commandBuffer,VkPipelineStageFlags2 stage,VkQueryPool queryPool,uint32_t query)1213 sqtt_CmdWriteTimestamp2(VkCommandBuffer commandBuffer, VkPipelineStageFlags2 stage, VkQueryPool queryPool,
1214                         uint32_t query)
1215 {
1216    API_MARKER_ALIAS(WriteTimestamp2, WriteTimestamp, commandBuffer, stage, queryPool, query);
1217 }
1218 
1219 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdPushConstants(VkCommandBuffer commandBuffer,VkPipelineLayout layout,VkShaderStageFlags stageFlags,uint32_t offset,uint32_t size,const void * pValues)1220 sqtt_CmdPushConstants(VkCommandBuffer commandBuffer, VkPipelineLayout layout, VkShaderStageFlags stageFlags,
1221                       uint32_t offset, uint32_t size, const void *pValues)
1222 {
1223    API_MARKER(PushConstants, commandBuffer, layout, stageFlags, offset, size, pValues);
1224 }
1225 
1226 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdBeginRendering(VkCommandBuffer commandBuffer,const VkRenderingInfo * pRenderingInfo)1227 sqtt_CmdBeginRendering(VkCommandBuffer commandBuffer, const VkRenderingInfo *pRenderingInfo)
1228 {
1229    API_MARKER_ALIAS(BeginRendering, BeginRenderPass, commandBuffer, pRenderingInfo);
1230 }
1231 
1232 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdEndRendering(VkCommandBuffer commandBuffer)1233 sqtt_CmdEndRendering(VkCommandBuffer commandBuffer)
1234 {
1235    API_MARKER_ALIAS(EndRendering, EndRenderPass, commandBuffer);
1236 }
1237 
1238 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdExecuteCommands(VkCommandBuffer commandBuffer,uint32_t commandBufferCount,const VkCommandBuffer * pCmdBuffers)1239 sqtt_CmdExecuteCommands(VkCommandBuffer commandBuffer, uint32_t commandBufferCount, const VkCommandBuffer *pCmdBuffers)
1240 {
1241    API_MARKER(ExecuteCommands, commandBuffer, commandBufferCount, pCmdBuffers);
1242 }
1243 
1244 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdExecuteGeneratedCommandsEXT(VkCommandBuffer commandBuffer,VkBool32 isPreprocessed,const VkGeneratedCommandsInfoEXT * pGeneratedCommandsInfo)1245 sqtt_CmdExecuteGeneratedCommandsEXT(VkCommandBuffer commandBuffer, VkBool32 isPreprocessed,
1246                                     const VkGeneratedCommandsInfoEXT *pGeneratedCommandsInfo)
1247 {
1248    /* There is no ExecuteIndirect Vulkan event in RGP yet. */
1249    API_MARKER_ALIAS(ExecuteGeneratedCommandsEXT, ExecuteCommands, commandBuffer, isPreprocessed,
1250                     pGeneratedCommandsInfo);
1251 }
1252 
1253 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdSetViewport(VkCommandBuffer commandBuffer,uint32_t firstViewport,uint32_t viewportCount,const VkViewport * pViewports)1254 sqtt_CmdSetViewport(VkCommandBuffer commandBuffer, uint32_t firstViewport, uint32_t viewportCount,
1255                     const VkViewport *pViewports)
1256 {
1257    API_MARKER(SetViewport, commandBuffer, firstViewport, viewportCount, pViewports);
1258 }
1259 
1260 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdSetScissor(VkCommandBuffer commandBuffer,uint32_t firstScissor,uint32_t scissorCount,const VkRect2D * pScissors)1261 sqtt_CmdSetScissor(VkCommandBuffer commandBuffer, uint32_t firstScissor, uint32_t scissorCount,
1262                    const VkRect2D *pScissors)
1263 {
1264    API_MARKER(SetScissor, commandBuffer, firstScissor, scissorCount, pScissors);
1265 }
1266 
1267 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdSetLineWidth(VkCommandBuffer commandBuffer,float lineWidth)1268 sqtt_CmdSetLineWidth(VkCommandBuffer commandBuffer, float lineWidth)
1269 {
1270    API_MARKER(SetLineWidth, commandBuffer, lineWidth);
1271 }
1272 
1273 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdSetDepthBias(VkCommandBuffer commandBuffer,float depthBiasConstantFactor,float depthBiasClamp,float depthBiasSlopeFactor)1274 sqtt_CmdSetDepthBias(VkCommandBuffer commandBuffer, float depthBiasConstantFactor, float depthBiasClamp,
1275                      float depthBiasSlopeFactor)
1276 {
1277    API_MARKER(SetDepthBias, commandBuffer, depthBiasConstantFactor, depthBiasClamp, depthBiasSlopeFactor);
1278 }
1279 
1280 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdSetBlendConstants(VkCommandBuffer commandBuffer,const float blendConstants[4])1281 sqtt_CmdSetBlendConstants(VkCommandBuffer commandBuffer, const float blendConstants[4])
1282 {
1283    API_MARKER(SetBlendConstants, commandBuffer, blendConstants);
1284 }
1285 
1286 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdSetDepthBounds(VkCommandBuffer commandBuffer,float minDepthBounds,float maxDepthBounds)1287 sqtt_CmdSetDepthBounds(VkCommandBuffer commandBuffer, float minDepthBounds, float maxDepthBounds)
1288 {
1289    API_MARKER(SetDepthBounds, commandBuffer, minDepthBounds, maxDepthBounds);
1290 }
1291 
1292 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer,VkStencilFaceFlags faceMask,uint32_t compareMask)1293 sqtt_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t compareMask)
1294 {
1295    API_MARKER(SetStencilCompareMask, commandBuffer, faceMask, compareMask);
1296 }
1297 
1298 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer,VkStencilFaceFlags faceMask,uint32_t writeMask)1299 sqtt_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t writeMask)
1300 {
1301    API_MARKER(SetStencilWriteMask, commandBuffer, faceMask, writeMask);
1302 }
1303 
1304 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdSetStencilReference(VkCommandBuffer commandBuffer,VkStencilFaceFlags faceMask,uint32_t reference)1305 sqtt_CmdSetStencilReference(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t reference)
1306 {
1307    API_MARKER(SetStencilReference, commandBuffer, faceMask, reference);
1308 }
1309 
1310 /* VK_EXT_debug_marker */
1311 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDebugMarkerBeginEXT(VkCommandBuffer commandBuffer,const VkDebugMarkerMarkerInfoEXT * pMarkerInfo)1312 sqtt_CmdDebugMarkerBeginEXT(VkCommandBuffer commandBuffer, const VkDebugMarkerMarkerInfoEXT *pMarkerInfo)
1313 {
1314    VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1315    radv_write_user_event_marker(cmd_buffer, UserEventPush, pMarkerInfo->pMarkerName);
1316 }
1317 
1318 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDebugMarkerEndEXT(VkCommandBuffer commandBuffer)1319 sqtt_CmdDebugMarkerEndEXT(VkCommandBuffer commandBuffer)
1320 {
1321    VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1322    radv_write_user_event_marker(cmd_buffer, UserEventPop, NULL);
1323 }
1324 
1325 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDebugMarkerInsertEXT(VkCommandBuffer commandBuffer,const VkDebugMarkerMarkerInfoEXT * pMarkerInfo)1326 sqtt_CmdDebugMarkerInsertEXT(VkCommandBuffer commandBuffer, const VkDebugMarkerMarkerInfoEXT *pMarkerInfo)
1327 {
1328    VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1329    radv_write_user_event_marker(cmd_buffer, UserEventTrigger, pMarkerInfo->pMarkerName);
1330 }
1331 
1332 VKAPI_ATTR VkResult VKAPI_CALL
sqtt_DebugMarkerSetObjectTagEXT(VkDevice device,const VkDebugMarkerObjectTagInfoEXT * pTagInfo)1333 sqtt_DebugMarkerSetObjectTagEXT(VkDevice device, const VkDebugMarkerObjectTagInfoEXT *pTagInfo)
1334 {
1335    /* no-op */
1336    return VK_SUCCESS;
1337 }
1338 
1339 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdBeginDebugUtilsLabelEXT(VkCommandBuffer commandBuffer,const VkDebugUtilsLabelEXT * pLabelInfo)1340 sqtt_CmdBeginDebugUtilsLabelEXT(VkCommandBuffer commandBuffer, const VkDebugUtilsLabelEXT *pLabelInfo)
1341 {
1342    VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1343    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1344 
1345    radv_write_user_event_marker(cmd_buffer, UserEventPush, pLabelInfo->pLabelName);
1346 
1347    device->layer_dispatch.rgp.CmdBeginDebugUtilsLabelEXT(commandBuffer, pLabelInfo);
1348 }
1349 
1350 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdEndDebugUtilsLabelEXT(VkCommandBuffer commandBuffer)1351 sqtt_CmdEndDebugUtilsLabelEXT(VkCommandBuffer commandBuffer)
1352 {
1353    VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1354    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1355 
1356    radv_write_user_event_marker(cmd_buffer, UserEventPop, NULL);
1357 
1358    device->layer_dispatch.rgp.CmdEndDebugUtilsLabelEXT(commandBuffer);
1359 }
1360 
1361 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdInsertDebugUtilsLabelEXT(VkCommandBuffer commandBuffer,const VkDebugUtilsLabelEXT * pLabelInfo)1362 sqtt_CmdInsertDebugUtilsLabelEXT(VkCommandBuffer commandBuffer, const VkDebugUtilsLabelEXT *pLabelInfo)
1363 {
1364    VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1365    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1366 
1367    radv_write_user_event_marker(cmd_buffer, UserEventTrigger, pLabelInfo->pLabelName);
1368 
1369    device->layer_dispatch.rgp.CmdInsertDebugUtilsLabelEXT(commandBuffer, pLabelInfo);
1370 }
1371 
1372 /* Pipelines */
1373 static enum rgp_hardware_stages
radv_get_rgp_shader_stage(struct radv_shader * shader)1374 radv_get_rgp_shader_stage(struct radv_shader *shader)
1375 {
1376    switch (shader->info.stage) {
1377    case MESA_SHADER_VERTEX:
1378       if (shader->info.vs.as_ls)
1379          return RGP_HW_STAGE_LS;
1380       else if (shader->info.vs.as_es)
1381          return RGP_HW_STAGE_ES;
1382       else if (shader->info.is_ngg)
1383          return RGP_HW_STAGE_GS;
1384       else
1385          return RGP_HW_STAGE_VS;
1386    case MESA_SHADER_TESS_CTRL:
1387       return RGP_HW_STAGE_HS;
1388    case MESA_SHADER_TESS_EVAL:
1389       if (shader->info.tes.as_es)
1390          return RGP_HW_STAGE_ES;
1391       else if (shader->info.is_ngg)
1392          return RGP_HW_STAGE_GS;
1393       else
1394          return RGP_HW_STAGE_VS;
1395    case MESA_SHADER_MESH:
1396    case MESA_SHADER_GEOMETRY:
1397       return RGP_HW_STAGE_GS;
1398    case MESA_SHADER_FRAGMENT:
1399       return RGP_HW_STAGE_PS;
1400    case MESA_SHADER_TASK:
1401    case MESA_SHADER_COMPUTE:
1402    case MESA_SHADER_RAYGEN:
1403    case MESA_SHADER_CLOSEST_HIT:
1404    case MESA_SHADER_ANY_HIT:
1405    case MESA_SHADER_INTERSECTION:
1406    case MESA_SHADER_MISS:
1407    case MESA_SHADER_CALLABLE:
1408       return RGP_HW_STAGE_CS;
1409    default:
1410       unreachable("invalid mesa shader stage");
1411    }
1412 }
1413 
1414 static void
radv_fill_code_object_record(struct radv_device * device,struct rgp_shader_data * shader_data,struct radv_shader * shader,uint64_t va)1415 radv_fill_code_object_record(struct radv_device *device, struct rgp_shader_data *shader_data,
1416                              struct radv_shader *shader, uint64_t va)
1417 {
1418    const struct radv_physical_device *pdev = radv_device_physical(device);
1419    unsigned lds_increment = pdev->info.gfx_level >= GFX11 && shader->info.stage == MESA_SHADER_FRAGMENT
1420                                ? 1024
1421                                : pdev->info.lds_encode_granularity;
1422 
1423    memset(shader_data->rt_shader_name, 0, sizeof(shader_data->rt_shader_name));
1424    shader_data->hash[0] = (uint64_t)(uintptr_t)shader;
1425    shader_data->hash[1] = (uint64_t)(uintptr_t)shader >> 32;
1426    shader_data->code_size = shader->code_size;
1427    shader_data->code = shader->code;
1428    shader_data->vgpr_count = shader->config.num_vgprs;
1429    shader_data->sgpr_count = shader->config.num_sgprs;
1430    shader_data->scratch_memory_size = shader->config.scratch_bytes_per_wave;
1431    shader_data->lds_size = shader->config.lds_size * lds_increment;
1432    shader_data->wavefront_size = shader->info.wave_size;
1433    shader_data->base_address = va & 0xffffffffffff;
1434    shader_data->elf_symbol_offset = 0;
1435    shader_data->hw_stage = radv_get_rgp_shader_stage(shader);
1436    shader_data->is_combined = false;
1437 }
1438 
1439 static VkResult
radv_add_code_object(struct radv_device * device,struct radv_pipeline * pipeline)1440 radv_add_code_object(struct radv_device *device, struct radv_pipeline *pipeline)
1441 {
1442    struct ac_sqtt *sqtt = &device->sqtt;
1443    struct rgp_code_object *code_object = &sqtt->rgp_code_object;
1444    struct rgp_code_object_record *record;
1445 
1446    record = malloc(sizeof(struct rgp_code_object_record));
1447    if (!record)
1448       return VK_ERROR_OUT_OF_HOST_MEMORY;
1449 
1450    record->shader_stages_mask = 0;
1451    record->num_shaders_combined = 0;
1452    record->pipeline_hash[0] = pipeline->pipeline_hash;
1453    record->pipeline_hash[1] = pipeline->pipeline_hash;
1454    record->is_rt = false;
1455 
1456    for (unsigned i = 0; i < MESA_VULKAN_SHADER_STAGES; i++) {
1457       struct radv_shader *shader = pipeline->shaders[i];
1458 
1459       if (!shader)
1460          continue;
1461 
1462       radv_fill_code_object_record(device, &record->shader_data[i], shader, radv_sqtt_shader_get_va_reloc(pipeline, i));
1463 
1464       record->shader_stages_mask |= (1 << i);
1465       record->num_shaders_combined++;
1466    }
1467 
1468    simple_mtx_lock(&code_object->lock);
1469    list_addtail(&record->list, &code_object->record);
1470    code_object->record_count++;
1471    simple_mtx_unlock(&code_object->lock);
1472 
1473    return VK_SUCCESS;
1474 }
1475 
1476 static VkResult
radv_add_rt_record(struct radv_device * device,struct rgp_code_object * code_object,struct radv_ray_tracing_pipeline * pipeline,struct radv_shader * shader,uint32_t stack_size,uint32_t index,uint64_t hash)1477 radv_add_rt_record(struct radv_device *device, struct rgp_code_object *code_object,
1478                    struct radv_ray_tracing_pipeline *pipeline, struct radv_shader *shader, uint32_t stack_size,
1479                    uint32_t index, uint64_t hash)
1480 {
1481    struct rgp_code_object_record *record = malloc(sizeof(struct rgp_code_object_record));
1482    if (!record)
1483       return VK_ERROR_OUT_OF_HOST_MEMORY;
1484 
1485    struct rgp_shader_data *shader_data = &record->shader_data[shader->info.stage];
1486 
1487    record->shader_stages_mask = 0;
1488    record->num_shaders_combined = 0;
1489    record->pipeline_hash[0] = hash;
1490    record->pipeline_hash[1] = hash;
1491 
1492    radv_fill_code_object_record(device, shader_data, shader, shader->va);
1493    shader_data->rt_stack_size = stack_size;
1494 
1495    record->shader_stages_mask |= (1 << shader->info.stage);
1496    record->is_rt = true;
1497    switch (shader->info.stage) {
1498    case MESA_SHADER_RAYGEN:
1499       snprintf(shader_data->rt_shader_name, sizeof(shader_data->rt_shader_name), "rgen_%d", index);
1500       break;
1501    case MESA_SHADER_CLOSEST_HIT:
1502       snprintf(shader_data->rt_shader_name, sizeof(shader_data->rt_shader_name), "chit_%d", index);
1503       break;
1504    case MESA_SHADER_MISS:
1505       snprintf(shader_data->rt_shader_name, sizeof(shader_data->rt_shader_name), "miss_%d", index);
1506       break;
1507    case MESA_SHADER_INTERSECTION:
1508       snprintf(shader_data->rt_shader_name, sizeof(shader_data->rt_shader_name), "traversal");
1509       break;
1510    case MESA_SHADER_CALLABLE:
1511       snprintf(shader_data->rt_shader_name, sizeof(shader_data->rt_shader_name), "call_%d", index);
1512       break;
1513    case MESA_SHADER_COMPUTE:
1514       snprintf(shader_data->rt_shader_name, sizeof(shader_data->rt_shader_name), "_amdgpu_cs_main");
1515       break;
1516    default:
1517       unreachable("invalid rt stage");
1518    }
1519    record->num_shaders_combined = 1;
1520 
1521    simple_mtx_lock(&code_object->lock);
1522    list_addtail(&record->list, &code_object->record);
1523    code_object->record_count++;
1524    simple_mtx_unlock(&code_object->lock);
1525 
1526    return VK_SUCCESS;
1527 }
1528 
1529 static void
compute_unique_rt_sha(uint64_t pipeline_hash,unsigned index,unsigned char sha1[SHA1_DIGEST_LENGTH])1530 compute_unique_rt_sha(uint64_t pipeline_hash, unsigned index, unsigned char sha1[SHA1_DIGEST_LENGTH])
1531 {
1532    struct mesa_sha1 ctx;
1533    _mesa_sha1_init(&ctx);
1534    _mesa_sha1_update(&ctx, &pipeline_hash, sizeof(pipeline_hash));
1535    _mesa_sha1_update(&ctx, &index, sizeof(index));
1536    _mesa_sha1_final(&ctx, sha1);
1537 }
1538 
1539 static VkResult
radv_register_rt_stage(struct radv_device * device,struct radv_ray_tracing_pipeline * pipeline,uint32_t index,uint32_t stack_size,struct radv_shader * shader)1540 radv_register_rt_stage(struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline, uint32_t index,
1541                        uint32_t stack_size, struct radv_shader *shader)
1542 {
1543    unsigned char sha1[SHA1_DIGEST_LENGTH];
1544    VkResult result;
1545 
1546    compute_unique_rt_sha(pipeline->base.base.pipeline_hash, index, sha1);
1547 
1548    result = ac_sqtt_add_pso_correlation(&device->sqtt, *(uint64_t *)sha1, pipeline->base.base.pipeline_hash);
1549    if (!result)
1550       return VK_ERROR_OUT_OF_HOST_MEMORY;
1551    result = ac_sqtt_add_code_object_loader_event(&device->sqtt, *(uint64_t *)sha1, shader->va);
1552    if (!result)
1553       return VK_ERROR_OUT_OF_HOST_MEMORY;
1554    result =
1555       radv_add_rt_record(device, &device->sqtt.rgp_code_object, pipeline, shader, stack_size, index, *(uint64_t *)sha1);
1556    return result;
1557 }
1558 
1559 static VkResult
radv_register_rt_pipeline(struct radv_device * device,struct radv_ray_tracing_pipeline * pipeline)1560 radv_register_rt_pipeline(struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline)
1561 {
1562    VkResult result = VK_SUCCESS;
1563 
1564    uint32_t max_any_hit_stack_size = 0;
1565    uint32_t max_intersection_stack_size = 0;
1566 
1567    for (unsigned i = 0; i < pipeline->stage_count; i++) {
1568       struct radv_ray_tracing_stage *stage = &pipeline->stages[i];
1569       if (stage->stage == MESA_SHADER_ANY_HIT)
1570          max_any_hit_stack_size = MAX2(max_any_hit_stack_size, stage->stack_size);
1571       else if (stage->stage == MESA_SHADER_INTERSECTION)
1572          max_intersection_stack_size = MAX2(max_intersection_stack_size, stage->stack_size);
1573 
1574       if (!pipeline->stages[i].shader)
1575          continue;
1576 
1577       result = radv_register_rt_stage(device, pipeline, i, stage->stack_size, stage->shader);
1578       if (result != VK_SUCCESS)
1579          return result;
1580    }
1581 
1582    uint32_t idx = pipeline->stage_count;
1583 
1584    /* Combined traversal shader */
1585    if (pipeline->base.base.shaders[MESA_SHADER_INTERSECTION]) {
1586       result = radv_register_rt_stage(device, pipeline, idx++, max_any_hit_stack_size + max_intersection_stack_size,
1587                                       pipeline->base.base.shaders[MESA_SHADER_INTERSECTION]);
1588       if (result != VK_SUCCESS)
1589          return result;
1590    }
1591 
1592    /* Prolog */
1593    result = radv_register_rt_stage(device, pipeline, idx++, 0, pipeline->prolog);
1594 
1595    return result;
1596 }
1597 
1598 static VkResult
radv_register_pipeline(struct radv_device * device,struct radv_pipeline * pipeline)1599 radv_register_pipeline(struct radv_device *device, struct radv_pipeline *pipeline)
1600 {
1601    bool result;
1602    uint64_t base_va = ~0;
1603 
1604    result = ac_sqtt_add_pso_correlation(&device->sqtt, pipeline->pipeline_hash, pipeline->pipeline_hash);
1605    if (!result)
1606       return VK_ERROR_OUT_OF_HOST_MEMORY;
1607 
1608    /* Find the lowest shader BO VA. */
1609    for (unsigned i = 0; i < MESA_VULKAN_SHADER_STAGES; i++) {
1610       struct radv_shader *shader = pipeline->shaders[i];
1611       uint64_t va;
1612 
1613       if (!shader)
1614          continue;
1615 
1616       va = radv_sqtt_shader_get_va_reloc(pipeline, i);
1617       base_va = MIN2(base_va, va);
1618    }
1619 
1620    result = ac_sqtt_add_code_object_loader_event(&device->sqtt, pipeline->pipeline_hash, base_va);
1621    if (!result)
1622       return VK_ERROR_OUT_OF_HOST_MEMORY;
1623 
1624    result = radv_add_code_object(device, pipeline);
1625    if (result != VK_SUCCESS)
1626       return result;
1627 
1628    return VK_SUCCESS;
1629 }
1630 
1631 static void
radv_unregister_records(struct radv_device * device,uint64_t hash)1632 radv_unregister_records(struct radv_device *device, uint64_t hash)
1633 {
1634    struct ac_sqtt *sqtt = &device->sqtt;
1635    struct rgp_pso_correlation *pso_correlation = &sqtt->rgp_pso_correlation;
1636    struct rgp_loader_events *loader_events = &sqtt->rgp_loader_events;
1637    struct rgp_code_object *code_object = &sqtt->rgp_code_object;
1638 
1639    /* Destroy the PSO correlation record. */
1640    simple_mtx_lock(&pso_correlation->lock);
1641    list_for_each_entry_safe (struct rgp_pso_correlation_record, record, &pso_correlation->record, list) {
1642       if (record->pipeline_hash[0] == hash) {
1643          pso_correlation->record_count--;
1644          list_del(&record->list);
1645          free(record);
1646          break;
1647       }
1648    }
1649    simple_mtx_unlock(&pso_correlation->lock);
1650 
1651    /* Destroy the code object loader record. */
1652    simple_mtx_lock(&loader_events->lock);
1653    list_for_each_entry_safe (struct rgp_loader_events_record, record, &loader_events->record, list) {
1654       if (record->code_object_hash[0] == hash) {
1655          loader_events->record_count--;
1656          list_del(&record->list);
1657          free(record);
1658          break;
1659       }
1660    }
1661    simple_mtx_unlock(&loader_events->lock);
1662 
1663    /* Destroy the code object record. */
1664    simple_mtx_lock(&code_object->lock);
1665    list_for_each_entry_safe (struct rgp_code_object_record, record, &code_object->record, list) {
1666       if (record->pipeline_hash[0] == hash) {
1667          code_object->record_count--;
1668          list_del(&record->list);
1669          free(record);
1670          break;
1671       }
1672    }
1673    simple_mtx_unlock(&code_object->lock);
1674 }
1675 
1676 VKAPI_ATTR VkResult VKAPI_CALL
sqtt_CreateGraphicsPipelines(VkDevice _device,VkPipelineCache pipelineCache,uint32_t count,const VkGraphicsPipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)1677 sqtt_CreateGraphicsPipelines(VkDevice _device, VkPipelineCache pipelineCache, uint32_t count,
1678                              const VkGraphicsPipelineCreateInfo *pCreateInfos, const VkAllocationCallbacks *pAllocator,
1679                              VkPipeline *pPipelines)
1680 {
1681    VK_FROM_HANDLE(radv_device, device, _device);
1682    VkResult result;
1683 
1684    result = device->layer_dispatch.rgp.CreateGraphicsPipelines(_device, pipelineCache, count, pCreateInfos, pAllocator,
1685                                                                pPipelines);
1686    if (result != VK_SUCCESS)
1687       return result;
1688 
1689    for (unsigned i = 0; i < count; i++) {
1690       VK_FROM_HANDLE(radv_pipeline, pipeline, pPipelines[i]);
1691 
1692       if (!pipeline)
1693          continue;
1694 
1695       const VkPipelineCreateFlagBits2 create_flags = vk_graphics_pipeline_create_flags(&pCreateInfos[i]);
1696       if (create_flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR)
1697          continue;
1698 
1699       result = radv_sqtt_reloc_graphics_shaders(device, radv_pipeline_to_graphics(pipeline));
1700       if (result != VK_SUCCESS)
1701          goto fail;
1702 
1703       result = radv_register_pipeline(device, pipeline);
1704       if (result != VK_SUCCESS)
1705          goto fail;
1706    }
1707 
1708    return VK_SUCCESS;
1709 
1710 fail:
1711    for (unsigned i = 0; i < count; i++) {
1712       sqtt_DestroyPipeline(_device, pPipelines[i], pAllocator);
1713       pPipelines[i] = VK_NULL_HANDLE;
1714    }
1715    return result;
1716 }
1717 
1718 VKAPI_ATTR VkResult VKAPI_CALL
sqtt_CreateComputePipelines(VkDevice _device,VkPipelineCache pipelineCache,uint32_t count,const VkComputePipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)1719 sqtt_CreateComputePipelines(VkDevice _device, VkPipelineCache pipelineCache, uint32_t count,
1720                             const VkComputePipelineCreateInfo *pCreateInfos, const VkAllocationCallbacks *pAllocator,
1721                             VkPipeline *pPipelines)
1722 {
1723    VK_FROM_HANDLE(radv_device, device, _device);
1724    VkResult result;
1725 
1726    result = device->layer_dispatch.rgp.CreateComputePipelines(_device, pipelineCache, count, pCreateInfos, pAllocator,
1727                                                               pPipelines);
1728    if (result != VK_SUCCESS)
1729       return result;
1730 
1731    for (unsigned i = 0; i < count; i++) {
1732       VK_FROM_HANDLE(radv_pipeline, pipeline, pPipelines[i]);
1733 
1734       if (!pipeline)
1735          continue;
1736 
1737       result = radv_register_pipeline(device, pipeline);
1738       if (result != VK_SUCCESS)
1739          goto fail;
1740    }
1741 
1742    return VK_SUCCESS;
1743 
1744 fail:
1745    for (unsigned i = 0; i < count; i++) {
1746       sqtt_DestroyPipeline(_device, pPipelines[i], pAllocator);
1747       pPipelines[i] = VK_NULL_HANDLE;
1748    }
1749    return result;
1750 }
1751 
1752 VKAPI_ATTR VkResult VKAPI_CALL
sqtt_CreateRayTracingPipelinesKHR(VkDevice _device,VkDeferredOperationKHR deferredOperation,VkPipelineCache pipelineCache,uint32_t count,const VkRayTracingPipelineCreateInfoKHR * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)1753 sqtt_CreateRayTracingPipelinesKHR(VkDevice _device, VkDeferredOperationKHR deferredOperation,
1754                                   VkPipelineCache pipelineCache, uint32_t count,
1755                                   const VkRayTracingPipelineCreateInfoKHR *pCreateInfos,
1756                                   const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines)
1757 {
1758    VK_FROM_HANDLE(radv_device, device, _device);
1759    VkResult result;
1760 
1761    result = device->layer_dispatch.rgp.CreateRayTracingPipelinesKHR(_device, deferredOperation, pipelineCache, count,
1762                                                                     pCreateInfos, pAllocator, pPipelines);
1763    if (result != VK_SUCCESS && result != VK_OPERATION_DEFERRED_KHR)
1764       return result;
1765 
1766    for (unsigned i = 0; i < count; i++) {
1767       VK_FROM_HANDLE(radv_pipeline, pipeline, pPipelines[i]);
1768 
1769       if (!pipeline)
1770          continue;
1771 
1772       const VkPipelineCreateFlagBits2 create_flags = vk_rt_pipeline_create_flags(&pCreateInfos[i]);
1773       if (create_flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR)
1774          continue;
1775 
1776       result = radv_register_rt_pipeline(device, radv_pipeline_to_ray_tracing(pipeline));
1777       if (result != VK_SUCCESS)
1778          goto fail;
1779    }
1780 
1781    return VK_SUCCESS;
1782 
1783 fail:
1784    for (unsigned i = 0; i < count; i++) {
1785       sqtt_DestroyPipeline(_device, pPipelines[i], pAllocator);
1786       pPipelines[i] = VK_NULL_HANDLE;
1787    }
1788    return result;
1789 }
1790 
1791 VKAPI_ATTR void VKAPI_CALL
sqtt_DestroyPipeline(VkDevice _device,VkPipeline _pipeline,const VkAllocationCallbacks * pAllocator)1792 sqtt_DestroyPipeline(VkDevice _device, VkPipeline _pipeline, const VkAllocationCallbacks *pAllocator)
1793 {
1794    VK_FROM_HANDLE(radv_device, device, _device);
1795    VK_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
1796 
1797    if (!_pipeline)
1798       return;
1799 
1800    /* Ray tracing pipelines have multiple records, each with their own hash */
1801    if (pipeline->type == RADV_PIPELINE_RAY_TRACING) {
1802       /* We have one record for each stage, plus one for the traversal shader and one for the prolog */
1803       uint32_t record_count = radv_pipeline_to_ray_tracing(pipeline)->stage_count + 2;
1804       unsigned char sha1[SHA1_DIGEST_LENGTH];
1805       for (uint32_t i = 0; i < record_count; ++i) {
1806          compute_unique_rt_sha(pipeline->pipeline_hash, i, sha1);
1807          radv_unregister_records(device, *(uint64_t *)sha1);
1808       }
1809    } else
1810       radv_unregister_records(device, pipeline->pipeline_hash);
1811 
1812    if (pipeline->type == RADV_PIPELINE_GRAPHICS) {
1813       struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline);
1814       struct radv_sqtt_shaders_reloc *reloc = graphics_pipeline->sqtt_shaders_reloc;
1815 
1816       radv_free_shader_memory(device, reloc->alloc);
1817       free(reloc);
1818    }
1819 
1820    device->layer_dispatch.rgp.DestroyPipeline(_device, _pipeline, pAllocator);
1821 }
1822 
1823 #undef API_MARKER
1824