• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2020 Valve Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "radv_cs.h"
25 #include "radv_private.h"
26 #include "radv_shader.h"
27 #include "vk_common_entrypoints.h"
28 #include "vk_semaphore.h"
29 #include "wsi_common_entrypoints.h"
30 
31 #include "ac_rgp.h"
32 #include "ac_sqtt.h"
33 
34 #include "vk_pipeline.h"
35 
36 void
radv_sqtt_emit_relocated_shaders(struct radv_cmd_buffer * cmd_buffer,struct radv_graphics_pipeline * pipeline)37 radv_sqtt_emit_relocated_shaders(struct radv_cmd_buffer *cmd_buffer, struct radv_graphics_pipeline *pipeline)
38 {
39    const enum amd_gfx_level gfx_level = cmd_buffer->device->physical_device->rad_info.gfx_level;
40    struct radv_sqtt_shaders_reloc *reloc = pipeline->sqtt_shaders_reloc;
41    struct radeon_cmdbuf *cs = cmd_buffer->cs;
42    uint64_t va;
43 
44    radv_cs_add_buffer(cmd_buffer->device->ws, cs, reloc->bo);
45 
46    /* VS */
47    if (pipeline->base.shaders[MESA_SHADER_VERTEX]) {
48       struct radv_shader *vs = pipeline->base.shaders[MESA_SHADER_VERTEX];
49 
50       va = reloc->va[MESA_SHADER_VERTEX];
51       if (vs->info.vs.as_ls) {
52          radeon_set_sh_reg(cs, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
53       } else if (vs->info.vs.as_es) {
54          radeon_set_sh_reg_seq(cs, R_00B320_SPI_SHADER_PGM_LO_ES, 2);
55          radeon_emit(cs, va >> 8);
56          radeon_emit(cs, S_00B324_MEM_BASE(va >> 40));
57       } else if (vs->info.is_ngg) {
58          radeon_set_sh_reg(cs, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
59       } else {
60          radeon_set_sh_reg_seq(cs, R_00B120_SPI_SHADER_PGM_LO_VS, 2);
61          radeon_emit(cs, va >> 8);
62          radeon_emit(cs, S_00B124_MEM_BASE(va >> 40));
63       }
64    }
65 
66    /* TCS */
67    if (pipeline->base.shaders[MESA_SHADER_TESS_CTRL]) {
68       va = reloc->va[MESA_SHADER_TESS_CTRL];
69 
70       if (gfx_level >= GFX9) {
71          if (gfx_level >= GFX10) {
72             radeon_set_sh_reg(cs, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
73          } else {
74             radeon_set_sh_reg(cs, R_00B410_SPI_SHADER_PGM_LO_LS, va >> 8);
75          }
76       } else {
77          radeon_set_sh_reg_seq(cs, R_00B420_SPI_SHADER_PGM_LO_HS, 2);
78          radeon_emit(cs, va >> 8);
79          radeon_emit(cs, S_00B424_MEM_BASE(va >> 40));
80       }
81    }
82 
83    /* TES */
84    if (pipeline->base.shaders[MESA_SHADER_TESS_EVAL]) {
85       struct radv_shader *tes = pipeline->base.shaders[MESA_SHADER_TESS_EVAL];
86 
87       va = reloc->va[MESA_SHADER_TESS_EVAL];
88       if (tes->info.is_ngg) {
89          radeon_set_sh_reg(cs, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
90       } else if (tes->info.tes.as_es) {
91          radeon_set_sh_reg_seq(cs, R_00B320_SPI_SHADER_PGM_LO_ES, 2);
92          radeon_emit(cs, va >> 8);
93          radeon_emit(cs, S_00B324_MEM_BASE(va >> 40));
94       } else {
95          radeon_set_sh_reg_seq(cs, R_00B120_SPI_SHADER_PGM_LO_VS, 2);
96          radeon_emit(cs, va >> 8);
97          radeon_emit(cs, S_00B124_MEM_BASE(va >> 40));
98       }
99    }
100 
101    /* GS */
102    if (pipeline->base.shaders[MESA_SHADER_GEOMETRY]) {
103       struct radv_shader *gs = pipeline->base.shaders[MESA_SHADER_GEOMETRY];
104 
105       va = reloc->va[MESA_SHADER_GEOMETRY];
106       if (gs->info.is_ngg) {
107          radeon_set_sh_reg(cs, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
108       } else {
109          if (gfx_level >= GFX9) {
110             if (gfx_level >= GFX10) {
111                radeon_set_sh_reg(cs, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
112             } else {
113                radeon_set_sh_reg(cs, R_00B210_SPI_SHADER_PGM_LO_ES, va >> 8);
114             }
115          } else {
116             radeon_set_sh_reg_seq(cs, R_00B220_SPI_SHADER_PGM_LO_GS, 2);
117             radeon_emit(cs, va >> 8);
118             radeon_emit(cs, S_00B224_MEM_BASE(va >> 40));
119          }
120       }
121    }
122 
123    /* FS */
124    if (pipeline->base.shaders[MESA_SHADER_FRAGMENT]) {
125       va = reloc->va[MESA_SHADER_FRAGMENT];
126 
127       radeon_set_sh_reg_seq(cs, R_00B020_SPI_SHADER_PGM_LO_PS, 2);
128       radeon_emit(cs, va >> 8);
129       radeon_emit(cs, S_00B024_MEM_BASE(va >> 40));
130    }
131 
132    /* MS */
133    if (pipeline->base.shaders[MESA_SHADER_MESH]) {
134       va = reloc->va[MESA_SHADER_MESH];
135 
136       radeon_set_sh_reg(cs, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
137    }
138 }
139 
140 static uint64_t
radv_sqtt_shader_get_va_reloc(struct radv_pipeline * pipeline,gl_shader_stage stage)141 radv_sqtt_shader_get_va_reloc(struct radv_pipeline *pipeline, gl_shader_stage stage)
142 {
143    if (pipeline->type == RADV_PIPELINE_GRAPHICS) {
144       struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline);
145       struct radv_sqtt_shaders_reloc *reloc = graphics_pipeline->sqtt_shaders_reloc;
146       return reloc->va[stage];
147    }
148 
149    return radv_shader_get_va(pipeline->shaders[stage]);
150 }
151 
152 static VkResult
radv_sqtt_reloc_graphics_shaders(struct radv_device * device,struct radv_graphics_pipeline * pipeline)153 radv_sqtt_reloc_graphics_shaders(struct radv_device *device, struct radv_graphics_pipeline *pipeline)
154 {
155    struct radv_shader_dma_submission *submission = NULL;
156    struct radv_sqtt_shaders_reloc *reloc;
157    uint32_t code_size = 0;
158 
159    reloc = calloc(1, sizeof(*reloc));
160    if (!reloc)
161       return VK_ERROR_OUT_OF_HOST_MEMORY;
162 
163    /* Compute the total code size. */
164    for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; i++) {
165       const struct radv_shader *shader = pipeline->base.shaders[i];
166       if (!shader)
167          continue;
168 
169       code_size += align(shader->code_size, RADV_SHADER_ALLOC_ALIGNMENT);
170    }
171 
172    /* Allocate memory for all shader binaries. */
173    reloc->alloc = radv_alloc_shader_memory(device, code_size, false, pipeline);
174    if (!reloc->alloc) {
175       free(reloc);
176       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
177    }
178 
179    reloc->bo = reloc->alloc->arena->bo;
180 
181    /* Relocate shader binaries to be contiguous in memory as requested by RGP. */
182    uint64_t slab_va = radv_buffer_get_va(reloc->bo) + reloc->alloc->offset;
183    char *slab_ptr = reloc->alloc->arena->ptr + reloc->alloc->offset;
184    uint64_t offset = 0;
185 
186    if (device->shader_use_invisible_vram) {
187       submission = radv_shader_dma_get_submission(device, reloc->bo, slab_va, code_size);
188       if (!submission)
189          return VK_ERROR_UNKNOWN;
190    }
191 
192    for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) {
193       const struct radv_shader *shader = pipeline->base.shaders[i];
194       void *dest_ptr;
195       if (!shader)
196          continue;
197 
198       reloc->va[i] = slab_va + offset;
199 
200       if (device->shader_use_invisible_vram)
201          dest_ptr = submission->ptr + offset;
202       else
203          dest_ptr = slab_ptr + offset;
204 
205       memcpy(dest_ptr, shader->code, shader->code_size);
206 
207       offset += align(shader->code_size, RADV_SHADER_ALLOC_ALIGNMENT);
208    }
209 
210    if (device->shader_use_invisible_vram) {
211       if (!radv_shader_dma_submit(device, submission, &pipeline->base.shader_upload_seq))
212          return VK_ERROR_UNKNOWN;
213    }
214 
215    pipeline->sqtt_shaders_reloc = reloc;
216 
217    return VK_SUCCESS;
218 }
219 
220 static void
radv_write_begin_general_api_marker(struct radv_cmd_buffer * cmd_buffer,enum rgp_sqtt_marker_general_api_type api_type)221 radv_write_begin_general_api_marker(struct radv_cmd_buffer *cmd_buffer, enum rgp_sqtt_marker_general_api_type api_type)
222 {
223    struct rgp_sqtt_marker_general_api marker = {0};
224 
225    marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_GENERAL_API;
226    marker.api_type = api_type;
227 
228    radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
229 }
230 
231 static void
radv_write_end_general_api_marker(struct radv_cmd_buffer * cmd_buffer,enum rgp_sqtt_marker_general_api_type api_type)232 radv_write_end_general_api_marker(struct radv_cmd_buffer *cmd_buffer, enum rgp_sqtt_marker_general_api_type api_type)
233 {
234    struct rgp_sqtt_marker_general_api marker = {0};
235 
236    marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_GENERAL_API;
237    marker.api_type = api_type;
238    marker.is_end = 1;
239 
240    radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
241 }
242 
243 static void
radv_write_event_marker(struct radv_cmd_buffer * cmd_buffer,enum rgp_sqtt_marker_event_type api_type,uint32_t vertex_offset_user_data,uint32_t instance_offset_user_data,uint32_t draw_index_user_data)244 radv_write_event_marker(struct radv_cmd_buffer *cmd_buffer, enum rgp_sqtt_marker_event_type api_type,
245                         uint32_t vertex_offset_user_data, uint32_t instance_offset_user_data,
246                         uint32_t draw_index_user_data)
247 {
248    struct rgp_sqtt_marker_event marker = {0};
249 
250    marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_EVENT;
251    marker.api_type = api_type;
252    marker.cmd_id = cmd_buffer->state.num_events++;
253    marker.cb_id = cmd_buffer->sqtt_cb_id;
254 
255    if (vertex_offset_user_data == UINT_MAX || instance_offset_user_data == UINT_MAX) {
256       vertex_offset_user_data = 0;
257       instance_offset_user_data = 0;
258    }
259 
260    if (draw_index_user_data == UINT_MAX)
261       draw_index_user_data = vertex_offset_user_data;
262 
263    marker.vertex_offset_reg_idx = vertex_offset_user_data;
264    marker.instance_offset_reg_idx = instance_offset_user_data;
265    marker.draw_index_reg_idx = draw_index_user_data;
266 
267    radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
268 }
269 
270 static void
radv_write_event_with_dims_marker(struct radv_cmd_buffer * cmd_buffer,enum rgp_sqtt_marker_event_type api_type,uint32_t x,uint32_t y,uint32_t z)271 radv_write_event_with_dims_marker(struct radv_cmd_buffer *cmd_buffer, enum rgp_sqtt_marker_event_type api_type,
272                                   uint32_t x, uint32_t y, uint32_t z)
273 {
274    struct rgp_sqtt_marker_event_with_dims marker = {0};
275 
276    marker.event.identifier = RGP_SQTT_MARKER_IDENTIFIER_EVENT;
277    marker.event.api_type = api_type;
278    marker.event.cmd_id = cmd_buffer->state.num_events++;
279    marker.event.cb_id = cmd_buffer->sqtt_cb_id;
280    marker.event.has_thread_dims = 1;
281 
282    marker.thread_x = x;
283    marker.thread_y = y;
284    marker.thread_z = z;
285 
286    radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
287 }
288 
289 void
radv_write_user_event_marker(struct radv_cmd_buffer * cmd_buffer,enum rgp_sqtt_marker_user_event_type type,const char * str)290 radv_write_user_event_marker(struct radv_cmd_buffer *cmd_buffer, enum rgp_sqtt_marker_user_event_type type,
291                              const char *str)
292 {
293    if (likely(!cmd_buffer->device->sqtt.bo))
294       return;
295 
296    if (type == UserEventPop) {
297       assert(str == NULL);
298       struct rgp_sqtt_marker_user_event marker = {0};
299       marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_USER_EVENT;
300       marker.data_type = type;
301 
302       radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
303    } else {
304       assert(str != NULL);
305       unsigned len = strlen(str);
306       struct rgp_sqtt_marker_user_event_with_length marker = {0};
307       marker.user_event.identifier = RGP_SQTT_MARKER_IDENTIFIER_USER_EVENT;
308       marker.user_event.data_type = type;
309       marker.length = align(len, 4);
310 
311       uint8_t *buffer = alloca(sizeof(marker) + marker.length);
312       memset(buffer, 0, sizeof(marker) + marker.length);
313       memcpy(buffer, &marker, sizeof(marker));
314       memcpy(buffer + sizeof(marker), str, len);
315 
316       radv_emit_sqtt_userdata(cmd_buffer, buffer, sizeof(marker) / 4 + marker.length / 4);
317    }
318 }
319 
320 void
radv_describe_begin_cmd_buffer(struct radv_cmd_buffer * cmd_buffer)321 radv_describe_begin_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
322 {
323    uint64_t device_id = (uintptr_t)cmd_buffer->device;
324    struct rgp_sqtt_marker_cb_start marker = {0};
325 
326    if (likely(!cmd_buffer->device->sqtt.bo))
327       return;
328 
329    /* Reserve a command buffer ID for SQTT. */
330    enum amd_ip_type ip_type = radv_queue_family_to_ring(cmd_buffer->device->physical_device, cmd_buffer->qf);
331    union rgp_sqtt_marker_cb_id cb_id = ac_sqtt_get_next_cmdbuf_id(&cmd_buffer->device->sqtt, ip_type);
332    cmd_buffer->sqtt_cb_id = cb_id.all;
333 
334    marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_CB_START;
335    marker.cb_id = cmd_buffer->sqtt_cb_id;
336    marker.device_id_low = device_id;
337    marker.device_id_high = device_id >> 32;
338    marker.queue = cmd_buffer->qf;
339    marker.queue_flags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT;
340 
341    if (cmd_buffer->qf == RADV_QUEUE_GENERAL)
342       marker.queue_flags |= VK_QUEUE_GRAPHICS_BIT;
343 
344    if (cmd_buffer->device->instance->drirc.legacy_sparse_binding)
345       marker.queue_flags |= VK_QUEUE_SPARSE_BINDING_BIT;
346 
347    radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
348 }
349 
350 void
radv_describe_end_cmd_buffer(struct radv_cmd_buffer * cmd_buffer)351 radv_describe_end_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
352 {
353    uint64_t device_id = (uintptr_t)cmd_buffer->device;
354    struct rgp_sqtt_marker_cb_end marker = {0};
355 
356    if (likely(!cmd_buffer->device->sqtt.bo))
357       return;
358 
359    marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_CB_END;
360    marker.cb_id = cmd_buffer->sqtt_cb_id;
361    marker.device_id_low = device_id;
362    marker.device_id_high = device_id >> 32;
363 
364    radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
365 }
366 
367 void
radv_describe_draw(struct radv_cmd_buffer * cmd_buffer)368 radv_describe_draw(struct radv_cmd_buffer *cmd_buffer)
369 {
370    if (likely(!cmd_buffer->device->sqtt.bo))
371       return;
372 
373    radv_write_event_marker(cmd_buffer, cmd_buffer->state.current_event_type, UINT_MAX, UINT_MAX, UINT_MAX);
374 }
375 
376 void
radv_describe_dispatch(struct radv_cmd_buffer * cmd_buffer,const struct radv_dispatch_info * info)377 radv_describe_dispatch(struct radv_cmd_buffer *cmd_buffer, const struct radv_dispatch_info *info)
378 {
379    if (likely(!cmd_buffer->device->sqtt.bo))
380       return;
381 
382    if (info->indirect) {
383       radv_write_event_marker(cmd_buffer, cmd_buffer->state.current_event_type, UINT_MAX, UINT_MAX, UINT_MAX);
384    } else {
385       radv_write_event_with_dims_marker(cmd_buffer, cmd_buffer->state.current_event_type, info->blocks[0],
386                                         info->blocks[1], info->blocks[2]);
387    }
388 }
389 
390 void
radv_describe_begin_render_pass_clear(struct radv_cmd_buffer * cmd_buffer,VkImageAspectFlagBits aspects)391 radv_describe_begin_render_pass_clear(struct radv_cmd_buffer *cmd_buffer, VkImageAspectFlagBits aspects)
392 {
393    cmd_buffer->state.current_event_type =
394       (aspects & VK_IMAGE_ASPECT_COLOR_BIT) ? EventRenderPassColorClear : EventRenderPassDepthStencilClear;
395 }
396 
397 void
radv_describe_end_render_pass_clear(struct radv_cmd_buffer * cmd_buffer)398 radv_describe_end_render_pass_clear(struct radv_cmd_buffer *cmd_buffer)
399 {
400    cmd_buffer->state.current_event_type = EventInternalUnknown;
401 }
402 
403 void
radv_describe_begin_render_pass_resolve(struct radv_cmd_buffer * cmd_buffer)404 radv_describe_begin_render_pass_resolve(struct radv_cmd_buffer *cmd_buffer)
405 {
406    cmd_buffer->state.current_event_type = EventRenderPassResolve;
407 }
408 
409 void
radv_describe_end_render_pass_resolve(struct radv_cmd_buffer * cmd_buffer)410 radv_describe_end_render_pass_resolve(struct radv_cmd_buffer *cmd_buffer)
411 {
412    cmd_buffer->state.current_event_type = EventInternalUnknown;
413 }
414 
415 void
radv_describe_barrier_end_delayed(struct radv_cmd_buffer * cmd_buffer)416 radv_describe_barrier_end_delayed(struct radv_cmd_buffer *cmd_buffer)
417 {
418    struct rgp_sqtt_marker_barrier_end marker = {0};
419 
420    if (likely(!cmd_buffer->device->sqtt.bo) || !cmd_buffer->state.pending_sqtt_barrier_end)
421       return;
422 
423    cmd_buffer->state.pending_sqtt_barrier_end = false;
424 
425    marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BARRIER_END;
426    marker.cb_id = cmd_buffer->sqtt_cb_id;
427 
428    marker.num_layout_transitions = cmd_buffer->state.num_layout_transitions;
429 
430    if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_WAIT_ON_EOP_TS)
431       marker.wait_on_eop_ts = true;
432    if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_VS_PARTIAL_FLUSH)
433       marker.vs_partial_flush = true;
434    if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_PS_PARTIAL_FLUSH)
435       marker.ps_partial_flush = true;
436    if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_CS_PARTIAL_FLUSH)
437       marker.cs_partial_flush = true;
438    if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_PFP_SYNC_ME)
439       marker.pfp_sync_me = true;
440    if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_SYNC_CP_DMA)
441       marker.sync_cp_dma = true;
442    if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_VMEM_L0)
443       marker.inval_tcp = true;
444    if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_ICACHE)
445       marker.inval_sqI = true;
446    if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_SMEM_L0)
447       marker.inval_sqK = true;
448    if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_FLUSH_L2)
449       marker.flush_tcc = true;
450    if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_L2)
451       marker.inval_tcc = true;
452    if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_FLUSH_CB)
453       marker.flush_cb = true;
454    if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_CB)
455       marker.inval_cb = true;
456    if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_FLUSH_DB)
457       marker.flush_db = true;
458    if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_DB)
459       marker.inval_db = true;
460    if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_L1)
461       marker.inval_gl1 = true;
462 
463    radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
464 
465    cmd_buffer->state.num_layout_transitions = 0;
466 }
467 
468 void
radv_describe_barrier_start(struct radv_cmd_buffer * cmd_buffer,enum rgp_barrier_reason reason)469 radv_describe_barrier_start(struct radv_cmd_buffer *cmd_buffer, enum rgp_barrier_reason reason)
470 {
471    struct rgp_sqtt_marker_barrier_start marker = {0};
472 
473    if (likely(!cmd_buffer->device->sqtt.bo))
474       return;
475 
476    if (cmd_buffer->state.in_barrier) {
477       assert(!"attempted to start a barrier while already in a barrier");
478       return;
479    }
480 
481    radv_describe_barrier_end_delayed(cmd_buffer);
482    cmd_buffer->state.sqtt_flush_bits = 0;
483    cmd_buffer->state.in_barrier = true;
484 
485    marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BARRIER_START;
486    marker.cb_id = cmd_buffer->sqtt_cb_id;
487    marker.dword02 = reason;
488 
489    radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
490 }
491 
492 void
radv_describe_barrier_end(struct radv_cmd_buffer * cmd_buffer)493 radv_describe_barrier_end(struct radv_cmd_buffer *cmd_buffer)
494 {
495    cmd_buffer->state.in_barrier = false;
496    cmd_buffer->state.pending_sqtt_barrier_end = true;
497 }
498 
499 void
radv_describe_layout_transition(struct radv_cmd_buffer * cmd_buffer,const struct radv_barrier_data * barrier)500 radv_describe_layout_transition(struct radv_cmd_buffer *cmd_buffer, const struct radv_barrier_data *barrier)
501 {
502    struct rgp_sqtt_marker_layout_transition marker = {0};
503 
504    if (likely(!cmd_buffer->device->sqtt.bo))
505       return;
506 
507    if (!cmd_buffer->state.in_barrier) {
508       assert(!"layout transition marker should be only emitted inside a barrier marker");
509       return;
510    }
511 
512    marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_LAYOUT_TRANSITION;
513    marker.depth_stencil_expand = barrier->layout_transitions.depth_stencil_expand;
514    marker.htile_hiz_range_expand = barrier->layout_transitions.htile_hiz_range_expand;
515    marker.depth_stencil_resummarize = barrier->layout_transitions.depth_stencil_resummarize;
516    marker.dcc_decompress = barrier->layout_transitions.dcc_decompress;
517    marker.fmask_decompress = barrier->layout_transitions.fmask_decompress;
518    marker.fast_clear_eliminate = barrier->layout_transitions.fast_clear_eliminate;
519    marker.fmask_color_expand = barrier->layout_transitions.fmask_color_expand;
520    marker.init_mask_ram = barrier->layout_transitions.init_mask_ram;
521 
522    radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
523 
524    cmd_buffer->state.num_layout_transitions++;
525 }
526 
527 void
radv_describe_begin_accel_struct_build(struct radv_cmd_buffer * cmd_buffer,uint32_t count)528 radv_describe_begin_accel_struct_build(struct radv_cmd_buffer *cmd_buffer, uint32_t count)
529 {
530    if (likely(!cmd_buffer->device->sqtt.bo))
531       return;
532 
533    char marker[64];
534    snprintf(marker, sizeof(marker), "vkCmdBuildAccelerationStructuresKHR(%u)", count);
535    radv_write_user_event_marker(cmd_buffer, UserEventPush, marker);
536 }
537 
538 void
radv_describe_end_accel_struct_build(struct radv_cmd_buffer * cmd_buffer)539 radv_describe_end_accel_struct_build(struct radv_cmd_buffer *cmd_buffer)
540 {
541    radv_write_user_event_marker(cmd_buffer, UserEventPop, NULL);
542 }
543 
544 static void
radv_describe_pipeline_bind(struct radv_cmd_buffer * cmd_buffer,VkPipelineBindPoint pipelineBindPoint,struct radv_pipeline * pipeline)545 radv_describe_pipeline_bind(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint pipelineBindPoint,
546                             struct radv_pipeline *pipeline)
547 {
548    struct rgp_sqtt_marker_pipeline_bind marker = {0};
549 
550    if (likely(!cmd_buffer->device->sqtt.bo))
551       return;
552 
553    marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BIND_PIPELINE;
554    marker.cb_id = cmd_buffer->sqtt_cb_id;
555    marker.bind_point = pipelineBindPoint;
556    marker.api_pso_hash[0] = pipeline->pipeline_hash;
557    marker.api_pso_hash[1] = pipeline->pipeline_hash >> 32;
558 
559    radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
560 }
561 
562 /* Queue events */
563 static void
radv_describe_queue_event(struct radv_queue * queue,struct rgp_queue_event_record * record)564 radv_describe_queue_event(struct radv_queue *queue, struct rgp_queue_event_record *record)
565 {
566    struct radv_device *device = queue->device;
567    struct ac_sqtt *sqtt = &device->sqtt;
568    struct rgp_queue_event *queue_event = &sqtt->rgp_queue_event;
569 
570    simple_mtx_lock(&queue_event->lock);
571    list_addtail(&record->list, &queue_event->record);
572    queue_event->record_count++;
573    simple_mtx_unlock(&queue_event->lock);
574 }
575 
576 static VkResult
radv_describe_queue_present(struct radv_queue * queue,uint64_t cpu_timestamp,void * gpu_timestamp_ptr)577 radv_describe_queue_present(struct radv_queue *queue, uint64_t cpu_timestamp, void *gpu_timestamp_ptr)
578 {
579    struct rgp_queue_event_record *record;
580 
581    record = calloc(1, sizeof(struct rgp_queue_event_record));
582    if (!record)
583       return VK_ERROR_OUT_OF_HOST_MEMORY;
584 
585    record->event_type = SQTT_QUEUE_TIMING_EVENT_PRESENT;
586    record->cpu_timestamp = cpu_timestamp;
587    record->gpu_timestamps[0] = gpu_timestamp_ptr;
588    record->queue_info_index = queue->vk.queue_family_index;
589 
590    radv_describe_queue_event(queue, record);
591 
592    return VK_SUCCESS;
593 }
594 
595 static VkResult
radv_describe_queue_submit(struct radv_queue * queue,struct radv_cmd_buffer * cmd_buffer,uint32_t cmdbuf_idx,uint64_t cpu_timestamp,void * pre_gpu_timestamp_ptr,void * post_gpu_timestamp_ptr)596 radv_describe_queue_submit(struct radv_queue *queue, struct radv_cmd_buffer *cmd_buffer, uint32_t cmdbuf_idx,
597                            uint64_t cpu_timestamp, void *pre_gpu_timestamp_ptr, void *post_gpu_timestamp_ptr)
598 {
599    struct radv_device *device = queue->device;
600    struct rgp_queue_event_record *record;
601 
602    record = calloc(1, sizeof(struct rgp_queue_event_record));
603    if (!record)
604       return VK_ERROR_OUT_OF_HOST_MEMORY;
605 
606    record->event_type = SQTT_QUEUE_TIMING_EVENT_CMDBUF_SUBMIT;
607    record->api_id = (uintptr_t)cmd_buffer;
608    record->cpu_timestamp = cpu_timestamp;
609    record->frame_index = device->vk.current_frame;
610    record->gpu_timestamps[0] = pre_gpu_timestamp_ptr;
611    record->gpu_timestamps[1] = post_gpu_timestamp_ptr;
612    record->queue_info_index = queue->vk.queue_family_index;
613    record->submit_sub_index = cmdbuf_idx;
614 
615    radv_describe_queue_event(queue, record);
616 
617    return VK_SUCCESS;
618 }
619 
620 static VkResult
radv_describe_queue_semaphore(struct radv_queue * queue,struct vk_semaphore * sync,enum sqtt_queue_event_type event_type)621 radv_describe_queue_semaphore(struct radv_queue *queue, struct vk_semaphore *sync,
622                               enum sqtt_queue_event_type event_type)
623 {
624    struct rgp_queue_event_record *record;
625 
626    record = calloc(1, sizeof(struct rgp_queue_event_record));
627    if (!record)
628       return VK_ERROR_OUT_OF_HOST_MEMORY;
629 
630    record->event_type = event_type;
631    record->api_id = (uintptr_t)sync;
632    record->cpu_timestamp = os_time_get_nano();
633    record->queue_info_index = queue->vk.queue_family_index;
634 
635    radv_describe_queue_event(queue, record);
636 
637    return VK_SUCCESS;
638 }
639 
640 static void
radv_handle_sqtt(VkQueue _queue)641 radv_handle_sqtt(VkQueue _queue)
642 {
643    RADV_FROM_HANDLE(radv_queue, queue, _queue);
644 
645    bool trigger = queue->device->sqtt_triggered;
646    queue->device->sqtt_triggered = false;
647 
648    if (queue->device->sqtt_enabled) {
649       struct ac_sqtt_trace sqtt_trace = {0};
650 
651       radv_end_sqtt(queue);
652       queue->device->sqtt_enabled = false;
653 
654       /* TODO: Do something better than this whole sync. */
655       queue->device->vk.dispatch_table.QueueWaitIdle(_queue);
656 
657       if (radv_get_sqtt_trace(queue, &sqtt_trace)) {
658          struct ac_spm_trace spm_trace;
659 
660          if (queue->device->spm.bo)
661             ac_spm_get_trace(&queue->device->spm, &spm_trace);
662 
663          ac_dump_rgp_capture(&queue->device->physical_device->rad_info, &sqtt_trace,
664                              queue->device->spm.bo ? &spm_trace : NULL);
665       } else {
666          /* Trigger a new capture if the driver failed to get
667           * the trace because the buffer was too small.
668           */
669          trigger = true;
670       }
671 
672       /* Clear resources used for this capture. */
673       radv_reset_sqtt_trace(queue->device);
674    }
675 
676    if (trigger) {
677       if (ac_check_profile_state(&queue->device->physical_device->rad_info)) {
678          fprintf(stderr, "radv: Canceling RGP trace request as a hang condition has been "
679                          "detected. Force the GPU into a profiling mode with e.g. "
680                          "\"echo profile_peak  > "
681                          "/sys/class/drm/card0/device/power_dpm_force_performance_level\"\n");
682          return;
683       }
684 
685       /* Sample CPU/GPU clocks before starting the trace. */
686       if (!radv_sqtt_sample_clocks(queue->device)) {
687          fprintf(stderr, "radv: Failed to sample clocks\n");
688       }
689 
690       radv_begin_sqtt(queue);
691       assert(!queue->device->sqtt_enabled);
692       queue->device->sqtt_enabled = true;
693    }
694 }
695 
696 VKAPI_ATTR VkResult VKAPI_CALL
sqtt_QueuePresentKHR(VkQueue _queue,const VkPresentInfoKHR * pPresentInfo)697 sqtt_QueuePresentKHR(VkQueue _queue, const VkPresentInfoKHR *pPresentInfo)
698 {
699    RADV_FROM_HANDLE(radv_queue, queue, _queue);
700    VkResult result;
701 
702    queue->sqtt_present = true;
703 
704    result = queue->device->layer_dispatch.rgp.QueuePresentKHR(_queue, pPresentInfo);
705    if (result != VK_SUCCESS && result != VK_SUBOPTIMAL_KHR)
706       return result;
707 
708    queue->sqtt_present = false;
709 
710    radv_handle_sqtt(_queue);
711 
712    return VK_SUCCESS;
713 }
714 
715 static VkResult
radv_sqtt_wsi_submit(VkQueue _queue,uint32_t submitCount,const VkSubmitInfo2 * pSubmits,VkFence _fence)716 radv_sqtt_wsi_submit(VkQueue _queue, uint32_t submitCount, const VkSubmitInfo2 *pSubmits, VkFence _fence)
717 {
718    RADV_FROM_HANDLE(radv_queue, queue, _queue);
719    struct radv_device *device = queue->device;
720    VkCommandBufferSubmitInfo *new_cmdbufs = NULL;
721    struct radeon_winsys_bo *gpu_timestamp_bo;
722    uint32_t gpu_timestamp_offset;
723    VkCommandBuffer timed_cmdbuf;
724    void *gpu_timestamp_ptr;
725    uint64_t cpu_timestamp;
726    VkResult result = VK_SUCCESS;
727 
728    assert(submitCount <= 1 && pSubmits != NULL);
729 
730    for (uint32_t i = 0; i < submitCount; i++) {
731       const VkSubmitInfo2 *pSubmit = &pSubmits[i];
732       VkSubmitInfo2 sqtt_submit = *pSubmit;
733 
734       assert(sqtt_submit.commandBufferInfoCount <= 1);
735 
736       /* Command buffers */
737       uint32_t new_cmdbuf_count = sqtt_submit.commandBufferInfoCount + 1;
738 
739       new_cmdbufs = malloc(new_cmdbuf_count * sizeof(*new_cmdbufs));
740       if (!new_cmdbufs)
741          return VK_ERROR_OUT_OF_HOST_MEMORY;
742 
743       /* Sample the current CPU time before building the GPU timestamp cmdbuf. */
744       cpu_timestamp = os_time_get_nano();
745 
746       result = radv_sqtt_acquire_gpu_timestamp(device, &gpu_timestamp_bo, &gpu_timestamp_offset, &gpu_timestamp_ptr);
747       if (result != VK_SUCCESS)
748          goto fail;
749 
750       result = radv_sqtt_get_timed_cmdbuf(queue, gpu_timestamp_bo, gpu_timestamp_offset,
751                                           VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT, &timed_cmdbuf);
752       if (result != VK_SUCCESS)
753          goto fail;
754 
755       new_cmdbufs[0] = (VkCommandBufferSubmitInfo){
756          .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO,
757          .commandBuffer = timed_cmdbuf,
758       };
759 
760       if (sqtt_submit.commandBufferInfoCount == 1)
761          new_cmdbufs[1] = sqtt_submit.pCommandBufferInfos[0];
762 
763       sqtt_submit.commandBufferInfoCount = new_cmdbuf_count;
764       sqtt_submit.pCommandBufferInfos = new_cmdbufs;
765 
766       radv_describe_queue_present(queue, cpu_timestamp, gpu_timestamp_ptr);
767 
768       result = queue->device->layer_dispatch.rgp.QueueSubmit2(_queue, 1, &sqtt_submit, _fence);
769       if (result != VK_SUCCESS)
770          goto fail;
771 
772       FREE(new_cmdbufs);
773    }
774 
775    return result;
776 
777 fail:
778    FREE(new_cmdbufs);
779    return result;
780 }
781 
782 VKAPI_ATTR VkResult VKAPI_CALL
sqtt_QueueSubmit2(VkQueue _queue,uint32_t submitCount,const VkSubmitInfo2 * pSubmits,VkFence _fence)783 sqtt_QueueSubmit2(VkQueue _queue, uint32_t submitCount, const VkSubmitInfo2 *pSubmits, VkFence _fence)
784 {
785    RADV_FROM_HANDLE(radv_queue, queue, _queue);
786    const bool is_gfx_or_ace = queue->state.qf == RADV_QUEUE_GENERAL || queue->state.qf == RADV_QUEUE_COMPUTE;
787    struct radv_device *device = queue->device;
788    VkCommandBufferSubmitInfo *new_cmdbufs = NULL;
789    VkResult result = VK_SUCCESS;
790 
791    /* Only consider queue events on graphics/compute when enabled. */
792    if (!device->sqtt_enabled || !radv_sqtt_queue_events_enabled() || !is_gfx_or_ace)
793       return queue->device->layer_dispatch.rgp.QueueSubmit2(_queue, submitCount, pSubmits, _fence);
794 
795    for (uint32_t i = 0; i < submitCount; i++) {
796       const VkSubmitInfo2 *pSubmit = &pSubmits[i];
797 
798       /* Wait semaphores */
799       for (uint32_t j = 0; j < pSubmit->waitSemaphoreInfoCount; j++) {
800          const VkSemaphoreSubmitInfo *pWaitSemaphoreInfo = &pSubmit->pWaitSemaphoreInfos[j];
801          VK_FROM_HANDLE(vk_semaphore, sem, pWaitSemaphoreInfo->semaphore);
802          radv_describe_queue_semaphore(queue, sem, SQTT_QUEUE_TIMING_EVENT_WAIT_SEMAPHORE);
803       }
804    }
805 
806    if (queue->sqtt_present)
807       return radv_sqtt_wsi_submit(_queue, submitCount, pSubmits, _fence);
808 
809    for (uint32_t i = 0; i < submitCount; i++) {
810       const VkSubmitInfo2 *pSubmit = &pSubmits[i];
811       VkSubmitInfo2 sqtt_submit = *pSubmit;
812 
813       /* Command buffers */
814       uint32_t new_cmdbuf_count = sqtt_submit.commandBufferInfoCount * 3;
815       uint32_t cmdbuf_idx = 0;
816 
817       new_cmdbufs = malloc(new_cmdbuf_count * sizeof(*new_cmdbufs));
818       if (!new_cmdbufs)
819          return VK_ERROR_OUT_OF_HOST_MEMORY;
820 
821       for (uint32_t j = 0; j < sqtt_submit.commandBufferInfoCount; j++) {
822          const VkCommandBufferSubmitInfo *pCommandBufferInfo = &sqtt_submit.pCommandBufferInfos[j];
823          struct radeon_winsys_bo *gpu_timestamps_bo[2];
824          uint32_t gpu_timestamps_offset[2];
825          VkCommandBuffer pre_timed_cmdbuf, post_timed_cmdbuf;
826          void *gpu_timestamps_ptr[2];
827          uint64_t cpu_timestamp;
828 
829          /* Sample the current CPU time before building the timed cmdbufs. */
830          cpu_timestamp = os_time_get_nano();
831 
832          result = radv_sqtt_acquire_gpu_timestamp(queue->device, &gpu_timestamps_bo[0], &gpu_timestamps_offset[0],
833                                                   &gpu_timestamps_ptr[0]);
834          if (result != VK_SUCCESS)
835             goto fail;
836 
837          result = radv_sqtt_get_timed_cmdbuf(queue, gpu_timestamps_bo[0], gpu_timestamps_offset[0],
838                                              VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT, &pre_timed_cmdbuf);
839          if (result != VK_SUCCESS)
840             goto fail;
841 
842          new_cmdbufs[cmdbuf_idx++] = (VkCommandBufferSubmitInfo){
843             .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO,
844             .commandBuffer = pre_timed_cmdbuf,
845          };
846 
847          new_cmdbufs[cmdbuf_idx++] = *pCommandBufferInfo;
848 
849          result = radv_sqtt_acquire_gpu_timestamp(queue->device, &gpu_timestamps_bo[1], &gpu_timestamps_offset[1],
850                                                   &gpu_timestamps_ptr[1]);
851          if (result != VK_SUCCESS)
852             goto fail;
853 
854          result = radv_sqtt_get_timed_cmdbuf(queue, gpu_timestamps_bo[1], gpu_timestamps_offset[1],
855                                              VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT, &post_timed_cmdbuf);
856          if (result != VK_SUCCESS)
857             goto fail;
858 
859          new_cmdbufs[cmdbuf_idx++] = (VkCommandBufferSubmitInfo){
860             .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO,
861             .commandBuffer = post_timed_cmdbuf,
862          };
863 
864          RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, pCommandBufferInfo->commandBuffer);
865          radv_describe_queue_submit(queue, cmd_buffer, j, cpu_timestamp, gpu_timestamps_ptr[0], gpu_timestamps_ptr[1]);
866       }
867 
868       sqtt_submit.commandBufferInfoCount = new_cmdbuf_count;
869       sqtt_submit.pCommandBufferInfos = new_cmdbufs;
870 
871       result = queue->device->layer_dispatch.rgp.QueueSubmit2(_queue, 1, &sqtt_submit, _fence);
872       if (result != VK_SUCCESS)
873          goto fail;
874 
875       /* Signal semaphores */
876       for (uint32_t j = 0; j < sqtt_submit.signalSemaphoreInfoCount; j++) {
877          const VkSemaphoreSubmitInfo *pSignalSemaphoreInfo = &sqtt_submit.pSignalSemaphoreInfos[j];
878          VK_FROM_HANDLE(vk_semaphore, sem, pSignalSemaphoreInfo->semaphore);
879          radv_describe_queue_semaphore(queue, sem, SQTT_QUEUE_TIMING_EVENT_SIGNAL_SEMAPHORE);
880       }
881 
882       FREE(new_cmdbufs);
883    }
884 
885    return result;
886 
887 fail:
888    FREE(new_cmdbufs);
889    return result;
890 }
891 
892 #define EVENT_MARKER_BASE(cmd_name, api_name, event_name, ...)                                                         \
893    RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);                                                       \
894    radv_write_begin_general_api_marker(cmd_buffer, ApiCmd##api_name);                                                  \
895    cmd_buffer->state.current_event_type = EventCmd##event_name;                                                        \
896    cmd_buffer->device->layer_dispatch.rgp.Cmd##cmd_name(__VA_ARGS__);                                                  \
897    cmd_buffer->state.current_event_type = EventInternalUnknown;                                                        \
898    radv_write_end_general_api_marker(cmd_buffer, ApiCmd##api_name);
899 
900 #define EVENT_MARKER_ALIAS(cmd_name, api_name, ...) EVENT_MARKER_BASE(cmd_name, api_name, api_name, __VA_ARGS__);
901 
902 #define EVENT_MARKER(cmd_name, ...) EVENT_MARKER_ALIAS(cmd_name, cmd_name, __VA_ARGS__);
903 
904 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDraw(VkCommandBuffer commandBuffer,uint32_t vertexCount,uint32_t instanceCount,uint32_t firstVertex,uint32_t firstInstance)905 sqtt_CmdDraw(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t instanceCount, uint32_t firstVertex,
906              uint32_t firstInstance)
907 {
908    EVENT_MARKER(Draw, commandBuffer, vertexCount, instanceCount, firstVertex, firstInstance);
909 }
910 
911 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDrawIndexed(VkCommandBuffer commandBuffer,uint32_t indexCount,uint32_t instanceCount,uint32_t firstIndex,int32_t vertexOffset,uint32_t firstInstance)912 sqtt_CmdDrawIndexed(VkCommandBuffer commandBuffer, uint32_t indexCount, uint32_t instanceCount, uint32_t firstIndex,
913                     int32_t vertexOffset, uint32_t firstInstance)
914 {
915    EVENT_MARKER(DrawIndexed, commandBuffer, indexCount, instanceCount, firstIndex, vertexOffset, firstInstance);
916 }
917 
918 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDrawIndirect(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,uint32_t drawCount,uint32_t stride)919 sqtt_CmdDrawIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t drawCount,
920                      uint32_t stride)
921 {
922    EVENT_MARKER(DrawIndirect, commandBuffer, buffer, offset, drawCount, stride);
923 }
924 
925 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,uint32_t drawCount,uint32_t stride)926 sqtt_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t drawCount,
927                             uint32_t stride)
928 {
929    EVENT_MARKER(DrawIndexedIndirect, commandBuffer, buffer, offset, drawCount, stride);
930 }
931 
932 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDrawIndirectCount(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,VkBuffer countBuffer,VkDeviceSize countBufferOffset,uint32_t maxDrawCount,uint32_t stride)933 sqtt_CmdDrawIndirectCount(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, VkBuffer countBuffer,
934                           VkDeviceSize countBufferOffset, uint32_t maxDrawCount, uint32_t stride)
935 {
936    EVENT_MARKER(DrawIndirectCount, commandBuffer, buffer, offset, countBuffer, countBufferOffset, maxDrawCount, stride);
937 }
938 
939 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,VkBuffer countBuffer,VkDeviceSize countBufferOffset,uint32_t maxDrawCount,uint32_t stride)940 sqtt_CmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
941                                  VkBuffer countBuffer, VkDeviceSize countBufferOffset, uint32_t maxDrawCount,
942                                  uint32_t stride)
943 {
944    EVENT_MARKER(DrawIndexedIndirectCount, commandBuffer, buffer, offset, countBuffer, countBufferOffset, maxDrawCount,
945                 stride);
946 }
947 
948 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDispatch(VkCommandBuffer commandBuffer,uint32_t x,uint32_t y,uint32_t z)949 sqtt_CmdDispatch(VkCommandBuffer commandBuffer, uint32_t x, uint32_t y, uint32_t z)
950 {
951    EVENT_MARKER_ALIAS(DispatchBase, Dispatch, commandBuffer, 0, 0, 0, x, y, z);
952 }
953 
954 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDispatchIndirect(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset)955 sqtt_CmdDispatchIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset)
956 {
957    EVENT_MARKER(DispatchIndirect, commandBuffer, buffer, offset);
958 }
959 
960 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdCopyBuffer2(VkCommandBuffer commandBuffer,const VkCopyBufferInfo2 * pCopyBufferInfo)961 sqtt_CmdCopyBuffer2(VkCommandBuffer commandBuffer, const VkCopyBufferInfo2 *pCopyBufferInfo)
962 {
963    EVENT_MARKER_ALIAS(CopyBuffer2, CopyBuffer, commandBuffer, pCopyBufferInfo);
964 }
965 
966 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdFillBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize fillSize,uint32_t data)967 sqtt_CmdFillBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize fillSize,
968                    uint32_t data)
969 {
970    EVENT_MARKER(FillBuffer, commandBuffer, dstBuffer, dstOffset, fillSize, data);
971 }
972 
973 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdUpdateBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize dataSize,const void * pData)974 sqtt_CmdUpdateBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize dataSize,
975                      const void *pData)
976 {
977    EVENT_MARKER(UpdateBuffer, commandBuffer, dstBuffer, dstOffset, dataSize, pData);
978 }
979 
980 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdCopyImage2(VkCommandBuffer commandBuffer,const VkCopyImageInfo2 * pCopyImageInfo)981 sqtt_CmdCopyImage2(VkCommandBuffer commandBuffer, const VkCopyImageInfo2 *pCopyImageInfo)
982 {
983    EVENT_MARKER_ALIAS(CopyImage2, CopyImage, commandBuffer, pCopyImageInfo);
984 }
985 
986 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer,const VkCopyBufferToImageInfo2 * pCopyBufferToImageInfo)987 sqtt_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer, const VkCopyBufferToImageInfo2 *pCopyBufferToImageInfo)
988 {
989    EVENT_MARKER_ALIAS(CopyBufferToImage2, CopyBufferToImage, commandBuffer, pCopyBufferToImageInfo);
990 }
991 
992 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer,const VkCopyImageToBufferInfo2 * pCopyImageToBufferInfo)993 sqtt_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer, const VkCopyImageToBufferInfo2 *pCopyImageToBufferInfo)
994 {
995    EVENT_MARKER_ALIAS(CopyImageToBuffer2, CopyImageToBuffer, commandBuffer, pCopyImageToBufferInfo);
996 }
997 
998 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdBlitImage2(VkCommandBuffer commandBuffer,const VkBlitImageInfo2 * pBlitImageInfo)999 sqtt_CmdBlitImage2(VkCommandBuffer commandBuffer, const VkBlitImageInfo2 *pBlitImageInfo)
1000 {
1001    EVENT_MARKER_ALIAS(BlitImage2, BlitImage, commandBuffer, pBlitImageInfo);
1002 }
1003 
1004 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdClearColorImage(VkCommandBuffer commandBuffer,VkImage image_h,VkImageLayout imageLayout,const VkClearColorValue * pColor,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)1005 sqtt_CmdClearColorImage(VkCommandBuffer commandBuffer, VkImage image_h, VkImageLayout imageLayout,
1006                         const VkClearColorValue *pColor, uint32_t rangeCount, const VkImageSubresourceRange *pRanges)
1007 {
1008    EVENT_MARKER(ClearColorImage, commandBuffer, image_h, imageLayout, pColor, rangeCount, pRanges);
1009 }
1010 
1011 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,VkImage image_h,VkImageLayout imageLayout,const VkClearDepthStencilValue * pDepthStencil,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)1012 sqtt_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer, VkImage image_h, VkImageLayout imageLayout,
1013                                const VkClearDepthStencilValue *pDepthStencil, uint32_t rangeCount,
1014                                const VkImageSubresourceRange *pRanges)
1015 {
1016    EVENT_MARKER(ClearDepthStencilImage, commandBuffer, image_h, imageLayout, pDepthStencil, rangeCount, pRanges);
1017 }
1018 
1019 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdClearAttachments(VkCommandBuffer commandBuffer,uint32_t attachmentCount,const VkClearAttachment * pAttachments,uint32_t rectCount,const VkClearRect * pRects)1020 sqtt_CmdClearAttachments(VkCommandBuffer commandBuffer, uint32_t attachmentCount, const VkClearAttachment *pAttachments,
1021                          uint32_t rectCount, const VkClearRect *pRects)
1022 {
1023    EVENT_MARKER(ClearAttachments, commandBuffer, attachmentCount, pAttachments, rectCount, pRects);
1024 }
1025 
1026 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdResolveImage2(VkCommandBuffer commandBuffer,const VkResolveImageInfo2 * pResolveImageInfo)1027 sqtt_CmdResolveImage2(VkCommandBuffer commandBuffer, const VkResolveImageInfo2 *pResolveImageInfo)
1028 {
1029    EVENT_MARKER_ALIAS(ResolveImage2, ResolveImage, commandBuffer, pResolveImageInfo);
1030 }
1031 
1032 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdWaitEvents2(VkCommandBuffer commandBuffer,uint32_t eventCount,const VkEvent * pEvents,const VkDependencyInfo * pDependencyInfos)1033 sqtt_CmdWaitEvents2(VkCommandBuffer commandBuffer, uint32_t eventCount, const VkEvent *pEvents,
1034                     const VkDependencyInfo *pDependencyInfos)
1035 {
1036    EVENT_MARKER_ALIAS(WaitEvents2, WaitEvents, commandBuffer, eventCount, pEvents, pDependencyInfos);
1037 }
1038 
1039 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdPipelineBarrier2(VkCommandBuffer commandBuffer,const VkDependencyInfo * pDependencyInfo)1040 sqtt_CmdPipelineBarrier2(VkCommandBuffer commandBuffer, const VkDependencyInfo *pDependencyInfo)
1041 {
1042    EVENT_MARKER_ALIAS(PipelineBarrier2, PipelineBarrier, commandBuffer, pDependencyInfo);
1043 }
1044 
1045 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdResetQueryPool(VkCommandBuffer commandBuffer,VkQueryPool queryPool,uint32_t firstQuery,uint32_t queryCount)1046 sqtt_CmdResetQueryPool(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t firstQuery, uint32_t queryCount)
1047 {
1048    EVENT_MARKER(ResetQueryPool, commandBuffer, queryPool, firstQuery, queryCount);
1049 }
1050 
1051 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer,VkQueryPool queryPool,uint32_t firstQuery,uint32_t queryCount,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize stride,VkQueryResultFlags flags)1052 sqtt_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t firstQuery,
1053                              uint32_t queryCount, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize stride,
1054                              VkQueryResultFlags flags)
1055 {
1056    EVENT_MARKER(CopyQueryPoolResults, commandBuffer, queryPool, firstQuery, queryCount, dstBuffer, dstOffset, stride,
1057                 flags);
1058 }
1059 
1060 #define EVENT_RT_MARKER(cmd_name, flags, ...) EVENT_MARKER_BASE(cmd_name, Dispatch, cmd_name | flags, __VA_ARGS__);
1061 
1062 #define EVENT_RT_MARKER_ALIAS(cmd_name, event_name, flags, ...)                                                        \
1063    EVENT_MARKER_BASE(cmd_name, Dispatch, event_name | flags, __VA_ARGS__);
1064 
1065 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdTraceRaysKHR(VkCommandBuffer commandBuffer,const VkStridedDeviceAddressRegionKHR * pRaygenShaderBindingTable,const VkStridedDeviceAddressRegionKHR * pMissShaderBindingTable,const VkStridedDeviceAddressRegionKHR * pHitShaderBindingTable,const VkStridedDeviceAddressRegionKHR * pCallableShaderBindingTable,uint32_t width,uint32_t height,uint32_t depth)1066 sqtt_CmdTraceRaysKHR(VkCommandBuffer commandBuffer, const VkStridedDeviceAddressRegionKHR *pRaygenShaderBindingTable,
1067                      const VkStridedDeviceAddressRegionKHR *pMissShaderBindingTable,
1068                      const VkStridedDeviceAddressRegionKHR *pHitShaderBindingTable,
1069                      const VkStridedDeviceAddressRegionKHR *pCallableShaderBindingTable, uint32_t width,
1070                      uint32_t height, uint32_t depth)
1071 {
1072    EVENT_RT_MARKER(TraceRaysKHR, ApiRayTracingSeparateCompiled, commandBuffer, pRaygenShaderBindingTable,
1073                    pMissShaderBindingTable, pHitShaderBindingTable, pCallableShaderBindingTable, width, height, depth);
1074 }
1075 
1076 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdTraceRaysIndirectKHR(VkCommandBuffer commandBuffer,const VkStridedDeviceAddressRegionKHR * pRaygenShaderBindingTable,const VkStridedDeviceAddressRegionKHR * pMissShaderBindingTable,const VkStridedDeviceAddressRegionKHR * pHitShaderBindingTable,const VkStridedDeviceAddressRegionKHR * pCallableShaderBindingTable,VkDeviceAddress indirectDeviceAddress)1077 sqtt_CmdTraceRaysIndirectKHR(VkCommandBuffer commandBuffer,
1078                              const VkStridedDeviceAddressRegionKHR *pRaygenShaderBindingTable,
1079                              const VkStridedDeviceAddressRegionKHR *pMissShaderBindingTable,
1080                              const VkStridedDeviceAddressRegionKHR *pHitShaderBindingTable,
1081                              const VkStridedDeviceAddressRegionKHR *pCallableShaderBindingTable,
1082                              VkDeviceAddress indirectDeviceAddress)
1083 {
1084    EVENT_RT_MARKER(TraceRaysIndirectKHR, ApiRayTracingSeparateCompiled, commandBuffer, pRaygenShaderBindingTable,
1085                    pMissShaderBindingTable, pHitShaderBindingTable, pCallableShaderBindingTable, indirectDeviceAddress);
1086 }
1087 
1088 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdTraceRaysIndirect2KHR(VkCommandBuffer commandBuffer,VkDeviceAddress indirectDeviceAddress)1089 sqtt_CmdTraceRaysIndirect2KHR(VkCommandBuffer commandBuffer, VkDeviceAddress indirectDeviceAddress)
1090 {
1091    EVENT_RT_MARKER_ALIAS(TraceRaysIndirect2KHR, TraceRaysIndirectKHR, ApiRayTracingSeparateCompiled, commandBuffer,
1092                          indirectDeviceAddress);
1093 }
1094 
1095 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdCopyAccelerationStructureKHR(VkCommandBuffer commandBuffer,const VkCopyAccelerationStructureInfoKHR * pInfo)1096 sqtt_CmdCopyAccelerationStructureKHR(VkCommandBuffer commandBuffer, const VkCopyAccelerationStructureInfoKHR *pInfo)
1097 {
1098    EVENT_RT_MARKER(CopyAccelerationStructureKHR, 0, commandBuffer, pInfo);
1099 }
1100 
1101 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdCopyAccelerationStructureToMemoryKHR(VkCommandBuffer commandBuffer,const VkCopyAccelerationStructureToMemoryInfoKHR * pInfo)1102 sqtt_CmdCopyAccelerationStructureToMemoryKHR(VkCommandBuffer commandBuffer,
1103                                              const VkCopyAccelerationStructureToMemoryInfoKHR *pInfo)
1104 {
1105    EVENT_RT_MARKER(CopyAccelerationStructureToMemoryKHR, 0, commandBuffer, pInfo);
1106 }
1107 
1108 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdCopyMemoryToAccelerationStructureKHR(VkCommandBuffer commandBuffer,const VkCopyMemoryToAccelerationStructureInfoKHR * pInfo)1109 sqtt_CmdCopyMemoryToAccelerationStructureKHR(VkCommandBuffer commandBuffer,
1110                                              const VkCopyMemoryToAccelerationStructureInfoKHR *pInfo)
1111 {
1112    EVENT_RT_MARKER(CopyMemoryToAccelerationStructureKHR, 0, commandBuffer, pInfo);
1113 }
1114 
1115 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDrawMeshTasksEXT(VkCommandBuffer commandBuffer,uint32_t x,uint32_t y,uint32_t z)1116 sqtt_CmdDrawMeshTasksEXT(VkCommandBuffer commandBuffer, uint32_t x, uint32_t y, uint32_t z)
1117 {
1118    EVENT_MARKER(DrawMeshTasksEXT, commandBuffer, x, y, z);
1119 }
1120 
1121 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDrawMeshTasksIndirectEXT(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,uint32_t drawCount,uint32_t stride)1122 sqtt_CmdDrawMeshTasksIndirectEXT(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
1123                                  uint32_t drawCount, uint32_t stride)
1124 {
1125    EVENT_MARKER(DrawMeshTasksIndirectEXT, commandBuffer, buffer, offset, drawCount, stride);
1126 }
1127 
1128 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDrawMeshTasksIndirectCountEXT(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,VkBuffer countBuffer,VkDeviceSize countBufferOffset,uint32_t maxDrawCount,uint32_t stride)1129 sqtt_CmdDrawMeshTasksIndirectCountEXT(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
1130                                       VkBuffer countBuffer, VkDeviceSize countBufferOffset, uint32_t maxDrawCount,
1131                                       uint32_t stride)
1132 {
1133    EVENT_MARKER(DrawMeshTasksIndirectCountEXT, commandBuffer, buffer, offset, countBuffer, countBufferOffset,
1134                 maxDrawCount, stride);
1135 }
1136 
1137 #undef EVENT_RT_MARKER_ALIAS
1138 #undef EVENT_RT_MARKER
1139 
1140 #undef EVENT_MARKER
1141 #undef EVENT_MARKER_ALIAS
1142 #undef EVENT_MARKER_BASE
1143 
1144 #define API_MARKER_ALIAS(cmd_name, api_name, ...)                                                                      \
1145    RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);                                                       \
1146    radv_write_begin_general_api_marker(cmd_buffer, ApiCmd##api_name);                                                  \
1147    cmd_buffer->device->layer_dispatch.rgp.Cmd##cmd_name(__VA_ARGS__);                                                  \
1148    radv_write_end_general_api_marker(cmd_buffer, ApiCmd##api_name);
1149 
1150 #define API_MARKER(cmd_name, ...) API_MARKER_ALIAS(cmd_name, cmd_name, __VA_ARGS__);
1151 
1152 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdBindPipeline(VkCommandBuffer commandBuffer,VkPipelineBindPoint pipelineBindPoint,VkPipeline _pipeline)1153 sqtt_CmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipeline _pipeline)
1154 {
1155    RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
1156 
1157    API_MARKER(BindPipeline, commandBuffer, pipelineBindPoint, _pipeline);
1158 
1159    if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR) {
1160       /* RGP seems to expect a compute bind point to detect and report RT pipelines, which makes
1161        * sense somehow given that RT shaders are compiled to an unified compute shader.
1162        */
1163       radv_describe_pipeline_bind(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1164    } else {
1165       radv_describe_pipeline_bind(cmd_buffer, pipelineBindPoint, pipeline);
1166    }
1167 }
1168 
1169 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,VkPipelineBindPoint pipelineBindPoint,VkPipelineLayout layout,uint32_t firstSet,uint32_t descriptorSetCount,const VkDescriptorSet * pDescriptorSets,uint32_t dynamicOffsetCount,const uint32_t * pDynamicOffsets)1170 sqtt_CmdBindDescriptorSets(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint,
1171                            VkPipelineLayout layout, uint32_t firstSet, uint32_t descriptorSetCount,
1172                            const VkDescriptorSet *pDescriptorSets, uint32_t dynamicOffsetCount,
1173                            const uint32_t *pDynamicOffsets)
1174 {
1175    API_MARKER(BindDescriptorSets, commandBuffer, pipelineBindPoint, layout, firstSet, descriptorSetCount,
1176               pDescriptorSets, dynamicOffsetCount, pDynamicOffsets);
1177 }
1178 
1179 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdBindIndexBuffer(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,VkIndexType indexType)1180 sqtt_CmdBindIndexBuffer(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, VkIndexType indexType)
1181 {
1182    API_MARKER(BindIndexBuffer, commandBuffer, buffer, offset, indexType);
1183 }
1184 
1185 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdBindVertexBuffers2(VkCommandBuffer commandBuffer,uint32_t firstBinding,uint32_t bindingCount,const VkBuffer * pBuffers,const VkDeviceSize * pOffsets,const VkDeviceSize * pSizes,const VkDeviceSize * pStrides)1186 sqtt_CmdBindVertexBuffers2(VkCommandBuffer commandBuffer, uint32_t firstBinding, uint32_t bindingCount,
1187                            const VkBuffer *pBuffers, const VkDeviceSize *pOffsets, const VkDeviceSize *pSizes,
1188                            const VkDeviceSize *pStrides)
1189 {
1190    API_MARKER_ALIAS(BindVertexBuffers2, BindVertexBuffers, commandBuffer, firstBinding, bindingCount, pBuffers,
1191                     pOffsets, pSizes, pStrides);
1192 }
1193 
1194 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdBeginQuery(VkCommandBuffer commandBuffer,VkQueryPool queryPool,uint32_t query,VkQueryControlFlags flags)1195 sqtt_CmdBeginQuery(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t query, VkQueryControlFlags flags)
1196 {
1197    API_MARKER(BeginQuery, commandBuffer, queryPool, query, flags);
1198 }
1199 
1200 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdEndQuery(VkCommandBuffer commandBuffer,VkQueryPool queryPool,uint32_t query)1201 sqtt_CmdEndQuery(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t query)
1202 {
1203    API_MARKER(EndQuery, commandBuffer, queryPool, query);
1204 }
1205 
1206 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdWriteTimestamp2(VkCommandBuffer commandBuffer,VkPipelineStageFlags2 stage,VkQueryPool queryPool,uint32_t query)1207 sqtt_CmdWriteTimestamp2(VkCommandBuffer commandBuffer, VkPipelineStageFlags2 stage, VkQueryPool queryPool,
1208                         uint32_t query)
1209 {
1210    API_MARKER_ALIAS(WriteTimestamp2, WriteTimestamp, commandBuffer, stage, queryPool, query);
1211 }
1212 
1213 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdPushConstants(VkCommandBuffer commandBuffer,VkPipelineLayout layout,VkShaderStageFlags stageFlags,uint32_t offset,uint32_t size,const void * pValues)1214 sqtt_CmdPushConstants(VkCommandBuffer commandBuffer, VkPipelineLayout layout, VkShaderStageFlags stageFlags,
1215                       uint32_t offset, uint32_t size, const void *pValues)
1216 {
1217    API_MARKER(PushConstants, commandBuffer, layout, stageFlags, offset, size, pValues);
1218 }
1219 
1220 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdBeginRendering(VkCommandBuffer commandBuffer,const VkRenderingInfo * pRenderingInfo)1221 sqtt_CmdBeginRendering(VkCommandBuffer commandBuffer, const VkRenderingInfo *pRenderingInfo)
1222 {
1223    API_MARKER_ALIAS(BeginRendering, BeginRenderPass, commandBuffer, pRenderingInfo);
1224 }
1225 
1226 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdEndRendering(VkCommandBuffer commandBuffer)1227 sqtt_CmdEndRendering(VkCommandBuffer commandBuffer)
1228 {
1229    API_MARKER_ALIAS(EndRendering, EndRenderPass, commandBuffer);
1230 }
1231 
1232 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdExecuteCommands(VkCommandBuffer commandBuffer,uint32_t commandBufferCount,const VkCommandBuffer * pCmdBuffers)1233 sqtt_CmdExecuteCommands(VkCommandBuffer commandBuffer, uint32_t commandBufferCount, const VkCommandBuffer *pCmdBuffers)
1234 {
1235    API_MARKER(ExecuteCommands, commandBuffer, commandBufferCount, pCmdBuffers);
1236 }
1237 
1238 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdExecuteGeneratedCommandsNV(VkCommandBuffer commandBuffer,VkBool32 isPreprocessed,const VkGeneratedCommandsInfoNV * pGeneratedCommandsInfo)1239 sqtt_CmdExecuteGeneratedCommandsNV(VkCommandBuffer commandBuffer, VkBool32 isPreprocessed,
1240                                    const VkGeneratedCommandsInfoNV *pGeneratedCommandsInfo)
1241 {
1242    /* There is no ExecuteIndirect Vulkan event in RGP yet. */
1243    API_MARKER_ALIAS(ExecuteGeneratedCommandsNV, ExecuteCommands, commandBuffer, isPreprocessed, pGeneratedCommandsInfo);
1244 }
1245 
1246 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdSetViewport(VkCommandBuffer commandBuffer,uint32_t firstViewport,uint32_t viewportCount,const VkViewport * pViewports)1247 sqtt_CmdSetViewport(VkCommandBuffer commandBuffer, uint32_t firstViewport, uint32_t viewportCount,
1248                     const VkViewport *pViewports)
1249 {
1250    API_MARKER(SetViewport, commandBuffer, firstViewport, viewportCount, pViewports);
1251 }
1252 
1253 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdSetScissor(VkCommandBuffer commandBuffer,uint32_t firstScissor,uint32_t scissorCount,const VkRect2D * pScissors)1254 sqtt_CmdSetScissor(VkCommandBuffer commandBuffer, uint32_t firstScissor, uint32_t scissorCount,
1255                    const VkRect2D *pScissors)
1256 {
1257    API_MARKER(SetScissor, commandBuffer, firstScissor, scissorCount, pScissors);
1258 }
1259 
1260 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdSetLineWidth(VkCommandBuffer commandBuffer,float lineWidth)1261 sqtt_CmdSetLineWidth(VkCommandBuffer commandBuffer, float lineWidth)
1262 {
1263    API_MARKER(SetLineWidth, commandBuffer, lineWidth);
1264 }
1265 
1266 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdSetDepthBias(VkCommandBuffer commandBuffer,float depthBiasConstantFactor,float depthBiasClamp,float depthBiasSlopeFactor)1267 sqtt_CmdSetDepthBias(VkCommandBuffer commandBuffer, float depthBiasConstantFactor, float depthBiasClamp,
1268                      float depthBiasSlopeFactor)
1269 {
1270    API_MARKER(SetDepthBias, commandBuffer, depthBiasConstantFactor, depthBiasClamp, depthBiasSlopeFactor);
1271 }
1272 
1273 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdSetBlendConstants(VkCommandBuffer commandBuffer,const float blendConstants[4])1274 sqtt_CmdSetBlendConstants(VkCommandBuffer commandBuffer, const float blendConstants[4])
1275 {
1276    API_MARKER(SetBlendConstants, commandBuffer, blendConstants);
1277 }
1278 
1279 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdSetDepthBounds(VkCommandBuffer commandBuffer,float minDepthBounds,float maxDepthBounds)1280 sqtt_CmdSetDepthBounds(VkCommandBuffer commandBuffer, float minDepthBounds, float maxDepthBounds)
1281 {
1282    API_MARKER(SetDepthBounds, commandBuffer, minDepthBounds, maxDepthBounds);
1283 }
1284 
1285 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer,VkStencilFaceFlags faceMask,uint32_t compareMask)1286 sqtt_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t compareMask)
1287 {
1288    API_MARKER(SetStencilCompareMask, commandBuffer, faceMask, compareMask);
1289 }
1290 
1291 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer,VkStencilFaceFlags faceMask,uint32_t writeMask)1292 sqtt_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t writeMask)
1293 {
1294    API_MARKER(SetStencilWriteMask, commandBuffer, faceMask, writeMask);
1295 }
1296 
1297 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdSetStencilReference(VkCommandBuffer commandBuffer,VkStencilFaceFlags faceMask,uint32_t reference)1298 sqtt_CmdSetStencilReference(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t reference)
1299 {
1300    API_MARKER(SetStencilReference, commandBuffer, faceMask, reference);
1301 }
1302 
1303 /* VK_EXT_debug_marker */
1304 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDebugMarkerBeginEXT(VkCommandBuffer commandBuffer,const VkDebugMarkerMarkerInfoEXT * pMarkerInfo)1305 sqtt_CmdDebugMarkerBeginEXT(VkCommandBuffer commandBuffer, const VkDebugMarkerMarkerInfoEXT *pMarkerInfo)
1306 {
1307    RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1308    radv_write_user_event_marker(cmd_buffer, UserEventPush, pMarkerInfo->pMarkerName);
1309 }
1310 
1311 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDebugMarkerEndEXT(VkCommandBuffer commandBuffer)1312 sqtt_CmdDebugMarkerEndEXT(VkCommandBuffer commandBuffer)
1313 {
1314    RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1315    radv_write_user_event_marker(cmd_buffer, UserEventPop, NULL);
1316 }
1317 
1318 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDebugMarkerInsertEXT(VkCommandBuffer commandBuffer,const VkDebugMarkerMarkerInfoEXT * pMarkerInfo)1319 sqtt_CmdDebugMarkerInsertEXT(VkCommandBuffer commandBuffer, const VkDebugMarkerMarkerInfoEXT *pMarkerInfo)
1320 {
1321    RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1322    radv_write_user_event_marker(cmd_buffer, UserEventTrigger, pMarkerInfo->pMarkerName);
1323 }
1324 
1325 VKAPI_ATTR VkResult VKAPI_CALL
sqtt_DebugMarkerSetObjectNameEXT(VkDevice device,const VkDebugMarkerObjectNameInfoEXT * pNameInfo)1326 sqtt_DebugMarkerSetObjectNameEXT(VkDevice device, const VkDebugMarkerObjectNameInfoEXT *pNameInfo)
1327 {
1328    /* no-op */
1329    return VK_SUCCESS;
1330 }
1331 
1332 VKAPI_ATTR VkResult VKAPI_CALL
sqtt_DebugMarkerSetObjectTagEXT(VkDevice device,const VkDebugMarkerObjectTagInfoEXT * pTagInfo)1333 sqtt_DebugMarkerSetObjectTagEXT(VkDevice device, const VkDebugMarkerObjectTagInfoEXT *pTagInfo)
1334 {
1335    /* no-op */
1336    return VK_SUCCESS;
1337 }
1338 
1339 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdBeginDebugUtilsLabelEXT(VkCommandBuffer commandBuffer,const VkDebugUtilsLabelEXT * pLabelInfo)1340 sqtt_CmdBeginDebugUtilsLabelEXT(VkCommandBuffer commandBuffer, const VkDebugUtilsLabelEXT *pLabelInfo)
1341 {
1342    RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1343    radv_write_user_event_marker(cmd_buffer, UserEventPush, pLabelInfo->pLabelName);
1344 
1345    cmd_buffer->device->layer_dispatch.rgp.CmdBeginDebugUtilsLabelEXT(commandBuffer, pLabelInfo);
1346 }
1347 
1348 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdEndDebugUtilsLabelEXT(VkCommandBuffer commandBuffer)1349 sqtt_CmdEndDebugUtilsLabelEXT(VkCommandBuffer commandBuffer)
1350 {
1351    RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1352    radv_write_user_event_marker(cmd_buffer, UserEventPop, NULL);
1353 
1354    cmd_buffer->device->layer_dispatch.rgp.CmdEndDebugUtilsLabelEXT(commandBuffer);
1355 }
1356 
1357 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdInsertDebugUtilsLabelEXT(VkCommandBuffer commandBuffer,const VkDebugUtilsLabelEXT * pLabelInfo)1358 sqtt_CmdInsertDebugUtilsLabelEXT(VkCommandBuffer commandBuffer, const VkDebugUtilsLabelEXT *pLabelInfo)
1359 {
1360    RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1361    radv_write_user_event_marker(cmd_buffer, UserEventTrigger, pLabelInfo->pLabelName);
1362 
1363    cmd_buffer->device->layer_dispatch.rgp.CmdInsertDebugUtilsLabelEXT(commandBuffer, pLabelInfo);
1364 }
1365 
1366 /* Pipelines */
1367 static enum rgp_hardware_stages
radv_get_rgp_shader_stage(struct radv_shader * shader)1368 radv_get_rgp_shader_stage(struct radv_shader *shader)
1369 {
1370    switch (shader->info.stage) {
1371    case MESA_SHADER_VERTEX:
1372       if (shader->info.vs.as_ls)
1373          return RGP_HW_STAGE_LS;
1374       else if (shader->info.vs.as_es)
1375          return RGP_HW_STAGE_ES;
1376       else if (shader->info.is_ngg)
1377          return RGP_HW_STAGE_GS;
1378       else
1379          return RGP_HW_STAGE_VS;
1380    case MESA_SHADER_TESS_CTRL:
1381       return RGP_HW_STAGE_HS;
1382    case MESA_SHADER_TESS_EVAL:
1383       if (shader->info.tes.as_es)
1384          return RGP_HW_STAGE_ES;
1385       else if (shader->info.is_ngg)
1386          return RGP_HW_STAGE_GS;
1387       else
1388          return RGP_HW_STAGE_VS;
1389    case MESA_SHADER_MESH:
1390    case MESA_SHADER_GEOMETRY:
1391       return RGP_HW_STAGE_GS;
1392    case MESA_SHADER_FRAGMENT:
1393       return RGP_HW_STAGE_PS;
1394    case MESA_SHADER_TASK:
1395    case MESA_SHADER_COMPUTE:
1396    case MESA_SHADER_RAYGEN:
1397    case MESA_SHADER_CLOSEST_HIT:
1398    case MESA_SHADER_ANY_HIT:
1399    case MESA_SHADER_INTERSECTION:
1400    case MESA_SHADER_MISS:
1401    case MESA_SHADER_CALLABLE:
1402       return RGP_HW_STAGE_CS;
1403    default:
1404       unreachable("invalid mesa shader stage");
1405    }
1406 }
1407 
1408 static void
radv_fill_code_object_record(struct radv_device * device,struct rgp_shader_data * shader_data,struct radv_shader * shader,uint64_t va)1409 radv_fill_code_object_record(struct radv_device *device, struct rgp_shader_data *shader_data,
1410                              struct radv_shader *shader, uint64_t va)
1411 {
1412    struct radv_physical_device *pdevice = device->physical_device;
1413    unsigned lds_increment = pdevice->rad_info.gfx_level >= GFX11 && shader->info.stage == MESA_SHADER_FRAGMENT
1414                                ? 1024
1415                                : pdevice->rad_info.lds_encode_granularity;
1416 
1417    memset(shader_data->rt_shader_name, 0, sizeof(shader_data->rt_shader_name));
1418    shader_data->hash[0] = (uint64_t)(uintptr_t)shader;
1419    shader_data->hash[1] = (uint64_t)(uintptr_t)shader >> 32;
1420    shader_data->code_size = shader->code_size;
1421    shader_data->code = shader->code;
1422    shader_data->vgpr_count = shader->config.num_vgprs;
1423    shader_data->sgpr_count = shader->config.num_sgprs;
1424    shader_data->scratch_memory_size = shader->config.scratch_bytes_per_wave;
1425    shader_data->lds_size = shader->config.lds_size * lds_increment;
1426    shader_data->wavefront_size = shader->info.wave_size;
1427    shader_data->base_address = va & 0xffffffffffff;
1428    shader_data->elf_symbol_offset = 0;
1429    shader_data->hw_stage = radv_get_rgp_shader_stage(shader);
1430    shader_data->is_combined = false;
1431 }
1432 
1433 static VkResult
radv_add_code_object(struct radv_device * device,struct radv_pipeline * pipeline)1434 radv_add_code_object(struct radv_device *device, struct radv_pipeline *pipeline)
1435 {
1436    struct ac_sqtt *sqtt = &device->sqtt;
1437    struct rgp_code_object *code_object = &sqtt->rgp_code_object;
1438    struct rgp_code_object_record *record;
1439 
1440    record = malloc(sizeof(struct rgp_code_object_record));
1441    if (!record)
1442       return VK_ERROR_OUT_OF_HOST_MEMORY;
1443 
1444    record->shader_stages_mask = 0;
1445    record->num_shaders_combined = 0;
1446    record->pipeline_hash[0] = pipeline->pipeline_hash;
1447    record->pipeline_hash[1] = pipeline->pipeline_hash;
1448    record->is_rt = false;
1449 
1450    for (unsigned i = 0; i < MESA_VULKAN_SHADER_STAGES; i++) {
1451       struct radv_shader *shader = pipeline->shaders[i];
1452 
1453       if (!shader)
1454          continue;
1455 
1456       radv_fill_code_object_record(device, &record->shader_data[i], shader, radv_sqtt_shader_get_va_reloc(pipeline, i));
1457 
1458       record->shader_stages_mask |= (1 << i);
1459       record->num_shaders_combined++;
1460    }
1461 
1462    simple_mtx_lock(&code_object->lock);
1463    list_addtail(&record->list, &code_object->record);
1464    code_object->record_count++;
1465    simple_mtx_unlock(&code_object->lock);
1466 
1467    return VK_SUCCESS;
1468 }
1469 
1470 static VkResult
radv_add_rt_record(struct radv_device * device,struct rgp_code_object * code_object,struct radv_ray_tracing_pipeline * pipeline,struct radv_shader * shader,uint32_t stack_size,uint32_t index,uint64_t hash)1471 radv_add_rt_record(struct radv_device *device, struct rgp_code_object *code_object,
1472                    struct radv_ray_tracing_pipeline *pipeline, struct radv_shader *shader, uint32_t stack_size,
1473                    uint32_t index, uint64_t hash)
1474 {
1475    struct rgp_code_object_record *record = malloc(sizeof(struct rgp_code_object_record));
1476    if (!record)
1477       return VK_ERROR_OUT_OF_HOST_MEMORY;
1478 
1479    struct rgp_shader_data *shader_data = &record->shader_data[shader->info.stage];
1480 
1481    record->shader_stages_mask = 0;
1482    record->num_shaders_combined = 0;
1483    record->pipeline_hash[0] = hash;
1484    record->pipeline_hash[1] = hash;
1485 
1486    radv_fill_code_object_record(device, shader_data, shader, shader->va);
1487    shader_data->rt_stack_size = stack_size;
1488 
1489    record->shader_stages_mask |= (1 << shader->info.stage);
1490    record->is_rt = true;
1491    switch (shader->info.stage) {
1492    case MESA_SHADER_RAYGEN:
1493       snprintf(shader_data->rt_shader_name, sizeof(shader_data->rt_shader_name), "rgen_%d", index);
1494       break;
1495    case MESA_SHADER_CLOSEST_HIT:
1496       snprintf(shader_data->rt_shader_name, sizeof(shader_data->rt_shader_name), "chit_%d", index);
1497       break;
1498    case MESA_SHADER_MISS:
1499       snprintf(shader_data->rt_shader_name, sizeof(shader_data->rt_shader_name), "miss_%d", index);
1500       break;
1501    case MESA_SHADER_INTERSECTION:
1502       snprintf(shader_data->rt_shader_name, sizeof(shader_data->rt_shader_name), "traversal");
1503       break;
1504    case MESA_SHADER_CALLABLE:
1505       snprintf(shader_data->rt_shader_name, sizeof(shader_data->rt_shader_name), "call_%d", index);
1506       break;
1507    case MESA_SHADER_COMPUTE:
1508       snprintf(shader_data->rt_shader_name, sizeof(shader_data->rt_shader_name), "_amdgpu_cs_main");
1509       break;
1510    default:
1511       unreachable("invalid rt stage");
1512    }
1513    record->num_shaders_combined = 1;
1514 
1515    simple_mtx_lock(&code_object->lock);
1516    list_addtail(&record->list, &code_object->record);
1517    code_object->record_count++;
1518    simple_mtx_unlock(&code_object->lock);
1519 
1520    return VK_SUCCESS;
1521 }
1522 
1523 static void
compute_unique_rt_sha(uint64_t pipeline_hash,unsigned index,unsigned char sha1[SHA1_DIGEST_LENGTH])1524 compute_unique_rt_sha(uint64_t pipeline_hash, unsigned index, unsigned char sha1[SHA1_DIGEST_LENGTH])
1525 {
1526    struct mesa_sha1 ctx;
1527    _mesa_sha1_init(&ctx);
1528    _mesa_sha1_update(&ctx, &pipeline_hash, sizeof(pipeline_hash));
1529    _mesa_sha1_update(&ctx, &index, sizeof(index));
1530    _mesa_sha1_final(&ctx, sha1);
1531 }
1532 
1533 static VkResult
radv_register_rt_stage(struct radv_device * device,struct radv_ray_tracing_pipeline * pipeline,uint32_t index,uint32_t stack_size,struct radv_shader * shader)1534 radv_register_rt_stage(struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline, uint32_t index,
1535                        uint32_t stack_size, struct radv_shader *shader)
1536 {
1537    unsigned char sha1[SHA1_DIGEST_LENGTH];
1538    VkResult result;
1539 
1540    compute_unique_rt_sha(pipeline->base.base.pipeline_hash, index, sha1);
1541 
1542    result = ac_sqtt_add_pso_correlation(&device->sqtt, *(uint64_t *)sha1, pipeline->base.base.pipeline_hash);
1543    if (!result)
1544       return VK_ERROR_OUT_OF_HOST_MEMORY;
1545    result = ac_sqtt_add_code_object_loader_event(&device->sqtt, *(uint64_t *)sha1, shader->va);
1546    if (!result)
1547       return VK_ERROR_OUT_OF_HOST_MEMORY;
1548    result =
1549       radv_add_rt_record(device, &device->sqtt.rgp_code_object, pipeline, shader, stack_size, index, *(uint64_t *)sha1);
1550    return result;
1551 }
1552 
1553 static VkResult
radv_register_rt_pipeline(struct radv_device * device,struct radv_ray_tracing_pipeline * pipeline)1554 radv_register_rt_pipeline(struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline)
1555 {
1556    VkResult result = VK_SUCCESS;
1557 
1558    uint32_t max_any_hit_stack_size = 0;
1559    uint32_t max_intersection_stack_size = 0;
1560 
1561    for (unsigned i = 0; i < pipeline->stage_count; i++) {
1562       struct radv_ray_tracing_stage *stage = &pipeline->stages[i];
1563       if (stage->stage == MESA_SHADER_ANY_HIT)
1564          max_any_hit_stack_size = MAX2(max_any_hit_stack_size, stage->stack_size);
1565       else if (stage->stage == MESA_SHADER_INTERSECTION)
1566          max_intersection_stack_size = MAX2(max_intersection_stack_size, stage->stack_size);
1567 
1568       if (!pipeline->stages[i].shader)
1569          continue;
1570 
1571       result = radv_register_rt_stage(device, pipeline, i, stage->stack_size, stage->shader);
1572       if (result != VK_SUCCESS)
1573          return result;
1574    }
1575 
1576    uint32_t idx = pipeline->stage_count;
1577 
1578    /* Combined traversal shader */
1579    if (pipeline->base.base.shaders[MESA_SHADER_INTERSECTION]) {
1580       result = radv_register_rt_stage(device, pipeline, idx++, max_any_hit_stack_size + max_intersection_stack_size,
1581                                       pipeline->base.base.shaders[MESA_SHADER_INTERSECTION]);
1582       if (result != VK_SUCCESS)
1583          return result;
1584    }
1585 
1586    /* Prolog */
1587    result = radv_register_rt_stage(device, pipeline, idx++, 0, pipeline->prolog);
1588 
1589    return result;
1590 }
1591 
1592 static VkResult
radv_register_pipeline(struct radv_device * device,struct radv_pipeline * pipeline)1593 radv_register_pipeline(struct radv_device *device, struct radv_pipeline *pipeline)
1594 {
1595    bool result;
1596    uint64_t base_va = ~0;
1597 
1598    result = ac_sqtt_add_pso_correlation(&device->sqtt, pipeline->pipeline_hash, pipeline->pipeline_hash);
1599    if (!result)
1600       return VK_ERROR_OUT_OF_HOST_MEMORY;
1601 
1602    /* Find the lowest shader BO VA. */
1603    for (unsigned i = 0; i < MESA_VULKAN_SHADER_STAGES; i++) {
1604       struct radv_shader *shader = pipeline->shaders[i];
1605       uint64_t va;
1606 
1607       if (!shader)
1608          continue;
1609 
1610       va = radv_sqtt_shader_get_va_reloc(pipeline, i);
1611       base_va = MIN2(base_va, va);
1612    }
1613 
1614    result = ac_sqtt_add_code_object_loader_event(&device->sqtt, pipeline->pipeline_hash, base_va);
1615    if (!result)
1616       return VK_ERROR_OUT_OF_HOST_MEMORY;
1617 
1618    result = radv_add_code_object(device, pipeline);
1619    if (result != VK_SUCCESS)
1620       return result;
1621 
1622    return VK_SUCCESS;
1623 }
1624 
1625 static void
radv_unregister_records(struct radv_device * device,uint64_t hash)1626 radv_unregister_records(struct radv_device *device, uint64_t hash)
1627 {
1628    struct ac_sqtt *sqtt = &device->sqtt;
1629    struct rgp_pso_correlation *pso_correlation = &sqtt->rgp_pso_correlation;
1630    struct rgp_loader_events *loader_events = &sqtt->rgp_loader_events;
1631    struct rgp_code_object *code_object = &sqtt->rgp_code_object;
1632 
1633    /* Destroy the PSO correlation record. */
1634    simple_mtx_lock(&pso_correlation->lock);
1635    list_for_each_entry_safe (struct rgp_pso_correlation_record, record, &pso_correlation->record, list) {
1636       if (record->pipeline_hash[0] == hash) {
1637          pso_correlation->record_count--;
1638          list_del(&record->list);
1639          free(record);
1640          break;
1641       }
1642    }
1643    simple_mtx_unlock(&pso_correlation->lock);
1644 
1645    /* Destroy the code object loader record. */
1646    simple_mtx_lock(&loader_events->lock);
1647    list_for_each_entry_safe (struct rgp_loader_events_record, record, &loader_events->record, list) {
1648       if (record->code_object_hash[0] == hash) {
1649          loader_events->record_count--;
1650          list_del(&record->list);
1651          free(record);
1652          break;
1653       }
1654    }
1655    simple_mtx_unlock(&loader_events->lock);
1656 
1657    /* Destroy the code object record. */
1658    simple_mtx_lock(&code_object->lock);
1659    list_for_each_entry_safe (struct rgp_code_object_record, record, &code_object->record, list) {
1660       if (record->pipeline_hash[0] == hash) {
1661          code_object->record_count--;
1662          list_del(&record->list);
1663          free(record);
1664          break;
1665       }
1666    }
1667    simple_mtx_unlock(&code_object->lock);
1668 }
1669 
1670 VKAPI_ATTR VkResult VKAPI_CALL
sqtt_CreateGraphicsPipelines(VkDevice _device,VkPipelineCache pipelineCache,uint32_t count,const VkGraphicsPipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)1671 sqtt_CreateGraphicsPipelines(VkDevice _device, VkPipelineCache pipelineCache, uint32_t count,
1672                              const VkGraphicsPipelineCreateInfo *pCreateInfos, const VkAllocationCallbacks *pAllocator,
1673                              VkPipeline *pPipelines)
1674 {
1675    RADV_FROM_HANDLE(radv_device, device, _device);
1676    VkResult result;
1677 
1678    result = device->layer_dispatch.rgp.CreateGraphicsPipelines(_device, pipelineCache, count, pCreateInfos, pAllocator,
1679                                                                pPipelines);
1680    if (result != VK_SUCCESS)
1681       return result;
1682 
1683    for (unsigned i = 0; i < count; i++) {
1684       RADV_FROM_HANDLE(radv_pipeline, pipeline, pPipelines[i]);
1685 
1686       if (!pipeline)
1687          continue;
1688 
1689       const VkPipelineCreateFlagBits2KHR create_flags = vk_graphics_pipeline_create_flags(&pCreateInfos[i]);
1690       if (create_flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR)
1691          continue;
1692 
1693       result = radv_sqtt_reloc_graphics_shaders(device, radv_pipeline_to_graphics(pipeline));
1694       if (result != VK_SUCCESS)
1695          goto fail;
1696 
1697       result = radv_register_pipeline(device, pipeline);
1698       if (result != VK_SUCCESS)
1699          goto fail;
1700    }
1701 
1702    return VK_SUCCESS;
1703 
1704 fail:
1705    for (unsigned i = 0; i < count; i++) {
1706       sqtt_DestroyPipeline(_device, pPipelines[i], pAllocator);
1707       pPipelines[i] = VK_NULL_HANDLE;
1708    }
1709    return result;
1710 }
1711 
1712 VKAPI_ATTR VkResult VKAPI_CALL
sqtt_CreateComputePipelines(VkDevice _device,VkPipelineCache pipelineCache,uint32_t count,const VkComputePipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)1713 sqtt_CreateComputePipelines(VkDevice _device, VkPipelineCache pipelineCache, uint32_t count,
1714                             const VkComputePipelineCreateInfo *pCreateInfos, const VkAllocationCallbacks *pAllocator,
1715                             VkPipeline *pPipelines)
1716 {
1717    RADV_FROM_HANDLE(radv_device, device, _device);
1718    VkResult result;
1719 
1720    result = device->layer_dispatch.rgp.CreateComputePipelines(_device, pipelineCache, count, pCreateInfos, pAllocator,
1721                                                               pPipelines);
1722    if (result != VK_SUCCESS)
1723       return result;
1724 
1725    for (unsigned i = 0; i < count; i++) {
1726       RADV_FROM_HANDLE(radv_pipeline, pipeline, pPipelines[i]);
1727 
1728       if (!pipeline)
1729          continue;
1730 
1731       result = radv_register_pipeline(device, pipeline);
1732       if (result != VK_SUCCESS)
1733          goto fail;
1734    }
1735 
1736    return VK_SUCCESS;
1737 
1738 fail:
1739    for (unsigned i = 0; i < count; i++) {
1740       sqtt_DestroyPipeline(_device, pPipelines[i], pAllocator);
1741       pPipelines[i] = VK_NULL_HANDLE;
1742    }
1743    return result;
1744 }
1745 
1746 VKAPI_ATTR VkResult VKAPI_CALL
sqtt_CreateRayTracingPipelinesKHR(VkDevice _device,VkDeferredOperationKHR deferredOperation,VkPipelineCache pipelineCache,uint32_t count,const VkRayTracingPipelineCreateInfoKHR * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)1747 sqtt_CreateRayTracingPipelinesKHR(VkDevice _device, VkDeferredOperationKHR deferredOperation,
1748                                   VkPipelineCache pipelineCache, uint32_t count,
1749                                   const VkRayTracingPipelineCreateInfoKHR *pCreateInfos,
1750                                   const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines)
1751 {
1752    RADV_FROM_HANDLE(radv_device, device, _device);
1753    VkResult result;
1754 
1755    result = device->layer_dispatch.rgp.CreateRayTracingPipelinesKHR(_device, deferredOperation, pipelineCache, count,
1756                                                                     pCreateInfos, pAllocator, pPipelines);
1757    if (result != VK_SUCCESS)
1758       return result;
1759 
1760    for (unsigned i = 0; i < count; i++) {
1761       RADV_FROM_HANDLE(radv_pipeline, pipeline, pPipelines[i]);
1762 
1763       if (!pipeline)
1764          continue;
1765 
1766       const VkPipelineCreateFlagBits2KHR create_flags = vk_rt_pipeline_create_flags(&pCreateInfos[i]);
1767       if (create_flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR)
1768          continue;
1769 
1770       result = radv_register_rt_pipeline(device, radv_pipeline_to_ray_tracing(pipeline));
1771       if (result != VK_SUCCESS)
1772          goto fail;
1773    }
1774 
1775    return VK_SUCCESS;
1776 
1777 fail:
1778    for (unsigned i = 0; i < count; i++) {
1779       sqtt_DestroyPipeline(_device, pPipelines[i], pAllocator);
1780       pPipelines[i] = VK_NULL_HANDLE;
1781    }
1782    return result;
1783 }
1784 
1785 VKAPI_ATTR void VKAPI_CALL
sqtt_DestroyPipeline(VkDevice _device,VkPipeline _pipeline,const VkAllocationCallbacks * pAllocator)1786 sqtt_DestroyPipeline(VkDevice _device, VkPipeline _pipeline, const VkAllocationCallbacks *pAllocator)
1787 {
1788    RADV_FROM_HANDLE(radv_device, device, _device);
1789    RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
1790 
1791    if (!_pipeline)
1792       return;
1793 
1794    /* Ray tracing pipelines have multiple records, each with their own hash */
1795    if (pipeline->type == RADV_PIPELINE_RAY_TRACING) {
1796       /* We have one record for each stage, plus one for the traversal shader and one for the prolog */
1797       uint32_t record_count = radv_pipeline_to_ray_tracing(pipeline)->stage_count + 2;
1798       unsigned char sha1[SHA1_DIGEST_LENGTH];
1799       for (uint32_t i = 0; i < record_count; ++i) {
1800          compute_unique_rt_sha(pipeline->pipeline_hash, i, sha1);
1801          radv_unregister_records(device, *(uint64_t *)sha1);
1802       }
1803    } else
1804       radv_unregister_records(device, pipeline->pipeline_hash);
1805 
1806    if (pipeline->type == RADV_PIPELINE_GRAPHICS) {
1807       struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline);
1808       struct radv_sqtt_shaders_reloc *reloc = graphics_pipeline->sqtt_shaders_reloc;
1809 
1810       radv_free_shader_memory(device, reloc->alloc);
1811       free(reloc);
1812    }
1813 
1814    device->layer_dispatch.rgp.DestroyPipeline(_device, _pipeline, pAllocator);
1815 }
1816 
1817 #undef API_MARKER
1818