1 /*
2 * Copyright © 2020 Valve Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "radv_cs.h"
25 #include "radv_private.h"
26 #include "radv_shader.h"
27 #include "vk_common_entrypoints.h"
28 #include "vk_semaphore.h"
29 #include "wsi_common_entrypoints.h"
30
31 #include "ac_rgp.h"
32 #include "ac_sqtt.h"
33
34 #include "vk_pipeline.h"
35
36 void
radv_sqtt_emit_relocated_shaders(struct radv_cmd_buffer * cmd_buffer,struct radv_graphics_pipeline * pipeline)37 radv_sqtt_emit_relocated_shaders(struct radv_cmd_buffer *cmd_buffer, struct radv_graphics_pipeline *pipeline)
38 {
39 const enum amd_gfx_level gfx_level = cmd_buffer->device->physical_device->rad_info.gfx_level;
40 struct radv_sqtt_shaders_reloc *reloc = pipeline->sqtt_shaders_reloc;
41 struct radeon_cmdbuf *cs = cmd_buffer->cs;
42 uint64_t va;
43
44 radv_cs_add_buffer(cmd_buffer->device->ws, cs, reloc->bo);
45
46 /* VS */
47 if (pipeline->base.shaders[MESA_SHADER_VERTEX]) {
48 struct radv_shader *vs = pipeline->base.shaders[MESA_SHADER_VERTEX];
49
50 va = reloc->va[MESA_SHADER_VERTEX];
51 if (vs->info.vs.as_ls) {
52 radeon_set_sh_reg(cs, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
53 } else if (vs->info.vs.as_es) {
54 radeon_set_sh_reg_seq(cs, R_00B320_SPI_SHADER_PGM_LO_ES, 2);
55 radeon_emit(cs, va >> 8);
56 radeon_emit(cs, S_00B324_MEM_BASE(va >> 40));
57 } else if (vs->info.is_ngg) {
58 radeon_set_sh_reg(cs, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
59 } else {
60 radeon_set_sh_reg_seq(cs, R_00B120_SPI_SHADER_PGM_LO_VS, 2);
61 radeon_emit(cs, va >> 8);
62 radeon_emit(cs, S_00B124_MEM_BASE(va >> 40));
63 }
64 }
65
66 /* TCS */
67 if (pipeline->base.shaders[MESA_SHADER_TESS_CTRL]) {
68 va = reloc->va[MESA_SHADER_TESS_CTRL];
69
70 if (gfx_level >= GFX9) {
71 if (gfx_level >= GFX10) {
72 radeon_set_sh_reg(cs, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
73 } else {
74 radeon_set_sh_reg(cs, R_00B410_SPI_SHADER_PGM_LO_LS, va >> 8);
75 }
76 } else {
77 radeon_set_sh_reg_seq(cs, R_00B420_SPI_SHADER_PGM_LO_HS, 2);
78 radeon_emit(cs, va >> 8);
79 radeon_emit(cs, S_00B424_MEM_BASE(va >> 40));
80 }
81 }
82
83 /* TES */
84 if (pipeline->base.shaders[MESA_SHADER_TESS_EVAL]) {
85 struct radv_shader *tes = pipeline->base.shaders[MESA_SHADER_TESS_EVAL];
86
87 va = reloc->va[MESA_SHADER_TESS_EVAL];
88 if (tes->info.is_ngg) {
89 radeon_set_sh_reg(cs, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
90 } else if (tes->info.tes.as_es) {
91 radeon_set_sh_reg_seq(cs, R_00B320_SPI_SHADER_PGM_LO_ES, 2);
92 radeon_emit(cs, va >> 8);
93 radeon_emit(cs, S_00B324_MEM_BASE(va >> 40));
94 } else {
95 radeon_set_sh_reg_seq(cs, R_00B120_SPI_SHADER_PGM_LO_VS, 2);
96 radeon_emit(cs, va >> 8);
97 radeon_emit(cs, S_00B124_MEM_BASE(va >> 40));
98 }
99 }
100
101 /* GS */
102 if (pipeline->base.shaders[MESA_SHADER_GEOMETRY]) {
103 struct radv_shader *gs = pipeline->base.shaders[MESA_SHADER_GEOMETRY];
104
105 va = reloc->va[MESA_SHADER_GEOMETRY];
106 if (gs->info.is_ngg) {
107 radeon_set_sh_reg(cs, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
108 } else {
109 if (gfx_level >= GFX9) {
110 if (gfx_level >= GFX10) {
111 radeon_set_sh_reg(cs, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
112 } else {
113 radeon_set_sh_reg(cs, R_00B210_SPI_SHADER_PGM_LO_ES, va >> 8);
114 }
115 } else {
116 radeon_set_sh_reg_seq(cs, R_00B220_SPI_SHADER_PGM_LO_GS, 2);
117 radeon_emit(cs, va >> 8);
118 radeon_emit(cs, S_00B224_MEM_BASE(va >> 40));
119 }
120 }
121 }
122
123 /* FS */
124 if (pipeline->base.shaders[MESA_SHADER_FRAGMENT]) {
125 va = reloc->va[MESA_SHADER_FRAGMENT];
126
127 radeon_set_sh_reg_seq(cs, R_00B020_SPI_SHADER_PGM_LO_PS, 2);
128 radeon_emit(cs, va >> 8);
129 radeon_emit(cs, S_00B024_MEM_BASE(va >> 40));
130 }
131
132 /* MS */
133 if (pipeline->base.shaders[MESA_SHADER_MESH]) {
134 va = reloc->va[MESA_SHADER_MESH];
135
136 radeon_set_sh_reg(cs, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
137 }
138 }
139
140 static uint64_t
radv_sqtt_shader_get_va_reloc(struct radv_pipeline * pipeline,gl_shader_stage stage)141 radv_sqtt_shader_get_va_reloc(struct radv_pipeline *pipeline, gl_shader_stage stage)
142 {
143 if (pipeline->type == RADV_PIPELINE_GRAPHICS) {
144 struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline);
145 struct radv_sqtt_shaders_reloc *reloc = graphics_pipeline->sqtt_shaders_reloc;
146 return reloc->va[stage];
147 }
148
149 return radv_shader_get_va(pipeline->shaders[stage]);
150 }
151
152 static VkResult
radv_sqtt_reloc_graphics_shaders(struct radv_device * device,struct radv_graphics_pipeline * pipeline)153 radv_sqtt_reloc_graphics_shaders(struct radv_device *device, struct radv_graphics_pipeline *pipeline)
154 {
155 struct radv_shader_dma_submission *submission = NULL;
156 struct radv_sqtt_shaders_reloc *reloc;
157 uint32_t code_size = 0;
158
159 reloc = calloc(1, sizeof(*reloc));
160 if (!reloc)
161 return VK_ERROR_OUT_OF_HOST_MEMORY;
162
163 /* Compute the total code size. */
164 for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; i++) {
165 const struct radv_shader *shader = pipeline->base.shaders[i];
166 if (!shader)
167 continue;
168
169 code_size += align(shader->code_size, RADV_SHADER_ALLOC_ALIGNMENT);
170 }
171
172 /* Allocate memory for all shader binaries. */
173 reloc->alloc = radv_alloc_shader_memory(device, code_size, false, pipeline);
174 if (!reloc->alloc) {
175 free(reloc);
176 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
177 }
178
179 reloc->bo = reloc->alloc->arena->bo;
180
181 /* Relocate shader binaries to be contiguous in memory as requested by RGP. */
182 uint64_t slab_va = radv_buffer_get_va(reloc->bo) + reloc->alloc->offset;
183 char *slab_ptr = reloc->alloc->arena->ptr + reloc->alloc->offset;
184 uint64_t offset = 0;
185
186 if (device->shader_use_invisible_vram) {
187 submission = radv_shader_dma_get_submission(device, reloc->bo, slab_va, code_size);
188 if (!submission)
189 return VK_ERROR_UNKNOWN;
190 }
191
192 for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) {
193 const struct radv_shader *shader = pipeline->base.shaders[i];
194 void *dest_ptr;
195 if (!shader)
196 continue;
197
198 reloc->va[i] = slab_va + offset;
199
200 if (device->shader_use_invisible_vram)
201 dest_ptr = submission->ptr + offset;
202 else
203 dest_ptr = slab_ptr + offset;
204
205 memcpy(dest_ptr, shader->code, shader->code_size);
206
207 offset += align(shader->code_size, RADV_SHADER_ALLOC_ALIGNMENT);
208 }
209
210 if (device->shader_use_invisible_vram) {
211 if (!radv_shader_dma_submit(device, submission, &pipeline->base.shader_upload_seq))
212 return VK_ERROR_UNKNOWN;
213 }
214
215 pipeline->sqtt_shaders_reloc = reloc;
216
217 return VK_SUCCESS;
218 }
219
220 static void
radv_write_begin_general_api_marker(struct radv_cmd_buffer * cmd_buffer,enum rgp_sqtt_marker_general_api_type api_type)221 radv_write_begin_general_api_marker(struct radv_cmd_buffer *cmd_buffer, enum rgp_sqtt_marker_general_api_type api_type)
222 {
223 struct rgp_sqtt_marker_general_api marker = {0};
224
225 marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_GENERAL_API;
226 marker.api_type = api_type;
227
228 radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
229 }
230
231 static void
radv_write_end_general_api_marker(struct radv_cmd_buffer * cmd_buffer,enum rgp_sqtt_marker_general_api_type api_type)232 radv_write_end_general_api_marker(struct radv_cmd_buffer *cmd_buffer, enum rgp_sqtt_marker_general_api_type api_type)
233 {
234 struct rgp_sqtt_marker_general_api marker = {0};
235
236 marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_GENERAL_API;
237 marker.api_type = api_type;
238 marker.is_end = 1;
239
240 radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
241 }
242
243 static void
radv_write_event_marker(struct radv_cmd_buffer * cmd_buffer,enum rgp_sqtt_marker_event_type api_type,uint32_t vertex_offset_user_data,uint32_t instance_offset_user_data,uint32_t draw_index_user_data)244 radv_write_event_marker(struct radv_cmd_buffer *cmd_buffer, enum rgp_sqtt_marker_event_type api_type,
245 uint32_t vertex_offset_user_data, uint32_t instance_offset_user_data,
246 uint32_t draw_index_user_data)
247 {
248 struct rgp_sqtt_marker_event marker = {0};
249
250 marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_EVENT;
251 marker.api_type = api_type;
252 marker.cmd_id = cmd_buffer->state.num_events++;
253 marker.cb_id = cmd_buffer->sqtt_cb_id;
254
255 if (vertex_offset_user_data == UINT_MAX || instance_offset_user_data == UINT_MAX) {
256 vertex_offset_user_data = 0;
257 instance_offset_user_data = 0;
258 }
259
260 if (draw_index_user_data == UINT_MAX)
261 draw_index_user_data = vertex_offset_user_data;
262
263 marker.vertex_offset_reg_idx = vertex_offset_user_data;
264 marker.instance_offset_reg_idx = instance_offset_user_data;
265 marker.draw_index_reg_idx = draw_index_user_data;
266
267 radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
268 }
269
270 static void
radv_write_event_with_dims_marker(struct radv_cmd_buffer * cmd_buffer,enum rgp_sqtt_marker_event_type api_type,uint32_t x,uint32_t y,uint32_t z)271 radv_write_event_with_dims_marker(struct radv_cmd_buffer *cmd_buffer, enum rgp_sqtt_marker_event_type api_type,
272 uint32_t x, uint32_t y, uint32_t z)
273 {
274 struct rgp_sqtt_marker_event_with_dims marker = {0};
275
276 marker.event.identifier = RGP_SQTT_MARKER_IDENTIFIER_EVENT;
277 marker.event.api_type = api_type;
278 marker.event.cmd_id = cmd_buffer->state.num_events++;
279 marker.event.cb_id = cmd_buffer->sqtt_cb_id;
280 marker.event.has_thread_dims = 1;
281
282 marker.thread_x = x;
283 marker.thread_y = y;
284 marker.thread_z = z;
285
286 radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
287 }
288
289 void
radv_write_user_event_marker(struct radv_cmd_buffer * cmd_buffer,enum rgp_sqtt_marker_user_event_type type,const char * str)290 radv_write_user_event_marker(struct radv_cmd_buffer *cmd_buffer, enum rgp_sqtt_marker_user_event_type type,
291 const char *str)
292 {
293 if (likely(!cmd_buffer->device->sqtt.bo))
294 return;
295
296 if (type == UserEventPop) {
297 assert(str == NULL);
298 struct rgp_sqtt_marker_user_event marker = {0};
299 marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_USER_EVENT;
300 marker.data_type = type;
301
302 radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
303 } else {
304 assert(str != NULL);
305 unsigned len = strlen(str);
306 struct rgp_sqtt_marker_user_event_with_length marker = {0};
307 marker.user_event.identifier = RGP_SQTT_MARKER_IDENTIFIER_USER_EVENT;
308 marker.user_event.data_type = type;
309 marker.length = align(len, 4);
310
311 uint8_t *buffer = alloca(sizeof(marker) + marker.length);
312 memset(buffer, 0, sizeof(marker) + marker.length);
313 memcpy(buffer, &marker, sizeof(marker));
314 memcpy(buffer + sizeof(marker), str, len);
315
316 radv_emit_sqtt_userdata(cmd_buffer, buffer, sizeof(marker) / 4 + marker.length / 4);
317 }
318 }
319
320 void
radv_describe_begin_cmd_buffer(struct radv_cmd_buffer * cmd_buffer)321 radv_describe_begin_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
322 {
323 uint64_t device_id = (uintptr_t)cmd_buffer->device;
324 struct rgp_sqtt_marker_cb_start marker = {0};
325
326 if (likely(!cmd_buffer->device->sqtt.bo))
327 return;
328
329 /* Reserve a command buffer ID for SQTT. */
330 enum amd_ip_type ip_type = radv_queue_family_to_ring(cmd_buffer->device->physical_device, cmd_buffer->qf);
331 union rgp_sqtt_marker_cb_id cb_id = ac_sqtt_get_next_cmdbuf_id(&cmd_buffer->device->sqtt, ip_type);
332 cmd_buffer->sqtt_cb_id = cb_id.all;
333
334 marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_CB_START;
335 marker.cb_id = cmd_buffer->sqtt_cb_id;
336 marker.device_id_low = device_id;
337 marker.device_id_high = device_id >> 32;
338 marker.queue = cmd_buffer->qf;
339 marker.queue_flags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT;
340
341 if (cmd_buffer->qf == RADV_QUEUE_GENERAL)
342 marker.queue_flags |= VK_QUEUE_GRAPHICS_BIT;
343
344 if (cmd_buffer->device->instance->drirc.legacy_sparse_binding)
345 marker.queue_flags |= VK_QUEUE_SPARSE_BINDING_BIT;
346
347 radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
348 }
349
350 void
radv_describe_end_cmd_buffer(struct radv_cmd_buffer * cmd_buffer)351 radv_describe_end_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
352 {
353 uint64_t device_id = (uintptr_t)cmd_buffer->device;
354 struct rgp_sqtt_marker_cb_end marker = {0};
355
356 if (likely(!cmd_buffer->device->sqtt.bo))
357 return;
358
359 marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_CB_END;
360 marker.cb_id = cmd_buffer->sqtt_cb_id;
361 marker.device_id_low = device_id;
362 marker.device_id_high = device_id >> 32;
363
364 radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
365 }
366
367 void
radv_describe_draw(struct radv_cmd_buffer * cmd_buffer)368 radv_describe_draw(struct radv_cmd_buffer *cmd_buffer)
369 {
370 if (likely(!cmd_buffer->device->sqtt.bo))
371 return;
372
373 radv_write_event_marker(cmd_buffer, cmd_buffer->state.current_event_type, UINT_MAX, UINT_MAX, UINT_MAX);
374 }
375
376 void
radv_describe_dispatch(struct radv_cmd_buffer * cmd_buffer,const struct radv_dispatch_info * info)377 radv_describe_dispatch(struct radv_cmd_buffer *cmd_buffer, const struct radv_dispatch_info *info)
378 {
379 if (likely(!cmd_buffer->device->sqtt.bo))
380 return;
381
382 if (info->indirect) {
383 radv_write_event_marker(cmd_buffer, cmd_buffer->state.current_event_type, UINT_MAX, UINT_MAX, UINT_MAX);
384 } else {
385 radv_write_event_with_dims_marker(cmd_buffer, cmd_buffer->state.current_event_type, info->blocks[0],
386 info->blocks[1], info->blocks[2]);
387 }
388 }
389
390 void
radv_describe_begin_render_pass_clear(struct radv_cmd_buffer * cmd_buffer,VkImageAspectFlagBits aspects)391 radv_describe_begin_render_pass_clear(struct radv_cmd_buffer *cmd_buffer, VkImageAspectFlagBits aspects)
392 {
393 cmd_buffer->state.current_event_type =
394 (aspects & VK_IMAGE_ASPECT_COLOR_BIT) ? EventRenderPassColorClear : EventRenderPassDepthStencilClear;
395 }
396
397 void
radv_describe_end_render_pass_clear(struct radv_cmd_buffer * cmd_buffer)398 radv_describe_end_render_pass_clear(struct radv_cmd_buffer *cmd_buffer)
399 {
400 cmd_buffer->state.current_event_type = EventInternalUnknown;
401 }
402
403 void
radv_describe_begin_render_pass_resolve(struct radv_cmd_buffer * cmd_buffer)404 radv_describe_begin_render_pass_resolve(struct radv_cmd_buffer *cmd_buffer)
405 {
406 cmd_buffer->state.current_event_type = EventRenderPassResolve;
407 }
408
409 void
radv_describe_end_render_pass_resolve(struct radv_cmd_buffer * cmd_buffer)410 radv_describe_end_render_pass_resolve(struct radv_cmd_buffer *cmd_buffer)
411 {
412 cmd_buffer->state.current_event_type = EventInternalUnknown;
413 }
414
415 void
radv_describe_barrier_end_delayed(struct radv_cmd_buffer * cmd_buffer)416 radv_describe_barrier_end_delayed(struct radv_cmd_buffer *cmd_buffer)
417 {
418 struct rgp_sqtt_marker_barrier_end marker = {0};
419
420 if (likely(!cmd_buffer->device->sqtt.bo) || !cmd_buffer->state.pending_sqtt_barrier_end)
421 return;
422
423 cmd_buffer->state.pending_sqtt_barrier_end = false;
424
425 marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BARRIER_END;
426 marker.cb_id = cmd_buffer->sqtt_cb_id;
427
428 marker.num_layout_transitions = cmd_buffer->state.num_layout_transitions;
429
430 if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_WAIT_ON_EOP_TS)
431 marker.wait_on_eop_ts = true;
432 if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_VS_PARTIAL_FLUSH)
433 marker.vs_partial_flush = true;
434 if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_PS_PARTIAL_FLUSH)
435 marker.ps_partial_flush = true;
436 if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_CS_PARTIAL_FLUSH)
437 marker.cs_partial_flush = true;
438 if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_PFP_SYNC_ME)
439 marker.pfp_sync_me = true;
440 if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_SYNC_CP_DMA)
441 marker.sync_cp_dma = true;
442 if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_VMEM_L0)
443 marker.inval_tcp = true;
444 if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_ICACHE)
445 marker.inval_sqI = true;
446 if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_SMEM_L0)
447 marker.inval_sqK = true;
448 if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_FLUSH_L2)
449 marker.flush_tcc = true;
450 if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_L2)
451 marker.inval_tcc = true;
452 if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_FLUSH_CB)
453 marker.flush_cb = true;
454 if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_CB)
455 marker.inval_cb = true;
456 if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_FLUSH_DB)
457 marker.flush_db = true;
458 if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_DB)
459 marker.inval_db = true;
460 if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_L1)
461 marker.inval_gl1 = true;
462
463 radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
464
465 cmd_buffer->state.num_layout_transitions = 0;
466 }
467
468 void
radv_describe_barrier_start(struct radv_cmd_buffer * cmd_buffer,enum rgp_barrier_reason reason)469 radv_describe_barrier_start(struct radv_cmd_buffer *cmd_buffer, enum rgp_barrier_reason reason)
470 {
471 struct rgp_sqtt_marker_barrier_start marker = {0};
472
473 if (likely(!cmd_buffer->device->sqtt.bo))
474 return;
475
476 if (cmd_buffer->state.in_barrier) {
477 assert(!"attempted to start a barrier while already in a barrier");
478 return;
479 }
480
481 radv_describe_barrier_end_delayed(cmd_buffer);
482 cmd_buffer->state.sqtt_flush_bits = 0;
483 cmd_buffer->state.in_barrier = true;
484
485 marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BARRIER_START;
486 marker.cb_id = cmd_buffer->sqtt_cb_id;
487 marker.dword02 = reason;
488
489 radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
490 }
491
492 void
radv_describe_barrier_end(struct radv_cmd_buffer * cmd_buffer)493 radv_describe_barrier_end(struct radv_cmd_buffer *cmd_buffer)
494 {
495 cmd_buffer->state.in_barrier = false;
496 cmd_buffer->state.pending_sqtt_barrier_end = true;
497 }
498
499 void
radv_describe_layout_transition(struct radv_cmd_buffer * cmd_buffer,const struct radv_barrier_data * barrier)500 radv_describe_layout_transition(struct radv_cmd_buffer *cmd_buffer, const struct radv_barrier_data *barrier)
501 {
502 struct rgp_sqtt_marker_layout_transition marker = {0};
503
504 if (likely(!cmd_buffer->device->sqtt.bo))
505 return;
506
507 if (!cmd_buffer->state.in_barrier) {
508 assert(!"layout transition marker should be only emitted inside a barrier marker");
509 return;
510 }
511
512 marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_LAYOUT_TRANSITION;
513 marker.depth_stencil_expand = barrier->layout_transitions.depth_stencil_expand;
514 marker.htile_hiz_range_expand = barrier->layout_transitions.htile_hiz_range_expand;
515 marker.depth_stencil_resummarize = barrier->layout_transitions.depth_stencil_resummarize;
516 marker.dcc_decompress = barrier->layout_transitions.dcc_decompress;
517 marker.fmask_decompress = barrier->layout_transitions.fmask_decompress;
518 marker.fast_clear_eliminate = barrier->layout_transitions.fast_clear_eliminate;
519 marker.fmask_color_expand = barrier->layout_transitions.fmask_color_expand;
520 marker.init_mask_ram = barrier->layout_transitions.init_mask_ram;
521
522 radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
523
524 cmd_buffer->state.num_layout_transitions++;
525 }
526
527 void
radv_describe_begin_accel_struct_build(struct radv_cmd_buffer * cmd_buffer,uint32_t count)528 radv_describe_begin_accel_struct_build(struct radv_cmd_buffer *cmd_buffer, uint32_t count)
529 {
530 if (likely(!cmd_buffer->device->sqtt.bo))
531 return;
532
533 char marker[64];
534 snprintf(marker, sizeof(marker), "vkCmdBuildAccelerationStructuresKHR(%u)", count);
535 radv_write_user_event_marker(cmd_buffer, UserEventPush, marker);
536 }
537
538 void
radv_describe_end_accel_struct_build(struct radv_cmd_buffer * cmd_buffer)539 radv_describe_end_accel_struct_build(struct radv_cmd_buffer *cmd_buffer)
540 {
541 radv_write_user_event_marker(cmd_buffer, UserEventPop, NULL);
542 }
543
544 static void
radv_describe_pipeline_bind(struct radv_cmd_buffer * cmd_buffer,VkPipelineBindPoint pipelineBindPoint,struct radv_pipeline * pipeline)545 radv_describe_pipeline_bind(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint pipelineBindPoint,
546 struct radv_pipeline *pipeline)
547 {
548 struct rgp_sqtt_marker_pipeline_bind marker = {0};
549
550 if (likely(!cmd_buffer->device->sqtt.bo))
551 return;
552
553 marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BIND_PIPELINE;
554 marker.cb_id = cmd_buffer->sqtt_cb_id;
555 marker.bind_point = pipelineBindPoint;
556 marker.api_pso_hash[0] = pipeline->pipeline_hash;
557 marker.api_pso_hash[1] = pipeline->pipeline_hash >> 32;
558
559 radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
560 }
561
562 /* Queue events */
563 static void
radv_describe_queue_event(struct radv_queue * queue,struct rgp_queue_event_record * record)564 radv_describe_queue_event(struct radv_queue *queue, struct rgp_queue_event_record *record)
565 {
566 struct radv_device *device = queue->device;
567 struct ac_sqtt *sqtt = &device->sqtt;
568 struct rgp_queue_event *queue_event = &sqtt->rgp_queue_event;
569
570 simple_mtx_lock(&queue_event->lock);
571 list_addtail(&record->list, &queue_event->record);
572 queue_event->record_count++;
573 simple_mtx_unlock(&queue_event->lock);
574 }
575
576 static VkResult
radv_describe_queue_present(struct radv_queue * queue,uint64_t cpu_timestamp,void * gpu_timestamp_ptr)577 radv_describe_queue_present(struct radv_queue *queue, uint64_t cpu_timestamp, void *gpu_timestamp_ptr)
578 {
579 struct rgp_queue_event_record *record;
580
581 record = calloc(1, sizeof(struct rgp_queue_event_record));
582 if (!record)
583 return VK_ERROR_OUT_OF_HOST_MEMORY;
584
585 record->event_type = SQTT_QUEUE_TIMING_EVENT_PRESENT;
586 record->cpu_timestamp = cpu_timestamp;
587 record->gpu_timestamps[0] = gpu_timestamp_ptr;
588 record->queue_info_index = queue->vk.queue_family_index;
589
590 radv_describe_queue_event(queue, record);
591
592 return VK_SUCCESS;
593 }
594
595 static VkResult
radv_describe_queue_submit(struct radv_queue * queue,struct radv_cmd_buffer * cmd_buffer,uint32_t cmdbuf_idx,uint64_t cpu_timestamp,void * pre_gpu_timestamp_ptr,void * post_gpu_timestamp_ptr)596 radv_describe_queue_submit(struct radv_queue *queue, struct radv_cmd_buffer *cmd_buffer, uint32_t cmdbuf_idx,
597 uint64_t cpu_timestamp, void *pre_gpu_timestamp_ptr, void *post_gpu_timestamp_ptr)
598 {
599 struct radv_device *device = queue->device;
600 struct rgp_queue_event_record *record;
601
602 record = calloc(1, sizeof(struct rgp_queue_event_record));
603 if (!record)
604 return VK_ERROR_OUT_OF_HOST_MEMORY;
605
606 record->event_type = SQTT_QUEUE_TIMING_EVENT_CMDBUF_SUBMIT;
607 record->api_id = (uintptr_t)cmd_buffer;
608 record->cpu_timestamp = cpu_timestamp;
609 record->frame_index = device->vk.current_frame;
610 record->gpu_timestamps[0] = pre_gpu_timestamp_ptr;
611 record->gpu_timestamps[1] = post_gpu_timestamp_ptr;
612 record->queue_info_index = queue->vk.queue_family_index;
613 record->submit_sub_index = cmdbuf_idx;
614
615 radv_describe_queue_event(queue, record);
616
617 return VK_SUCCESS;
618 }
619
620 static VkResult
radv_describe_queue_semaphore(struct radv_queue * queue,struct vk_semaphore * sync,enum sqtt_queue_event_type event_type)621 radv_describe_queue_semaphore(struct radv_queue *queue, struct vk_semaphore *sync,
622 enum sqtt_queue_event_type event_type)
623 {
624 struct rgp_queue_event_record *record;
625
626 record = calloc(1, sizeof(struct rgp_queue_event_record));
627 if (!record)
628 return VK_ERROR_OUT_OF_HOST_MEMORY;
629
630 record->event_type = event_type;
631 record->api_id = (uintptr_t)sync;
632 record->cpu_timestamp = os_time_get_nano();
633 record->queue_info_index = queue->vk.queue_family_index;
634
635 radv_describe_queue_event(queue, record);
636
637 return VK_SUCCESS;
638 }
639
640 static void
radv_handle_sqtt(VkQueue _queue)641 radv_handle_sqtt(VkQueue _queue)
642 {
643 RADV_FROM_HANDLE(radv_queue, queue, _queue);
644
645 bool trigger = queue->device->sqtt_triggered;
646 queue->device->sqtt_triggered = false;
647
648 if (queue->device->sqtt_enabled) {
649 struct ac_sqtt_trace sqtt_trace = {0};
650
651 radv_end_sqtt(queue);
652 queue->device->sqtt_enabled = false;
653
654 /* TODO: Do something better than this whole sync. */
655 queue->device->vk.dispatch_table.QueueWaitIdle(_queue);
656
657 if (radv_get_sqtt_trace(queue, &sqtt_trace)) {
658 struct ac_spm_trace spm_trace;
659
660 if (queue->device->spm.bo)
661 ac_spm_get_trace(&queue->device->spm, &spm_trace);
662
663 ac_dump_rgp_capture(&queue->device->physical_device->rad_info, &sqtt_trace,
664 queue->device->spm.bo ? &spm_trace : NULL);
665 } else {
666 /* Trigger a new capture if the driver failed to get
667 * the trace because the buffer was too small.
668 */
669 trigger = true;
670 }
671
672 /* Clear resources used for this capture. */
673 radv_reset_sqtt_trace(queue->device);
674 }
675
676 if (trigger) {
677 if (ac_check_profile_state(&queue->device->physical_device->rad_info)) {
678 fprintf(stderr, "radv: Canceling RGP trace request as a hang condition has been "
679 "detected. Force the GPU into a profiling mode with e.g. "
680 "\"echo profile_peak > "
681 "/sys/class/drm/card0/device/power_dpm_force_performance_level\"\n");
682 return;
683 }
684
685 /* Sample CPU/GPU clocks before starting the trace. */
686 if (!radv_sqtt_sample_clocks(queue->device)) {
687 fprintf(stderr, "radv: Failed to sample clocks\n");
688 }
689
690 radv_begin_sqtt(queue);
691 assert(!queue->device->sqtt_enabled);
692 queue->device->sqtt_enabled = true;
693 }
694 }
695
696 VKAPI_ATTR VkResult VKAPI_CALL
sqtt_QueuePresentKHR(VkQueue _queue,const VkPresentInfoKHR * pPresentInfo)697 sqtt_QueuePresentKHR(VkQueue _queue, const VkPresentInfoKHR *pPresentInfo)
698 {
699 RADV_FROM_HANDLE(radv_queue, queue, _queue);
700 VkResult result;
701
702 queue->sqtt_present = true;
703
704 result = queue->device->layer_dispatch.rgp.QueuePresentKHR(_queue, pPresentInfo);
705 if (result != VK_SUCCESS && result != VK_SUBOPTIMAL_KHR)
706 return result;
707
708 queue->sqtt_present = false;
709
710 radv_handle_sqtt(_queue);
711
712 return VK_SUCCESS;
713 }
714
715 static VkResult
radv_sqtt_wsi_submit(VkQueue _queue,uint32_t submitCount,const VkSubmitInfo2 * pSubmits,VkFence _fence)716 radv_sqtt_wsi_submit(VkQueue _queue, uint32_t submitCount, const VkSubmitInfo2 *pSubmits, VkFence _fence)
717 {
718 RADV_FROM_HANDLE(radv_queue, queue, _queue);
719 struct radv_device *device = queue->device;
720 VkCommandBufferSubmitInfo *new_cmdbufs = NULL;
721 struct radeon_winsys_bo *gpu_timestamp_bo;
722 uint32_t gpu_timestamp_offset;
723 VkCommandBuffer timed_cmdbuf;
724 void *gpu_timestamp_ptr;
725 uint64_t cpu_timestamp;
726 VkResult result = VK_SUCCESS;
727
728 assert(submitCount <= 1 && pSubmits != NULL);
729
730 for (uint32_t i = 0; i < submitCount; i++) {
731 const VkSubmitInfo2 *pSubmit = &pSubmits[i];
732 VkSubmitInfo2 sqtt_submit = *pSubmit;
733
734 assert(sqtt_submit.commandBufferInfoCount <= 1);
735
736 /* Command buffers */
737 uint32_t new_cmdbuf_count = sqtt_submit.commandBufferInfoCount + 1;
738
739 new_cmdbufs = malloc(new_cmdbuf_count * sizeof(*new_cmdbufs));
740 if (!new_cmdbufs)
741 return VK_ERROR_OUT_OF_HOST_MEMORY;
742
743 /* Sample the current CPU time before building the GPU timestamp cmdbuf. */
744 cpu_timestamp = os_time_get_nano();
745
746 result = radv_sqtt_acquire_gpu_timestamp(device, &gpu_timestamp_bo, &gpu_timestamp_offset, &gpu_timestamp_ptr);
747 if (result != VK_SUCCESS)
748 goto fail;
749
750 result = radv_sqtt_get_timed_cmdbuf(queue, gpu_timestamp_bo, gpu_timestamp_offset,
751 VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT, &timed_cmdbuf);
752 if (result != VK_SUCCESS)
753 goto fail;
754
755 new_cmdbufs[0] = (VkCommandBufferSubmitInfo){
756 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO,
757 .commandBuffer = timed_cmdbuf,
758 };
759
760 if (sqtt_submit.commandBufferInfoCount == 1)
761 new_cmdbufs[1] = sqtt_submit.pCommandBufferInfos[0];
762
763 sqtt_submit.commandBufferInfoCount = new_cmdbuf_count;
764 sqtt_submit.pCommandBufferInfos = new_cmdbufs;
765
766 radv_describe_queue_present(queue, cpu_timestamp, gpu_timestamp_ptr);
767
768 result = queue->device->layer_dispatch.rgp.QueueSubmit2(_queue, 1, &sqtt_submit, _fence);
769 if (result != VK_SUCCESS)
770 goto fail;
771
772 FREE(new_cmdbufs);
773 }
774
775 return result;
776
777 fail:
778 FREE(new_cmdbufs);
779 return result;
780 }
781
782 VKAPI_ATTR VkResult VKAPI_CALL
sqtt_QueueSubmit2(VkQueue _queue,uint32_t submitCount,const VkSubmitInfo2 * pSubmits,VkFence _fence)783 sqtt_QueueSubmit2(VkQueue _queue, uint32_t submitCount, const VkSubmitInfo2 *pSubmits, VkFence _fence)
784 {
785 RADV_FROM_HANDLE(radv_queue, queue, _queue);
786 const bool is_gfx_or_ace = queue->state.qf == RADV_QUEUE_GENERAL || queue->state.qf == RADV_QUEUE_COMPUTE;
787 struct radv_device *device = queue->device;
788 VkCommandBufferSubmitInfo *new_cmdbufs = NULL;
789 VkResult result = VK_SUCCESS;
790
791 /* Only consider queue events on graphics/compute when enabled. */
792 if (!device->sqtt_enabled || !radv_sqtt_queue_events_enabled() || !is_gfx_or_ace)
793 return queue->device->layer_dispatch.rgp.QueueSubmit2(_queue, submitCount, pSubmits, _fence);
794
795 for (uint32_t i = 0; i < submitCount; i++) {
796 const VkSubmitInfo2 *pSubmit = &pSubmits[i];
797
798 /* Wait semaphores */
799 for (uint32_t j = 0; j < pSubmit->waitSemaphoreInfoCount; j++) {
800 const VkSemaphoreSubmitInfo *pWaitSemaphoreInfo = &pSubmit->pWaitSemaphoreInfos[j];
801 VK_FROM_HANDLE(vk_semaphore, sem, pWaitSemaphoreInfo->semaphore);
802 radv_describe_queue_semaphore(queue, sem, SQTT_QUEUE_TIMING_EVENT_WAIT_SEMAPHORE);
803 }
804 }
805
806 if (queue->sqtt_present)
807 return radv_sqtt_wsi_submit(_queue, submitCount, pSubmits, _fence);
808
809 for (uint32_t i = 0; i < submitCount; i++) {
810 const VkSubmitInfo2 *pSubmit = &pSubmits[i];
811 VkSubmitInfo2 sqtt_submit = *pSubmit;
812
813 /* Command buffers */
814 uint32_t new_cmdbuf_count = sqtt_submit.commandBufferInfoCount * 3;
815 uint32_t cmdbuf_idx = 0;
816
817 new_cmdbufs = malloc(new_cmdbuf_count * sizeof(*new_cmdbufs));
818 if (!new_cmdbufs)
819 return VK_ERROR_OUT_OF_HOST_MEMORY;
820
821 for (uint32_t j = 0; j < sqtt_submit.commandBufferInfoCount; j++) {
822 const VkCommandBufferSubmitInfo *pCommandBufferInfo = &sqtt_submit.pCommandBufferInfos[j];
823 struct radeon_winsys_bo *gpu_timestamps_bo[2];
824 uint32_t gpu_timestamps_offset[2];
825 VkCommandBuffer pre_timed_cmdbuf, post_timed_cmdbuf;
826 void *gpu_timestamps_ptr[2];
827 uint64_t cpu_timestamp;
828
829 /* Sample the current CPU time before building the timed cmdbufs. */
830 cpu_timestamp = os_time_get_nano();
831
832 result = radv_sqtt_acquire_gpu_timestamp(queue->device, &gpu_timestamps_bo[0], &gpu_timestamps_offset[0],
833 &gpu_timestamps_ptr[0]);
834 if (result != VK_SUCCESS)
835 goto fail;
836
837 result = radv_sqtt_get_timed_cmdbuf(queue, gpu_timestamps_bo[0], gpu_timestamps_offset[0],
838 VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT, &pre_timed_cmdbuf);
839 if (result != VK_SUCCESS)
840 goto fail;
841
842 new_cmdbufs[cmdbuf_idx++] = (VkCommandBufferSubmitInfo){
843 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO,
844 .commandBuffer = pre_timed_cmdbuf,
845 };
846
847 new_cmdbufs[cmdbuf_idx++] = *pCommandBufferInfo;
848
849 result = radv_sqtt_acquire_gpu_timestamp(queue->device, &gpu_timestamps_bo[1], &gpu_timestamps_offset[1],
850 &gpu_timestamps_ptr[1]);
851 if (result != VK_SUCCESS)
852 goto fail;
853
854 result = radv_sqtt_get_timed_cmdbuf(queue, gpu_timestamps_bo[1], gpu_timestamps_offset[1],
855 VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT, &post_timed_cmdbuf);
856 if (result != VK_SUCCESS)
857 goto fail;
858
859 new_cmdbufs[cmdbuf_idx++] = (VkCommandBufferSubmitInfo){
860 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO,
861 .commandBuffer = post_timed_cmdbuf,
862 };
863
864 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, pCommandBufferInfo->commandBuffer);
865 radv_describe_queue_submit(queue, cmd_buffer, j, cpu_timestamp, gpu_timestamps_ptr[0], gpu_timestamps_ptr[1]);
866 }
867
868 sqtt_submit.commandBufferInfoCount = new_cmdbuf_count;
869 sqtt_submit.pCommandBufferInfos = new_cmdbufs;
870
871 result = queue->device->layer_dispatch.rgp.QueueSubmit2(_queue, 1, &sqtt_submit, _fence);
872 if (result != VK_SUCCESS)
873 goto fail;
874
875 /* Signal semaphores */
876 for (uint32_t j = 0; j < sqtt_submit.signalSemaphoreInfoCount; j++) {
877 const VkSemaphoreSubmitInfo *pSignalSemaphoreInfo = &sqtt_submit.pSignalSemaphoreInfos[j];
878 VK_FROM_HANDLE(vk_semaphore, sem, pSignalSemaphoreInfo->semaphore);
879 radv_describe_queue_semaphore(queue, sem, SQTT_QUEUE_TIMING_EVENT_SIGNAL_SEMAPHORE);
880 }
881
882 FREE(new_cmdbufs);
883 }
884
885 return result;
886
887 fail:
888 FREE(new_cmdbufs);
889 return result;
890 }
891
892 #define EVENT_MARKER_BASE(cmd_name, api_name, event_name, ...) \
893 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); \
894 radv_write_begin_general_api_marker(cmd_buffer, ApiCmd##api_name); \
895 cmd_buffer->state.current_event_type = EventCmd##event_name; \
896 cmd_buffer->device->layer_dispatch.rgp.Cmd##cmd_name(__VA_ARGS__); \
897 cmd_buffer->state.current_event_type = EventInternalUnknown; \
898 radv_write_end_general_api_marker(cmd_buffer, ApiCmd##api_name);
899
900 #define EVENT_MARKER_ALIAS(cmd_name, api_name, ...) EVENT_MARKER_BASE(cmd_name, api_name, api_name, __VA_ARGS__);
901
902 #define EVENT_MARKER(cmd_name, ...) EVENT_MARKER_ALIAS(cmd_name, cmd_name, __VA_ARGS__);
903
904 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDraw(VkCommandBuffer commandBuffer,uint32_t vertexCount,uint32_t instanceCount,uint32_t firstVertex,uint32_t firstInstance)905 sqtt_CmdDraw(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t instanceCount, uint32_t firstVertex,
906 uint32_t firstInstance)
907 {
908 EVENT_MARKER(Draw, commandBuffer, vertexCount, instanceCount, firstVertex, firstInstance);
909 }
910
911 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDrawIndexed(VkCommandBuffer commandBuffer,uint32_t indexCount,uint32_t instanceCount,uint32_t firstIndex,int32_t vertexOffset,uint32_t firstInstance)912 sqtt_CmdDrawIndexed(VkCommandBuffer commandBuffer, uint32_t indexCount, uint32_t instanceCount, uint32_t firstIndex,
913 int32_t vertexOffset, uint32_t firstInstance)
914 {
915 EVENT_MARKER(DrawIndexed, commandBuffer, indexCount, instanceCount, firstIndex, vertexOffset, firstInstance);
916 }
917
918 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDrawIndirect(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,uint32_t drawCount,uint32_t stride)919 sqtt_CmdDrawIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t drawCount,
920 uint32_t stride)
921 {
922 EVENT_MARKER(DrawIndirect, commandBuffer, buffer, offset, drawCount, stride);
923 }
924
925 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,uint32_t drawCount,uint32_t stride)926 sqtt_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t drawCount,
927 uint32_t stride)
928 {
929 EVENT_MARKER(DrawIndexedIndirect, commandBuffer, buffer, offset, drawCount, stride);
930 }
931
932 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDrawIndirectCount(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,VkBuffer countBuffer,VkDeviceSize countBufferOffset,uint32_t maxDrawCount,uint32_t stride)933 sqtt_CmdDrawIndirectCount(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, VkBuffer countBuffer,
934 VkDeviceSize countBufferOffset, uint32_t maxDrawCount, uint32_t stride)
935 {
936 EVENT_MARKER(DrawIndirectCount, commandBuffer, buffer, offset, countBuffer, countBufferOffset, maxDrawCount, stride);
937 }
938
939 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,VkBuffer countBuffer,VkDeviceSize countBufferOffset,uint32_t maxDrawCount,uint32_t stride)940 sqtt_CmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
941 VkBuffer countBuffer, VkDeviceSize countBufferOffset, uint32_t maxDrawCount,
942 uint32_t stride)
943 {
944 EVENT_MARKER(DrawIndexedIndirectCount, commandBuffer, buffer, offset, countBuffer, countBufferOffset, maxDrawCount,
945 stride);
946 }
947
948 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDispatch(VkCommandBuffer commandBuffer,uint32_t x,uint32_t y,uint32_t z)949 sqtt_CmdDispatch(VkCommandBuffer commandBuffer, uint32_t x, uint32_t y, uint32_t z)
950 {
951 EVENT_MARKER_ALIAS(DispatchBase, Dispatch, commandBuffer, 0, 0, 0, x, y, z);
952 }
953
954 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDispatchIndirect(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset)955 sqtt_CmdDispatchIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset)
956 {
957 EVENT_MARKER(DispatchIndirect, commandBuffer, buffer, offset);
958 }
959
960 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdCopyBuffer2(VkCommandBuffer commandBuffer,const VkCopyBufferInfo2 * pCopyBufferInfo)961 sqtt_CmdCopyBuffer2(VkCommandBuffer commandBuffer, const VkCopyBufferInfo2 *pCopyBufferInfo)
962 {
963 EVENT_MARKER_ALIAS(CopyBuffer2, CopyBuffer, commandBuffer, pCopyBufferInfo);
964 }
965
966 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdFillBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize fillSize,uint32_t data)967 sqtt_CmdFillBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize fillSize,
968 uint32_t data)
969 {
970 EVENT_MARKER(FillBuffer, commandBuffer, dstBuffer, dstOffset, fillSize, data);
971 }
972
973 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdUpdateBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize dataSize,const void * pData)974 sqtt_CmdUpdateBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize dataSize,
975 const void *pData)
976 {
977 EVENT_MARKER(UpdateBuffer, commandBuffer, dstBuffer, dstOffset, dataSize, pData);
978 }
979
980 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdCopyImage2(VkCommandBuffer commandBuffer,const VkCopyImageInfo2 * pCopyImageInfo)981 sqtt_CmdCopyImage2(VkCommandBuffer commandBuffer, const VkCopyImageInfo2 *pCopyImageInfo)
982 {
983 EVENT_MARKER_ALIAS(CopyImage2, CopyImage, commandBuffer, pCopyImageInfo);
984 }
985
986 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer,const VkCopyBufferToImageInfo2 * pCopyBufferToImageInfo)987 sqtt_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer, const VkCopyBufferToImageInfo2 *pCopyBufferToImageInfo)
988 {
989 EVENT_MARKER_ALIAS(CopyBufferToImage2, CopyBufferToImage, commandBuffer, pCopyBufferToImageInfo);
990 }
991
992 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer,const VkCopyImageToBufferInfo2 * pCopyImageToBufferInfo)993 sqtt_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer, const VkCopyImageToBufferInfo2 *pCopyImageToBufferInfo)
994 {
995 EVENT_MARKER_ALIAS(CopyImageToBuffer2, CopyImageToBuffer, commandBuffer, pCopyImageToBufferInfo);
996 }
997
998 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdBlitImage2(VkCommandBuffer commandBuffer,const VkBlitImageInfo2 * pBlitImageInfo)999 sqtt_CmdBlitImage2(VkCommandBuffer commandBuffer, const VkBlitImageInfo2 *pBlitImageInfo)
1000 {
1001 EVENT_MARKER_ALIAS(BlitImage2, BlitImage, commandBuffer, pBlitImageInfo);
1002 }
1003
1004 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdClearColorImage(VkCommandBuffer commandBuffer,VkImage image_h,VkImageLayout imageLayout,const VkClearColorValue * pColor,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)1005 sqtt_CmdClearColorImage(VkCommandBuffer commandBuffer, VkImage image_h, VkImageLayout imageLayout,
1006 const VkClearColorValue *pColor, uint32_t rangeCount, const VkImageSubresourceRange *pRanges)
1007 {
1008 EVENT_MARKER(ClearColorImage, commandBuffer, image_h, imageLayout, pColor, rangeCount, pRanges);
1009 }
1010
1011 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,VkImage image_h,VkImageLayout imageLayout,const VkClearDepthStencilValue * pDepthStencil,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)1012 sqtt_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer, VkImage image_h, VkImageLayout imageLayout,
1013 const VkClearDepthStencilValue *pDepthStencil, uint32_t rangeCount,
1014 const VkImageSubresourceRange *pRanges)
1015 {
1016 EVENT_MARKER(ClearDepthStencilImage, commandBuffer, image_h, imageLayout, pDepthStencil, rangeCount, pRanges);
1017 }
1018
1019 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdClearAttachments(VkCommandBuffer commandBuffer,uint32_t attachmentCount,const VkClearAttachment * pAttachments,uint32_t rectCount,const VkClearRect * pRects)1020 sqtt_CmdClearAttachments(VkCommandBuffer commandBuffer, uint32_t attachmentCount, const VkClearAttachment *pAttachments,
1021 uint32_t rectCount, const VkClearRect *pRects)
1022 {
1023 EVENT_MARKER(ClearAttachments, commandBuffer, attachmentCount, pAttachments, rectCount, pRects);
1024 }
1025
1026 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdResolveImage2(VkCommandBuffer commandBuffer,const VkResolveImageInfo2 * pResolveImageInfo)1027 sqtt_CmdResolveImage2(VkCommandBuffer commandBuffer, const VkResolveImageInfo2 *pResolveImageInfo)
1028 {
1029 EVENT_MARKER_ALIAS(ResolveImage2, ResolveImage, commandBuffer, pResolveImageInfo);
1030 }
1031
1032 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdWaitEvents2(VkCommandBuffer commandBuffer,uint32_t eventCount,const VkEvent * pEvents,const VkDependencyInfo * pDependencyInfos)1033 sqtt_CmdWaitEvents2(VkCommandBuffer commandBuffer, uint32_t eventCount, const VkEvent *pEvents,
1034 const VkDependencyInfo *pDependencyInfos)
1035 {
1036 EVENT_MARKER_ALIAS(WaitEvents2, WaitEvents, commandBuffer, eventCount, pEvents, pDependencyInfos);
1037 }
1038
1039 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdPipelineBarrier2(VkCommandBuffer commandBuffer,const VkDependencyInfo * pDependencyInfo)1040 sqtt_CmdPipelineBarrier2(VkCommandBuffer commandBuffer, const VkDependencyInfo *pDependencyInfo)
1041 {
1042 EVENT_MARKER_ALIAS(PipelineBarrier2, PipelineBarrier, commandBuffer, pDependencyInfo);
1043 }
1044
1045 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdResetQueryPool(VkCommandBuffer commandBuffer,VkQueryPool queryPool,uint32_t firstQuery,uint32_t queryCount)1046 sqtt_CmdResetQueryPool(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t firstQuery, uint32_t queryCount)
1047 {
1048 EVENT_MARKER(ResetQueryPool, commandBuffer, queryPool, firstQuery, queryCount);
1049 }
1050
1051 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer,VkQueryPool queryPool,uint32_t firstQuery,uint32_t queryCount,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize stride,VkQueryResultFlags flags)1052 sqtt_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t firstQuery,
1053 uint32_t queryCount, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize stride,
1054 VkQueryResultFlags flags)
1055 {
1056 EVENT_MARKER(CopyQueryPoolResults, commandBuffer, queryPool, firstQuery, queryCount, dstBuffer, dstOffset, stride,
1057 flags);
1058 }
1059
1060 #define EVENT_RT_MARKER(cmd_name, flags, ...) EVENT_MARKER_BASE(cmd_name, Dispatch, cmd_name | flags, __VA_ARGS__);
1061
1062 #define EVENT_RT_MARKER_ALIAS(cmd_name, event_name, flags, ...) \
1063 EVENT_MARKER_BASE(cmd_name, Dispatch, event_name | flags, __VA_ARGS__);
1064
1065 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdTraceRaysKHR(VkCommandBuffer commandBuffer,const VkStridedDeviceAddressRegionKHR * pRaygenShaderBindingTable,const VkStridedDeviceAddressRegionKHR * pMissShaderBindingTable,const VkStridedDeviceAddressRegionKHR * pHitShaderBindingTable,const VkStridedDeviceAddressRegionKHR * pCallableShaderBindingTable,uint32_t width,uint32_t height,uint32_t depth)1066 sqtt_CmdTraceRaysKHR(VkCommandBuffer commandBuffer, const VkStridedDeviceAddressRegionKHR *pRaygenShaderBindingTable,
1067 const VkStridedDeviceAddressRegionKHR *pMissShaderBindingTable,
1068 const VkStridedDeviceAddressRegionKHR *pHitShaderBindingTable,
1069 const VkStridedDeviceAddressRegionKHR *pCallableShaderBindingTable, uint32_t width,
1070 uint32_t height, uint32_t depth)
1071 {
1072 EVENT_RT_MARKER(TraceRaysKHR, ApiRayTracingSeparateCompiled, commandBuffer, pRaygenShaderBindingTable,
1073 pMissShaderBindingTable, pHitShaderBindingTable, pCallableShaderBindingTable, width, height, depth);
1074 }
1075
1076 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdTraceRaysIndirectKHR(VkCommandBuffer commandBuffer,const VkStridedDeviceAddressRegionKHR * pRaygenShaderBindingTable,const VkStridedDeviceAddressRegionKHR * pMissShaderBindingTable,const VkStridedDeviceAddressRegionKHR * pHitShaderBindingTable,const VkStridedDeviceAddressRegionKHR * pCallableShaderBindingTable,VkDeviceAddress indirectDeviceAddress)1077 sqtt_CmdTraceRaysIndirectKHR(VkCommandBuffer commandBuffer,
1078 const VkStridedDeviceAddressRegionKHR *pRaygenShaderBindingTable,
1079 const VkStridedDeviceAddressRegionKHR *pMissShaderBindingTable,
1080 const VkStridedDeviceAddressRegionKHR *pHitShaderBindingTable,
1081 const VkStridedDeviceAddressRegionKHR *pCallableShaderBindingTable,
1082 VkDeviceAddress indirectDeviceAddress)
1083 {
1084 EVENT_RT_MARKER(TraceRaysIndirectKHR, ApiRayTracingSeparateCompiled, commandBuffer, pRaygenShaderBindingTable,
1085 pMissShaderBindingTable, pHitShaderBindingTable, pCallableShaderBindingTable, indirectDeviceAddress);
1086 }
1087
1088 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdTraceRaysIndirect2KHR(VkCommandBuffer commandBuffer,VkDeviceAddress indirectDeviceAddress)1089 sqtt_CmdTraceRaysIndirect2KHR(VkCommandBuffer commandBuffer, VkDeviceAddress indirectDeviceAddress)
1090 {
1091 EVENT_RT_MARKER_ALIAS(TraceRaysIndirect2KHR, TraceRaysIndirectKHR, ApiRayTracingSeparateCompiled, commandBuffer,
1092 indirectDeviceAddress);
1093 }
1094
1095 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdCopyAccelerationStructureKHR(VkCommandBuffer commandBuffer,const VkCopyAccelerationStructureInfoKHR * pInfo)1096 sqtt_CmdCopyAccelerationStructureKHR(VkCommandBuffer commandBuffer, const VkCopyAccelerationStructureInfoKHR *pInfo)
1097 {
1098 EVENT_RT_MARKER(CopyAccelerationStructureKHR, 0, commandBuffer, pInfo);
1099 }
1100
1101 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdCopyAccelerationStructureToMemoryKHR(VkCommandBuffer commandBuffer,const VkCopyAccelerationStructureToMemoryInfoKHR * pInfo)1102 sqtt_CmdCopyAccelerationStructureToMemoryKHR(VkCommandBuffer commandBuffer,
1103 const VkCopyAccelerationStructureToMemoryInfoKHR *pInfo)
1104 {
1105 EVENT_RT_MARKER(CopyAccelerationStructureToMemoryKHR, 0, commandBuffer, pInfo);
1106 }
1107
1108 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdCopyMemoryToAccelerationStructureKHR(VkCommandBuffer commandBuffer,const VkCopyMemoryToAccelerationStructureInfoKHR * pInfo)1109 sqtt_CmdCopyMemoryToAccelerationStructureKHR(VkCommandBuffer commandBuffer,
1110 const VkCopyMemoryToAccelerationStructureInfoKHR *pInfo)
1111 {
1112 EVENT_RT_MARKER(CopyMemoryToAccelerationStructureKHR, 0, commandBuffer, pInfo);
1113 }
1114
1115 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDrawMeshTasksEXT(VkCommandBuffer commandBuffer,uint32_t x,uint32_t y,uint32_t z)1116 sqtt_CmdDrawMeshTasksEXT(VkCommandBuffer commandBuffer, uint32_t x, uint32_t y, uint32_t z)
1117 {
1118 EVENT_MARKER(DrawMeshTasksEXT, commandBuffer, x, y, z);
1119 }
1120
1121 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDrawMeshTasksIndirectEXT(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,uint32_t drawCount,uint32_t stride)1122 sqtt_CmdDrawMeshTasksIndirectEXT(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
1123 uint32_t drawCount, uint32_t stride)
1124 {
1125 EVENT_MARKER(DrawMeshTasksIndirectEXT, commandBuffer, buffer, offset, drawCount, stride);
1126 }
1127
1128 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDrawMeshTasksIndirectCountEXT(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,VkBuffer countBuffer,VkDeviceSize countBufferOffset,uint32_t maxDrawCount,uint32_t stride)1129 sqtt_CmdDrawMeshTasksIndirectCountEXT(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
1130 VkBuffer countBuffer, VkDeviceSize countBufferOffset, uint32_t maxDrawCount,
1131 uint32_t stride)
1132 {
1133 EVENT_MARKER(DrawMeshTasksIndirectCountEXT, commandBuffer, buffer, offset, countBuffer, countBufferOffset,
1134 maxDrawCount, stride);
1135 }
1136
1137 #undef EVENT_RT_MARKER_ALIAS
1138 #undef EVENT_RT_MARKER
1139
1140 #undef EVENT_MARKER
1141 #undef EVENT_MARKER_ALIAS
1142 #undef EVENT_MARKER_BASE
1143
1144 #define API_MARKER_ALIAS(cmd_name, api_name, ...) \
1145 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); \
1146 radv_write_begin_general_api_marker(cmd_buffer, ApiCmd##api_name); \
1147 cmd_buffer->device->layer_dispatch.rgp.Cmd##cmd_name(__VA_ARGS__); \
1148 radv_write_end_general_api_marker(cmd_buffer, ApiCmd##api_name);
1149
1150 #define API_MARKER(cmd_name, ...) API_MARKER_ALIAS(cmd_name, cmd_name, __VA_ARGS__);
1151
1152 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdBindPipeline(VkCommandBuffer commandBuffer,VkPipelineBindPoint pipelineBindPoint,VkPipeline _pipeline)1153 sqtt_CmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipeline _pipeline)
1154 {
1155 RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
1156
1157 API_MARKER(BindPipeline, commandBuffer, pipelineBindPoint, _pipeline);
1158
1159 if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR) {
1160 /* RGP seems to expect a compute bind point to detect and report RT pipelines, which makes
1161 * sense somehow given that RT shaders are compiled to an unified compute shader.
1162 */
1163 radv_describe_pipeline_bind(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1164 } else {
1165 radv_describe_pipeline_bind(cmd_buffer, pipelineBindPoint, pipeline);
1166 }
1167 }
1168
1169 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,VkPipelineBindPoint pipelineBindPoint,VkPipelineLayout layout,uint32_t firstSet,uint32_t descriptorSetCount,const VkDescriptorSet * pDescriptorSets,uint32_t dynamicOffsetCount,const uint32_t * pDynamicOffsets)1170 sqtt_CmdBindDescriptorSets(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint,
1171 VkPipelineLayout layout, uint32_t firstSet, uint32_t descriptorSetCount,
1172 const VkDescriptorSet *pDescriptorSets, uint32_t dynamicOffsetCount,
1173 const uint32_t *pDynamicOffsets)
1174 {
1175 API_MARKER(BindDescriptorSets, commandBuffer, pipelineBindPoint, layout, firstSet, descriptorSetCount,
1176 pDescriptorSets, dynamicOffsetCount, pDynamicOffsets);
1177 }
1178
1179 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdBindIndexBuffer(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,VkIndexType indexType)1180 sqtt_CmdBindIndexBuffer(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, VkIndexType indexType)
1181 {
1182 API_MARKER(BindIndexBuffer, commandBuffer, buffer, offset, indexType);
1183 }
1184
1185 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdBindVertexBuffers2(VkCommandBuffer commandBuffer,uint32_t firstBinding,uint32_t bindingCount,const VkBuffer * pBuffers,const VkDeviceSize * pOffsets,const VkDeviceSize * pSizes,const VkDeviceSize * pStrides)1186 sqtt_CmdBindVertexBuffers2(VkCommandBuffer commandBuffer, uint32_t firstBinding, uint32_t bindingCount,
1187 const VkBuffer *pBuffers, const VkDeviceSize *pOffsets, const VkDeviceSize *pSizes,
1188 const VkDeviceSize *pStrides)
1189 {
1190 API_MARKER_ALIAS(BindVertexBuffers2, BindVertexBuffers, commandBuffer, firstBinding, bindingCount, pBuffers,
1191 pOffsets, pSizes, pStrides);
1192 }
1193
1194 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdBeginQuery(VkCommandBuffer commandBuffer,VkQueryPool queryPool,uint32_t query,VkQueryControlFlags flags)1195 sqtt_CmdBeginQuery(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t query, VkQueryControlFlags flags)
1196 {
1197 API_MARKER(BeginQuery, commandBuffer, queryPool, query, flags);
1198 }
1199
1200 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdEndQuery(VkCommandBuffer commandBuffer,VkQueryPool queryPool,uint32_t query)1201 sqtt_CmdEndQuery(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t query)
1202 {
1203 API_MARKER(EndQuery, commandBuffer, queryPool, query);
1204 }
1205
1206 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdWriteTimestamp2(VkCommandBuffer commandBuffer,VkPipelineStageFlags2 stage,VkQueryPool queryPool,uint32_t query)1207 sqtt_CmdWriteTimestamp2(VkCommandBuffer commandBuffer, VkPipelineStageFlags2 stage, VkQueryPool queryPool,
1208 uint32_t query)
1209 {
1210 API_MARKER_ALIAS(WriteTimestamp2, WriteTimestamp, commandBuffer, stage, queryPool, query);
1211 }
1212
1213 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdPushConstants(VkCommandBuffer commandBuffer,VkPipelineLayout layout,VkShaderStageFlags stageFlags,uint32_t offset,uint32_t size,const void * pValues)1214 sqtt_CmdPushConstants(VkCommandBuffer commandBuffer, VkPipelineLayout layout, VkShaderStageFlags stageFlags,
1215 uint32_t offset, uint32_t size, const void *pValues)
1216 {
1217 API_MARKER(PushConstants, commandBuffer, layout, stageFlags, offset, size, pValues);
1218 }
1219
1220 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdBeginRendering(VkCommandBuffer commandBuffer,const VkRenderingInfo * pRenderingInfo)1221 sqtt_CmdBeginRendering(VkCommandBuffer commandBuffer, const VkRenderingInfo *pRenderingInfo)
1222 {
1223 API_MARKER_ALIAS(BeginRendering, BeginRenderPass, commandBuffer, pRenderingInfo);
1224 }
1225
1226 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdEndRendering(VkCommandBuffer commandBuffer)1227 sqtt_CmdEndRendering(VkCommandBuffer commandBuffer)
1228 {
1229 API_MARKER_ALIAS(EndRendering, EndRenderPass, commandBuffer);
1230 }
1231
1232 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdExecuteCommands(VkCommandBuffer commandBuffer,uint32_t commandBufferCount,const VkCommandBuffer * pCmdBuffers)1233 sqtt_CmdExecuteCommands(VkCommandBuffer commandBuffer, uint32_t commandBufferCount, const VkCommandBuffer *pCmdBuffers)
1234 {
1235 API_MARKER(ExecuteCommands, commandBuffer, commandBufferCount, pCmdBuffers);
1236 }
1237
1238 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdExecuteGeneratedCommandsNV(VkCommandBuffer commandBuffer,VkBool32 isPreprocessed,const VkGeneratedCommandsInfoNV * pGeneratedCommandsInfo)1239 sqtt_CmdExecuteGeneratedCommandsNV(VkCommandBuffer commandBuffer, VkBool32 isPreprocessed,
1240 const VkGeneratedCommandsInfoNV *pGeneratedCommandsInfo)
1241 {
1242 /* There is no ExecuteIndirect Vulkan event in RGP yet. */
1243 API_MARKER_ALIAS(ExecuteGeneratedCommandsNV, ExecuteCommands, commandBuffer, isPreprocessed, pGeneratedCommandsInfo);
1244 }
1245
1246 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdSetViewport(VkCommandBuffer commandBuffer,uint32_t firstViewport,uint32_t viewportCount,const VkViewport * pViewports)1247 sqtt_CmdSetViewport(VkCommandBuffer commandBuffer, uint32_t firstViewport, uint32_t viewportCount,
1248 const VkViewport *pViewports)
1249 {
1250 API_MARKER(SetViewport, commandBuffer, firstViewport, viewportCount, pViewports);
1251 }
1252
1253 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdSetScissor(VkCommandBuffer commandBuffer,uint32_t firstScissor,uint32_t scissorCount,const VkRect2D * pScissors)1254 sqtt_CmdSetScissor(VkCommandBuffer commandBuffer, uint32_t firstScissor, uint32_t scissorCount,
1255 const VkRect2D *pScissors)
1256 {
1257 API_MARKER(SetScissor, commandBuffer, firstScissor, scissorCount, pScissors);
1258 }
1259
1260 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdSetLineWidth(VkCommandBuffer commandBuffer,float lineWidth)1261 sqtt_CmdSetLineWidth(VkCommandBuffer commandBuffer, float lineWidth)
1262 {
1263 API_MARKER(SetLineWidth, commandBuffer, lineWidth);
1264 }
1265
1266 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdSetDepthBias(VkCommandBuffer commandBuffer,float depthBiasConstantFactor,float depthBiasClamp,float depthBiasSlopeFactor)1267 sqtt_CmdSetDepthBias(VkCommandBuffer commandBuffer, float depthBiasConstantFactor, float depthBiasClamp,
1268 float depthBiasSlopeFactor)
1269 {
1270 API_MARKER(SetDepthBias, commandBuffer, depthBiasConstantFactor, depthBiasClamp, depthBiasSlopeFactor);
1271 }
1272
1273 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdSetBlendConstants(VkCommandBuffer commandBuffer,const float blendConstants[4])1274 sqtt_CmdSetBlendConstants(VkCommandBuffer commandBuffer, const float blendConstants[4])
1275 {
1276 API_MARKER(SetBlendConstants, commandBuffer, blendConstants);
1277 }
1278
1279 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdSetDepthBounds(VkCommandBuffer commandBuffer,float minDepthBounds,float maxDepthBounds)1280 sqtt_CmdSetDepthBounds(VkCommandBuffer commandBuffer, float minDepthBounds, float maxDepthBounds)
1281 {
1282 API_MARKER(SetDepthBounds, commandBuffer, minDepthBounds, maxDepthBounds);
1283 }
1284
1285 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer,VkStencilFaceFlags faceMask,uint32_t compareMask)1286 sqtt_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t compareMask)
1287 {
1288 API_MARKER(SetStencilCompareMask, commandBuffer, faceMask, compareMask);
1289 }
1290
1291 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer,VkStencilFaceFlags faceMask,uint32_t writeMask)1292 sqtt_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t writeMask)
1293 {
1294 API_MARKER(SetStencilWriteMask, commandBuffer, faceMask, writeMask);
1295 }
1296
1297 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdSetStencilReference(VkCommandBuffer commandBuffer,VkStencilFaceFlags faceMask,uint32_t reference)1298 sqtt_CmdSetStencilReference(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t reference)
1299 {
1300 API_MARKER(SetStencilReference, commandBuffer, faceMask, reference);
1301 }
1302
1303 /* VK_EXT_debug_marker */
1304 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDebugMarkerBeginEXT(VkCommandBuffer commandBuffer,const VkDebugMarkerMarkerInfoEXT * pMarkerInfo)1305 sqtt_CmdDebugMarkerBeginEXT(VkCommandBuffer commandBuffer, const VkDebugMarkerMarkerInfoEXT *pMarkerInfo)
1306 {
1307 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1308 radv_write_user_event_marker(cmd_buffer, UserEventPush, pMarkerInfo->pMarkerName);
1309 }
1310
1311 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDebugMarkerEndEXT(VkCommandBuffer commandBuffer)1312 sqtt_CmdDebugMarkerEndEXT(VkCommandBuffer commandBuffer)
1313 {
1314 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1315 radv_write_user_event_marker(cmd_buffer, UserEventPop, NULL);
1316 }
1317
1318 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDebugMarkerInsertEXT(VkCommandBuffer commandBuffer,const VkDebugMarkerMarkerInfoEXT * pMarkerInfo)1319 sqtt_CmdDebugMarkerInsertEXT(VkCommandBuffer commandBuffer, const VkDebugMarkerMarkerInfoEXT *pMarkerInfo)
1320 {
1321 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1322 radv_write_user_event_marker(cmd_buffer, UserEventTrigger, pMarkerInfo->pMarkerName);
1323 }
1324
1325 VKAPI_ATTR VkResult VKAPI_CALL
sqtt_DebugMarkerSetObjectNameEXT(VkDevice device,const VkDebugMarkerObjectNameInfoEXT * pNameInfo)1326 sqtt_DebugMarkerSetObjectNameEXT(VkDevice device, const VkDebugMarkerObjectNameInfoEXT *pNameInfo)
1327 {
1328 /* no-op */
1329 return VK_SUCCESS;
1330 }
1331
1332 VKAPI_ATTR VkResult VKAPI_CALL
sqtt_DebugMarkerSetObjectTagEXT(VkDevice device,const VkDebugMarkerObjectTagInfoEXT * pTagInfo)1333 sqtt_DebugMarkerSetObjectTagEXT(VkDevice device, const VkDebugMarkerObjectTagInfoEXT *pTagInfo)
1334 {
1335 /* no-op */
1336 return VK_SUCCESS;
1337 }
1338
1339 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdBeginDebugUtilsLabelEXT(VkCommandBuffer commandBuffer,const VkDebugUtilsLabelEXT * pLabelInfo)1340 sqtt_CmdBeginDebugUtilsLabelEXT(VkCommandBuffer commandBuffer, const VkDebugUtilsLabelEXT *pLabelInfo)
1341 {
1342 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1343 radv_write_user_event_marker(cmd_buffer, UserEventPush, pLabelInfo->pLabelName);
1344
1345 cmd_buffer->device->layer_dispatch.rgp.CmdBeginDebugUtilsLabelEXT(commandBuffer, pLabelInfo);
1346 }
1347
1348 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdEndDebugUtilsLabelEXT(VkCommandBuffer commandBuffer)1349 sqtt_CmdEndDebugUtilsLabelEXT(VkCommandBuffer commandBuffer)
1350 {
1351 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1352 radv_write_user_event_marker(cmd_buffer, UserEventPop, NULL);
1353
1354 cmd_buffer->device->layer_dispatch.rgp.CmdEndDebugUtilsLabelEXT(commandBuffer);
1355 }
1356
1357 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdInsertDebugUtilsLabelEXT(VkCommandBuffer commandBuffer,const VkDebugUtilsLabelEXT * pLabelInfo)1358 sqtt_CmdInsertDebugUtilsLabelEXT(VkCommandBuffer commandBuffer, const VkDebugUtilsLabelEXT *pLabelInfo)
1359 {
1360 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1361 radv_write_user_event_marker(cmd_buffer, UserEventTrigger, pLabelInfo->pLabelName);
1362
1363 cmd_buffer->device->layer_dispatch.rgp.CmdInsertDebugUtilsLabelEXT(commandBuffer, pLabelInfo);
1364 }
1365
1366 /* Pipelines */
1367 static enum rgp_hardware_stages
radv_get_rgp_shader_stage(struct radv_shader * shader)1368 radv_get_rgp_shader_stage(struct radv_shader *shader)
1369 {
1370 switch (shader->info.stage) {
1371 case MESA_SHADER_VERTEX:
1372 if (shader->info.vs.as_ls)
1373 return RGP_HW_STAGE_LS;
1374 else if (shader->info.vs.as_es)
1375 return RGP_HW_STAGE_ES;
1376 else if (shader->info.is_ngg)
1377 return RGP_HW_STAGE_GS;
1378 else
1379 return RGP_HW_STAGE_VS;
1380 case MESA_SHADER_TESS_CTRL:
1381 return RGP_HW_STAGE_HS;
1382 case MESA_SHADER_TESS_EVAL:
1383 if (shader->info.tes.as_es)
1384 return RGP_HW_STAGE_ES;
1385 else if (shader->info.is_ngg)
1386 return RGP_HW_STAGE_GS;
1387 else
1388 return RGP_HW_STAGE_VS;
1389 case MESA_SHADER_MESH:
1390 case MESA_SHADER_GEOMETRY:
1391 return RGP_HW_STAGE_GS;
1392 case MESA_SHADER_FRAGMENT:
1393 return RGP_HW_STAGE_PS;
1394 case MESA_SHADER_TASK:
1395 case MESA_SHADER_COMPUTE:
1396 case MESA_SHADER_RAYGEN:
1397 case MESA_SHADER_CLOSEST_HIT:
1398 case MESA_SHADER_ANY_HIT:
1399 case MESA_SHADER_INTERSECTION:
1400 case MESA_SHADER_MISS:
1401 case MESA_SHADER_CALLABLE:
1402 return RGP_HW_STAGE_CS;
1403 default:
1404 unreachable("invalid mesa shader stage");
1405 }
1406 }
1407
1408 static void
radv_fill_code_object_record(struct radv_device * device,struct rgp_shader_data * shader_data,struct radv_shader * shader,uint64_t va)1409 radv_fill_code_object_record(struct radv_device *device, struct rgp_shader_data *shader_data,
1410 struct radv_shader *shader, uint64_t va)
1411 {
1412 struct radv_physical_device *pdevice = device->physical_device;
1413 unsigned lds_increment = pdevice->rad_info.gfx_level >= GFX11 && shader->info.stage == MESA_SHADER_FRAGMENT
1414 ? 1024
1415 : pdevice->rad_info.lds_encode_granularity;
1416
1417 memset(shader_data->rt_shader_name, 0, sizeof(shader_data->rt_shader_name));
1418 shader_data->hash[0] = (uint64_t)(uintptr_t)shader;
1419 shader_data->hash[1] = (uint64_t)(uintptr_t)shader >> 32;
1420 shader_data->code_size = shader->code_size;
1421 shader_data->code = shader->code;
1422 shader_data->vgpr_count = shader->config.num_vgprs;
1423 shader_data->sgpr_count = shader->config.num_sgprs;
1424 shader_data->scratch_memory_size = shader->config.scratch_bytes_per_wave;
1425 shader_data->lds_size = shader->config.lds_size * lds_increment;
1426 shader_data->wavefront_size = shader->info.wave_size;
1427 shader_data->base_address = va & 0xffffffffffff;
1428 shader_data->elf_symbol_offset = 0;
1429 shader_data->hw_stage = radv_get_rgp_shader_stage(shader);
1430 shader_data->is_combined = false;
1431 }
1432
1433 static VkResult
radv_add_code_object(struct radv_device * device,struct radv_pipeline * pipeline)1434 radv_add_code_object(struct radv_device *device, struct radv_pipeline *pipeline)
1435 {
1436 struct ac_sqtt *sqtt = &device->sqtt;
1437 struct rgp_code_object *code_object = &sqtt->rgp_code_object;
1438 struct rgp_code_object_record *record;
1439
1440 record = malloc(sizeof(struct rgp_code_object_record));
1441 if (!record)
1442 return VK_ERROR_OUT_OF_HOST_MEMORY;
1443
1444 record->shader_stages_mask = 0;
1445 record->num_shaders_combined = 0;
1446 record->pipeline_hash[0] = pipeline->pipeline_hash;
1447 record->pipeline_hash[1] = pipeline->pipeline_hash;
1448 record->is_rt = false;
1449
1450 for (unsigned i = 0; i < MESA_VULKAN_SHADER_STAGES; i++) {
1451 struct radv_shader *shader = pipeline->shaders[i];
1452
1453 if (!shader)
1454 continue;
1455
1456 radv_fill_code_object_record(device, &record->shader_data[i], shader, radv_sqtt_shader_get_va_reloc(pipeline, i));
1457
1458 record->shader_stages_mask |= (1 << i);
1459 record->num_shaders_combined++;
1460 }
1461
1462 simple_mtx_lock(&code_object->lock);
1463 list_addtail(&record->list, &code_object->record);
1464 code_object->record_count++;
1465 simple_mtx_unlock(&code_object->lock);
1466
1467 return VK_SUCCESS;
1468 }
1469
1470 static VkResult
radv_add_rt_record(struct radv_device * device,struct rgp_code_object * code_object,struct radv_ray_tracing_pipeline * pipeline,struct radv_shader * shader,uint32_t stack_size,uint32_t index,uint64_t hash)1471 radv_add_rt_record(struct radv_device *device, struct rgp_code_object *code_object,
1472 struct radv_ray_tracing_pipeline *pipeline, struct radv_shader *shader, uint32_t stack_size,
1473 uint32_t index, uint64_t hash)
1474 {
1475 struct rgp_code_object_record *record = malloc(sizeof(struct rgp_code_object_record));
1476 if (!record)
1477 return VK_ERROR_OUT_OF_HOST_MEMORY;
1478
1479 struct rgp_shader_data *shader_data = &record->shader_data[shader->info.stage];
1480
1481 record->shader_stages_mask = 0;
1482 record->num_shaders_combined = 0;
1483 record->pipeline_hash[0] = hash;
1484 record->pipeline_hash[1] = hash;
1485
1486 radv_fill_code_object_record(device, shader_data, shader, shader->va);
1487 shader_data->rt_stack_size = stack_size;
1488
1489 record->shader_stages_mask |= (1 << shader->info.stage);
1490 record->is_rt = true;
1491 switch (shader->info.stage) {
1492 case MESA_SHADER_RAYGEN:
1493 snprintf(shader_data->rt_shader_name, sizeof(shader_data->rt_shader_name), "rgen_%d", index);
1494 break;
1495 case MESA_SHADER_CLOSEST_HIT:
1496 snprintf(shader_data->rt_shader_name, sizeof(shader_data->rt_shader_name), "chit_%d", index);
1497 break;
1498 case MESA_SHADER_MISS:
1499 snprintf(shader_data->rt_shader_name, sizeof(shader_data->rt_shader_name), "miss_%d", index);
1500 break;
1501 case MESA_SHADER_INTERSECTION:
1502 snprintf(shader_data->rt_shader_name, sizeof(shader_data->rt_shader_name), "traversal");
1503 break;
1504 case MESA_SHADER_CALLABLE:
1505 snprintf(shader_data->rt_shader_name, sizeof(shader_data->rt_shader_name), "call_%d", index);
1506 break;
1507 case MESA_SHADER_COMPUTE:
1508 snprintf(shader_data->rt_shader_name, sizeof(shader_data->rt_shader_name), "_amdgpu_cs_main");
1509 break;
1510 default:
1511 unreachable("invalid rt stage");
1512 }
1513 record->num_shaders_combined = 1;
1514
1515 simple_mtx_lock(&code_object->lock);
1516 list_addtail(&record->list, &code_object->record);
1517 code_object->record_count++;
1518 simple_mtx_unlock(&code_object->lock);
1519
1520 return VK_SUCCESS;
1521 }
1522
1523 static void
compute_unique_rt_sha(uint64_t pipeline_hash,unsigned index,unsigned char sha1[SHA1_DIGEST_LENGTH])1524 compute_unique_rt_sha(uint64_t pipeline_hash, unsigned index, unsigned char sha1[SHA1_DIGEST_LENGTH])
1525 {
1526 struct mesa_sha1 ctx;
1527 _mesa_sha1_init(&ctx);
1528 _mesa_sha1_update(&ctx, &pipeline_hash, sizeof(pipeline_hash));
1529 _mesa_sha1_update(&ctx, &index, sizeof(index));
1530 _mesa_sha1_final(&ctx, sha1);
1531 }
1532
1533 static VkResult
radv_register_rt_stage(struct radv_device * device,struct radv_ray_tracing_pipeline * pipeline,uint32_t index,uint32_t stack_size,struct radv_shader * shader)1534 radv_register_rt_stage(struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline, uint32_t index,
1535 uint32_t stack_size, struct radv_shader *shader)
1536 {
1537 unsigned char sha1[SHA1_DIGEST_LENGTH];
1538 VkResult result;
1539
1540 compute_unique_rt_sha(pipeline->base.base.pipeline_hash, index, sha1);
1541
1542 result = ac_sqtt_add_pso_correlation(&device->sqtt, *(uint64_t *)sha1, pipeline->base.base.pipeline_hash);
1543 if (!result)
1544 return VK_ERROR_OUT_OF_HOST_MEMORY;
1545 result = ac_sqtt_add_code_object_loader_event(&device->sqtt, *(uint64_t *)sha1, shader->va);
1546 if (!result)
1547 return VK_ERROR_OUT_OF_HOST_MEMORY;
1548 result =
1549 radv_add_rt_record(device, &device->sqtt.rgp_code_object, pipeline, shader, stack_size, index, *(uint64_t *)sha1);
1550 return result;
1551 }
1552
1553 static VkResult
radv_register_rt_pipeline(struct radv_device * device,struct radv_ray_tracing_pipeline * pipeline)1554 radv_register_rt_pipeline(struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline)
1555 {
1556 VkResult result = VK_SUCCESS;
1557
1558 uint32_t max_any_hit_stack_size = 0;
1559 uint32_t max_intersection_stack_size = 0;
1560
1561 for (unsigned i = 0; i < pipeline->stage_count; i++) {
1562 struct radv_ray_tracing_stage *stage = &pipeline->stages[i];
1563 if (stage->stage == MESA_SHADER_ANY_HIT)
1564 max_any_hit_stack_size = MAX2(max_any_hit_stack_size, stage->stack_size);
1565 else if (stage->stage == MESA_SHADER_INTERSECTION)
1566 max_intersection_stack_size = MAX2(max_intersection_stack_size, stage->stack_size);
1567
1568 if (!pipeline->stages[i].shader)
1569 continue;
1570
1571 result = radv_register_rt_stage(device, pipeline, i, stage->stack_size, stage->shader);
1572 if (result != VK_SUCCESS)
1573 return result;
1574 }
1575
1576 uint32_t idx = pipeline->stage_count;
1577
1578 /* Combined traversal shader */
1579 if (pipeline->base.base.shaders[MESA_SHADER_INTERSECTION]) {
1580 result = radv_register_rt_stage(device, pipeline, idx++, max_any_hit_stack_size + max_intersection_stack_size,
1581 pipeline->base.base.shaders[MESA_SHADER_INTERSECTION]);
1582 if (result != VK_SUCCESS)
1583 return result;
1584 }
1585
1586 /* Prolog */
1587 result = radv_register_rt_stage(device, pipeline, idx++, 0, pipeline->prolog);
1588
1589 return result;
1590 }
1591
1592 static VkResult
radv_register_pipeline(struct radv_device * device,struct radv_pipeline * pipeline)1593 radv_register_pipeline(struct radv_device *device, struct radv_pipeline *pipeline)
1594 {
1595 bool result;
1596 uint64_t base_va = ~0;
1597
1598 result = ac_sqtt_add_pso_correlation(&device->sqtt, pipeline->pipeline_hash, pipeline->pipeline_hash);
1599 if (!result)
1600 return VK_ERROR_OUT_OF_HOST_MEMORY;
1601
1602 /* Find the lowest shader BO VA. */
1603 for (unsigned i = 0; i < MESA_VULKAN_SHADER_STAGES; i++) {
1604 struct radv_shader *shader = pipeline->shaders[i];
1605 uint64_t va;
1606
1607 if (!shader)
1608 continue;
1609
1610 va = radv_sqtt_shader_get_va_reloc(pipeline, i);
1611 base_va = MIN2(base_va, va);
1612 }
1613
1614 result = ac_sqtt_add_code_object_loader_event(&device->sqtt, pipeline->pipeline_hash, base_va);
1615 if (!result)
1616 return VK_ERROR_OUT_OF_HOST_MEMORY;
1617
1618 result = radv_add_code_object(device, pipeline);
1619 if (result != VK_SUCCESS)
1620 return result;
1621
1622 return VK_SUCCESS;
1623 }
1624
1625 static void
radv_unregister_records(struct radv_device * device,uint64_t hash)1626 radv_unregister_records(struct radv_device *device, uint64_t hash)
1627 {
1628 struct ac_sqtt *sqtt = &device->sqtt;
1629 struct rgp_pso_correlation *pso_correlation = &sqtt->rgp_pso_correlation;
1630 struct rgp_loader_events *loader_events = &sqtt->rgp_loader_events;
1631 struct rgp_code_object *code_object = &sqtt->rgp_code_object;
1632
1633 /* Destroy the PSO correlation record. */
1634 simple_mtx_lock(&pso_correlation->lock);
1635 list_for_each_entry_safe (struct rgp_pso_correlation_record, record, &pso_correlation->record, list) {
1636 if (record->pipeline_hash[0] == hash) {
1637 pso_correlation->record_count--;
1638 list_del(&record->list);
1639 free(record);
1640 break;
1641 }
1642 }
1643 simple_mtx_unlock(&pso_correlation->lock);
1644
1645 /* Destroy the code object loader record. */
1646 simple_mtx_lock(&loader_events->lock);
1647 list_for_each_entry_safe (struct rgp_loader_events_record, record, &loader_events->record, list) {
1648 if (record->code_object_hash[0] == hash) {
1649 loader_events->record_count--;
1650 list_del(&record->list);
1651 free(record);
1652 break;
1653 }
1654 }
1655 simple_mtx_unlock(&loader_events->lock);
1656
1657 /* Destroy the code object record. */
1658 simple_mtx_lock(&code_object->lock);
1659 list_for_each_entry_safe (struct rgp_code_object_record, record, &code_object->record, list) {
1660 if (record->pipeline_hash[0] == hash) {
1661 code_object->record_count--;
1662 list_del(&record->list);
1663 free(record);
1664 break;
1665 }
1666 }
1667 simple_mtx_unlock(&code_object->lock);
1668 }
1669
1670 VKAPI_ATTR VkResult VKAPI_CALL
sqtt_CreateGraphicsPipelines(VkDevice _device,VkPipelineCache pipelineCache,uint32_t count,const VkGraphicsPipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)1671 sqtt_CreateGraphicsPipelines(VkDevice _device, VkPipelineCache pipelineCache, uint32_t count,
1672 const VkGraphicsPipelineCreateInfo *pCreateInfos, const VkAllocationCallbacks *pAllocator,
1673 VkPipeline *pPipelines)
1674 {
1675 RADV_FROM_HANDLE(radv_device, device, _device);
1676 VkResult result;
1677
1678 result = device->layer_dispatch.rgp.CreateGraphicsPipelines(_device, pipelineCache, count, pCreateInfos, pAllocator,
1679 pPipelines);
1680 if (result != VK_SUCCESS)
1681 return result;
1682
1683 for (unsigned i = 0; i < count; i++) {
1684 RADV_FROM_HANDLE(radv_pipeline, pipeline, pPipelines[i]);
1685
1686 if (!pipeline)
1687 continue;
1688
1689 const VkPipelineCreateFlagBits2KHR create_flags = vk_graphics_pipeline_create_flags(&pCreateInfos[i]);
1690 if (create_flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR)
1691 continue;
1692
1693 result = radv_sqtt_reloc_graphics_shaders(device, radv_pipeline_to_graphics(pipeline));
1694 if (result != VK_SUCCESS)
1695 goto fail;
1696
1697 result = radv_register_pipeline(device, pipeline);
1698 if (result != VK_SUCCESS)
1699 goto fail;
1700 }
1701
1702 return VK_SUCCESS;
1703
1704 fail:
1705 for (unsigned i = 0; i < count; i++) {
1706 sqtt_DestroyPipeline(_device, pPipelines[i], pAllocator);
1707 pPipelines[i] = VK_NULL_HANDLE;
1708 }
1709 return result;
1710 }
1711
1712 VKAPI_ATTR VkResult VKAPI_CALL
sqtt_CreateComputePipelines(VkDevice _device,VkPipelineCache pipelineCache,uint32_t count,const VkComputePipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)1713 sqtt_CreateComputePipelines(VkDevice _device, VkPipelineCache pipelineCache, uint32_t count,
1714 const VkComputePipelineCreateInfo *pCreateInfos, const VkAllocationCallbacks *pAllocator,
1715 VkPipeline *pPipelines)
1716 {
1717 RADV_FROM_HANDLE(radv_device, device, _device);
1718 VkResult result;
1719
1720 result = device->layer_dispatch.rgp.CreateComputePipelines(_device, pipelineCache, count, pCreateInfos, pAllocator,
1721 pPipelines);
1722 if (result != VK_SUCCESS)
1723 return result;
1724
1725 for (unsigned i = 0; i < count; i++) {
1726 RADV_FROM_HANDLE(radv_pipeline, pipeline, pPipelines[i]);
1727
1728 if (!pipeline)
1729 continue;
1730
1731 result = radv_register_pipeline(device, pipeline);
1732 if (result != VK_SUCCESS)
1733 goto fail;
1734 }
1735
1736 return VK_SUCCESS;
1737
1738 fail:
1739 for (unsigned i = 0; i < count; i++) {
1740 sqtt_DestroyPipeline(_device, pPipelines[i], pAllocator);
1741 pPipelines[i] = VK_NULL_HANDLE;
1742 }
1743 return result;
1744 }
1745
1746 VKAPI_ATTR VkResult VKAPI_CALL
sqtt_CreateRayTracingPipelinesKHR(VkDevice _device,VkDeferredOperationKHR deferredOperation,VkPipelineCache pipelineCache,uint32_t count,const VkRayTracingPipelineCreateInfoKHR * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)1747 sqtt_CreateRayTracingPipelinesKHR(VkDevice _device, VkDeferredOperationKHR deferredOperation,
1748 VkPipelineCache pipelineCache, uint32_t count,
1749 const VkRayTracingPipelineCreateInfoKHR *pCreateInfos,
1750 const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines)
1751 {
1752 RADV_FROM_HANDLE(radv_device, device, _device);
1753 VkResult result;
1754
1755 result = device->layer_dispatch.rgp.CreateRayTracingPipelinesKHR(_device, deferredOperation, pipelineCache, count,
1756 pCreateInfos, pAllocator, pPipelines);
1757 if (result != VK_SUCCESS)
1758 return result;
1759
1760 for (unsigned i = 0; i < count; i++) {
1761 RADV_FROM_HANDLE(radv_pipeline, pipeline, pPipelines[i]);
1762
1763 if (!pipeline)
1764 continue;
1765
1766 const VkPipelineCreateFlagBits2KHR create_flags = vk_rt_pipeline_create_flags(&pCreateInfos[i]);
1767 if (create_flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR)
1768 continue;
1769
1770 result = radv_register_rt_pipeline(device, radv_pipeline_to_ray_tracing(pipeline));
1771 if (result != VK_SUCCESS)
1772 goto fail;
1773 }
1774
1775 return VK_SUCCESS;
1776
1777 fail:
1778 for (unsigned i = 0; i < count; i++) {
1779 sqtt_DestroyPipeline(_device, pPipelines[i], pAllocator);
1780 pPipelines[i] = VK_NULL_HANDLE;
1781 }
1782 return result;
1783 }
1784
1785 VKAPI_ATTR void VKAPI_CALL
sqtt_DestroyPipeline(VkDevice _device,VkPipeline _pipeline,const VkAllocationCallbacks * pAllocator)1786 sqtt_DestroyPipeline(VkDevice _device, VkPipeline _pipeline, const VkAllocationCallbacks *pAllocator)
1787 {
1788 RADV_FROM_HANDLE(radv_device, device, _device);
1789 RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
1790
1791 if (!_pipeline)
1792 return;
1793
1794 /* Ray tracing pipelines have multiple records, each with their own hash */
1795 if (pipeline->type == RADV_PIPELINE_RAY_TRACING) {
1796 /* We have one record for each stage, plus one for the traversal shader and one for the prolog */
1797 uint32_t record_count = radv_pipeline_to_ray_tracing(pipeline)->stage_count + 2;
1798 unsigned char sha1[SHA1_DIGEST_LENGTH];
1799 for (uint32_t i = 0; i < record_count; ++i) {
1800 compute_unique_rt_sha(pipeline->pipeline_hash, i, sha1);
1801 radv_unregister_records(device, *(uint64_t *)sha1);
1802 }
1803 } else
1804 radv_unregister_records(device, pipeline->pipeline_hash);
1805
1806 if (pipeline->type == RADV_PIPELINE_GRAPHICS) {
1807 struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline);
1808 struct radv_sqtt_shaders_reloc *reloc = graphics_pipeline->sqtt_shaders_reloc;
1809
1810 radv_free_shader_memory(device, reloc->alloc);
1811 free(reloc);
1812 }
1813
1814 device->layer_dispatch.rgp.DestroyPipeline(_device, _pipeline, pAllocator);
1815 }
1816
1817 #undef API_MARKER
1818