1 /*
2 * Copyright © 2020 Valve Corporation
3 *
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include "radv_cmd_buffer.h"
8 #include "radv_cs.h"
9 #include "radv_entrypoints.h"
10 #include "radv_pipeline_rt.h"
11 #include "radv_queue.h"
12 #include "radv_shader.h"
13 #include "radv_spm.h"
14 #include "radv_sqtt.h"
15 #include "vk_common_entrypoints.h"
16 #include "vk_semaphore.h"
17
18 #include "ac_rgp.h"
19 #include "ac_sqtt.h"
20
21 void
radv_sqtt_emit_relocated_shaders(struct radv_cmd_buffer * cmd_buffer,struct radv_graphics_pipeline * pipeline)22 radv_sqtt_emit_relocated_shaders(struct radv_cmd_buffer *cmd_buffer, struct radv_graphics_pipeline *pipeline)
23 {
24 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
25 const struct radv_physical_device *pdev = radv_device_physical(device);
26 const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
27 struct radv_sqtt_shaders_reloc *reloc = pipeline->sqtt_shaders_reloc;
28 struct radeon_cmdbuf *cs = cmd_buffer->cs;
29 uint64_t va;
30
31 radv_cs_add_buffer(device->ws, cs, reloc->bo);
32
33 /* VS */
34 if (pipeline->base.shaders[MESA_SHADER_VERTEX]) {
35 struct radv_shader *vs = pipeline->base.shaders[MESA_SHADER_VERTEX];
36
37 va = reloc->va[MESA_SHADER_VERTEX];
38 if (vs->info.vs.as_ls) {
39 radeon_set_sh_reg(cs, vs->info.regs.pgm_lo, va >> 8);
40 } else if (vs->info.vs.as_es) {
41 radeon_set_sh_reg_seq(cs, vs->info.regs.pgm_lo, 2);
42 radeon_emit(cs, va >> 8);
43 radeon_emit(cs, S_00B324_MEM_BASE(va >> 40));
44 } else if (vs->info.is_ngg) {
45 radeon_set_sh_reg(cs, vs->info.regs.pgm_lo, va >> 8);
46 } else {
47 radeon_set_sh_reg_seq(cs, vs->info.regs.pgm_lo, 2);
48 radeon_emit(cs, va >> 8);
49 radeon_emit(cs, S_00B124_MEM_BASE(va >> 40));
50 }
51 }
52
53 /* TCS */
54 if (pipeline->base.shaders[MESA_SHADER_TESS_CTRL]) {
55 const struct radv_shader *tcs = pipeline->base.shaders[MESA_SHADER_TESS_CTRL];
56
57 va = reloc->va[MESA_SHADER_TESS_CTRL];
58
59 if (gfx_level >= GFX9) {
60 radeon_set_sh_reg(cs, tcs->info.regs.pgm_lo, va >> 8);
61 } else {
62 radeon_set_sh_reg_seq(cs, tcs->info.regs.pgm_lo, 2);
63 radeon_emit(cs, va >> 8);
64 radeon_emit(cs, S_00B424_MEM_BASE(va >> 40));
65 }
66 }
67
68 /* TES */
69 if (pipeline->base.shaders[MESA_SHADER_TESS_EVAL]) {
70 struct radv_shader *tes = pipeline->base.shaders[MESA_SHADER_TESS_EVAL];
71
72 va = reloc->va[MESA_SHADER_TESS_EVAL];
73 if (tes->info.is_ngg) {
74 radeon_set_sh_reg(cs, tes->info.regs.pgm_lo, va >> 8);
75 } else if (tes->info.tes.as_es) {
76 radeon_set_sh_reg_seq(cs, tes->info.regs.pgm_lo, 2);
77 radeon_emit(cs, va >> 8);
78 radeon_emit(cs, S_00B324_MEM_BASE(va >> 40));
79 } else {
80 radeon_set_sh_reg_seq(cs, tes->info.regs.pgm_lo, 2);
81 radeon_emit(cs, va >> 8);
82 radeon_emit(cs, S_00B124_MEM_BASE(va >> 40));
83 }
84 }
85
86 /* GS */
87 if (pipeline->base.shaders[MESA_SHADER_GEOMETRY]) {
88 struct radv_shader *gs = pipeline->base.shaders[MESA_SHADER_GEOMETRY];
89
90 va = reloc->va[MESA_SHADER_GEOMETRY];
91 if (gs->info.is_ngg) {
92 radeon_set_sh_reg(cs, gs->info.regs.pgm_lo, va >> 8);
93 } else {
94 if (gfx_level >= GFX9) {
95 radeon_set_sh_reg(cs, gs->info.regs.pgm_lo, va >> 8);
96 } else {
97 radeon_set_sh_reg_seq(cs, gs->info.regs.pgm_lo, 2);
98 radeon_emit(cs, va >> 8);
99 radeon_emit(cs, S_00B224_MEM_BASE(va >> 40));
100 }
101 }
102 }
103
104 /* FS */
105 if (pipeline->base.shaders[MESA_SHADER_FRAGMENT]) {
106 const struct radv_shader *ps = pipeline->base.shaders[MESA_SHADER_FRAGMENT];
107
108 va = reloc->va[MESA_SHADER_FRAGMENT];
109
110 radeon_set_sh_reg_seq(cs, ps->info.regs.pgm_lo, 2);
111 radeon_emit(cs, va >> 8);
112 radeon_emit(cs, S_00B024_MEM_BASE(va >> 40));
113 }
114
115 /* MS */
116 if (pipeline->base.shaders[MESA_SHADER_MESH]) {
117 const struct radv_shader *ms = pipeline->base.shaders[MESA_SHADER_MESH];
118
119 va = reloc->va[MESA_SHADER_MESH];
120
121 radeon_set_sh_reg(cs, ms->info.regs.pgm_lo, va >> 8);
122 }
123 }
124
125 static uint64_t
radv_sqtt_shader_get_va_reloc(struct radv_pipeline * pipeline,gl_shader_stage stage)126 radv_sqtt_shader_get_va_reloc(struct radv_pipeline *pipeline, gl_shader_stage stage)
127 {
128 if (pipeline->type == RADV_PIPELINE_GRAPHICS) {
129 struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline);
130 struct radv_sqtt_shaders_reloc *reloc = graphics_pipeline->sqtt_shaders_reloc;
131 return reloc->va[stage];
132 }
133
134 return radv_shader_get_va(pipeline->shaders[stage]);
135 }
136
137 static VkResult
radv_sqtt_reloc_graphics_shaders(struct radv_device * device,struct radv_graphics_pipeline * pipeline)138 radv_sqtt_reloc_graphics_shaders(struct radv_device *device, struct radv_graphics_pipeline *pipeline)
139 {
140 struct radv_shader_dma_submission *submission = NULL;
141 struct radv_sqtt_shaders_reloc *reloc;
142 uint32_t code_size = 0;
143 VkResult result;
144
145 reloc = calloc(1, sizeof(*reloc));
146 if (!reloc)
147 return VK_ERROR_OUT_OF_HOST_MEMORY;
148
149 /* Compute the total code size. */
150 for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; i++) {
151 const struct radv_shader *shader = pipeline->base.shaders[i];
152 if (!shader)
153 continue;
154
155 code_size += align(shader->code_size, RADV_SHADER_ALLOC_ALIGNMENT);
156 }
157
158 /* Allocate memory for all shader binaries. */
159 reloc->alloc = radv_alloc_shader_memory(device, code_size, false, pipeline);
160 if (!reloc->alloc) {
161 result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
162 goto fail;
163 }
164
165 reloc->bo = reloc->alloc->arena->bo;
166
167 /* Relocate shader binaries to be contiguous in memory as requested by RGP. */
168 uint64_t slab_va = radv_buffer_get_va(reloc->bo) + reloc->alloc->offset;
169 char *slab_ptr = reloc->alloc->arena->ptr + reloc->alloc->offset;
170 uint64_t offset = 0;
171
172 if (device->shader_use_invisible_vram) {
173 submission = radv_shader_dma_get_submission(device, reloc->bo, slab_va, code_size);
174 if (!submission) {
175 result = VK_ERROR_UNKNOWN;
176 goto fail;
177 }
178 }
179
180 for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) {
181 const struct radv_shader *shader = pipeline->base.shaders[i];
182 void *dest_ptr;
183 if (!shader)
184 continue;
185
186 reloc->va[i] = slab_va + offset;
187
188 if (device->shader_use_invisible_vram)
189 dest_ptr = submission->ptr + offset;
190 else
191 dest_ptr = slab_ptr + offset;
192
193 memcpy(dest_ptr, shader->code, shader->code_size);
194
195 offset += align(shader->code_size, RADV_SHADER_ALLOC_ALIGNMENT);
196 }
197
198 if (device->shader_use_invisible_vram) {
199 uint64_t upload_seq = 0;
200
201 if (!radv_shader_dma_submit(device, submission, &upload_seq)) {
202 result = VK_ERROR_UNKNOWN;
203 goto fail;
204 }
205
206 for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) {
207 struct radv_shader *shader = pipeline->base.shaders[i];
208
209 if (!shader)
210 continue;
211
212 shader->upload_seq = upload_seq;
213 }
214
215 if (pipeline->base.gs_copy_shader)
216 pipeline->base.gs_copy_shader->upload_seq = upload_seq;
217 }
218
219 pipeline->sqtt_shaders_reloc = reloc;
220
221 return VK_SUCCESS;
222
223 fail:
224 if (reloc->alloc)
225 radv_free_shader_memory(device, reloc->alloc);
226 free(reloc);
227 return result;
228 }
229
230 static void
radv_write_begin_general_api_marker(struct radv_cmd_buffer * cmd_buffer,enum rgp_sqtt_marker_general_api_type api_type)231 radv_write_begin_general_api_marker(struct radv_cmd_buffer *cmd_buffer, enum rgp_sqtt_marker_general_api_type api_type)
232 {
233 struct rgp_sqtt_marker_general_api marker = {0};
234
235 marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_GENERAL_API;
236 marker.api_type = api_type;
237
238 radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
239 }
240
241 static void
radv_write_end_general_api_marker(struct radv_cmd_buffer * cmd_buffer,enum rgp_sqtt_marker_general_api_type api_type)242 radv_write_end_general_api_marker(struct radv_cmd_buffer *cmd_buffer, enum rgp_sqtt_marker_general_api_type api_type)
243 {
244 struct rgp_sqtt_marker_general_api marker = {0};
245
246 marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_GENERAL_API;
247 marker.api_type = api_type;
248 marker.is_end = 1;
249
250 radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
251 }
252
253 static void
radv_write_event_marker(struct radv_cmd_buffer * cmd_buffer,enum rgp_sqtt_marker_event_type api_type,uint32_t vertex_offset_user_data,uint32_t instance_offset_user_data,uint32_t draw_index_user_data)254 radv_write_event_marker(struct radv_cmd_buffer *cmd_buffer, enum rgp_sqtt_marker_event_type api_type,
255 uint32_t vertex_offset_user_data, uint32_t instance_offset_user_data,
256 uint32_t draw_index_user_data)
257 {
258 struct rgp_sqtt_marker_event marker = {0};
259
260 marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_EVENT;
261 marker.api_type = api_type;
262 marker.cmd_id = cmd_buffer->state.num_events++;
263 marker.cb_id = cmd_buffer->sqtt_cb_id;
264
265 if (vertex_offset_user_data == UINT_MAX || instance_offset_user_data == UINT_MAX) {
266 vertex_offset_user_data = 0;
267 instance_offset_user_data = 0;
268 }
269
270 if (draw_index_user_data == UINT_MAX)
271 draw_index_user_data = vertex_offset_user_data;
272
273 marker.vertex_offset_reg_idx = vertex_offset_user_data;
274 marker.instance_offset_reg_idx = instance_offset_user_data;
275 marker.draw_index_reg_idx = draw_index_user_data;
276
277 radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
278 }
279
280 static void
radv_write_event_with_dims_marker(struct radv_cmd_buffer * cmd_buffer,enum rgp_sqtt_marker_event_type api_type,uint32_t x,uint32_t y,uint32_t z)281 radv_write_event_with_dims_marker(struct radv_cmd_buffer *cmd_buffer, enum rgp_sqtt_marker_event_type api_type,
282 uint32_t x, uint32_t y, uint32_t z)
283 {
284 struct rgp_sqtt_marker_event_with_dims marker = {0};
285
286 marker.event.identifier = RGP_SQTT_MARKER_IDENTIFIER_EVENT;
287 marker.event.api_type = api_type;
288 marker.event.cmd_id = cmd_buffer->state.num_events++;
289 marker.event.cb_id = cmd_buffer->sqtt_cb_id;
290 marker.event.has_thread_dims = 1;
291
292 marker.thread_x = x;
293 marker.thread_y = y;
294 marker.thread_z = z;
295
296 radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
297 }
298
299 void
radv_write_user_event_marker(struct radv_cmd_buffer * cmd_buffer,enum rgp_sqtt_marker_user_event_type type,const char * str)300 radv_write_user_event_marker(struct radv_cmd_buffer *cmd_buffer, enum rgp_sqtt_marker_user_event_type type,
301 const char *str)
302 {
303 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
304
305 if (likely(!device->sqtt.bo))
306 return;
307
308 if (type == UserEventPop) {
309 assert(str == NULL);
310 struct rgp_sqtt_marker_user_event marker = {0};
311 marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_USER_EVENT;
312 marker.data_type = type;
313
314 radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
315 } else {
316 assert(str != NULL);
317 unsigned len = strlen(str);
318 struct rgp_sqtt_marker_user_event_with_length marker = {0};
319 marker.user_event.identifier = RGP_SQTT_MARKER_IDENTIFIER_USER_EVENT;
320 marker.user_event.data_type = type;
321 marker.length = align(len, 4);
322
323 uint8_t *buffer = alloca(sizeof(marker) + marker.length);
324 memset(buffer, 0, sizeof(marker) + marker.length);
325 memcpy(buffer, &marker, sizeof(marker));
326 memcpy(buffer + sizeof(marker), str, len);
327
328 radv_emit_sqtt_userdata(cmd_buffer, buffer, sizeof(marker) / 4 + marker.length / 4);
329 }
330 }
331
332 void
radv_describe_begin_cmd_buffer(struct radv_cmd_buffer * cmd_buffer)333 radv_describe_begin_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
334 {
335 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
336 uint64_t device_id = (uintptr_t)device;
337 struct rgp_sqtt_marker_cb_start marker = {0};
338
339 if (likely(!device->sqtt.bo))
340 return;
341
342 /* Reserve a command buffer ID for SQTT. */
343 const struct radv_physical_device *pdev = radv_device_physical(device);
344 enum amd_ip_type ip_type = radv_queue_family_to_ring(pdev, cmd_buffer->qf);
345 union rgp_sqtt_marker_cb_id cb_id = ac_sqtt_get_next_cmdbuf_id(&device->sqtt, ip_type);
346 cmd_buffer->sqtt_cb_id = cb_id.all;
347
348 marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_CB_START;
349 marker.cb_id = cmd_buffer->sqtt_cb_id;
350 marker.device_id_low = device_id;
351 marker.device_id_high = device_id >> 32;
352 marker.queue = cmd_buffer->qf;
353 marker.queue_flags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT;
354
355 if (cmd_buffer->qf == RADV_QUEUE_GENERAL)
356 marker.queue_flags |= VK_QUEUE_GRAPHICS_BIT;
357
358 if (!radv_sparse_queue_enabled(pdev))
359 marker.queue_flags |= VK_QUEUE_SPARSE_BINDING_BIT;
360
361 radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
362 }
363
364 void
radv_describe_end_cmd_buffer(struct radv_cmd_buffer * cmd_buffer)365 radv_describe_end_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
366 {
367 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
368 uint64_t device_id = (uintptr_t)device;
369 struct rgp_sqtt_marker_cb_end marker = {0};
370
371 if (likely(!device->sqtt.bo))
372 return;
373
374 marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_CB_END;
375 marker.cb_id = cmd_buffer->sqtt_cb_id;
376 marker.device_id_low = device_id;
377 marker.device_id_high = device_id >> 32;
378
379 radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
380 }
381
382 void
radv_describe_draw(struct radv_cmd_buffer * cmd_buffer)383 radv_describe_draw(struct radv_cmd_buffer *cmd_buffer)
384 {
385 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
386
387 if (likely(!device->sqtt.bo))
388 return;
389
390 radv_write_event_marker(cmd_buffer, cmd_buffer->state.current_event_type, UINT_MAX, UINT_MAX, UINT_MAX);
391 }
392
393 void
radv_describe_dispatch(struct radv_cmd_buffer * cmd_buffer,const struct radv_dispatch_info * info)394 radv_describe_dispatch(struct radv_cmd_buffer *cmd_buffer, const struct radv_dispatch_info *info)
395 {
396 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
397
398 if (likely(!device->sqtt.bo))
399 return;
400
401 if (info->indirect) {
402 radv_write_event_marker(cmd_buffer, cmd_buffer->state.current_event_type, UINT_MAX, UINT_MAX, UINT_MAX);
403 } else {
404 radv_write_event_with_dims_marker(cmd_buffer, cmd_buffer->state.current_event_type, info->blocks[0],
405 info->blocks[1], info->blocks[2]);
406 }
407 }
408
409 void
radv_describe_begin_render_pass_clear(struct radv_cmd_buffer * cmd_buffer,VkImageAspectFlagBits aspects)410 radv_describe_begin_render_pass_clear(struct radv_cmd_buffer *cmd_buffer, VkImageAspectFlagBits aspects)
411 {
412 cmd_buffer->state.current_event_type =
413 (aspects & VK_IMAGE_ASPECT_COLOR_BIT) ? EventRenderPassColorClear : EventRenderPassDepthStencilClear;
414 }
415
416 void
radv_describe_end_render_pass_clear(struct radv_cmd_buffer * cmd_buffer)417 radv_describe_end_render_pass_clear(struct radv_cmd_buffer *cmd_buffer)
418 {
419 cmd_buffer->state.current_event_type = EventInternalUnknown;
420 }
421
422 void
radv_describe_begin_render_pass_resolve(struct radv_cmd_buffer * cmd_buffer)423 radv_describe_begin_render_pass_resolve(struct radv_cmd_buffer *cmd_buffer)
424 {
425 cmd_buffer->state.current_event_type = EventRenderPassResolve;
426 }
427
428 void
radv_describe_end_render_pass_resolve(struct radv_cmd_buffer * cmd_buffer)429 radv_describe_end_render_pass_resolve(struct radv_cmd_buffer *cmd_buffer)
430 {
431 cmd_buffer->state.current_event_type = EventInternalUnknown;
432 }
433
434 void
radv_describe_barrier_end_delayed(struct radv_cmd_buffer * cmd_buffer)435 radv_describe_barrier_end_delayed(struct radv_cmd_buffer *cmd_buffer)
436 {
437 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
438 struct rgp_sqtt_marker_barrier_end marker = {0};
439
440 if (likely(!device->sqtt.bo) || !cmd_buffer->state.pending_sqtt_barrier_end)
441 return;
442
443 cmd_buffer->state.pending_sqtt_barrier_end = false;
444
445 marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BARRIER_END;
446 marker.cb_id = cmd_buffer->sqtt_cb_id;
447
448 marker.num_layout_transitions = cmd_buffer->state.num_layout_transitions;
449
450 if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_WAIT_ON_EOP_TS)
451 marker.wait_on_eop_ts = true;
452 if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_VS_PARTIAL_FLUSH)
453 marker.vs_partial_flush = true;
454 if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_PS_PARTIAL_FLUSH)
455 marker.ps_partial_flush = true;
456 if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_CS_PARTIAL_FLUSH)
457 marker.cs_partial_flush = true;
458 if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_PFP_SYNC_ME)
459 marker.pfp_sync_me = true;
460 if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_SYNC_CP_DMA)
461 marker.sync_cp_dma = true;
462 if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_VMEM_L0)
463 marker.inval_tcp = true;
464 if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_ICACHE)
465 marker.inval_sqI = true;
466 if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_SMEM_L0)
467 marker.inval_sqK = true;
468 if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_FLUSH_L2)
469 marker.flush_tcc = true;
470 if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_L2)
471 marker.inval_tcc = true;
472 if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_FLUSH_CB)
473 marker.flush_cb = true;
474 if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_CB)
475 marker.inval_cb = true;
476 if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_FLUSH_DB)
477 marker.flush_db = true;
478 if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_DB)
479 marker.inval_db = true;
480 if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_L1)
481 marker.inval_gl1 = true;
482
483 radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
484
485 cmd_buffer->state.num_layout_transitions = 0;
486 }
487
488 void
radv_describe_barrier_start(struct radv_cmd_buffer * cmd_buffer,enum rgp_barrier_reason reason)489 radv_describe_barrier_start(struct radv_cmd_buffer *cmd_buffer, enum rgp_barrier_reason reason)
490 {
491 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
492 struct rgp_sqtt_marker_barrier_start marker = {0};
493
494 if (likely(!device->sqtt.bo))
495 return;
496
497 if (cmd_buffer->state.in_barrier) {
498 assert(!"attempted to start a barrier while already in a barrier");
499 return;
500 }
501
502 radv_describe_barrier_end_delayed(cmd_buffer);
503 cmd_buffer->state.sqtt_flush_bits = 0;
504 cmd_buffer->state.in_barrier = true;
505
506 marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BARRIER_START;
507 marker.cb_id = cmd_buffer->sqtt_cb_id;
508 marker.dword02 = reason;
509
510 radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
511 }
512
513 void
radv_describe_barrier_end(struct radv_cmd_buffer * cmd_buffer)514 radv_describe_barrier_end(struct radv_cmd_buffer *cmd_buffer)
515 {
516 cmd_buffer->state.in_barrier = false;
517 cmd_buffer->state.pending_sqtt_barrier_end = true;
518 }
519
520 void
radv_describe_layout_transition(struct radv_cmd_buffer * cmd_buffer,const struct radv_barrier_data * barrier)521 radv_describe_layout_transition(struct radv_cmd_buffer *cmd_buffer, const struct radv_barrier_data *barrier)
522 {
523 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
524 struct rgp_sqtt_marker_layout_transition marker = {0};
525
526 if (likely(!device->sqtt.bo))
527 return;
528
529 if (!cmd_buffer->state.in_barrier) {
530 assert(!"layout transition marker should be only emitted inside a barrier marker");
531 return;
532 }
533
534 marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_LAYOUT_TRANSITION;
535 marker.depth_stencil_expand = barrier->layout_transitions.depth_stencil_expand;
536 marker.htile_hiz_range_expand = barrier->layout_transitions.htile_hiz_range_expand;
537 marker.depth_stencil_resummarize = barrier->layout_transitions.depth_stencil_resummarize;
538 marker.dcc_decompress = barrier->layout_transitions.dcc_decompress;
539 marker.fmask_decompress = barrier->layout_transitions.fmask_decompress;
540 marker.fast_clear_eliminate = barrier->layout_transitions.fast_clear_eliminate;
541 marker.fmask_color_expand = barrier->layout_transitions.fmask_color_expand;
542 marker.init_mask_ram = barrier->layout_transitions.init_mask_ram;
543
544 radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
545
546 cmd_buffer->state.num_layout_transitions++;
547 }
548
549 void
radv_describe_begin_accel_struct_build(struct radv_cmd_buffer * cmd_buffer,uint32_t count)550 radv_describe_begin_accel_struct_build(struct radv_cmd_buffer *cmd_buffer, uint32_t count)
551 {
552 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
553
554 if (likely(!device->sqtt.bo))
555 return;
556
557 char marker[64];
558 snprintf(marker, sizeof(marker), "vkCmdBuildAccelerationStructuresKHR(%u)", count);
559 radv_write_user_event_marker(cmd_buffer, UserEventPush, marker);
560 }
561
562 void
radv_describe_end_accel_struct_build(struct radv_cmd_buffer * cmd_buffer)563 radv_describe_end_accel_struct_build(struct radv_cmd_buffer *cmd_buffer)
564 {
565 radv_write_user_event_marker(cmd_buffer, UserEventPop, NULL);
566 }
567
568 static void
radv_describe_pipeline_bind(struct radv_cmd_buffer * cmd_buffer,VkPipelineBindPoint pipelineBindPoint,struct radv_pipeline * pipeline)569 radv_describe_pipeline_bind(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint pipelineBindPoint,
570 struct radv_pipeline *pipeline)
571 {
572 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
573 struct rgp_sqtt_marker_pipeline_bind marker = {0};
574
575 if (likely(!device->sqtt.bo))
576 return;
577
578 marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BIND_PIPELINE;
579 marker.cb_id = cmd_buffer->sqtt_cb_id;
580 marker.bind_point = pipelineBindPoint;
581 marker.api_pso_hash[0] = pipeline->pipeline_hash;
582 marker.api_pso_hash[1] = pipeline->pipeline_hash >> 32;
583
584 radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
585 }
586
587 /* Queue events */
588 static void
radv_describe_queue_event(struct radv_queue * queue,struct rgp_queue_event_record * record)589 radv_describe_queue_event(struct radv_queue *queue, struct rgp_queue_event_record *record)
590 {
591 struct radv_device *device = radv_queue_device(queue);
592 struct ac_sqtt *sqtt = &device->sqtt;
593 struct rgp_queue_event *queue_event = &sqtt->rgp_queue_event;
594
595 simple_mtx_lock(&queue_event->lock);
596 list_addtail(&record->list, &queue_event->record);
597 queue_event->record_count++;
598 simple_mtx_unlock(&queue_event->lock);
599 }
600
601 static VkResult
radv_describe_queue_present(struct radv_queue * queue,uint64_t cpu_timestamp,void * gpu_timestamp_ptr)602 radv_describe_queue_present(struct radv_queue *queue, uint64_t cpu_timestamp, void *gpu_timestamp_ptr)
603 {
604 struct rgp_queue_event_record *record;
605
606 record = calloc(1, sizeof(struct rgp_queue_event_record));
607 if (!record)
608 return VK_ERROR_OUT_OF_HOST_MEMORY;
609
610 record->event_type = SQTT_QUEUE_TIMING_EVENT_PRESENT;
611 record->cpu_timestamp = cpu_timestamp;
612 record->gpu_timestamps[0] = gpu_timestamp_ptr;
613 record->queue_info_index = queue->vk.queue_family_index;
614
615 radv_describe_queue_event(queue, record);
616
617 return VK_SUCCESS;
618 }
619
620 static VkResult
radv_describe_queue_submit(struct radv_queue * queue,struct radv_cmd_buffer * cmd_buffer,uint32_t cmdbuf_idx,uint64_t cpu_timestamp,void * pre_gpu_timestamp_ptr,void * post_gpu_timestamp_ptr)621 radv_describe_queue_submit(struct radv_queue *queue, struct radv_cmd_buffer *cmd_buffer, uint32_t cmdbuf_idx,
622 uint64_t cpu_timestamp, void *pre_gpu_timestamp_ptr, void *post_gpu_timestamp_ptr)
623 {
624 struct radv_device *device = radv_queue_device(queue);
625 struct rgp_queue_event_record *record;
626
627 record = calloc(1, sizeof(struct rgp_queue_event_record));
628 if (!record)
629 return VK_ERROR_OUT_OF_HOST_MEMORY;
630
631 record->event_type = SQTT_QUEUE_TIMING_EVENT_CMDBUF_SUBMIT;
632 record->api_id = (uintptr_t)cmd_buffer;
633 record->cpu_timestamp = cpu_timestamp;
634 record->frame_index = device->vk.current_frame;
635 record->gpu_timestamps[0] = pre_gpu_timestamp_ptr;
636 record->gpu_timestamps[1] = post_gpu_timestamp_ptr;
637 record->queue_info_index = queue->vk.queue_family_index;
638 record->submit_sub_index = cmdbuf_idx;
639
640 radv_describe_queue_event(queue, record);
641
642 return VK_SUCCESS;
643 }
644
645 static VkResult
radv_describe_queue_semaphore(struct radv_queue * queue,struct vk_semaphore * sync,enum sqtt_queue_event_type event_type)646 radv_describe_queue_semaphore(struct radv_queue *queue, struct vk_semaphore *sync,
647 enum sqtt_queue_event_type event_type)
648 {
649 struct rgp_queue_event_record *record;
650
651 record = calloc(1, sizeof(struct rgp_queue_event_record));
652 if (!record)
653 return VK_ERROR_OUT_OF_HOST_MEMORY;
654
655 record->event_type = event_type;
656 record->api_id = (uintptr_t)sync;
657 record->cpu_timestamp = os_time_get_nano();
658 record->queue_info_index = queue->vk.queue_family_index;
659
660 radv_describe_queue_event(queue, record);
661
662 return VK_SUCCESS;
663 }
664
665 static void
radv_handle_sqtt(VkQueue _queue)666 radv_handle_sqtt(VkQueue _queue)
667 {
668 VK_FROM_HANDLE(radv_queue, queue, _queue);
669 struct radv_device *device = radv_queue_device(queue);
670 bool trigger = device->sqtt_triggered;
671 device->sqtt_triggered = false;
672
673 if (device->sqtt_enabled) {
674 if (!radv_sqtt_stop_capturing(queue)) {
675 /* Try to capture the next frame if the buffer was too small initially. */
676 trigger = true;
677 }
678 }
679
680 if (trigger) {
681 radv_sqtt_start_capturing(queue);
682 }
683 }
684
685 VKAPI_ATTR VkResult VKAPI_CALL
sqtt_QueuePresentKHR(VkQueue _queue,const VkPresentInfoKHR * pPresentInfo)686 sqtt_QueuePresentKHR(VkQueue _queue, const VkPresentInfoKHR *pPresentInfo)
687 {
688 VK_FROM_HANDLE(radv_queue, queue, _queue);
689 struct radv_device *device = radv_queue_device(queue);
690 VkResult result;
691
692 queue->sqtt_present = true;
693
694 result = device->layer_dispatch.rgp.QueuePresentKHR(_queue, pPresentInfo);
695 if (result != VK_SUCCESS && result != VK_SUBOPTIMAL_KHR)
696 return result;
697
698 queue->sqtt_present = false;
699
700 radv_handle_sqtt(_queue);
701
702 return VK_SUCCESS;
703 }
704
705 static VkResult
radv_sqtt_wsi_submit(VkQueue _queue,uint32_t submitCount,const VkSubmitInfo2 * pSubmits,VkFence _fence)706 radv_sqtt_wsi_submit(VkQueue _queue, uint32_t submitCount, const VkSubmitInfo2 *pSubmits, VkFence _fence)
707 {
708 VK_FROM_HANDLE(radv_queue, queue, _queue);
709 struct radv_device *device = radv_queue_device(queue);
710 VkCommandBufferSubmitInfo *new_cmdbufs = NULL;
711 struct radeon_winsys_bo *gpu_timestamp_bo;
712 uint32_t gpu_timestamp_offset;
713 VkCommandBuffer timed_cmdbuf;
714 void *gpu_timestamp_ptr;
715 uint64_t cpu_timestamp;
716 VkResult result = VK_SUCCESS;
717
718 assert(submitCount <= 1 && pSubmits != NULL);
719
720 for (uint32_t i = 0; i < submitCount; i++) {
721 const VkSubmitInfo2 *pSubmit = &pSubmits[i];
722 VkSubmitInfo2 sqtt_submit = *pSubmit;
723
724 assert(sqtt_submit.commandBufferInfoCount <= 1);
725
726 /* Command buffers */
727 uint32_t new_cmdbuf_count = sqtt_submit.commandBufferInfoCount + 1;
728
729 new_cmdbufs = malloc(new_cmdbuf_count * sizeof(*new_cmdbufs));
730 if (!new_cmdbufs)
731 return VK_ERROR_OUT_OF_HOST_MEMORY;
732
733 /* Sample the current CPU time before building the GPU timestamp cmdbuf. */
734 cpu_timestamp = os_time_get_nano();
735
736 result = radv_sqtt_acquire_gpu_timestamp(device, &gpu_timestamp_bo, &gpu_timestamp_offset, &gpu_timestamp_ptr);
737 if (result != VK_SUCCESS)
738 goto fail;
739
740 result = radv_sqtt_get_timed_cmdbuf(queue, gpu_timestamp_bo, gpu_timestamp_offset,
741 VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT, &timed_cmdbuf);
742 if (result != VK_SUCCESS)
743 goto fail;
744
745 new_cmdbufs[0] = (VkCommandBufferSubmitInfo){
746 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO,
747 .commandBuffer = timed_cmdbuf,
748 };
749
750 if (sqtt_submit.commandBufferInfoCount == 1)
751 new_cmdbufs[1] = sqtt_submit.pCommandBufferInfos[0];
752
753 sqtt_submit.commandBufferInfoCount = new_cmdbuf_count;
754 sqtt_submit.pCommandBufferInfos = new_cmdbufs;
755
756 radv_describe_queue_present(queue, cpu_timestamp, gpu_timestamp_ptr);
757
758 result = device->layer_dispatch.rgp.QueueSubmit2(_queue, 1, &sqtt_submit, _fence);
759 if (result != VK_SUCCESS)
760 goto fail;
761
762 FREE(new_cmdbufs);
763 }
764
765 return result;
766
767 fail:
768 FREE(new_cmdbufs);
769 return result;
770 }
771
772 VKAPI_ATTR VkResult VKAPI_CALL
sqtt_QueueSubmit2(VkQueue _queue,uint32_t submitCount,const VkSubmitInfo2 * pSubmits,VkFence _fence)773 sqtt_QueueSubmit2(VkQueue _queue, uint32_t submitCount, const VkSubmitInfo2 *pSubmits, VkFence _fence)
774 {
775 VK_FROM_HANDLE(radv_queue, queue, _queue);
776 struct radv_device *device = radv_queue_device(queue);
777 const struct radv_physical_device *pdev = radv_device_physical(device);
778 const struct radv_instance *instance = radv_physical_device_instance(pdev);
779 const bool is_gfx_or_ace = queue->state.qf == RADV_QUEUE_GENERAL || queue->state.qf == RADV_QUEUE_COMPUTE;
780 VkCommandBufferSubmitInfo *new_cmdbufs = NULL;
781 VkResult result = VK_SUCCESS;
782
783 /* Only consider queue events on graphics/compute when enabled. */
784 if (((!device->sqtt_enabled || !radv_sqtt_queue_events_enabled()) && !instance->vk.trace_per_submit) ||
785 !is_gfx_or_ace)
786 return device->layer_dispatch.rgp.QueueSubmit2(_queue, submitCount, pSubmits, _fence);
787
788 for (uint32_t i = 0; i < submitCount; i++) {
789 const VkSubmitInfo2 *pSubmit = &pSubmits[i];
790
791 /* Wait semaphores */
792 for (uint32_t j = 0; j < pSubmit->waitSemaphoreInfoCount; j++) {
793 const VkSemaphoreSubmitInfo *pWaitSemaphoreInfo = &pSubmit->pWaitSemaphoreInfos[j];
794 VK_FROM_HANDLE(vk_semaphore, sem, pWaitSemaphoreInfo->semaphore);
795 radv_describe_queue_semaphore(queue, sem, SQTT_QUEUE_TIMING_EVENT_WAIT_SEMAPHORE);
796 }
797 }
798
799 if (queue->sqtt_present)
800 return radv_sqtt_wsi_submit(_queue, submitCount, pSubmits, _fence);
801
802 if (instance->vk.trace_per_submit)
803 radv_sqtt_start_capturing(queue);
804
805 for (uint32_t i = 0; i < submitCount; i++) {
806 const VkSubmitInfo2 *pSubmit = &pSubmits[i];
807 VkSubmitInfo2 sqtt_submit = *pSubmit;
808
809 /* Command buffers */
810 uint32_t new_cmdbuf_count = sqtt_submit.commandBufferInfoCount * 3;
811 uint32_t cmdbuf_idx = 0;
812
813 new_cmdbufs = malloc(new_cmdbuf_count * sizeof(*new_cmdbufs));
814 if (!new_cmdbufs)
815 return VK_ERROR_OUT_OF_HOST_MEMORY;
816
817 for (uint32_t j = 0; j < sqtt_submit.commandBufferInfoCount; j++) {
818 const VkCommandBufferSubmitInfo *pCommandBufferInfo = &sqtt_submit.pCommandBufferInfos[j];
819 struct radeon_winsys_bo *gpu_timestamps_bo[2];
820 uint32_t gpu_timestamps_offset[2];
821 VkCommandBuffer pre_timed_cmdbuf, post_timed_cmdbuf;
822 void *gpu_timestamps_ptr[2];
823 uint64_t cpu_timestamp;
824
825 /* Sample the current CPU time before building the timed cmdbufs. */
826 cpu_timestamp = os_time_get_nano();
827
828 result = radv_sqtt_acquire_gpu_timestamp(device, &gpu_timestamps_bo[0], &gpu_timestamps_offset[0],
829 &gpu_timestamps_ptr[0]);
830 if (result != VK_SUCCESS)
831 goto fail;
832
833 result = radv_sqtt_get_timed_cmdbuf(queue, gpu_timestamps_bo[0], gpu_timestamps_offset[0],
834 VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT, &pre_timed_cmdbuf);
835 if (result != VK_SUCCESS)
836 goto fail;
837
838 new_cmdbufs[cmdbuf_idx++] = (VkCommandBufferSubmitInfo){
839 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO,
840 .commandBuffer = pre_timed_cmdbuf,
841 };
842
843 new_cmdbufs[cmdbuf_idx++] = *pCommandBufferInfo;
844
845 result = radv_sqtt_acquire_gpu_timestamp(device, &gpu_timestamps_bo[1], &gpu_timestamps_offset[1],
846 &gpu_timestamps_ptr[1]);
847 if (result != VK_SUCCESS)
848 goto fail;
849
850 result = radv_sqtt_get_timed_cmdbuf(queue, gpu_timestamps_bo[1], gpu_timestamps_offset[1],
851 VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT, &post_timed_cmdbuf);
852 if (result != VK_SUCCESS)
853 goto fail;
854
855 new_cmdbufs[cmdbuf_idx++] = (VkCommandBufferSubmitInfo){
856 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO,
857 .commandBuffer = post_timed_cmdbuf,
858 };
859
860 VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, pCommandBufferInfo->commandBuffer);
861 radv_describe_queue_submit(queue, cmd_buffer, j, cpu_timestamp, gpu_timestamps_ptr[0], gpu_timestamps_ptr[1]);
862 }
863
864 sqtt_submit.commandBufferInfoCount = new_cmdbuf_count;
865 sqtt_submit.pCommandBufferInfos = new_cmdbufs;
866
867 result = device->layer_dispatch.rgp.QueueSubmit2(_queue, 1, &sqtt_submit, _fence);
868 if (result != VK_SUCCESS)
869 goto fail;
870
871 /* Signal semaphores */
872 for (uint32_t j = 0; j < sqtt_submit.signalSemaphoreInfoCount; j++) {
873 const VkSemaphoreSubmitInfo *pSignalSemaphoreInfo = &sqtt_submit.pSignalSemaphoreInfos[j];
874 VK_FROM_HANDLE(vk_semaphore, sem, pSignalSemaphoreInfo->semaphore);
875 radv_describe_queue_semaphore(queue, sem, SQTT_QUEUE_TIMING_EVENT_SIGNAL_SEMAPHORE);
876 }
877
878 FREE(new_cmdbufs);
879 }
880
881 if (instance->vk.trace_per_submit) {
882 if (!radv_sqtt_stop_capturing(queue)) {
883 fprintf(stderr,
884 "radv: Failed to capture RGP for this submit because the buffer is too small and auto-resizing "
885 "is disabled. See RADV_THREAD_TRACE_BUFFER_SIZE for increasing the size.\n");
886 }
887 }
888
889 return result;
890
891 fail:
892 FREE(new_cmdbufs);
893 return result;
894 }
895
896 #define EVENT_MARKER_BASE(cmd_name, api_name, event_name, ...) \
897 VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); \
898 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); \
899 radv_write_begin_general_api_marker(cmd_buffer, ApiCmd##api_name); \
900 cmd_buffer->state.current_event_type = EventCmd##event_name; \
901 device->layer_dispatch.rgp.Cmd##cmd_name(__VA_ARGS__); \
902 cmd_buffer->state.current_event_type = EventInternalUnknown; \
903 radv_write_end_general_api_marker(cmd_buffer, ApiCmd##api_name);
904
905 #define EVENT_MARKER_ALIAS(cmd_name, api_name, ...) EVENT_MARKER_BASE(cmd_name, api_name, api_name, __VA_ARGS__);
906
907 #define EVENT_MARKER(cmd_name, ...) EVENT_MARKER_ALIAS(cmd_name, cmd_name, __VA_ARGS__);
908
909 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDraw(VkCommandBuffer commandBuffer,uint32_t vertexCount,uint32_t instanceCount,uint32_t firstVertex,uint32_t firstInstance)910 sqtt_CmdDraw(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t instanceCount, uint32_t firstVertex,
911 uint32_t firstInstance)
912 {
913 EVENT_MARKER(Draw, commandBuffer, vertexCount, instanceCount, firstVertex, firstInstance);
914 }
915
916 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDrawIndexed(VkCommandBuffer commandBuffer,uint32_t indexCount,uint32_t instanceCount,uint32_t firstIndex,int32_t vertexOffset,uint32_t firstInstance)917 sqtt_CmdDrawIndexed(VkCommandBuffer commandBuffer, uint32_t indexCount, uint32_t instanceCount, uint32_t firstIndex,
918 int32_t vertexOffset, uint32_t firstInstance)
919 {
920 EVENT_MARKER(DrawIndexed, commandBuffer, indexCount, instanceCount, firstIndex, vertexOffset, firstInstance);
921 }
922
923 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDrawIndirect(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,uint32_t drawCount,uint32_t stride)924 sqtt_CmdDrawIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t drawCount,
925 uint32_t stride)
926 {
927 EVENT_MARKER(DrawIndirect, commandBuffer, buffer, offset, drawCount, stride);
928 }
929
930 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,uint32_t drawCount,uint32_t stride)931 sqtt_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t drawCount,
932 uint32_t stride)
933 {
934 EVENT_MARKER(DrawIndexedIndirect, commandBuffer, buffer, offset, drawCount, stride);
935 }
936
937 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDrawIndirectCount(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,VkBuffer countBuffer,VkDeviceSize countBufferOffset,uint32_t maxDrawCount,uint32_t stride)938 sqtt_CmdDrawIndirectCount(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, VkBuffer countBuffer,
939 VkDeviceSize countBufferOffset, uint32_t maxDrawCount, uint32_t stride)
940 {
941 EVENT_MARKER(DrawIndirectCount, commandBuffer, buffer, offset, countBuffer, countBufferOffset, maxDrawCount, stride);
942 }
943
944 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,VkBuffer countBuffer,VkDeviceSize countBufferOffset,uint32_t maxDrawCount,uint32_t stride)945 sqtt_CmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
946 VkBuffer countBuffer, VkDeviceSize countBufferOffset, uint32_t maxDrawCount,
947 uint32_t stride)
948 {
949 EVENT_MARKER(DrawIndexedIndirectCount, commandBuffer, buffer, offset, countBuffer, countBufferOffset, maxDrawCount,
950 stride);
951 }
952
953 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDispatch(VkCommandBuffer commandBuffer,uint32_t x,uint32_t y,uint32_t z)954 sqtt_CmdDispatch(VkCommandBuffer commandBuffer, uint32_t x, uint32_t y, uint32_t z)
955 {
956 EVENT_MARKER_ALIAS(DispatchBase, Dispatch, commandBuffer, 0, 0, 0, x, y, z);
957 }
958
959 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDispatchIndirect(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset)960 sqtt_CmdDispatchIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset)
961 {
962 EVENT_MARKER(DispatchIndirect, commandBuffer, buffer, offset);
963 }
964
965 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdCopyBuffer2(VkCommandBuffer commandBuffer,const VkCopyBufferInfo2 * pCopyBufferInfo)966 sqtt_CmdCopyBuffer2(VkCommandBuffer commandBuffer, const VkCopyBufferInfo2 *pCopyBufferInfo)
967 {
968 EVENT_MARKER_ALIAS(CopyBuffer2, CopyBuffer, commandBuffer, pCopyBufferInfo);
969 }
970
971 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdFillBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize fillSize,uint32_t data)972 sqtt_CmdFillBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize fillSize,
973 uint32_t data)
974 {
975 EVENT_MARKER(FillBuffer, commandBuffer, dstBuffer, dstOffset, fillSize, data);
976 }
977
978 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdUpdateBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize dataSize,const void * pData)979 sqtt_CmdUpdateBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize dataSize,
980 const void *pData)
981 {
982 EVENT_MARKER(UpdateBuffer, commandBuffer, dstBuffer, dstOffset, dataSize, pData);
983 }
984
985 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdCopyImage2(VkCommandBuffer commandBuffer,const VkCopyImageInfo2 * pCopyImageInfo)986 sqtt_CmdCopyImage2(VkCommandBuffer commandBuffer, const VkCopyImageInfo2 *pCopyImageInfo)
987 {
988 EVENT_MARKER_ALIAS(CopyImage2, CopyImage, commandBuffer, pCopyImageInfo);
989 }
990
991 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer,const VkCopyBufferToImageInfo2 * pCopyBufferToImageInfo)992 sqtt_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer, const VkCopyBufferToImageInfo2 *pCopyBufferToImageInfo)
993 {
994 EVENT_MARKER_ALIAS(CopyBufferToImage2, CopyBufferToImage, commandBuffer, pCopyBufferToImageInfo);
995 }
996
997 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer,const VkCopyImageToBufferInfo2 * pCopyImageToBufferInfo)998 sqtt_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer, const VkCopyImageToBufferInfo2 *pCopyImageToBufferInfo)
999 {
1000 EVENT_MARKER_ALIAS(CopyImageToBuffer2, CopyImageToBuffer, commandBuffer, pCopyImageToBufferInfo);
1001 }
1002
1003 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdBlitImage2(VkCommandBuffer commandBuffer,const VkBlitImageInfo2 * pBlitImageInfo)1004 sqtt_CmdBlitImage2(VkCommandBuffer commandBuffer, const VkBlitImageInfo2 *pBlitImageInfo)
1005 {
1006 EVENT_MARKER_ALIAS(BlitImage2, BlitImage, commandBuffer, pBlitImageInfo);
1007 }
1008
1009 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdClearColorImage(VkCommandBuffer commandBuffer,VkImage image_h,VkImageLayout imageLayout,const VkClearColorValue * pColor,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)1010 sqtt_CmdClearColorImage(VkCommandBuffer commandBuffer, VkImage image_h, VkImageLayout imageLayout,
1011 const VkClearColorValue *pColor, uint32_t rangeCount, const VkImageSubresourceRange *pRanges)
1012 {
1013 EVENT_MARKER(ClearColorImage, commandBuffer, image_h, imageLayout, pColor, rangeCount, pRanges);
1014 }
1015
1016 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,VkImage image_h,VkImageLayout imageLayout,const VkClearDepthStencilValue * pDepthStencil,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)1017 sqtt_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer, VkImage image_h, VkImageLayout imageLayout,
1018 const VkClearDepthStencilValue *pDepthStencil, uint32_t rangeCount,
1019 const VkImageSubresourceRange *pRanges)
1020 {
1021 EVENT_MARKER(ClearDepthStencilImage, commandBuffer, image_h, imageLayout, pDepthStencil, rangeCount, pRanges);
1022 }
1023
1024 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdClearAttachments(VkCommandBuffer commandBuffer,uint32_t attachmentCount,const VkClearAttachment * pAttachments,uint32_t rectCount,const VkClearRect * pRects)1025 sqtt_CmdClearAttachments(VkCommandBuffer commandBuffer, uint32_t attachmentCount, const VkClearAttachment *pAttachments,
1026 uint32_t rectCount, const VkClearRect *pRects)
1027 {
1028 EVENT_MARKER(ClearAttachments, commandBuffer, attachmentCount, pAttachments, rectCount, pRects);
1029 }
1030
1031 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdResolveImage2(VkCommandBuffer commandBuffer,const VkResolveImageInfo2 * pResolveImageInfo)1032 sqtt_CmdResolveImage2(VkCommandBuffer commandBuffer, const VkResolveImageInfo2 *pResolveImageInfo)
1033 {
1034 EVENT_MARKER_ALIAS(ResolveImage2, ResolveImage, commandBuffer, pResolveImageInfo);
1035 }
1036
1037 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdWaitEvents2(VkCommandBuffer commandBuffer,uint32_t eventCount,const VkEvent * pEvents,const VkDependencyInfo * pDependencyInfos)1038 sqtt_CmdWaitEvents2(VkCommandBuffer commandBuffer, uint32_t eventCount, const VkEvent *pEvents,
1039 const VkDependencyInfo *pDependencyInfos)
1040 {
1041 EVENT_MARKER_ALIAS(WaitEvents2, WaitEvents, commandBuffer, eventCount, pEvents, pDependencyInfos);
1042 }
1043
1044 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdPipelineBarrier2(VkCommandBuffer commandBuffer,const VkDependencyInfo * pDependencyInfo)1045 sqtt_CmdPipelineBarrier2(VkCommandBuffer commandBuffer, const VkDependencyInfo *pDependencyInfo)
1046 {
1047 EVENT_MARKER_ALIAS(PipelineBarrier2, PipelineBarrier, commandBuffer, pDependencyInfo);
1048 }
1049
1050 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdResetQueryPool(VkCommandBuffer commandBuffer,VkQueryPool queryPool,uint32_t firstQuery,uint32_t queryCount)1051 sqtt_CmdResetQueryPool(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t firstQuery, uint32_t queryCount)
1052 {
1053 EVENT_MARKER(ResetQueryPool, commandBuffer, queryPool, firstQuery, queryCount);
1054 }
1055
1056 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer,VkQueryPool queryPool,uint32_t firstQuery,uint32_t queryCount,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize stride,VkQueryResultFlags flags)1057 sqtt_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t firstQuery,
1058 uint32_t queryCount, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize stride,
1059 VkQueryResultFlags flags)
1060 {
1061 EVENT_MARKER(CopyQueryPoolResults, commandBuffer, queryPool, firstQuery, queryCount, dstBuffer, dstOffset, stride,
1062 flags);
1063 }
1064
1065 #define EVENT_RT_MARKER(cmd_name, flags, ...) EVENT_MARKER_BASE(cmd_name, Dispatch, cmd_name | flags, __VA_ARGS__);
1066
1067 #define EVENT_RT_MARKER_ALIAS(cmd_name, event_name, flags, ...) \
1068 EVENT_MARKER_BASE(cmd_name, Dispatch, event_name | flags, __VA_ARGS__);
1069
1070 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdTraceRaysKHR(VkCommandBuffer commandBuffer,const VkStridedDeviceAddressRegionKHR * pRaygenShaderBindingTable,const VkStridedDeviceAddressRegionKHR * pMissShaderBindingTable,const VkStridedDeviceAddressRegionKHR * pHitShaderBindingTable,const VkStridedDeviceAddressRegionKHR * pCallableShaderBindingTable,uint32_t width,uint32_t height,uint32_t depth)1071 sqtt_CmdTraceRaysKHR(VkCommandBuffer commandBuffer, const VkStridedDeviceAddressRegionKHR *pRaygenShaderBindingTable,
1072 const VkStridedDeviceAddressRegionKHR *pMissShaderBindingTable,
1073 const VkStridedDeviceAddressRegionKHR *pHitShaderBindingTable,
1074 const VkStridedDeviceAddressRegionKHR *pCallableShaderBindingTable, uint32_t width,
1075 uint32_t height, uint32_t depth)
1076 {
1077 EVENT_RT_MARKER(TraceRaysKHR, ApiRayTracingSeparateCompiled, commandBuffer, pRaygenShaderBindingTable,
1078 pMissShaderBindingTable, pHitShaderBindingTable, pCallableShaderBindingTable, width, height, depth);
1079 }
1080
1081 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdTraceRaysIndirectKHR(VkCommandBuffer commandBuffer,const VkStridedDeviceAddressRegionKHR * pRaygenShaderBindingTable,const VkStridedDeviceAddressRegionKHR * pMissShaderBindingTable,const VkStridedDeviceAddressRegionKHR * pHitShaderBindingTable,const VkStridedDeviceAddressRegionKHR * pCallableShaderBindingTable,VkDeviceAddress indirectDeviceAddress)1082 sqtt_CmdTraceRaysIndirectKHR(VkCommandBuffer commandBuffer,
1083 const VkStridedDeviceAddressRegionKHR *pRaygenShaderBindingTable,
1084 const VkStridedDeviceAddressRegionKHR *pMissShaderBindingTable,
1085 const VkStridedDeviceAddressRegionKHR *pHitShaderBindingTable,
1086 const VkStridedDeviceAddressRegionKHR *pCallableShaderBindingTable,
1087 VkDeviceAddress indirectDeviceAddress)
1088 {
1089 EVENT_RT_MARKER(TraceRaysIndirectKHR, ApiRayTracingSeparateCompiled, commandBuffer, pRaygenShaderBindingTable,
1090 pMissShaderBindingTable, pHitShaderBindingTable, pCallableShaderBindingTable, indirectDeviceAddress);
1091 }
1092
1093 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdTraceRaysIndirect2KHR(VkCommandBuffer commandBuffer,VkDeviceAddress indirectDeviceAddress)1094 sqtt_CmdTraceRaysIndirect2KHR(VkCommandBuffer commandBuffer, VkDeviceAddress indirectDeviceAddress)
1095 {
1096 EVENT_RT_MARKER_ALIAS(TraceRaysIndirect2KHR, TraceRaysIndirectKHR, ApiRayTracingSeparateCompiled, commandBuffer,
1097 indirectDeviceAddress);
1098 }
1099
1100 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdCopyAccelerationStructureKHR(VkCommandBuffer commandBuffer,const VkCopyAccelerationStructureInfoKHR * pInfo)1101 sqtt_CmdCopyAccelerationStructureKHR(VkCommandBuffer commandBuffer, const VkCopyAccelerationStructureInfoKHR *pInfo)
1102 {
1103 EVENT_RT_MARKER(CopyAccelerationStructureKHR, 0, commandBuffer, pInfo);
1104 }
1105
1106 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdCopyAccelerationStructureToMemoryKHR(VkCommandBuffer commandBuffer,const VkCopyAccelerationStructureToMemoryInfoKHR * pInfo)1107 sqtt_CmdCopyAccelerationStructureToMemoryKHR(VkCommandBuffer commandBuffer,
1108 const VkCopyAccelerationStructureToMemoryInfoKHR *pInfo)
1109 {
1110 EVENT_RT_MARKER(CopyAccelerationStructureToMemoryKHR, 0, commandBuffer, pInfo);
1111 }
1112
1113 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdCopyMemoryToAccelerationStructureKHR(VkCommandBuffer commandBuffer,const VkCopyMemoryToAccelerationStructureInfoKHR * pInfo)1114 sqtt_CmdCopyMemoryToAccelerationStructureKHR(VkCommandBuffer commandBuffer,
1115 const VkCopyMemoryToAccelerationStructureInfoKHR *pInfo)
1116 {
1117 EVENT_RT_MARKER(CopyMemoryToAccelerationStructureKHR, 0, commandBuffer, pInfo);
1118 }
1119
1120 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDrawMeshTasksEXT(VkCommandBuffer commandBuffer,uint32_t x,uint32_t y,uint32_t z)1121 sqtt_CmdDrawMeshTasksEXT(VkCommandBuffer commandBuffer, uint32_t x, uint32_t y, uint32_t z)
1122 {
1123 EVENT_MARKER(DrawMeshTasksEXT, commandBuffer, x, y, z);
1124 }
1125
1126 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDrawMeshTasksIndirectEXT(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,uint32_t drawCount,uint32_t stride)1127 sqtt_CmdDrawMeshTasksIndirectEXT(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
1128 uint32_t drawCount, uint32_t stride)
1129 {
1130 EVENT_MARKER(DrawMeshTasksIndirectEXT, commandBuffer, buffer, offset, drawCount, stride);
1131 }
1132
1133 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDrawMeshTasksIndirectCountEXT(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,VkBuffer countBuffer,VkDeviceSize countBufferOffset,uint32_t maxDrawCount,uint32_t stride)1134 sqtt_CmdDrawMeshTasksIndirectCountEXT(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
1135 VkBuffer countBuffer, VkDeviceSize countBufferOffset, uint32_t maxDrawCount,
1136 uint32_t stride)
1137 {
1138 EVENT_MARKER(DrawMeshTasksIndirectCountEXT, commandBuffer, buffer, offset, countBuffer, countBufferOffset,
1139 maxDrawCount, stride);
1140 }
1141
1142 #undef EVENT_RT_MARKER_ALIAS
1143 #undef EVENT_RT_MARKER
1144
1145 #undef EVENT_MARKER
1146 #undef EVENT_MARKER_ALIAS
1147 #undef EVENT_MARKER_BASE
1148
1149 #define API_MARKER_ALIAS(cmd_name, api_name, ...) \
1150 VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); \
1151 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); \
1152 radv_write_begin_general_api_marker(cmd_buffer, ApiCmd##api_name); \
1153 device->layer_dispatch.rgp.Cmd##cmd_name(__VA_ARGS__); \
1154 radv_write_end_general_api_marker(cmd_buffer, ApiCmd##api_name);
1155
1156 #define API_MARKER(cmd_name, ...) API_MARKER_ALIAS(cmd_name, cmd_name, __VA_ARGS__);
1157
1158 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdBindPipeline(VkCommandBuffer commandBuffer,VkPipelineBindPoint pipelineBindPoint,VkPipeline _pipeline)1159 sqtt_CmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipeline _pipeline)
1160 {
1161 VK_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
1162
1163 API_MARKER(BindPipeline, commandBuffer, pipelineBindPoint, _pipeline);
1164
1165 if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR) {
1166 /* RGP seems to expect a compute bind point to detect and report RT pipelines, which makes
1167 * sense somehow given that RT shaders are compiled to an unified compute shader.
1168 */
1169 radv_describe_pipeline_bind(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1170 } else {
1171 radv_describe_pipeline_bind(cmd_buffer, pipelineBindPoint, pipeline);
1172 }
1173 }
1174
1175 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,VkPipelineBindPoint pipelineBindPoint,VkPipelineLayout layout,uint32_t firstSet,uint32_t descriptorSetCount,const VkDescriptorSet * pDescriptorSets,uint32_t dynamicOffsetCount,const uint32_t * pDynamicOffsets)1176 sqtt_CmdBindDescriptorSets(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint,
1177 VkPipelineLayout layout, uint32_t firstSet, uint32_t descriptorSetCount,
1178 const VkDescriptorSet *pDescriptorSets, uint32_t dynamicOffsetCount,
1179 const uint32_t *pDynamicOffsets)
1180 {
1181 API_MARKER(BindDescriptorSets, commandBuffer, pipelineBindPoint, layout, firstSet, descriptorSetCount,
1182 pDescriptorSets, dynamicOffsetCount, pDynamicOffsets);
1183 }
1184
1185 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdBindIndexBuffer(VkCommandBuffer commandBuffer,VkBuffer buffer,VkDeviceSize offset,VkIndexType indexType)1186 sqtt_CmdBindIndexBuffer(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, VkIndexType indexType)
1187 {
1188 API_MARKER(BindIndexBuffer, commandBuffer, buffer, offset, indexType);
1189 }
1190
1191 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdBindVertexBuffers2(VkCommandBuffer commandBuffer,uint32_t firstBinding,uint32_t bindingCount,const VkBuffer * pBuffers,const VkDeviceSize * pOffsets,const VkDeviceSize * pSizes,const VkDeviceSize * pStrides)1192 sqtt_CmdBindVertexBuffers2(VkCommandBuffer commandBuffer, uint32_t firstBinding, uint32_t bindingCount,
1193 const VkBuffer *pBuffers, const VkDeviceSize *pOffsets, const VkDeviceSize *pSizes,
1194 const VkDeviceSize *pStrides)
1195 {
1196 API_MARKER_ALIAS(BindVertexBuffers2, BindVertexBuffers, commandBuffer, firstBinding, bindingCount, pBuffers,
1197 pOffsets, pSizes, pStrides);
1198 }
1199
1200 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdBeginQuery(VkCommandBuffer commandBuffer,VkQueryPool queryPool,uint32_t query,VkQueryControlFlags flags)1201 sqtt_CmdBeginQuery(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t query, VkQueryControlFlags flags)
1202 {
1203 API_MARKER(BeginQuery, commandBuffer, queryPool, query, flags);
1204 }
1205
1206 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdEndQuery(VkCommandBuffer commandBuffer,VkQueryPool queryPool,uint32_t query)1207 sqtt_CmdEndQuery(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t query)
1208 {
1209 API_MARKER(EndQuery, commandBuffer, queryPool, query);
1210 }
1211
1212 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdWriteTimestamp2(VkCommandBuffer commandBuffer,VkPipelineStageFlags2 stage,VkQueryPool queryPool,uint32_t query)1213 sqtt_CmdWriteTimestamp2(VkCommandBuffer commandBuffer, VkPipelineStageFlags2 stage, VkQueryPool queryPool,
1214 uint32_t query)
1215 {
1216 API_MARKER_ALIAS(WriteTimestamp2, WriteTimestamp, commandBuffer, stage, queryPool, query);
1217 }
1218
1219 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdPushConstants(VkCommandBuffer commandBuffer,VkPipelineLayout layout,VkShaderStageFlags stageFlags,uint32_t offset,uint32_t size,const void * pValues)1220 sqtt_CmdPushConstants(VkCommandBuffer commandBuffer, VkPipelineLayout layout, VkShaderStageFlags stageFlags,
1221 uint32_t offset, uint32_t size, const void *pValues)
1222 {
1223 API_MARKER(PushConstants, commandBuffer, layout, stageFlags, offset, size, pValues);
1224 }
1225
1226 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdBeginRendering(VkCommandBuffer commandBuffer,const VkRenderingInfo * pRenderingInfo)1227 sqtt_CmdBeginRendering(VkCommandBuffer commandBuffer, const VkRenderingInfo *pRenderingInfo)
1228 {
1229 API_MARKER_ALIAS(BeginRendering, BeginRenderPass, commandBuffer, pRenderingInfo);
1230 }
1231
1232 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdEndRendering(VkCommandBuffer commandBuffer)1233 sqtt_CmdEndRendering(VkCommandBuffer commandBuffer)
1234 {
1235 API_MARKER_ALIAS(EndRendering, EndRenderPass, commandBuffer);
1236 }
1237
1238 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdExecuteCommands(VkCommandBuffer commandBuffer,uint32_t commandBufferCount,const VkCommandBuffer * pCmdBuffers)1239 sqtt_CmdExecuteCommands(VkCommandBuffer commandBuffer, uint32_t commandBufferCount, const VkCommandBuffer *pCmdBuffers)
1240 {
1241 API_MARKER(ExecuteCommands, commandBuffer, commandBufferCount, pCmdBuffers);
1242 }
1243
1244 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdExecuteGeneratedCommandsEXT(VkCommandBuffer commandBuffer,VkBool32 isPreprocessed,const VkGeneratedCommandsInfoEXT * pGeneratedCommandsInfo)1245 sqtt_CmdExecuteGeneratedCommandsEXT(VkCommandBuffer commandBuffer, VkBool32 isPreprocessed,
1246 const VkGeneratedCommandsInfoEXT *pGeneratedCommandsInfo)
1247 {
1248 /* There is no ExecuteIndirect Vulkan event in RGP yet. */
1249 API_MARKER_ALIAS(ExecuteGeneratedCommandsEXT, ExecuteCommands, commandBuffer, isPreprocessed,
1250 pGeneratedCommandsInfo);
1251 }
1252
1253 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdSetViewport(VkCommandBuffer commandBuffer,uint32_t firstViewport,uint32_t viewportCount,const VkViewport * pViewports)1254 sqtt_CmdSetViewport(VkCommandBuffer commandBuffer, uint32_t firstViewport, uint32_t viewportCount,
1255 const VkViewport *pViewports)
1256 {
1257 API_MARKER(SetViewport, commandBuffer, firstViewport, viewportCount, pViewports);
1258 }
1259
1260 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdSetScissor(VkCommandBuffer commandBuffer,uint32_t firstScissor,uint32_t scissorCount,const VkRect2D * pScissors)1261 sqtt_CmdSetScissor(VkCommandBuffer commandBuffer, uint32_t firstScissor, uint32_t scissorCount,
1262 const VkRect2D *pScissors)
1263 {
1264 API_MARKER(SetScissor, commandBuffer, firstScissor, scissorCount, pScissors);
1265 }
1266
1267 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdSetLineWidth(VkCommandBuffer commandBuffer,float lineWidth)1268 sqtt_CmdSetLineWidth(VkCommandBuffer commandBuffer, float lineWidth)
1269 {
1270 API_MARKER(SetLineWidth, commandBuffer, lineWidth);
1271 }
1272
1273 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdSetDepthBias(VkCommandBuffer commandBuffer,float depthBiasConstantFactor,float depthBiasClamp,float depthBiasSlopeFactor)1274 sqtt_CmdSetDepthBias(VkCommandBuffer commandBuffer, float depthBiasConstantFactor, float depthBiasClamp,
1275 float depthBiasSlopeFactor)
1276 {
1277 API_MARKER(SetDepthBias, commandBuffer, depthBiasConstantFactor, depthBiasClamp, depthBiasSlopeFactor);
1278 }
1279
1280 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdSetBlendConstants(VkCommandBuffer commandBuffer,const float blendConstants[4])1281 sqtt_CmdSetBlendConstants(VkCommandBuffer commandBuffer, const float blendConstants[4])
1282 {
1283 API_MARKER(SetBlendConstants, commandBuffer, blendConstants);
1284 }
1285
1286 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdSetDepthBounds(VkCommandBuffer commandBuffer,float minDepthBounds,float maxDepthBounds)1287 sqtt_CmdSetDepthBounds(VkCommandBuffer commandBuffer, float minDepthBounds, float maxDepthBounds)
1288 {
1289 API_MARKER(SetDepthBounds, commandBuffer, minDepthBounds, maxDepthBounds);
1290 }
1291
1292 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer,VkStencilFaceFlags faceMask,uint32_t compareMask)1293 sqtt_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t compareMask)
1294 {
1295 API_MARKER(SetStencilCompareMask, commandBuffer, faceMask, compareMask);
1296 }
1297
1298 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer,VkStencilFaceFlags faceMask,uint32_t writeMask)1299 sqtt_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t writeMask)
1300 {
1301 API_MARKER(SetStencilWriteMask, commandBuffer, faceMask, writeMask);
1302 }
1303
1304 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdSetStencilReference(VkCommandBuffer commandBuffer,VkStencilFaceFlags faceMask,uint32_t reference)1305 sqtt_CmdSetStencilReference(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t reference)
1306 {
1307 API_MARKER(SetStencilReference, commandBuffer, faceMask, reference);
1308 }
1309
1310 /* VK_EXT_debug_marker */
1311 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDebugMarkerBeginEXT(VkCommandBuffer commandBuffer,const VkDebugMarkerMarkerInfoEXT * pMarkerInfo)1312 sqtt_CmdDebugMarkerBeginEXT(VkCommandBuffer commandBuffer, const VkDebugMarkerMarkerInfoEXT *pMarkerInfo)
1313 {
1314 VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1315 radv_write_user_event_marker(cmd_buffer, UserEventPush, pMarkerInfo->pMarkerName);
1316 }
1317
1318 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDebugMarkerEndEXT(VkCommandBuffer commandBuffer)1319 sqtt_CmdDebugMarkerEndEXT(VkCommandBuffer commandBuffer)
1320 {
1321 VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1322 radv_write_user_event_marker(cmd_buffer, UserEventPop, NULL);
1323 }
1324
1325 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdDebugMarkerInsertEXT(VkCommandBuffer commandBuffer,const VkDebugMarkerMarkerInfoEXT * pMarkerInfo)1326 sqtt_CmdDebugMarkerInsertEXT(VkCommandBuffer commandBuffer, const VkDebugMarkerMarkerInfoEXT *pMarkerInfo)
1327 {
1328 VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1329 radv_write_user_event_marker(cmd_buffer, UserEventTrigger, pMarkerInfo->pMarkerName);
1330 }
1331
1332 VKAPI_ATTR VkResult VKAPI_CALL
sqtt_DebugMarkerSetObjectTagEXT(VkDevice device,const VkDebugMarkerObjectTagInfoEXT * pTagInfo)1333 sqtt_DebugMarkerSetObjectTagEXT(VkDevice device, const VkDebugMarkerObjectTagInfoEXT *pTagInfo)
1334 {
1335 /* no-op */
1336 return VK_SUCCESS;
1337 }
1338
1339 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdBeginDebugUtilsLabelEXT(VkCommandBuffer commandBuffer,const VkDebugUtilsLabelEXT * pLabelInfo)1340 sqtt_CmdBeginDebugUtilsLabelEXT(VkCommandBuffer commandBuffer, const VkDebugUtilsLabelEXT *pLabelInfo)
1341 {
1342 VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1343 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1344
1345 radv_write_user_event_marker(cmd_buffer, UserEventPush, pLabelInfo->pLabelName);
1346
1347 device->layer_dispatch.rgp.CmdBeginDebugUtilsLabelEXT(commandBuffer, pLabelInfo);
1348 }
1349
1350 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdEndDebugUtilsLabelEXT(VkCommandBuffer commandBuffer)1351 sqtt_CmdEndDebugUtilsLabelEXT(VkCommandBuffer commandBuffer)
1352 {
1353 VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1354 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1355
1356 radv_write_user_event_marker(cmd_buffer, UserEventPop, NULL);
1357
1358 device->layer_dispatch.rgp.CmdEndDebugUtilsLabelEXT(commandBuffer);
1359 }
1360
1361 VKAPI_ATTR void VKAPI_CALL
sqtt_CmdInsertDebugUtilsLabelEXT(VkCommandBuffer commandBuffer,const VkDebugUtilsLabelEXT * pLabelInfo)1362 sqtt_CmdInsertDebugUtilsLabelEXT(VkCommandBuffer commandBuffer, const VkDebugUtilsLabelEXT *pLabelInfo)
1363 {
1364 VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1365 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1366
1367 radv_write_user_event_marker(cmd_buffer, UserEventTrigger, pLabelInfo->pLabelName);
1368
1369 device->layer_dispatch.rgp.CmdInsertDebugUtilsLabelEXT(commandBuffer, pLabelInfo);
1370 }
1371
1372 /* Pipelines */
1373 static enum rgp_hardware_stages
radv_get_rgp_shader_stage(struct radv_shader * shader)1374 radv_get_rgp_shader_stage(struct radv_shader *shader)
1375 {
1376 switch (shader->info.stage) {
1377 case MESA_SHADER_VERTEX:
1378 if (shader->info.vs.as_ls)
1379 return RGP_HW_STAGE_LS;
1380 else if (shader->info.vs.as_es)
1381 return RGP_HW_STAGE_ES;
1382 else if (shader->info.is_ngg)
1383 return RGP_HW_STAGE_GS;
1384 else
1385 return RGP_HW_STAGE_VS;
1386 case MESA_SHADER_TESS_CTRL:
1387 return RGP_HW_STAGE_HS;
1388 case MESA_SHADER_TESS_EVAL:
1389 if (shader->info.tes.as_es)
1390 return RGP_HW_STAGE_ES;
1391 else if (shader->info.is_ngg)
1392 return RGP_HW_STAGE_GS;
1393 else
1394 return RGP_HW_STAGE_VS;
1395 case MESA_SHADER_MESH:
1396 case MESA_SHADER_GEOMETRY:
1397 return RGP_HW_STAGE_GS;
1398 case MESA_SHADER_FRAGMENT:
1399 return RGP_HW_STAGE_PS;
1400 case MESA_SHADER_TASK:
1401 case MESA_SHADER_COMPUTE:
1402 case MESA_SHADER_RAYGEN:
1403 case MESA_SHADER_CLOSEST_HIT:
1404 case MESA_SHADER_ANY_HIT:
1405 case MESA_SHADER_INTERSECTION:
1406 case MESA_SHADER_MISS:
1407 case MESA_SHADER_CALLABLE:
1408 return RGP_HW_STAGE_CS;
1409 default:
1410 unreachable("invalid mesa shader stage");
1411 }
1412 }
1413
1414 static void
radv_fill_code_object_record(struct radv_device * device,struct rgp_shader_data * shader_data,struct radv_shader * shader,uint64_t va)1415 radv_fill_code_object_record(struct radv_device *device, struct rgp_shader_data *shader_data,
1416 struct radv_shader *shader, uint64_t va)
1417 {
1418 const struct radv_physical_device *pdev = radv_device_physical(device);
1419 unsigned lds_increment = pdev->info.gfx_level >= GFX11 && shader->info.stage == MESA_SHADER_FRAGMENT
1420 ? 1024
1421 : pdev->info.lds_encode_granularity;
1422
1423 memset(shader_data->rt_shader_name, 0, sizeof(shader_data->rt_shader_name));
1424 shader_data->hash[0] = (uint64_t)(uintptr_t)shader;
1425 shader_data->hash[1] = (uint64_t)(uintptr_t)shader >> 32;
1426 shader_data->code_size = shader->code_size;
1427 shader_data->code = shader->code;
1428 shader_data->vgpr_count = shader->config.num_vgprs;
1429 shader_data->sgpr_count = shader->config.num_sgprs;
1430 shader_data->scratch_memory_size = shader->config.scratch_bytes_per_wave;
1431 shader_data->lds_size = shader->config.lds_size * lds_increment;
1432 shader_data->wavefront_size = shader->info.wave_size;
1433 shader_data->base_address = va & 0xffffffffffff;
1434 shader_data->elf_symbol_offset = 0;
1435 shader_data->hw_stage = radv_get_rgp_shader_stage(shader);
1436 shader_data->is_combined = false;
1437 }
1438
1439 static VkResult
radv_add_code_object(struct radv_device * device,struct radv_pipeline * pipeline)1440 radv_add_code_object(struct radv_device *device, struct radv_pipeline *pipeline)
1441 {
1442 struct ac_sqtt *sqtt = &device->sqtt;
1443 struct rgp_code_object *code_object = &sqtt->rgp_code_object;
1444 struct rgp_code_object_record *record;
1445
1446 record = malloc(sizeof(struct rgp_code_object_record));
1447 if (!record)
1448 return VK_ERROR_OUT_OF_HOST_MEMORY;
1449
1450 record->shader_stages_mask = 0;
1451 record->num_shaders_combined = 0;
1452 record->pipeline_hash[0] = pipeline->pipeline_hash;
1453 record->pipeline_hash[1] = pipeline->pipeline_hash;
1454 record->is_rt = false;
1455
1456 for (unsigned i = 0; i < MESA_VULKAN_SHADER_STAGES; i++) {
1457 struct radv_shader *shader = pipeline->shaders[i];
1458
1459 if (!shader)
1460 continue;
1461
1462 radv_fill_code_object_record(device, &record->shader_data[i], shader, radv_sqtt_shader_get_va_reloc(pipeline, i));
1463
1464 record->shader_stages_mask |= (1 << i);
1465 record->num_shaders_combined++;
1466 }
1467
1468 simple_mtx_lock(&code_object->lock);
1469 list_addtail(&record->list, &code_object->record);
1470 code_object->record_count++;
1471 simple_mtx_unlock(&code_object->lock);
1472
1473 return VK_SUCCESS;
1474 }
1475
1476 static VkResult
radv_add_rt_record(struct radv_device * device,struct rgp_code_object * code_object,struct radv_ray_tracing_pipeline * pipeline,struct radv_shader * shader,uint32_t stack_size,uint32_t index,uint64_t hash)1477 radv_add_rt_record(struct radv_device *device, struct rgp_code_object *code_object,
1478 struct radv_ray_tracing_pipeline *pipeline, struct radv_shader *shader, uint32_t stack_size,
1479 uint32_t index, uint64_t hash)
1480 {
1481 struct rgp_code_object_record *record = malloc(sizeof(struct rgp_code_object_record));
1482 if (!record)
1483 return VK_ERROR_OUT_OF_HOST_MEMORY;
1484
1485 struct rgp_shader_data *shader_data = &record->shader_data[shader->info.stage];
1486
1487 record->shader_stages_mask = 0;
1488 record->num_shaders_combined = 0;
1489 record->pipeline_hash[0] = hash;
1490 record->pipeline_hash[1] = hash;
1491
1492 radv_fill_code_object_record(device, shader_data, shader, shader->va);
1493 shader_data->rt_stack_size = stack_size;
1494
1495 record->shader_stages_mask |= (1 << shader->info.stage);
1496 record->is_rt = true;
1497 switch (shader->info.stage) {
1498 case MESA_SHADER_RAYGEN:
1499 snprintf(shader_data->rt_shader_name, sizeof(shader_data->rt_shader_name), "rgen_%d", index);
1500 break;
1501 case MESA_SHADER_CLOSEST_HIT:
1502 snprintf(shader_data->rt_shader_name, sizeof(shader_data->rt_shader_name), "chit_%d", index);
1503 break;
1504 case MESA_SHADER_MISS:
1505 snprintf(shader_data->rt_shader_name, sizeof(shader_data->rt_shader_name), "miss_%d", index);
1506 break;
1507 case MESA_SHADER_INTERSECTION:
1508 snprintf(shader_data->rt_shader_name, sizeof(shader_data->rt_shader_name), "traversal");
1509 break;
1510 case MESA_SHADER_CALLABLE:
1511 snprintf(shader_data->rt_shader_name, sizeof(shader_data->rt_shader_name), "call_%d", index);
1512 break;
1513 case MESA_SHADER_COMPUTE:
1514 snprintf(shader_data->rt_shader_name, sizeof(shader_data->rt_shader_name), "_amdgpu_cs_main");
1515 break;
1516 default:
1517 unreachable("invalid rt stage");
1518 }
1519 record->num_shaders_combined = 1;
1520
1521 simple_mtx_lock(&code_object->lock);
1522 list_addtail(&record->list, &code_object->record);
1523 code_object->record_count++;
1524 simple_mtx_unlock(&code_object->lock);
1525
1526 return VK_SUCCESS;
1527 }
1528
1529 static void
compute_unique_rt_sha(uint64_t pipeline_hash,unsigned index,unsigned char sha1[SHA1_DIGEST_LENGTH])1530 compute_unique_rt_sha(uint64_t pipeline_hash, unsigned index, unsigned char sha1[SHA1_DIGEST_LENGTH])
1531 {
1532 struct mesa_sha1 ctx;
1533 _mesa_sha1_init(&ctx);
1534 _mesa_sha1_update(&ctx, &pipeline_hash, sizeof(pipeline_hash));
1535 _mesa_sha1_update(&ctx, &index, sizeof(index));
1536 _mesa_sha1_final(&ctx, sha1);
1537 }
1538
1539 static VkResult
radv_register_rt_stage(struct radv_device * device,struct radv_ray_tracing_pipeline * pipeline,uint32_t index,uint32_t stack_size,struct radv_shader * shader)1540 radv_register_rt_stage(struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline, uint32_t index,
1541 uint32_t stack_size, struct radv_shader *shader)
1542 {
1543 unsigned char sha1[SHA1_DIGEST_LENGTH];
1544 VkResult result;
1545
1546 compute_unique_rt_sha(pipeline->base.base.pipeline_hash, index, sha1);
1547
1548 result = ac_sqtt_add_pso_correlation(&device->sqtt, *(uint64_t *)sha1, pipeline->base.base.pipeline_hash);
1549 if (!result)
1550 return VK_ERROR_OUT_OF_HOST_MEMORY;
1551 result = ac_sqtt_add_code_object_loader_event(&device->sqtt, *(uint64_t *)sha1, shader->va);
1552 if (!result)
1553 return VK_ERROR_OUT_OF_HOST_MEMORY;
1554 result =
1555 radv_add_rt_record(device, &device->sqtt.rgp_code_object, pipeline, shader, stack_size, index, *(uint64_t *)sha1);
1556 return result;
1557 }
1558
1559 static VkResult
radv_register_rt_pipeline(struct radv_device * device,struct radv_ray_tracing_pipeline * pipeline)1560 radv_register_rt_pipeline(struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline)
1561 {
1562 VkResult result = VK_SUCCESS;
1563
1564 uint32_t max_any_hit_stack_size = 0;
1565 uint32_t max_intersection_stack_size = 0;
1566
1567 for (unsigned i = 0; i < pipeline->stage_count; i++) {
1568 struct radv_ray_tracing_stage *stage = &pipeline->stages[i];
1569 if (stage->stage == MESA_SHADER_ANY_HIT)
1570 max_any_hit_stack_size = MAX2(max_any_hit_stack_size, stage->stack_size);
1571 else if (stage->stage == MESA_SHADER_INTERSECTION)
1572 max_intersection_stack_size = MAX2(max_intersection_stack_size, stage->stack_size);
1573
1574 if (!pipeline->stages[i].shader)
1575 continue;
1576
1577 result = radv_register_rt_stage(device, pipeline, i, stage->stack_size, stage->shader);
1578 if (result != VK_SUCCESS)
1579 return result;
1580 }
1581
1582 uint32_t idx = pipeline->stage_count;
1583
1584 /* Combined traversal shader */
1585 if (pipeline->base.base.shaders[MESA_SHADER_INTERSECTION]) {
1586 result = radv_register_rt_stage(device, pipeline, idx++, max_any_hit_stack_size + max_intersection_stack_size,
1587 pipeline->base.base.shaders[MESA_SHADER_INTERSECTION]);
1588 if (result != VK_SUCCESS)
1589 return result;
1590 }
1591
1592 /* Prolog */
1593 result = radv_register_rt_stage(device, pipeline, idx++, 0, pipeline->prolog);
1594
1595 return result;
1596 }
1597
1598 static VkResult
radv_register_pipeline(struct radv_device * device,struct radv_pipeline * pipeline)1599 radv_register_pipeline(struct radv_device *device, struct radv_pipeline *pipeline)
1600 {
1601 bool result;
1602 uint64_t base_va = ~0;
1603
1604 result = ac_sqtt_add_pso_correlation(&device->sqtt, pipeline->pipeline_hash, pipeline->pipeline_hash);
1605 if (!result)
1606 return VK_ERROR_OUT_OF_HOST_MEMORY;
1607
1608 /* Find the lowest shader BO VA. */
1609 for (unsigned i = 0; i < MESA_VULKAN_SHADER_STAGES; i++) {
1610 struct radv_shader *shader = pipeline->shaders[i];
1611 uint64_t va;
1612
1613 if (!shader)
1614 continue;
1615
1616 va = radv_sqtt_shader_get_va_reloc(pipeline, i);
1617 base_va = MIN2(base_va, va);
1618 }
1619
1620 result = ac_sqtt_add_code_object_loader_event(&device->sqtt, pipeline->pipeline_hash, base_va);
1621 if (!result)
1622 return VK_ERROR_OUT_OF_HOST_MEMORY;
1623
1624 result = radv_add_code_object(device, pipeline);
1625 if (result != VK_SUCCESS)
1626 return result;
1627
1628 return VK_SUCCESS;
1629 }
1630
1631 static void
radv_unregister_records(struct radv_device * device,uint64_t hash)1632 radv_unregister_records(struct radv_device *device, uint64_t hash)
1633 {
1634 struct ac_sqtt *sqtt = &device->sqtt;
1635 struct rgp_pso_correlation *pso_correlation = &sqtt->rgp_pso_correlation;
1636 struct rgp_loader_events *loader_events = &sqtt->rgp_loader_events;
1637 struct rgp_code_object *code_object = &sqtt->rgp_code_object;
1638
1639 /* Destroy the PSO correlation record. */
1640 simple_mtx_lock(&pso_correlation->lock);
1641 list_for_each_entry_safe (struct rgp_pso_correlation_record, record, &pso_correlation->record, list) {
1642 if (record->pipeline_hash[0] == hash) {
1643 pso_correlation->record_count--;
1644 list_del(&record->list);
1645 free(record);
1646 break;
1647 }
1648 }
1649 simple_mtx_unlock(&pso_correlation->lock);
1650
1651 /* Destroy the code object loader record. */
1652 simple_mtx_lock(&loader_events->lock);
1653 list_for_each_entry_safe (struct rgp_loader_events_record, record, &loader_events->record, list) {
1654 if (record->code_object_hash[0] == hash) {
1655 loader_events->record_count--;
1656 list_del(&record->list);
1657 free(record);
1658 break;
1659 }
1660 }
1661 simple_mtx_unlock(&loader_events->lock);
1662
1663 /* Destroy the code object record. */
1664 simple_mtx_lock(&code_object->lock);
1665 list_for_each_entry_safe (struct rgp_code_object_record, record, &code_object->record, list) {
1666 if (record->pipeline_hash[0] == hash) {
1667 code_object->record_count--;
1668 list_del(&record->list);
1669 free(record);
1670 break;
1671 }
1672 }
1673 simple_mtx_unlock(&code_object->lock);
1674 }
1675
1676 VKAPI_ATTR VkResult VKAPI_CALL
sqtt_CreateGraphicsPipelines(VkDevice _device,VkPipelineCache pipelineCache,uint32_t count,const VkGraphicsPipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)1677 sqtt_CreateGraphicsPipelines(VkDevice _device, VkPipelineCache pipelineCache, uint32_t count,
1678 const VkGraphicsPipelineCreateInfo *pCreateInfos, const VkAllocationCallbacks *pAllocator,
1679 VkPipeline *pPipelines)
1680 {
1681 VK_FROM_HANDLE(radv_device, device, _device);
1682 VkResult result;
1683
1684 result = device->layer_dispatch.rgp.CreateGraphicsPipelines(_device, pipelineCache, count, pCreateInfos, pAllocator,
1685 pPipelines);
1686 if (result != VK_SUCCESS)
1687 return result;
1688
1689 for (unsigned i = 0; i < count; i++) {
1690 VK_FROM_HANDLE(radv_pipeline, pipeline, pPipelines[i]);
1691
1692 if (!pipeline)
1693 continue;
1694
1695 const VkPipelineCreateFlagBits2 create_flags = vk_graphics_pipeline_create_flags(&pCreateInfos[i]);
1696 if (create_flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR)
1697 continue;
1698
1699 result = radv_sqtt_reloc_graphics_shaders(device, radv_pipeline_to_graphics(pipeline));
1700 if (result != VK_SUCCESS)
1701 goto fail;
1702
1703 result = radv_register_pipeline(device, pipeline);
1704 if (result != VK_SUCCESS)
1705 goto fail;
1706 }
1707
1708 return VK_SUCCESS;
1709
1710 fail:
1711 for (unsigned i = 0; i < count; i++) {
1712 sqtt_DestroyPipeline(_device, pPipelines[i], pAllocator);
1713 pPipelines[i] = VK_NULL_HANDLE;
1714 }
1715 return result;
1716 }
1717
1718 VKAPI_ATTR VkResult VKAPI_CALL
sqtt_CreateComputePipelines(VkDevice _device,VkPipelineCache pipelineCache,uint32_t count,const VkComputePipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)1719 sqtt_CreateComputePipelines(VkDevice _device, VkPipelineCache pipelineCache, uint32_t count,
1720 const VkComputePipelineCreateInfo *pCreateInfos, const VkAllocationCallbacks *pAllocator,
1721 VkPipeline *pPipelines)
1722 {
1723 VK_FROM_HANDLE(radv_device, device, _device);
1724 VkResult result;
1725
1726 result = device->layer_dispatch.rgp.CreateComputePipelines(_device, pipelineCache, count, pCreateInfos, pAllocator,
1727 pPipelines);
1728 if (result != VK_SUCCESS)
1729 return result;
1730
1731 for (unsigned i = 0; i < count; i++) {
1732 VK_FROM_HANDLE(radv_pipeline, pipeline, pPipelines[i]);
1733
1734 if (!pipeline)
1735 continue;
1736
1737 result = radv_register_pipeline(device, pipeline);
1738 if (result != VK_SUCCESS)
1739 goto fail;
1740 }
1741
1742 return VK_SUCCESS;
1743
1744 fail:
1745 for (unsigned i = 0; i < count; i++) {
1746 sqtt_DestroyPipeline(_device, pPipelines[i], pAllocator);
1747 pPipelines[i] = VK_NULL_HANDLE;
1748 }
1749 return result;
1750 }
1751
1752 VKAPI_ATTR VkResult VKAPI_CALL
sqtt_CreateRayTracingPipelinesKHR(VkDevice _device,VkDeferredOperationKHR deferredOperation,VkPipelineCache pipelineCache,uint32_t count,const VkRayTracingPipelineCreateInfoKHR * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)1753 sqtt_CreateRayTracingPipelinesKHR(VkDevice _device, VkDeferredOperationKHR deferredOperation,
1754 VkPipelineCache pipelineCache, uint32_t count,
1755 const VkRayTracingPipelineCreateInfoKHR *pCreateInfos,
1756 const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines)
1757 {
1758 VK_FROM_HANDLE(radv_device, device, _device);
1759 VkResult result;
1760
1761 result = device->layer_dispatch.rgp.CreateRayTracingPipelinesKHR(_device, deferredOperation, pipelineCache, count,
1762 pCreateInfos, pAllocator, pPipelines);
1763 if (result != VK_SUCCESS && result != VK_OPERATION_DEFERRED_KHR)
1764 return result;
1765
1766 for (unsigned i = 0; i < count; i++) {
1767 VK_FROM_HANDLE(radv_pipeline, pipeline, pPipelines[i]);
1768
1769 if (!pipeline)
1770 continue;
1771
1772 const VkPipelineCreateFlagBits2 create_flags = vk_rt_pipeline_create_flags(&pCreateInfos[i]);
1773 if (create_flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR)
1774 continue;
1775
1776 result = radv_register_rt_pipeline(device, radv_pipeline_to_ray_tracing(pipeline));
1777 if (result != VK_SUCCESS)
1778 goto fail;
1779 }
1780
1781 return VK_SUCCESS;
1782
1783 fail:
1784 for (unsigned i = 0; i < count; i++) {
1785 sqtt_DestroyPipeline(_device, pPipelines[i], pAllocator);
1786 pPipelines[i] = VK_NULL_HANDLE;
1787 }
1788 return result;
1789 }
1790
1791 VKAPI_ATTR void VKAPI_CALL
sqtt_DestroyPipeline(VkDevice _device,VkPipeline _pipeline,const VkAllocationCallbacks * pAllocator)1792 sqtt_DestroyPipeline(VkDevice _device, VkPipeline _pipeline, const VkAllocationCallbacks *pAllocator)
1793 {
1794 VK_FROM_HANDLE(radv_device, device, _device);
1795 VK_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
1796
1797 if (!_pipeline)
1798 return;
1799
1800 /* Ray tracing pipelines have multiple records, each with their own hash */
1801 if (pipeline->type == RADV_PIPELINE_RAY_TRACING) {
1802 /* We have one record for each stage, plus one for the traversal shader and one for the prolog */
1803 uint32_t record_count = radv_pipeline_to_ray_tracing(pipeline)->stage_count + 2;
1804 unsigned char sha1[SHA1_DIGEST_LENGTH];
1805 for (uint32_t i = 0; i < record_count; ++i) {
1806 compute_unique_rt_sha(pipeline->pipeline_hash, i, sha1);
1807 radv_unregister_records(device, *(uint64_t *)sha1);
1808 }
1809 } else
1810 radv_unregister_records(device, pipeline->pipeline_hash);
1811
1812 if (pipeline->type == RADV_PIPELINE_GRAPHICS) {
1813 struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline);
1814 struct radv_sqtt_shaders_reloc *reloc = graphics_pipeline->sqtt_shaders_reloc;
1815
1816 radv_free_shader_memory(device, reloc->alloc);
1817 free(reloc);
1818 }
1819
1820 device->layer_dispatch.rgp.DestroyPipeline(_device, _pipeline, pAllocator);
1821 }
1822
1823 #undef API_MARKER
1824