1 /*
2 * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
3 * SPDX-License-Identifier: MIT
4 */
5 #include "nvk_cmd_buffer.h"
6
7 #include "nvk_buffer.h"
8 #include "nvk_cmd_pool.h"
9 #include "nvk_descriptor_set_layout.h"
10 #include "nvk_device.h"
11 #include "nvk_device_memory.h"
12 #include "nvk_entrypoints.h"
13 #include "nvk_mme.h"
14 #include "nvk_physical_device.h"
15 #include "nvk_shader.h"
16 #include "nvkmd/nvkmd.h"
17
18 #include "vk_pipeline_layout.h"
19 #include "vk_synchronization.h"
20
21 #include "nv_push_cl906f.h"
22 #include "nv_push_cl90b5.h"
23 #include "nv_push_cla097.h"
24 #include "nv_push_cla0c0.h"
25 #include "nv_push_clb1c0.h"
26 #include "nv_push_clc597.h"
27
28 static void
nvk_descriptor_state_fini(struct nvk_cmd_buffer * cmd,struct nvk_descriptor_state * desc)29 nvk_descriptor_state_fini(struct nvk_cmd_buffer *cmd,
30 struct nvk_descriptor_state *desc)
31 {
32 struct nvk_cmd_pool *pool = nvk_cmd_buffer_pool(cmd);
33
34 for (unsigned i = 0; i < NVK_MAX_SETS; i++) {
35 vk_free(&pool->vk.alloc, desc->sets[i].push);
36 desc->sets[i].push = NULL;
37 }
38 }
39
40 static void
nvk_destroy_cmd_buffer(struct vk_command_buffer * vk_cmd_buffer)41 nvk_destroy_cmd_buffer(struct vk_command_buffer *vk_cmd_buffer)
42 {
43 struct nvk_cmd_buffer *cmd =
44 container_of(vk_cmd_buffer, struct nvk_cmd_buffer, vk);
45 struct nvk_cmd_pool *pool = nvk_cmd_buffer_pool(cmd);
46
47 nvk_descriptor_state_fini(cmd, &cmd->state.gfx.descriptors);
48 nvk_descriptor_state_fini(cmd, &cmd->state.cs.descriptors);
49
50 nvk_cmd_pool_free_mem_list(pool, &cmd->owned_mem);
51 nvk_cmd_pool_free_mem_list(pool, &cmd->owned_gart_mem);
52 util_dynarray_fini(&cmd->pushes);
53 vk_command_buffer_finish(&cmd->vk);
54 vk_free(&pool->vk.alloc, cmd);
55 }
56
57 static VkResult
nvk_create_cmd_buffer(struct vk_command_pool * vk_pool,VkCommandBufferLevel level,struct vk_command_buffer ** cmd_buffer_out)58 nvk_create_cmd_buffer(struct vk_command_pool *vk_pool,
59 VkCommandBufferLevel level,
60 struct vk_command_buffer **cmd_buffer_out)
61 {
62 struct nvk_cmd_pool *pool = container_of(vk_pool, struct nvk_cmd_pool, vk);
63 struct nvk_device *dev = nvk_cmd_pool_device(pool);
64 struct nvk_cmd_buffer *cmd;
65 VkResult result;
66
67 cmd = vk_zalloc(&pool->vk.alloc, sizeof(*cmd), 8,
68 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
69 if (cmd == NULL)
70 return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
71
72 result = vk_command_buffer_init(&pool->vk, &cmd->vk,
73 &nvk_cmd_buffer_ops, level);
74 if (result != VK_SUCCESS) {
75 vk_free(&pool->vk.alloc, cmd);
76 return result;
77 }
78
79 cmd->vk.dynamic_graphics_state.vi = &cmd->state.gfx._dynamic_vi;
80 cmd->vk.dynamic_graphics_state.ms.sample_locations =
81 &cmd->state.gfx._dynamic_sl;
82
83 list_inithead(&cmd->owned_mem);
84 list_inithead(&cmd->owned_gart_mem);
85 util_dynarray_init(&cmd->pushes, NULL);
86
87 *cmd_buffer_out = &cmd->vk;
88
89 return VK_SUCCESS;
90 }
91
92 static void
nvk_reset_cmd_buffer(struct vk_command_buffer * vk_cmd_buffer,UNUSED VkCommandBufferResetFlags flags)93 nvk_reset_cmd_buffer(struct vk_command_buffer *vk_cmd_buffer,
94 UNUSED VkCommandBufferResetFlags flags)
95 {
96 struct nvk_cmd_buffer *cmd =
97 container_of(vk_cmd_buffer, struct nvk_cmd_buffer, vk);
98 struct nvk_cmd_pool *pool = nvk_cmd_buffer_pool(cmd);
99
100 vk_command_buffer_reset(&cmd->vk);
101
102 nvk_descriptor_state_fini(cmd, &cmd->state.gfx.descriptors);
103 nvk_descriptor_state_fini(cmd, &cmd->state.cs.descriptors);
104
105 nvk_cmd_pool_free_mem_list(pool, &cmd->owned_mem);
106 nvk_cmd_pool_free_gart_mem_list(pool, &cmd->owned_gart_mem);
107 cmd->upload_mem = NULL;
108 cmd->push_mem = NULL;
109 cmd->push_mem_limit = NULL;
110 cmd->push = (struct nv_push) {0};
111
112 util_dynarray_clear(&cmd->pushes);
113
114 memset(&cmd->state, 0, sizeof(cmd->state));
115 }
116
117 const struct vk_command_buffer_ops nvk_cmd_buffer_ops = {
118 .create = nvk_create_cmd_buffer,
119 .reset = nvk_reset_cmd_buffer,
120 .destroy = nvk_destroy_cmd_buffer,
121 };
122
123 /* If we ever fail to allocate a push, we use this */
124 static uint32_t push_runout[NVK_CMD_BUFFER_MAX_PUSH];
125
126 static VkResult
nvk_cmd_buffer_alloc_mem(struct nvk_cmd_buffer * cmd,bool force_gart,struct nvk_cmd_mem ** mem_out)127 nvk_cmd_buffer_alloc_mem(struct nvk_cmd_buffer *cmd, bool force_gart,
128 struct nvk_cmd_mem **mem_out)
129 {
130 VkResult result = nvk_cmd_pool_alloc_mem(nvk_cmd_buffer_pool(cmd),
131 force_gart, mem_out);
132 if (result != VK_SUCCESS)
133 return result;
134
135 if (force_gart)
136 list_addtail(&(*mem_out)->link, &cmd->owned_gart_mem);
137 else
138 list_addtail(&(*mem_out)->link, &cmd->owned_mem);
139
140 return VK_SUCCESS;
141 }
142
143 static void
nvk_cmd_buffer_flush_push(struct nvk_cmd_buffer * cmd)144 nvk_cmd_buffer_flush_push(struct nvk_cmd_buffer *cmd)
145 {
146 if (likely(cmd->push_mem != NULL)) {
147 const uint32_t mem_offset =
148 (char *)cmd->push.start - (char *)cmd->push_mem->mem->map;
149
150 struct nvk_cmd_push push = {
151 .map = cmd->push.start,
152 .addr = cmd->push_mem->mem->va->addr + mem_offset,
153 .range = nv_push_dw_count(&cmd->push) * 4,
154 };
155 util_dynarray_append(&cmd->pushes, struct nvk_cmd_push, push);
156 }
157
158 cmd->push.start = cmd->push.end;
159 }
160
161 void
nvk_cmd_buffer_new_push(struct nvk_cmd_buffer * cmd)162 nvk_cmd_buffer_new_push(struct nvk_cmd_buffer *cmd)
163 {
164 nvk_cmd_buffer_flush_push(cmd);
165
166 VkResult result = nvk_cmd_buffer_alloc_mem(cmd, false, &cmd->push_mem);
167 if (unlikely(result != VK_SUCCESS)) {
168 STATIC_ASSERT(NVK_CMD_BUFFER_MAX_PUSH <= NVK_CMD_MEM_SIZE / 4);
169 cmd->push_mem = NULL;
170 nv_push_init(&cmd->push, push_runout, 0);
171 cmd->push_mem_limit = &push_runout[NVK_CMD_BUFFER_MAX_PUSH];
172 } else {
173 nv_push_init(&cmd->push, cmd->push_mem->mem->map, 0);
174 cmd->push_mem_limit =
175 (uint32_t *)((char *)cmd->push_mem->mem->map + NVK_CMD_MEM_SIZE);
176 }
177 }
178
179 void
nvk_cmd_buffer_push_indirect(struct nvk_cmd_buffer * cmd,uint64_t addr,uint32_t range)180 nvk_cmd_buffer_push_indirect(struct nvk_cmd_buffer *cmd,
181 uint64_t addr, uint32_t range)
182 {
183 nvk_cmd_buffer_flush_push(cmd);
184
185 struct nvk_cmd_push push = {
186 .addr = addr,
187 .range = range,
188 .no_prefetch = true,
189 };
190
191 util_dynarray_append(&cmd->pushes, struct nvk_cmd_push, push);
192 }
193
194 VkResult
nvk_cmd_buffer_upload_alloc(struct nvk_cmd_buffer * cmd,uint32_t size,uint32_t alignment,uint64_t * addr,void ** ptr)195 nvk_cmd_buffer_upload_alloc(struct nvk_cmd_buffer *cmd,
196 uint32_t size, uint32_t alignment,
197 uint64_t *addr, void **ptr)
198 {
199 assert(size % 4 == 0);
200 assert(size <= NVK_CMD_MEM_SIZE);
201
202 uint32_t offset = cmd->upload_offset;
203 if (alignment > 0)
204 offset = align(offset, alignment);
205
206 assert(offset <= NVK_CMD_MEM_SIZE);
207 if (cmd->upload_mem != NULL && size <= NVK_CMD_MEM_SIZE - offset) {
208 *addr = cmd->upload_mem->mem->va->addr + offset;
209 *ptr = (char *)cmd->upload_mem->mem->map + offset;
210
211 cmd->upload_offset = offset + size;
212
213 return VK_SUCCESS;
214 }
215
216 struct nvk_cmd_mem *mem;
217 VkResult result = nvk_cmd_buffer_alloc_mem(cmd, false, &mem);
218 if (unlikely(result != VK_SUCCESS))
219 return result;
220
221 *addr = mem->mem->va->addr;
222 *ptr = mem->mem->map;
223
224 /* Pick whichever of the current upload BO and the new BO will have more
225 * room left to be the BO for the next upload. If our upload size is
226 * bigger than the old offset, we're better off burning the whole new
227 * upload BO on this one allocation and continuing on the current upload
228 * BO.
229 */
230 if (cmd->upload_mem == NULL || size < cmd->upload_offset) {
231 cmd->upload_mem = mem;
232 cmd->upload_offset = size;
233 }
234
235 return VK_SUCCESS;
236 }
237
238 VkResult
nvk_cmd_buffer_upload_data(struct nvk_cmd_buffer * cmd,const void * data,uint32_t size,uint32_t alignment,uint64_t * addr)239 nvk_cmd_buffer_upload_data(struct nvk_cmd_buffer *cmd,
240 const void *data, uint32_t size,
241 uint32_t alignment, uint64_t *addr)
242 {
243 VkResult result;
244 void *map;
245
246 result = nvk_cmd_buffer_upload_alloc(cmd, size, alignment, addr, &map);
247 if (unlikely(result != VK_SUCCESS))
248 return result;
249
250 memcpy(map, data, size);
251
252 return VK_SUCCESS;
253 }
254
255 VkResult
nvk_cmd_buffer_cond_render_alloc(struct nvk_cmd_buffer * cmd,uint64_t * addr)256 nvk_cmd_buffer_cond_render_alloc(struct nvk_cmd_buffer *cmd,
257 uint64_t *addr)
258 {
259 uint32_t offset = cmd->cond_render_gart_offset;
260 uint32_t size = 64;
261
262 assert(offset <= NVK_CMD_MEM_SIZE);
263 if (cmd->cond_render_gart_mem != NULL && size <= NVK_CMD_MEM_SIZE - offset) {
264 *addr = cmd->cond_render_gart_mem->mem->va->addr + offset;
265
266 cmd->cond_render_gart_offset = offset + size;
267
268 return VK_SUCCESS;
269 }
270
271 struct nvk_cmd_mem *mem;
272 VkResult result = nvk_cmd_buffer_alloc_mem(cmd, true, &mem);
273 if (unlikely(result != VK_SUCCESS))
274 return result;
275
276 *addr = mem->mem->va->addr;
277
278 /* Pick whichever of the current upload BO and the new BO will have more
279 * room left to be the BO for the next upload. If our upload size is
280 * bigger than the old offset, we're better off burning the whole new
281 * upload BO on this one allocation and continuing on the current upload
282 * BO.
283 */
284 if (cmd->cond_render_gart_mem == NULL || size < cmd->cond_render_gart_offset) {
285 cmd->cond_render_gart_mem = mem;
286 cmd->cond_render_gart_offset = size;
287 }
288
289 return VK_SUCCESS;
290 }
291
292 VKAPI_ATTR VkResult VKAPI_CALL
nvk_BeginCommandBuffer(VkCommandBuffer commandBuffer,const VkCommandBufferBeginInfo * pBeginInfo)293 nvk_BeginCommandBuffer(VkCommandBuffer commandBuffer,
294 const VkCommandBufferBeginInfo *pBeginInfo)
295 {
296 VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
297
298 nvk_reset_cmd_buffer(&cmd->vk, 0);
299
300 /* Start with a nop so we have at least something to submit */
301 struct nv_push *p = nvk_cmd_buffer_push(cmd, 2);
302 P_MTHD(p, NV90B5, NOP);
303 P_NV90B5_NOP(p, 0);
304
305 nvk_cmd_buffer_begin_compute(cmd, pBeginInfo);
306 nvk_cmd_buffer_begin_graphics(cmd, pBeginInfo);
307
308 return VK_SUCCESS;
309 }
310
311 VKAPI_ATTR VkResult VKAPI_CALL
nvk_EndCommandBuffer(VkCommandBuffer commandBuffer)312 nvk_EndCommandBuffer(VkCommandBuffer commandBuffer)
313 {
314 VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
315
316 nvk_cmd_buffer_flush_push(cmd);
317
318 return vk_command_buffer_get_record_result(&cmd->vk);
319 }
320
321 VKAPI_ATTR void VKAPI_CALL
nvk_CmdExecuteCommands(VkCommandBuffer commandBuffer,uint32_t commandBufferCount,const VkCommandBuffer * pCommandBuffers)322 nvk_CmdExecuteCommands(VkCommandBuffer commandBuffer,
323 uint32_t commandBufferCount,
324 const VkCommandBuffer *pCommandBuffers)
325 {
326 VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
327
328 if (commandBufferCount == 0)
329 return;
330
331 nvk_cmd_buffer_flush_push(cmd);
332
333 for (uint32_t i = 0; i < commandBufferCount; i++) {
334 VK_FROM_HANDLE(nvk_cmd_buffer, other, pCommandBuffers[i]);
335
336 /* We only need to copy the pushes. We do not copy the
337 * nvk_cmd_buffer::bos because that tracks ownership. Instead, we
338 * depend on the app to not discard secondaries while they are used by a
339 * primary. The Vulkan 1.3.227 spec for vkFreeCommandBuffers() says:
340 *
341 * "Any primary command buffer that is in the recording or executable
342 * state and has any element of pCommandBuffers recorded into it,
343 * becomes invalid."
344 *
345 * In other words, if the secondary command buffer ever goes away, this
346 * command buffer is invalid and the only thing the client can validly
347 * do with it is reset it. vkResetCommandPool() has similar language.
348 */
349 util_dynarray_append_dynarray(&cmd->pushes, &other->pushes);
350 }
351
352 /* From the Vulkan 1.3.275 spec:
353 *
354 * "When secondary command buffer(s) are recorded to execute on a
355 * primary command buffer, the secondary command buffer inherits no
356 * state from the primary command buffer, and all state of the primary
357 * command buffer is undefined after an execute secondary command buffer
358 * command is recorded. There is one exception to this rule - if the
359 * primary command buffer is inside a render pass instance, then the
360 * render pass and subpass state is not disturbed by executing secondary
361 * command buffers. For state dependent commands (such as draws and
362 * dispatches), any state consumed by those commands must not be
363 * undefined."
364 *
365 * Therefore, it's the client's job to reset all the state in the primary
366 * after the secondary executes. However, if we're doing any internal
367 * dirty tracking, we may miss the fact that a secondary has messed with
368 * GPU state if we don't invalidate all our internal tracking.
369 */
370 nvk_cmd_invalidate_graphics_state(cmd);
371 nvk_cmd_invalidate_compute_state(cmd);
372 }
373
374 enum nvk_barrier {
375 NVK_BARRIER_RENDER_WFI = 1 << 0,
376 NVK_BARRIER_COMPUTE_WFI = 1 << 1,
377 NVK_BARRIER_FLUSH_SHADER_DATA = 1 << 2,
378 NVK_BARRIER_INVALIDATE_SHADER_DATA = 1 << 3,
379 NVK_BARRIER_INVALIDATE_TEX_DATA = 1 << 4,
380 NVK_BARRIER_INVALIDATE_CONSTANT = 1 << 5,
381 NVK_BARRIER_INVALIDATE_MME_DATA = 1 << 6,
382 NVK_BARRIER_INVALIDATE_QMD_DATA = 1 << 7,
383 };
384
385 static enum nvk_barrier
nvk_barrier_flushes_waits(VkPipelineStageFlags2 stages,VkAccessFlags2 access)386 nvk_barrier_flushes_waits(VkPipelineStageFlags2 stages,
387 VkAccessFlags2 access)
388 {
389 stages = vk_expand_src_stage_flags2(stages);
390 access = vk_filter_src_access_flags2(stages, access);
391
392 enum nvk_barrier barriers = 0;
393
394 if (access & VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT) {
395 barriers |= NVK_BARRIER_FLUSH_SHADER_DATA;
396
397 if (vk_pipeline_stage_flags2_has_graphics_shader(stages))
398 barriers |= NVK_BARRIER_RENDER_WFI;
399
400 if (vk_pipeline_stage_flags2_has_compute_shader(stages))
401 barriers |= NVK_BARRIER_COMPUTE_WFI;
402 }
403
404 if (access & (VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT |
405 VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
406 VK_ACCESS_2_TRANSFORM_FEEDBACK_WRITE_BIT_EXT))
407 barriers |= NVK_BARRIER_RENDER_WFI;
408
409 if ((access & VK_ACCESS_2_TRANSFER_WRITE_BIT) &&
410 (stages & (VK_PIPELINE_STAGE_2_RESOLVE_BIT |
411 VK_PIPELINE_STAGE_2_BLIT_BIT |
412 VK_PIPELINE_STAGE_2_CLEAR_BIT)))
413 barriers |= NVK_BARRIER_RENDER_WFI;
414
415 if (access & VK_ACCESS_2_COMMAND_PREPROCESS_WRITE_BIT_EXT)
416 barriers |= NVK_BARRIER_FLUSH_SHADER_DATA |
417 NVK_BARRIER_COMPUTE_WFI;
418
419 return barriers;
420 }
421
422 static enum nvk_barrier
nvk_barrier_invalidates(VkPipelineStageFlags2 stages,VkAccessFlags2 access)423 nvk_barrier_invalidates(VkPipelineStageFlags2 stages,
424 VkAccessFlags2 access)
425 {
426 stages = vk_expand_dst_stage_flags2(stages);
427 access = vk_filter_dst_access_flags2(stages, access);
428
429 enum nvk_barrier barriers = 0;
430
431 if (access & (VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT |
432 VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT |
433 VK_ACCESS_2_CONDITIONAL_RENDERING_READ_BIT_EXT |
434 VK_ACCESS_2_DESCRIPTOR_BUFFER_READ_BIT_EXT))
435 barriers |= NVK_BARRIER_INVALIDATE_MME_DATA;
436
437 if (access & VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT)
438 barriers |= NVK_BARRIER_INVALIDATE_CONSTANT |
439 NVK_BARRIER_INVALIDATE_QMD_DATA;
440
441 if (access & (VK_ACCESS_2_UNIFORM_READ_BIT |
442 VK_ACCESS_2_DESCRIPTOR_BUFFER_READ_BIT_EXT))
443 barriers |= NVK_BARRIER_INVALIDATE_SHADER_DATA |
444 NVK_BARRIER_INVALIDATE_CONSTANT;
445
446 if (access & (VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT |
447 VK_ACCESS_2_SHADER_SAMPLED_READ_BIT))
448 barriers |= NVK_BARRIER_INVALIDATE_TEX_DATA;
449
450 if (access & VK_ACCESS_2_SHADER_STORAGE_READ_BIT)
451 barriers |= NVK_BARRIER_INVALIDATE_SHADER_DATA;
452
453 if ((access & VK_ACCESS_2_TRANSFER_READ_BIT) &&
454 (stages & (VK_PIPELINE_STAGE_2_RESOLVE_BIT |
455 VK_PIPELINE_STAGE_2_BLIT_BIT)))
456 barriers |= NVK_BARRIER_INVALIDATE_TEX_DATA;
457
458 return barriers;
459 }
460
461 void
nvk_cmd_flush_wait_dep(struct nvk_cmd_buffer * cmd,const VkDependencyInfo * dep,bool wait)462 nvk_cmd_flush_wait_dep(struct nvk_cmd_buffer *cmd,
463 const VkDependencyInfo *dep,
464 bool wait)
465 {
466 enum nvk_barrier barriers = 0;
467
468 for (uint32_t i = 0; i < dep->memoryBarrierCount; i++) {
469 const VkMemoryBarrier2 *bar = &dep->pMemoryBarriers[i];
470 barriers |= nvk_barrier_flushes_waits(bar->srcStageMask,
471 bar->srcAccessMask);
472 }
473
474 for (uint32_t i = 0; i < dep->bufferMemoryBarrierCount; i++) {
475 const VkBufferMemoryBarrier2 *bar = &dep->pBufferMemoryBarriers[i];
476 barriers |= nvk_barrier_flushes_waits(bar->srcStageMask,
477 bar->srcAccessMask);
478 }
479
480 for (uint32_t i = 0; i < dep->imageMemoryBarrierCount; i++) {
481 const VkImageMemoryBarrier2 *bar = &dep->pImageMemoryBarriers[i];
482 barriers |= nvk_barrier_flushes_waits(bar->srcStageMask,
483 bar->srcAccessMask);
484 }
485
486 if (!barriers)
487 return;
488
489 struct nv_push *p = nvk_cmd_buffer_push(cmd, 4);
490
491 if (barriers & NVK_BARRIER_FLUSH_SHADER_DATA) {
492 assert(barriers & (NVK_BARRIER_RENDER_WFI | NVK_BARRIER_COMPUTE_WFI));
493 if (barriers & NVK_BARRIER_RENDER_WFI) {
494 P_IMMD(p, NVA097, INVALIDATE_SHADER_CACHES, {
495 .data = DATA_TRUE,
496 .flush_data = FLUSH_DATA_TRUE,
497 });
498 }
499
500 if (barriers & NVK_BARRIER_COMPUTE_WFI) {
501 P_IMMD(p, NVA0C0, INVALIDATE_SHADER_CACHES, {
502 .data = DATA_TRUE,
503 .flush_data = FLUSH_DATA_TRUE,
504 });
505 }
506 } else if (barriers & NVK_BARRIER_RENDER_WFI) {
507 /* If this comes from a vkCmdSetEvent, we don't need to wait */
508 if (wait)
509 P_IMMD(p, NVA097, WAIT_FOR_IDLE, 0);
510 } else {
511 /* Compute WFI only happens when shader data is flushed */
512 assert(!(barriers & NVK_BARRIER_COMPUTE_WFI));
513 }
514 }
515
516 void
nvk_cmd_invalidate_deps(struct nvk_cmd_buffer * cmd,uint32_t dep_count,const VkDependencyInfo * deps)517 nvk_cmd_invalidate_deps(struct nvk_cmd_buffer *cmd,
518 uint32_t dep_count,
519 const VkDependencyInfo *deps)
520 {
521 struct nvk_device *dev = nvk_cmd_buffer_device(cmd);
522 struct nvk_physical_device *pdev = nvk_device_physical(dev);
523
524 enum nvk_barrier barriers = 0;
525
526 for (uint32_t d = 0; d < dep_count; d++) {
527 const VkDependencyInfo *dep = &deps[d];
528
529 for (uint32_t i = 0; i < dep->memoryBarrierCount; i++) {
530 const VkMemoryBarrier2 *bar = &dep->pMemoryBarriers[i];
531 barriers |= nvk_barrier_invalidates(bar->dstStageMask,
532 bar->dstAccessMask);
533 }
534
535 for (uint32_t i = 0; i < dep->bufferMemoryBarrierCount; i++) {
536 const VkBufferMemoryBarrier2 *bar = &dep->pBufferMemoryBarriers[i];
537 barriers |= nvk_barrier_invalidates(bar->dstStageMask,
538 bar->dstAccessMask);
539 }
540
541 for (uint32_t i = 0; i < dep->imageMemoryBarrierCount; i++) {
542 const VkImageMemoryBarrier2 *bar = &dep->pImageMemoryBarriers[i];
543 barriers |= nvk_barrier_invalidates(bar->dstStageMask,
544 bar->dstAccessMask);
545 }
546 }
547
548 if (!barriers)
549 return;
550
551 struct nv_push *p = nvk_cmd_buffer_push(cmd, 10);
552
553 if (barriers & NVK_BARRIER_INVALIDATE_TEX_DATA) {
554 P_IMMD(p, NVA097, INVALIDATE_TEXTURE_DATA_CACHE_NO_WFI, {
555 .lines = LINES_ALL,
556 });
557 }
558
559 if (barriers & (NVK_BARRIER_INVALIDATE_SHADER_DATA &
560 NVK_BARRIER_INVALIDATE_CONSTANT)) {
561 P_IMMD(p, NVA097, INVALIDATE_SHADER_CACHES_NO_WFI, {
562 .global_data = (barriers & NVK_BARRIER_INVALIDATE_SHADER_DATA) != 0,
563 .constant = (barriers & NVK_BARRIER_INVALIDATE_CONSTANT) != 0,
564 });
565 }
566
567 if (barriers & (NVK_BARRIER_INVALIDATE_MME_DATA)) {
568 __push_immd(p, SUBC_NV9097, NV906F_SET_REFERENCE, 0);
569
570 if (pdev->info.cls_eng3d >= TURING_A)
571 P_IMMD(p, NVC597, MME_DMA_SYSMEMBAR, 0);
572 }
573
574 if ((barriers & NVK_BARRIER_INVALIDATE_QMD_DATA) &&
575 pdev->info.cls_eng3d >= MAXWELL_COMPUTE_B)
576 P_IMMD(p, NVB1C0, INVALIDATE_SKED_CACHES, 0);
577 }
578
579 VKAPI_ATTR void VKAPI_CALL
nvk_CmdPipelineBarrier2(VkCommandBuffer commandBuffer,const VkDependencyInfo * pDependencyInfo)580 nvk_CmdPipelineBarrier2(VkCommandBuffer commandBuffer,
581 const VkDependencyInfo *pDependencyInfo)
582 {
583 VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
584
585 nvk_cmd_flush_wait_dep(cmd, pDependencyInfo, true);
586 nvk_cmd_invalidate_deps(cmd, 1, pDependencyInfo);
587 }
588
589 void
nvk_cmd_bind_shaders(struct vk_command_buffer * vk_cmd,uint32_t stage_count,const gl_shader_stage * stages,struct vk_shader ** const shaders)590 nvk_cmd_bind_shaders(struct vk_command_buffer *vk_cmd,
591 uint32_t stage_count,
592 const gl_shader_stage *stages,
593 struct vk_shader ** const shaders)
594 {
595 struct nvk_cmd_buffer *cmd = container_of(vk_cmd, struct nvk_cmd_buffer, vk);
596 struct nvk_device *dev = nvk_cmd_buffer_device(cmd);
597
598 for (uint32_t i = 0; i < stage_count; i++) {
599 struct nvk_shader *shader =
600 container_of(shaders[i], struct nvk_shader, vk);
601
602 if (shader != NULL) {
603 nvk_device_ensure_slm(dev, shader->info.slm_size,
604 shader->info.crs_size);
605 }
606
607 if (stages[i] == MESA_SHADER_COMPUTE ||
608 stages[i] == MESA_SHADER_KERNEL)
609 nvk_cmd_bind_compute_shader(cmd, shader);
610 else
611 nvk_cmd_bind_graphics_shader(cmd, stages[i], shader);
612 }
613 }
614
615 #define NVK_VK_GRAPHICS_STAGE_BITS VK_SHADER_STAGE_ALL_GRAPHICS
616
617 void
nvk_cmd_dirty_cbufs_for_descriptors(struct nvk_cmd_buffer * cmd,VkShaderStageFlags stages,uint32_t sets_start,uint32_t sets_end)618 nvk_cmd_dirty_cbufs_for_descriptors(struct nvk_cmd_buffer *cmd,
619 VkShaderStageFlags stages,
620 uint32_t sets_start, uint32_t sets_end)
621 {
622 if (!(stages & NVK_VK_GRAPHICS_STAGE_BITS))
623 return;
624
625 uint32_t groups = 0;
626 u_foreach_bit(i, stages & NVK_VK_GRAPHICS_STAGE_BITS) {
627 gl_shader_stage stage = vk_to_mesa_shader_stage(1 << i);
628 uint32_t g = nvk_cbuf_binding_for_stage(stage);
629 groups |= BITFIELD_BIT(g);
630 }
631
632 u_foreach_bit(g, groups) {
633 struct nvk_cbuf_group *group = &cmd->state.gfx.cbuf_groups[g];
634
635 for (uint32_t i = 0; i < ARRAY_SIZE(group->cbufs); i++) {
636 const struct nvk_cbuf *cbuf = &group->cbufs[i];
637 switch (cbuf->type) {
638 case NVK_CBUF_TYPE_INVALID:
639 case NVK_CBUF_TYPE_ROOT_DESC:
640 case NVK_CBUF_TYPE_SHADER_DATA:
641 break;
642
643 case NVK_CBUF_TYPE_DESC_SET:
644 case NVK_CBUF_TYPE_UBO_DESC:
645 case NVK_CBUF_TYPE_DYNAMIC_UBO:
646 if (cbuf->desc_set >= sets_start && cbuf->desc_set < sets_end)
647 group->dirty |= BITFIELD_BIT(i);
648 break;
649
650 default:
651 unreachable("Invalid cbuf type");
652 }
653 }
654 }
655 }
656
657 static void
nvk_bind_descriptor_sets(struct nvk_cmd_buffer * cmd,struct nvk_descriptor_state * desc,const VkBindDescriptorSetsInfoKHR * info)658 nvk_bind_descriptor_sets(struct nvk_cmd_buffer *cmd,
659 struct nvk_descriptor_state *desc,
660 const VkBindDescriptorSetsInfoKHR *info)
661 {
662 VK_FROM_HANDLE(vk_pipeline_layout, pipeline_layout, info->layout);
663 struct nvk_device *dev = nvk_cmd_buffer_device(cmd);
664 struct nvk_physical_device *pdev = nvk_device_physical(dev);
665
666 union nvk_buffer_descriptor dynamic_buffers[NVK_MAX_DYNAMIC_BUFFERS];
667 uint8_t set_dynamic_buffer_start[NVK_MAX_SETS];
668
669 /* Read off the current dynamic buffer start array so we can use it to
670 * determine where we should start binding dynamic buffers.
671 */
672 nvk_descriptor_state_get_root_array(desc, set_dynamic_buffer_start,
673 0, NVK_MAX_SETS,
674 set_dynamic_buffer_start);
675
676 /* From the Vulkan 1.3.275 spec:
677 *
678 * "When binding a descriptor set (see Descriptor Set Binding) to
679 * set number N...
680 *
681 * If, additionally, the previously bound descriptor set for set
682 * N was bound using a pipeline layout not compatible for set N,
683 * then all bindings in sets numbered greater than N are
684 * disturbed."
685 *
686 * This means that, if some earlier set gets bound in such a way that
687 * it changes set_dynamic_buffer_start[s], this binding is implicitly
688 * invalidated. Therefore, we can always look at the current value
689 * of set_dynamic_buffer_start[s] as the base of our dynamic buffer
690 * range and it's only our responsibility to adjust all
691 * set_dynamic_buffer_start[p] for p > s as needed.
692 */
693 const uint8_t dyn_buffer_start = set_dynamic_buffer_start[info->firstSet];
694 uint8_t dyn_buffer_end = dyn_buffer_start;
695
696 uint32_t next_dyn_offset = 0;
697 for (uint32_t i = 0; i < info->descriptorSetCount; ++i) {
698 unsigned s = i + info->firstSet;
699 VK_FROM_HANDLE(nvk_descriptor_set, set, info->pDescriptorSets[i]);
700
701 if (desc->sets[s].type != NVK_DESCRIPTOR_SET_TYPE_SET ||
702 desc->sets[s].set != set) {
703 struct nvk_buffer_address set_addr;
704 if (set != NULL) {
705 desc->sets[s].type = NVK_DESCRIPTOR_SET_TYPE_SET;
706 desc->sets[s].set = set;
707 set_addr = nvk_descriptor_set_addr(set);
708 } else {
709 desc->sets[s].type = NVK_DESCRIPTOR_SET_TYPE_NONE;
710 desc->sets[s].set = NULL;
711 set_addr = NVK_BUFFER_ADDRESS_NULL;
712 }
713 nvk_descriptor_state_set_root(cmd, desc, sets[s], set_addr);
714 }
715
716 set_dynamic_buffer_start[s] = dyn_buffer_end;
717
718 if (pipeline_layout->set_layouts[s] != NULL) {
719 const struct nvk_descriptor_set_layout *set_layout =
720 vk_to_nvk_descriptor_set_layout(pipeline_layout->set_layouts[s]);
721
722 if (set != NULL && set_layout->dynamic_buffer_count > 0) {
723 for (uint32_t j = 0; j < set_layout->dynamic_buffer_count; j++) {
724 union nvk_buffer_descriptor db = set->dynamic_buffers[j];
725 uint32_t offset = info->pDynamicOffsets[next_dyn_offset + j];
726 if (BITSET_TEST(set_layout->dynamic_ubos, j) &&
727 nvk_use_bindless_cbuf(&pdev->info)) {
728 assert((offset & 0xf) == 0);
729 db.cbuf.base_addr_shift_4 += offset >> 4;
730 } else {
731 db.addr.base_addr += offset;
732 }
733 dynamic_buffers[dyn_buffer_end + j] = db;
734 }
735 next_dyn_offset += set->layout->dynamic_buffer_count;
736 }
737
738 dyn_buffer_end += set_layout->dynamic_buffer_count;
739 } else {
740 assert(set == NULL);
741 }
742 }
743 assert(dyn_buffer_end <= NVK_MAX_DYNAMIC_BUFFERS);
744 assert(next_dyn_offset <= info->dynamicOffsetCount);
745
746 nvk_descriptor_state_set_root_array(cmd, desc, dynamic_buffers,
747 dyn_buffer_start, dyn_buffer_end - dyn_buffer_start,
748 &dynamic_buffers[dyn_buffer_start]);
749
750 /* We need to set everything above first_set because later calls to
751 * nvk_bind_descriptor_sets() depend on it for knowing where to start and
752 * they may not be called on the next consecutive set.
753 */
754 for (uint32_t s = info->firstSet + info->descriptorSetCount;
755 s < NVK_MAX_SETS; s++)
756 set_dynamic_buffer_start[s] = dyn_buffer_end;
757
758 /* We need to at least sync everything from first_set to NVK_MAX_SETS.
759 * However, we only save anything if firstSet >= 4 so we may as well sync
760 * everything just to be safe.
761 */
762 nvk_descriptor_state_set_root_array(cmd, desc, set_dynamic_buffer_start,
763 0, NVK_MAX_SETS,
764 set_dynamic_buffer_start);
765
766 nvk_cmd_dirty_cbufs_for_descriptors(cmd, info->stageFlags, info->firstSet,
767 info->firstSet + info->descriptorSetCount);
768 }
769
770 VKAPI_ATTR void VKAPI_CALL
nvk_CmdBindDescriptorSets2KHR(VkCommandBuffer commandBuffer,const VkBindDescriptorSetsInfoKHR * pBindDescriptorSetsInfo)771 nvk_CmdBindDescriptorSets2KHR(VkCommandBuffer commandBuffer,
772 const VkBindDescriptorSetsInfoKHR *pBindDescriptorSetsInfo)
773 {
774 VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
775
776 if (pBindDescriptorSetsInfo->stageFlags & NVK_VK_GRAPHICS_STAGE_BITS) {
777 nvk_bind_descriptor_sets(cmd, &cmd->state.gfx.descriptors,
778 pBindDescriptorSetsInfo);
779 }
780
781 if (pBindDescriptorSetsInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT) {
782 nvk_bind_descriptor_sets(cmd, &cmd->state.cs.descriptors,
783 pBindDescriptorSetsInfo);
784 }
785 }
786
787 VKAPI_ATTR void VKAPI_CALL
nvk_CmdBindDescriptorBuffersEXT(VkCommandBuffer commandBuffer,uint32_t bufferCount,const VkDescriptorBufferBindingInfoEXT * pBindingInfos)788 nvk_CmdBindDescriptorBuffersEXT(VkCommandBuffer commandBuffer,
789 uint32_t bufferCount,
790 const VkDescriptorBufferBindingInfoEXT *pBindingInfos)
791 {
792 VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
793
794 for (uint32_t i = 0; i < bufferCount; i++)
795 cmd->state.descriptor_buffers[i] = pBindingInfos[i].address;
796 }
797
798 static void
nvk_set_descriptor_buffer_offsets(struct nvk_cmd_buffer * cmd,struct nvk_descriptor_state * desc,const VkSetDescriptorBufferOffsetsInfoEXT * info)799 nvk_set_descriptor_buffer_offsets(struct nvk_cmd_buffer *cmd,
800 struct nvk_descriptor_state *desc,
801 const VkSetDescriptorBufferOffsetsInfoEXT *info)
802 {
803 VK_FROM_HANDLE(vk_pipeline_layout, pipeline_layout, info->layout);
804
805 for (uint32_t i = 0; i < info->setCount; ++i) {
806 const uint32_t s = i + info->firstSet;
807
808 desc->sets[s].type = NVK_DESCRIPTOR_SET_TYPE_BUFFER;
809 desc->sets[s].set = NULL;
810
811 struct nvk_buffer_address set_addr;
812 if (pipeline_layout->set_layouts[s] != NULL) {
813 const struct nvk_descriptor_set_layout *set_layout =
814 vk_to_nvk_descriptor_set_layout(pipeline_layout->set_layouts[s]);
815 assert(set_layout->flags &
816 VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT);
817
818 const uint64_t buffer_base_addr =
819 cmd->state.descriptor_buffers[info->pBufferIndices[i]];
820
821 set_addr = (struct nvk_buffer_address) {
822 .base_addr = buffer_base_addr + info->pOffsets[i],
823 .size = set_layout->max_buffer_size,
824 };
825 } else {
826 set_addr = NVK_BUFFER_ADDRESS_NULL;
827 }
828 nvk_descriptor_state_set_root(cmd, desc, sets[s], set_addr);
829 }
830
831 nvk_cmd_dirty_cbufs_for_descriptors(cmd, info->stageFlags,
832 info->firstSet,
833 info->firstSet + info->setCount);
834 }
835
836 VKAPI_ATTR void VKAPI_CALL
nvk_CmdSetDescriptorBufferOffsets2EXT(VkCommandBuffer commandBuffer,const VkSetDescriptorBufferOffsetsInfoEXT * pInfo)837 nvk_CmdSetDescriptorBufferOffsets2EXT(VkCommandBuffer commandBuffer,
838 const VkSetDescriptorBufferOffsetsInfoEXT *pInfo)
839 {
840 VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
841
842 if (pInfo->stageFlags & NVK_VK_GRAPHICS_STAGE_BITS) {
843 nvk_set_descriptor_buffer_offsets(cmd, &cmd->state.gfx.descriptors,
844 pInfo);
845 }
846
847 if (pInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT) {
848 nvk_set_descriptor_buffer_offsets(cmd, &cmd->state.cs.descriptors,
849 pInfo);
850 }
851 }
852
853 static void
nvk_bind_embedded_samplers(struct nvk_cmd_buffer * cmd,struct nvk_descriptor_state * desc,const VkBindDescriptorBufferEmbeddedSamplersInfoEXT * info)854 nvk_bind_embedded_samplers(struct nvk_cmd_buffer *cmd,
855 struct nvk_descriptor_state *desc,
856 const VkBindDescriptorBufferEmbeddedSamplersInfoEXT *info)
857 {
858 VK_FROM_HANDLE(vk_pipeline_layout, pipeline_layout, info->layout);
859 const struct nvk_descriptor_set_layout *set_layout =
860 vk_to_nvk_descriptor_set_layout(pipeline_layout->set_layouts[info->set]);
861
862 struct nvk_buffer_address set_addr = {
863 .base_addr = set_layout->embedded_samplers_addr,
864 .size = set_layout->non_variable_descriptor_buffer_size,
865 };
866 nvk_descriptor_state_set_root(cmd, desc, sets[info->set], set_addr);
867 }
868
869 VKAPI_ATTR void VKAPI_CALL
nvk_CmdBindDescriptorBufferEmbeddedSamplers2EXT(VkCommandBuffer commandBuffer,const VkBindDescriptorBufferEmbeddedSamplersInfoEXT * pInfo)870 nvk_CmdBindDescriptorBufferEmbeddedSamplers2EXT(
871 VkCommandBuffer commandBuffer,
872 const VkBindDescriptorBufferEmbeddedSamplersInfoEXT *pInfo)
873 {
874 VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
875
876 if (pInfo->stageFlags & NVK_VK_GRAPHICS_STAGE_BITS) {
877 nvk_bind_embedded_samplers(cmd, &cmd->state.gfx.descriptors, pInfo);
878 }
879
880 if (pInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT) {
881 nvk_bind_embedded_samplers(cmd, &cmd->state.cs.descriptors, pInfo);
882 }
883 }
884
885 static void
nvk_push_constants(UNUSED struct nvk_cmd_buffer * cmd,struct nvk_descriptor_state * desc,const VkPushConstantsInfoKHR * info)886 nvk_push_constants(UNUSED struct nvk_cmd_buffer *cmd,
887 struct nvk_descriptor_state *desc,
888 const VkPushConstantsInfoKHR *info)
889 {
890 nvk_descriptor_state_set_root_array(cmd, desc, push,
891 info->offset, info->size,
892 (char *)info->pValues);
893 }
894
895
896 VKAPI_ATTR void VKAPI_CALL
nvk_CmdPushConstants2KHR(VkCommandBuffer commandBuffer,const VkPushConstantsInfoKHR * pPushConstantsInfo)897 nvk_CmdPushConstants2KHR(VkCommandBuffer commandBuffer,
898 const VkPushConstantsInfoKHR *pPushConstantsInfo)
899 {
900 VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
901
902 if (pPushConstantsInfo->stageFlags & NVK_VK_GRAPHICS_STAGE_BITS)
903 nvk_push_constants(cmd, &cmd->state.gfx.descriptors, pPushConstantsInfo);
904
905 if (pPushConstantsInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT)
906 nvk_push_constants(cmd, &cmd->state.cs.descriptors, pPushConstantsInfo);
907 }
908
909 static struct nvk_push_descriptor_set *
nvk_cmd_push_descriptors(struct nvk_cmd_buffer * cmd,struct nvk_descriptor_state * desc,uint32_t set)910 nvk_cmd_push_descriptors(struct nvk_cmd_buffer *cmd,
911 struct nvk_descriptor_state *desc,
912 uint32_t set)
913 {
914 assert(set < NVK_MAX_SETS);
915 if (unlikely(desc->sets[set].push == NULL)) {
916 desc->sets[set].push = vk_zalloc(&cmd->vk.pool->alloc,
917 sizeof(*desc->sets[set].push), 8,
918 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
919 if (unlikely(desc->sets[set].push == NULL)) {
920 vk_command_buffer_set_error(&cmd->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
921 return NULL;
922 }
923 }
924
925 /* Pushing descriptors replaces whatever sets are bound */
926 desc->sets[set].type = NVK_DESCRIPTOR_SET_TYPE_PUSH;
927 desc->sets[set].set = NULL;
928 desc->push_dirty |= BITFIELD_BIT(set);
929
930 return desc->sets[set].push;
931 }
932
933 static void
nvk_push_descriptor_set(struct nvk_cmd_buffer * cmd,struct nvk_descriptor_state * desc,const VkPushDescriptorSetInfoKHR * info)934 nvk_push_descriptor_set(struct nvk_cmd_buffer *cmd,
935 struct nvk_descriptor_state *desc,
936 const VkPushDescriptorSetInfoKHR *info)
937 {
938 struct nvk_device *dev = nvk_cmd_buffer_device(cmd);
939 VK_FROM_HANDLE(vk_pipeline_layout, pipeline_layout, info->layout);
940
941 struct nvk_push_descriptor_set *push_set =
942 nvk_cmd_push_descriptors(cmd, desc, info->set);
943 if (unlikely(push_set == NULL))
944 return;
945
946 struct nvk_descriptor_set_layout *set_layout =
947 vk_to_nvk_descriptor_set_layout(pipeline_layout->set_layouts[info->set]);
948
949 nvk_push_descriptor_set_update(dev, push_set, set_layout,
950 info->descriptorWriteCount,
951 info->pDescriptorWrites);
952
953 nvk_cmd_dirty_cbufs_for_descriptors(cmd, info->stageFlags,
954 info->set, info->set + 1);
955 }
956
957 VKAPI_ATTR void VKAPI_CALL
nvk_CmdPushDescriptorSet2KHR(VkCommandBuffer commandBuffer,const VkPushDescriptorSetInfoKHR * pPushDescriptorSetInfo)958 nvk_CmdPushDescriptorSet2KHR(VkCommandBuffer commandBuffer,
959 const VkPushDescriptorSetInfoKHR *pPushDescriptorSetInfo)
960 {
961 VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
962
963 if (pPushDescriptorSetInfo->stageFlags & NVK_VK_GRAPHICS_STAGE_BITS) {
964 nvk_push_descriptor_set(cmd, &cmd->state.gfx.descriptors,
965 pPushDescriptorSetInfo);
966 }
967
968 if (pPushDescriptorSetInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT) {
969 nvk_push_descriptor_set(cmd, &cmd->state.cs.descriptors,
970 pPushDescriptorSetInfo);
971 }
972 }
973
974 void
nvk_cmd_buffer_flush_push_descriptors(struct nvk_cmd_buffer * cmd,struct nvk_descriptor_state * desc)975 nvk_cmd_buffer_flush_push_descriptors(struct nvk_cmd_buffer *cmd,
976 struct nvk_descriptor_state *desc)
977 {
978 struct nvk_device *dev = nvk_cmd_buffer_device(cmd);
979 struct nvk_physical_device *pdev = nvk_device_physical(dev);
980 const uint32_t min_cbuf_alignment = nvk_min_cbuf_alignment(&pdev->info);
981 VkResult result;
982
983 u_foreach_bit(set_idx, desc->push_dirty) {
984 if (desc->sets[set_idx].type != NVK_DESCRIPTOR_SET_TYPE_PUSH)
985 continue;
986
987 struct nvk_push_descriptor_set *push_set = desc->sets[set_idx].push;
988 uint64_t push_set_addr;
989 result = nvk_cmd_buffer_upload_data(cmd, push_set->data,
990 sizeof(push_set->data),
991 min_cbuf_alignment,
992 &push_set_addr);
993 if (unlikely(result != VK_SUCCESS)) {
994 vk_command_buffer_set_error(&cmd->vk, result);
995 return;
996 }
997
998 struct nvk_buffer_address set_addr = {
999 .base_addr = push_set_addr,
1000 .size = sizeof(push_set->data),
1001 };
1002 nvk_descriptor_state_set_root(cmd, desc, sets[set_idx], set_addr);
1003 }
1004 }
1005
1006 bool
nvk_cmd_buffer_get_cbuf_addr(struct nvk_cmd_buffer * cmd,const struct nvk_descriptor_state * desc,const struct nvk_shader * shader,const struct nvk_cbuf * cbuf,struct nvk_buffer_address * addr_out)1007 nvk_cmd_buffer_get_cbuf_addr(struct nvk_cmd_buffer *cmd,
1008 const struct nvk_descriptor_state *desc,
1009 const struct nvk_shader *shader,
1010 const struct nvk_cbuf *cbuf,
1011 struct nvk_buffer_address *addr_out)
1012 {
1013 struct nvk_device *dev = nvk_cmd_buffer_device(cmd);
1014 struct nvk_physical_device *pdev = nvk_device_physical(dev);
1015
1016 switch (cbuf->type) {
1017 case NVK_CBUF_TYPE_INVALID:
1018 *addr_out = (struct nvk_buffer_address) { .size = 0 };
1019 return true;
1020
1021 case NVK_CBUF_TYPE_ROOT_DESC:
1022 unreachable("The caller should handle root descriptors");
1023 return false;
1024
1025 case NVK_CBUF_TYPE_SHADER_DATA:
1026 *addr_out = (struct nvk_buffer_address) {
1027 .base_addr = shader->data_addr,
1028 .size = shader->data_size,
1029 };
1030 return true;
1031
1032 case NVK_CBUF_TYPE_DESC_SET:
1033 nvk_descriptor_state_get_root(desc, sets[cbuf->desc_set], addr_out);
1034 return true;
1035
1036 case NVK_CBUF_TYPE_DYNAMIC_UBO: {
1037 uint8_t dyn_idx;
1038 nvk_descriptor_state_get_root(
1039 desc, set_dynamic_buffer_start[cbuf->desc_set], &dyn_idx);
1040 dyn_idx += cbuf->dynamic_idx;
1041 union nvk_buffer_descriptor ubo_desc;
1042 nvk_descriptor_state_get_root(desc, dynamic_buffers[dyn_idx], &ubo_desc);
1043 *addr_out = nvk_ubo_descriptor_addr(pdev, ubo_desc);
1044 return true;
1045 }
1046
1047 case NVK_CBUF_TYPE_UBO_DESC: {
1048 if (desc->sets[cbuf->desc_set].type != NVK_DESCRIPTOR_SET_TYPE_PUSH)
1049 return false;
1050
1051 struct nvk_push_descriptor_set *push = desc->sets[cbuf->desc_set].push;
1052 if (push == NULL)
1053 return false;
1054
1055 assert(cbuf->desc_offset < NVK_PUSH_DESCRIPTOR_SET_SIZE);
1056 union nvk_buffer_descriptor desc;
1057 memcpy(&desc, &push->data[cbuf->desc_offset], sizeof(desc));
1058 *addr_out = nvk_ubo_descriptor_addr(pdev, desc);
1059 return true;
1060 }
1061
1062 default:
1063 unreachable("Invalid cbuf type");
1064 }
1065 }
1066
1067 uint64_t
nvk_cmd_buffer_get_cbuf_descriptor_addr(struct nvk_cmd_buffer * cmd,const struct nvk_descriptor_state * desc,const struct nvk_cbuf * cbuf)1068 nvk_cmd_buffer_get_cbuf_descriptor_addr(struct nvk_cmd_buffer *cmd,
1069 const struct nvk_descriptor_state *desc,
1070 const struct nvk_cbuf *cbuf)
1071 {
1072 assert(cbuf->type == NVK_CBUF_TYPE_UBO_DESC);
1073 switch (desc->sets[cbuf->desc_set].type) {
1074 case NVK_DESCRIPTOR_SET_TYPE_SET:
1075 case NVK_DESCRIPTOR_SET_TYPE_BUFFER: {
1076 struct nvk_buffer_address set_addr;
1077 nvk_descriptor_state_get_root(desc, sets[cbuf->desc_set], &set_addr);
1078
1079 assert(cbuf->desc_offset < set_addr.size);
1080 return set_addr.base_addr + cbuf->desc_offset;
1081 }
1082
1083 default:
1084 unreachable("Unknown descriptor set type");
1085 }
1086 }
1087
1088 void
nvk_cmd_buffer_dump(struct nvk_cmd_buffer * cmd,FILE * fp)1089 nvk_cmd_buffer_dump(struct nvk_cmd_buffer *cmd, FILE *fp)
1090 {
1091 struct nvk_device *dev = nvk_cmd_buffer_device(cmd);
1092 struct nvk_physical_device *pdev = nvk_device_physical(dev);
1093
1094 util_dynarray_foreach(&cmd->pushes, struct nvk_cmd_push, p) {
1095 if (p->map) {
1096 struct nv_push push = {
1097 .start = (uint32_t *)p->map,
1098 .end = (uint32_t *)((char *)p->map + p->range),
1099 };
1100 vk_push_print(fp, &push, &pdev->info);
1101 } else {
1102 const uint64_t addr = p->addr;
1103 fprintf(fp, "<%u B of INDIRECT DATA at 0x%" PRIx64 ">\n",
1104 p->range, addr);
1105
1106 uint64_t mem_offset = 0;
1107 struct nvkmd_mem *mem =
1108 nvkmd_dev_lookup_mem_by_va(dev->nvkmd, addr, &mem_offset);
1109 if (mem != NULL) {
1110 void *map;
1111 VkResult map_result = nvkmd_mem_map(mem, &dev->vk.base,
1112 NVKMD_MEM_MAP_RD, NULL,
1113 &map);
1114 if (map_result == VK_SUCCESS) {
1115 struct nv_push push = {
1116 .start = mem->map + mem_offset,
1117 .end = mem->map + mem_offset + p->range,
1118 };
1119 vk_push_print(fp, &push, &pdev->info);
1120 nvkmd_mem_unmap(mem, 0);
1121 }
1122
1123 nvkmd_mem_unref(mem);
1124 }
1125 }
1126 }
1127 }
1128
1129 VKAPI_ATTR void VKAPI_CALL
nvk_CmdPushDescriptorSetWithTemplate2KHR(VkCommandBuffer commandBuffer,const VkPushDescriptorSetWithTemplateInfoKHR * pPushDescriptorSetWithTemplateInfo)1130 nvk_CmdPushDescriptorSetWithTemplate2KHR(
1131 VkCommandBuffer commandBuffer,
1132 const VkPushDescriptorSetWithTemplateInfoKHR *pPushDescriptorSetWithTemplateInfo)
1133 {
1134 VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
1135 struct nvk_device *dev = nvk_cmd_buffer_device(cmd);
1136 VK_FROM_HANDLE(vk_descriptor_update_template, template,
1137 pPushDescriptorSetWithTemplateInfo->descriptorUpdateTemplate);
1138 VK_FROM_HANDLE(vk_pipeline_layout, pipeline_layout,
1139 pPushDescriptorSetWithTemplateInfo->layout);
1140 const uint32_t set = pPushDescriptorSetWithTemplateInfo->set;
1141
1142 struct nvk_descriptor_state *desc =
1143 nvk_get_descriptors_state(cmd, template->bind_point);
1144 struct nvk_push_descriptor_set *push_set =
1145 nvk_cmd_push_descriptors(cmd, desc, set);
1146 if (unlikely(push_set == NULL))
1147 return;
1148
1149 struct nvk_descriptor_set_layout *set_layout =
1150 vk_to_nvk_descriptor_set_layout(pipeline_layout->set_layouts[set]);
1151
1152 nvk_push_descriptor_set_update_template(dev, push_set, set_layout, template,
1153 pPushDescriptorSetWithTemplateInfo->pData);
1154
1155 /* We don't know the actual set of stages here so assume everything */
1156 nvk_cmd_dirty_cbufs_for_descriptors(cmd, NVK_VK_GRAPHICS_STAGE_BITS |
1157 VK_SHADER_STAGE_COMPUTE_BIT,
1158 set, set + 1);
1159 }
1160