• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
3  * SPDX-License-Identifier: MIT
4  */
5 #include "nvk_cmd_buffer.h"
6 
7 #include "nvk_buffer.h"
8 #include "nvk_cmd_pool.h"
9 #include "nvk_descriptor_set_layout.h"
10 #include "nvk_device.h"
11 #include "nvk_device_memory.h"
12 #include "nvk_entrypoints.h"
13 #include "nvk_mme.h"
14 #include "nvk_physical_device.h"
15 #include "nvk_shader.h"
16 #include "nvkmd/nvkmd.h"
17 
18 #include "vk_pipeline_layout.h"
19 #include "vk_synchronization.h"
20 
21 #include "nv_push_cl906f.h"
22 #include "nv_push_cl90b5.h"
23 #include "nv_push_cla097.h"
24 #include "nv_push_cla0c0.h"
25 #include "nv_push_clb1c0.h"
26 #include "nv_push_clc597.h"
27 
28 static void
nvk_descriptor_state_fini(struct nvk_cmd_buffer * cmd,struct nvk_descriptor_state * desc)29 nvk_descriptor_state_fini(struct nvk_cmd_buffer *cmd,
30                           struct nvk_descriptor_state *desc)
31 {
32    struct nvk_cmd_pool *pool = nvk_cmd_buffer_pool(cmd);
33 
34    for (unsigned i = 0; i < NVK_MAX_SETS; i++) {
35       vk_free(&pool->vk.alloc, desc->sets[i].push);
36       desc->sets[i].push = NULL;
37    }
38 }
39 
40 static void
nvk_destroy_cmd_buffer(struct vk_command_buffer * vk_cmd_buffer)41 nvk_destroy_cmd_buffer(struct vk_command_buffer *vk_cmd_buffer)
42 {
43    struct nvk_cmd_buffer *cmd =
44       container_of(vk_cmd_buffer, struct nvk_cmd_buffer, vk);
45    struct nvk_cmd_pool *pool = nvk_cmd_buffer_pool(cmd);
46 
47    nvk_descriptor_state_fini(cmd, &cmd->state.gfx.descriptors);
48    nvk_descriptor_state_fini(cmd, &cmd->state.cs.descriptors);
49 
50    nvk_cmd_pool_free_mem_list(pool, &cmd->owned_mem);
51    nvk_cmd_pool_free_mem_list(pool, &cmd->owned_gart_mem);
52    util_dynarray_fini(&cmd->pushes);
53    vk_command_buffer_finish(&cmd->vk);
54    vk_free(&pool->vk.alloc, cmd);
55 }
56 
57 static VkResult
nvk_create_cmd_buffer(struct vk_command_pool * vk_pool,VkCommandBufferLevel level,struct vk_command_buffer ** cmd_buffer_out)58 nvk_create_cmd_buffer(struct vk_command_pool *vk_pool,
59                       VkCommandBufferLevel level,
60                       struct vk_command_buffer **cmd_buffer_out)
61 {
62    struct nvk_cmd_pool *pool = container_of(vk_pool, struct nvk_cmd_pool, vk);
63    struct nvk_device *dev = nvk_cmd_pool_device(pool);
64    struct nvk_cmd_buffer *cmd;
65    VkResult result;
66 
67    cmd = vk_zalloc(&pool->vk.alloc, sizeof(*cmd), 8,
68                    VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
69    if (cmd == NULL)
70       return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
71 
72    result = vk_command_buffer_init(&pool->vk, &cmd->vk,
73                                    &nvk_cmd_buffer_ops, level);
74    if (result != VK_SUCCESS) {
75       vk_free(&pool->vk.alloc, cmd);
76       return result;
77    }
78 
79    cmd->vk.dynamic_graphics_state.vi = &cmd->state.gfx._dynamic_vi;
80    cmd->vk.dynamic_graphics_state.ms.sample_locations =
81       &cmd->state.gfx._dynamic_sl;
82 
83    list_inithead(&cmd->owned_mem);
84    list_inithead(&cmd->owned_gart_mem);
85    util_dynarray_init(&cmd->pushes, NULL);
86 
87    *cmd_buffer_out = &cmd->vk;
88 
89    return VK_SUCCESS;
90 }
91 
92 static void
nvk_reset_cmd_buffer(struct vk_command_buffer * vk_cmd_buffer,UNUSED VkCommandBufferResetFlags flags)93 nvk_reset_cmd_buffer(struct vk_command_buffer *vk_cmd_buffer,
94                      UNUSED VkCommandBufferResetFlags flags)
95 {
96    struct nvk_cmd_buffer *cmd =
97       container_of(vk_cmd_buffer, struct nvk_cmd_buffer, vk);
98    struct nvk_cmd_pool *pool = nvk_cmd_buffer_pool(cmd);
99 
100    vk_command_buffer_reset(&cmd->vk);
101 
102    nvk_descriptor_state_fini(cmd, &cmd->state.gfx.descriptors);
103    nvk_descriptor_state_fini(cmd, &cmd->state.cs.descriptors);
104 
105    nvk_cmd_pool_free_mem_list(pool, &cmd->owned_mem);
106    nvk_cmd_pool_free_gart_mem_list(pool, &cmd->owned_gart_mem);
107    cmd->upload_mem = NULL;
108    cmd->push_mem = NULL;
109    cmd->push_mem_limit = NULL;
110    cmd->push = (struct nv_push) {0};
111 
112    util_dynarray_clear(&cmd->pushes);
113 
114    memset(&cmd->state, 0, sizeof(cmd->state));
115 }
116 
117 const struct vk_command_buffer_ops nvk_cmd_buffer_ops = {
118    .create = nvk_create_cmd_buffer,
119    .reset = nvk_reset_cmd_buffer,
120    .destroy = nvk_destroy_cmd_buffer,
121 };
122 
123 /* If we ever fail to allocate a push, we use this */
124 static uint32_t push_runout[NVK_CMD_BUFFER_MAX_PUSH];
125 
126 static VkResult
nvk_cmd_buffer_alloc_mem(struct nvk_cmd_buffer * cmd,bool force_gart,struct nvk_cmd_mem ** mem_out)127 nvk_cmd_buffer_alloc_mem(struct nvk_cmd_buffer *cmd, bool force_gart,
128                          struct nvk_cmd_mem **mem_out)
129 {
130    VkResult result = nvk_cmd_pool_alloc_mem(nvk_cmd_buffer_pool(cmd),
131                                             force_gart, mem_out);
132    if (result != VK_SUCCESS)
133       return result;
134 
135    if (force_gart)
136       list_addtail(&(*mem_out)->link, &cmd->owned_gart_mem);
137    else
138       list_addtail(&(*mem_out)->link, &cmd->owned_mem);
139 
140    return VK_SUCCESS;
141 }
142 
143 static void
nvk_cmd_buffer_flush_push(struct nvk_cmd_buffer * cmd)144 nvk_cmd_buffer_flush_push(struct nvk_cmd_buffer *cmd)
145 {
146    if (likely(cmd->push_mem != NULL)) {
147       const uint32_t mem_offset =
148          (char *)cmd->push.start - (char *)cmd->push_mem->mem->map;
149 
150       struct nvk_cmd_push push = {
151          .map = cmd->push.start,
152          .addr = cmd->push_mem->mem->va->addr + mem_offset,
153          .range = nv_push_dw_count(&cmd->push) * 4,
154       };
155       util_dynarray_append(&cmd->pushes, struct nvk_cmd_push, push);
156    }
157 
158    cmd->push.start = cmd->push.end;
159 }
160 
161 void
nvk_cmd_buffer_new_push(struct nvk_cmd_buffer * cmd)162 nvk_cmd_buffer_new_push(struct nvk_cmd_buffer *cmd)
163 {
164    nvk_cmd_buffer_flush_push(cmd);
165 
166    VkResult result = nvk_cmd_buffer_alloc_mem(cmd, false, &cmd->push_mem);
167    if (unlikely(result != VK_SUCCESS)) {
168       vk_command_buffer_set_error(&cmd->vk, result);
169       STATIC_ASSERT(NVK_CMD_BUFFER_MAX_PUSH <= NVK_CMD_MEM_SIZE / 4);
170       cmd->push_mem = NULL;
171       nv_push_init(&cmd->push, push_runout, 0);
172       cmd->push_mem_limit = &push_runout[NVK_CMD_BUFFER_MAX_PUSH];
173    } else {
174       nv_push_init(&cmd->push, cmd->push_mem->mem->map, 0);
175       cmd->push_mem_limit =
176          (uint32_t *)((char *)cmd->push_mem->mem->map + NVK_CMD_MEM_SIZE);
177    }
178 }
179 
180 void
nvk_cmd_buffer_push_indirect(struct nvk_cmd_buffer * cmd,uint64_t addr,uint32_t range)181 nvk_cmd_buffer_push_indirect(struct nvk_cmd_buffer *cmd,
182                              uint64_t addr, uint32_t range)
183 {
184    nvk_cmd_buffer_flush_push(cmd);
185 
186    struct nvk_cmd_push push = {
187       .addr = addr,
188       .range = range,
189       .no_prefetch = true,
190    };
191 
192    util_dynarray_append(&cmd->pushes, struct nvk_cmd_push, push);
193 }
194 
195 VkResult
nvk_cmd_buffer_upload_alloc(struct nvk_cmd_buffer * cmd,uint32_t size,uint32_t alignment,uint64_t * addr,void ** ptr)196 nvk_cmd_buffer_upload_alloc(struct nvk_cmd_buffer *cmd,
197                             uint32_t size, uint32_t alignment,
198                             uint64_t *addr, void **ptr)
199 {
200    assert(size % 4 == 0);
201    assert(size <= NVK_CMD_MEM_SIZE);
202 
203    uint32_t offset = cmd->upload_offset;
204    if (alignment > 0)
205       offset = align(offset, alignment);
206 
207    assert(offset <= NVK_CMD_MEM_SIZE);
208    if (cmd->upload_mem != NULL && size <= NVK_CMD_MEM_SIZE - offset) {
209       *addr = cmd->upload_mem->mem->va->addr + offset;
210       *ptr = (char *)cmd->upload_mem->mem->map + offset;
211 
212       cmd->upload_offset = offset + size;
213 
214       return VK_SUCCESS;
215    }
216 
217    struct nvk_cmd_mem *mem;
218    VkResult result = nvk_cmd_buffer_alloc_mem(cmd, false, &mem);
219    if (unlikely(result != VK_SUCCESS))
220       return result;
221 
222    *addr = mem->mem->va->addr;
223    *ptr = mem->mem->map;
224 
225    /* Pick whichever of the current upload BO and the new BO will have more
226     * room left to be the BO for the next upload.  If our upload size is
227     * bigger than the old offset, we're better off burning the whole new
228     * upload BO on this one allocation and continuing on the current upload
229     * BO.
230     */
231    if (cmd->upload_mem == NULL || size < cmd->upload_offset) {
232       cmd->upload_mem = mem;
233       cmd->upload_offset = size;
234    }
235 
236    return VK_SUCCESS;
237 }
238 
239 VkResult
nvk_cmd_buffer_upload_data(struct nvk_cmd_buffer * cmd,const void * data,uint32_t size,uint32_t alignment,uint64_t * addr)240 nvk_cmd_buffer_upload_data(struct nvk_cmd_buffer *cmd,
241                            const void *data, uint32_t size,
242                            uint32_t alignment, uint64_t *addr)
243 {
244    VkResult result;
245    void *map;
246 
247    result = nvk_cmd_buffer_upload_alloc(cmd, size, alignment, addr, &map);
248    if (unlikely(result != VK_SUCCESS))
249       return result;
250 
251    memcpy(map, data, size);
252 
253    return VK_SUCCESS;
254 }
255 
256 VkResult
nvk_cmd_buffer_cond_render_alloc(struct nvk_cmd_buffer * cmd,uint64_t * addr)257 nvk_cmd_buffer_cond_render_alloc(struct nvk_cmd_buffer *cmd,
258                                  uint64_t *addr)
259 {
260    uint32_t offset = cmd->cond_render_gart_offset;
261    uint32_t size = 64;
262 
263    assert(offset <= NVK_CMD_MEM_SIZE);
264    if (cmd->cond_render_gart_mem != NULL && size <= NVK_CMD_MEM_SIZE - offset) {
265       *addr = cmd->cond_render_gart_mem->mem->va->addr + offset;
266 
267       cmd->cond_render_gart_offset = offset + size;
268 
269       return VK_SUCCESS;
270    }
271 
272    struct nvk_cmd_mem *mem;
273    VkResult result = nvk_cmd_buffer_alloc_mem(cmd, true, &mem);
274    if (unlikely(result != VK_SUCCESS))
275       return result;
276 
277    *addr = mem->mem->va->addr;
278 
279    /* Pick whichever of the current upload BO and the new BO will have more
280     * room left to be the BO for the next upload.  If our upload size is
281     * bigger than the old offset, we're better off burning the whole new
282     * upload BO on this one allocation and continuing on the current upload
283     * BO.
284     */
285    if (cmd->cond_render_gart_mem == NULL || size < cmd->cond_render_gart_offset) {
286       cmd->cond_render_gart_mem = mem;
287       cmd->cond_render_gart_offset = size;
288    }
289 
290    return VK_SUCCESS;
291 }
292 
293 VKAPI_ATTR VkResult VKAPI_CALL
nvk_BeginCommandBuffer(VkCommandBuffer commandBuffer,const VkCommandBufferBeginInfo * pBeginInfo)294 nvk_BeginCommandBuffer(VkCommandBuffer commandBuffer,
295                        const VkCommandBufferBeginInfo *pBeginInfo)
296 {
297    VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
298 
299    nvk_reset_cmd_buffer(&cmd->vk, 0);
300 
301    /* Start with a nop so we have at least something to submit */
302    struct nv_push *p = nvk_cmd_buffer_push(cmd, 2);
303    P_MTHD(p, NV90B5, NOP);
304    P_NV90B5_NOP(p, 0);
305 
306    nvk_cmd_buffer_begin_compute(cmd, pBeginInfo);
307    nvk_cmd_buffer_begin_graphics(cmd, pBeginInfo);
308 
309    return VK_SUCCESS;
310 }
311 
312 VKAPI_ATTR VkResult VKAPI_CALL
nvk_EndCommandBuffer(VkCommandBuffer commandBuffer)313 nvk_EndCommandBuffer(VkCommandBuffer commandBuffer)
314 {
315    VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
316 
317    nvk_cmd_buffer_flush_push(cmd);
318 
319    return vk_command_buffer_get_record_result(&cmd->vk);
320 }
321 
322 VKAPI_ATTR void VKAPI_CALL
nvk_CmdExecuteCommands(VkCommandBuffer commandBuffer,uint32_t commandBufferCount,const VkCommandBuffer * pCommandBuffers)323 nvk_CmdExecuteCommands(VkCommandBuffer commandBuffer,
324                        uint32_t commandBufferCount,
325                        const VkCommandBuffer *pCommandBuffers)
326 {
327    VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
328 
329    if (commandBufferCount == 0)
330       return;
331 
332    nvk_cmd_buffer_flush_push(cmd);
333 
334    for (uint32_t i = 0; i < commandBufferCount; i++) {
335       VK_FROM_HANDLE(nvk_cmd_buffer, other, pCommandBuffers[i]);
336 
337       /* We only need to copy the pushes.  We do not copy the
338        * nvk_cmd_buffer::bos because that tracks ownership.  Instead, we
339        * depend on the app to not discard secondaries while they are used by a
340        * primary.  The Vulkan 1.3.227 spec for vkFreeCommandBuffers() says:
341        *
342        *    "Any primary command buffer that is in the recording or executable
343        *    state and has any element of pCommandBuffers recorded into it,
344        *    becomes invalid."
345        *
346        * In other words, if the secondary command buffer ever goes away, this
347        * command buffer is invalid and the only thing the client can validly
348        * do with it is reset it.  vkResetCommandPool() has similar language.
349        */
350       util_dynarray_append_dynarray(&cmd->pushes, &other->pushes);
351    }
352 
353    /* From the Vulkan 1.3.275 spec:
354     *
355     *    "When secondary command buffer(s) are recorded to execute on a
356     *    primary command buffer, the secondary command buffer inherits no
357     *    state from the primary command buffer, and all state of the primary
358     *    command buffer is undefined after an execute secondary command buffer
359     *    command is recorded. There is one exception to this rule - if the
360     *    primary command buffer is inside a render pass instance, then the
361     *    render pass and subpass state is not disturbed by executing secondary
362     *    command buffers. For state dependent commands (such as draws and
363     *    dispatches), any state consumed by those commands must not be
364     *    undefined."
365     *
366     * Therefore, it's the client's job to reset all the state in the primary
367     * after the secondary executes.  However, if we're doing any internal
368     * dirty tracking, we may miss the fact that a secondary has messed with
369     * GPU state if we don't invalidate all our internal tracking.
370     */
371    nvk_cmd_invalidate_graphics_state(cmd);
372    nvk_cmd_invalidate_compute_state(cmd);
373 }
374 
375 enum nvk_barrier {
376    NVK_BARRIER_RENDER_WFI              = 1 << 0,
377    NVK_BARRIER_COMPUTE_WFI             = 1 << 1,
378    NVK_BARRIER_FLUSH_SHADER_DATA       = 1 << 2,
379    NVK_BARRIER_INVALIDATE_SHADER_DATA  = 1 << 3,
380    NVK_BARRIER_INVALIDATE_TEX_DATA     = 1 << 4,
381    NVK_BARRIER_INVALIDATE_CONSTANT     = 1 << 5,
382    NVK_BARRIER_INVALIDATE_MME_DATA     = 1 << 6,
383    NVK_BARRIER_INVALIDATE_QMD_DATA     = 1 << 7,
384 };
385 
386 static enum nvk_barrier
nvk_barrier_flushes_waits(VkPipelineStageFlags2 stages,VkAccessFlags2 access)387 nvk_barrier_flushes_waits(VkPipelineStageFlags2 stages,
388                           VkAccessFlags2 access)
389 {
390    stages = vk_expand_src_stage_flags2(stages);
391    access = vk_filter_src_access_flags2(stages, access);
392 
393    enum nvk_barrier barriers = 0;
394 
395    if (access & VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT) {
396       barriers |= NVK_BARRIER_FLUSH_SHADER_DATA;
397 
398       if (vk_pipeline_stage_flags2_has_graphics_shader(stages))
399          barriers |= NVK_BARRIER_RENDER_WFI;
400 
401       if (vk_pipeline_stage_flags2_has_compute_shader(stages))
402          barriers |= NVK_BARRIER_COMPUTE_WFI;
403    }
404 
405    if (access & (VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT |
406                  VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
407                  VK_ACCESS_2_TRANSFORM_FEEDBACK_WRITE_BIT_EXT))
408       barriers |= NVK_BARRIER_RENDER_WFI;
409 
410    if ((access & VK_ACCESS_2_TRANSFER_WRITE_BIT) &&
411        (stages & (VK_PIPELINE_STAGE_2_RESOLVE_BIT |
412                   VK_PIPELINE_STAGE_2_BLIT_BIT |
413                   VK_PIPELINE_STAGE_2_CLEAR_BIT)))
414       barriers |= NVK_BARRIER_RENDER_WFI;
415 
416    if (access & VK_ACCESS_2_COMMAND_PREPROCESS_WRITE_BIT_EXT)
417       barriers |= NVK_BARRIER_FLUSH_SHADER_DATA |
418                   NVK_BARRIER_COMPUTE_WFI;
419 
420    return barriers;
421 }
422 
423 static enum nvk_barrier
nvk_barrier_invalidates(VkPipelineStageFlags2 stages,VkAccessFlags2 access)424 nvk_barrier_invalidates(VkPipelineStageFlags2 stages,
425                         VkAccessFlags2 access)
426 {
427    stages = vk_expand_dst_stage_flags2(stages);
428    access = vk_filter_dst_access_flags2(stages, access);
429 
430    enum nvk_barrier barriers = 0;
431 
432    if (access & (VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT |
433                  VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT |
434                  VK_ACCESS_2_CONDITIONAL_RENDERING_READ_BIT_EXT |
435                  VK_ACCESS_2_DESCRIPTOR_BUFFER_READ_BIT_EXT))
436       barriers |= NVK_BARRIER_INVALIDATE_MME_DATA;
437 
438    if (access & VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT)
439       barriers |= NVK_BARRIER_INVALIDATE_CONSTANT |
440                   NVK_BARRIER_INVALIDATE_QMD_DATA;
441 
442    if (access & (VK_ACCESS_2_UNIFORM_READ_BIT |
443                  VK_ACCESS_2_DESCRIPTOR_BUFFER_READ_BIT_EXT))
444       barriers |= NVK_BARRIER_INVALIDATE_SHADER_DATA |
445                   NVK_BARRIER_INVALIDATE_CONSTANT;
446 
447    if (access & (VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT |
448                  VK_ACCESS_2_SHADER_SAMPLED_READ_BIT))
449       barriers |= NVK_BARRIER_INVALIDATE_TEX_DATA;
450 
451    if (access & VK_ACCESS_2_SHADER_STORAGE_READ_BIT)
452       barriers |= NVK_BARRIER_INVALIDATE_SHADER_DATA;
453 
454    if ((access & VK_ACCESS_2_TRANSFER_READ_BIT) &&
455        (stages & (VK_PIPELINE_STAGE_2_RESOLVE_BIT |
456                   VK_PIPELINE_STAGE_2_BLIT_BIT)))
457       barriers |= NVK_BARRIER_INVALIDATE_TEX_DATA;
458 
459    return barriers;
460 }
461 
462 void
nvk_cmd_flush_wait_dep(struct nvk_cmd_buffer * cmd,const VkDependencyInfo * dep,bool wait)463 nvk_cmd_flush_wait_dep(struct nvk_cmd_buffer *cmd,
464                        const VkDependencyInfo *dep,
465                        bool wait)
466 {
467    enum nvk_barrier barriers = 0;
468 
469    for (uint32_t i = 0; i < dep->memoryBarrierCount; i++) {
470       const VkMemoryBarrier2 *bar = &dep->pMemoryBarriers[i];
471       barriers |= nvk_barrier_flushes_waits(bar->srcStageMask,
472                                             bar->srcAccessMask);
473    }
474 
475    for (uint32_t i = 0; i < dep->bufferMemoryBarrierCount; i++) {
476       const VkBufferMemoryBarrier2 *bar = &dep->pBufferMemoryBarriers[i];
477       barriers |= nvk_barrier_flushes_waits(bar->srcStageMask,
478                                             bar->srcAccessMask);
479    }
480 
481    for (uint32_t i = 0; i < dep->imageMemoryBarrierCount; i++) {
482       const VkImageMemoryBarrier2 *bar = &dep->pImageMemoryBarriers[i];
483       barriers |= nvk_barrier_flushes_waits(bar->srcStageMask,
484                                             bar->srcAccessMask);
485    }
486 
487    if (!barriers)
488       return;
489 
490    struct nv_push *p = nvk_cmd_buffer_push(cmd, 4);
491 
492    if (barriers & NVK_BARRIER_FLUSH_SHADER_DATA) {
493       assert(barriers & (NVK_BARRIER_RENDER_WFI | NVK_BARRIER_COMPUTE_WFI));
494       if (barriers & NVK_BARRIER_RENDER_WFI) {
495          P_IMMD(p, NVA097, INVALIDATE_SHADER_CACHES, {
496             .data = DATA_TRUE,
497             .flush_data = FLUSH_DATA_TRUE,
498          });
499       }
500 
501       if (barriers & NVK_BARRIER_COMPUTE_WFI) {
502          P_IMMD(p, NVA0C0, INVALIDATE_SHADER_CACHES, {
503             .data = DATA_TRUE,
504             .flush_data = FLUSH_DATA_TRUE,
505          });
506       }
507    } else if (barriers & NVK_BARRIER_RENDER_WFI) {
508       /* If this comes from a vkCmdSetEvent, we don't need to wait */
509       if (wait)
510          P_IMMD(p, NVA097, WAIT_FOR_IDLE, 0);
511    } else {
512       /* Compute WFI only happens when shader data is flushed */
513       assert(!(barriers & NVK_BARRIER_COMPUTE_WFI));
514    }
515 }
516 
517 void
nvk_cmd_invalidate_deps(struct nvk_cmd_buffer * cmd,uint32_t dep_count,const VkDependencyInfo * deps)518 nvk_cmd_invalidate_deps(struct nvk_cmd_buffer *cmd,
519                         uint32_t dep_count,
520                         const VkDependencyInfo *deps)
521 {
522    struct nvk_device *dev = nvk_cmd_buffer_device(cmd);
523    struct nvk_physical_device *pdev = nvk_device_physical(dev);
524 
525    enum nvk_barrier barriers = 0;
526 
527    for (uint32_t d = 0; d < dep_count; d++) {
528       const VkDependencyInfo *dep = &deps[d];
529 
530       for (uint32_t i = 0; i < dep->memoryBarrierCount; i++) {
531          const VkMemoryBarrier2 *bar = &dep->pMemoryBarriers[i];
532          barriers |= nvk_barrier_invalidates(bar->dstStageMask,
533                                              bar->dstAccessMask);
534       }
535 
536       for (uint32_t i = 0; i < dep->bufferMemoryBarrierCount; i++) {
537          const VkBufferMemoryBarrier2 *bar = &dep->pBufferMemoryBarriers[i];
538          barriers |= nvk_barrier_invalidates(bar->dstStageMask,
539                                              bar->dstAccessMask);
540       }
541 
542       for (uint32_t i = 0; i < dep->imageMemoryBarrierCount; i++) {
543          const VkImageMemoryBarrier2 *bar = &dep->pImageMemoryBarriers[i];
544          barriers |= nvk_barrier_invalidates(bar->dstStageMask,
545                                              bar->dstAccessMask);
546       }
547    }
548 
549    if (!barriers)
550       return;
551 
552    struct nv_push *p = nvk_cmd_buffer_push(cmd, 10);
553 
554    if (barriers & NVK_BARRIER_INVALIDATE_TEX_DATA) {
555       P_IMMD(p, NVA097, INVALIDATE_TEXTURE_DATA_CACHE_NO_WFI, {
556          .lines = LINES_ALL,
557       });
558    }
559 
560    if (barriers & (NVK_BARRIER_INVALIDATE_SHADER_DATA &
561                    NVK_BARRIER_INVALIDATE_CONSTANT)) {
562       P_IMMD(p, NVA097, INVALIDATE_SHADER_CACHES_NO_WFI, {
563          .global_data = (barriers & NVK_BARRIER_INVALIDATE_SHADER_DATA) != 0,
564          .constant = (barriers & NVK_BARRIER_INVALIDATE_CONSTANT) != 0,
565       });
566    }
567 
568    if (barriers & (NVK_BARRIER_INVALIDATE_MME_DATA)) {
569       __push_immd(p, SUBC_NV9097, NV906F_SET_REFERENCE, 0);
570 
571       if (pdev->info.cls_eng3d >= TURING_A)
572          P_IMMD(p, NVC597, MME_DMA_SYSMEMBAR, 0);
573    }
574 
575    if ((barriers & NVK_BARRIER_INVALIDATE_QMD_DATA) &&
576        pdev->info.cls_eng3d >= MAXWELL_COMPUTE_B)
577       P_IMMD(p, NVB1C0, INVALIDATE_SKED_CACHES, 0);
578 }
579 
580 VKAPI_ATTR void VKAPI_CALL
nvk_CmdPipelineBarrier2(VkCommandBuffer commandBuffer,const VkDependencyInfo * pDependencyInfo)581 nvk_CmdPipelineBarrier2(VkCommandBuffer commandBuffer,
582                         const VkDependencyInfo *pDependencyInfo)
583 {
584    VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
585 
586    nvk_cmd_flush_wait_dep(cmd, pDependencyInfo, true);
587    nvk_cmd_invalidate_deps(cmd, 1, pDependencyInfo);
588 }
589 
590 void
nvk_cmd_bind_shaders(struct vk_command_buffer * vk_cmd,uint32_t stage_count,const gl_shader_stage * stages,struct vk_shader ** const shaders)591 nvk_cmd_bind_shaders(struct vk_command_buffer *vk_cmd,
592                      uint32_t stage_count,
593                      const gl_shader_stage *stages,
594                      struct vk_shader ** const shaders)
595 {
596    struct nvk_cmd_buffer *cmd = container_of(vk_cmd, struct nvk_cmd_buffer, vk);
597    struct nvk_device *dev = nvk_cmd_buffer_device(cmd);
598 
599    for (uint32_t i = 0; i < stage_count; i++) {
600       struct nvk_shader *shader =
601          container_of(shaders[i], struct nvk_shader, vk);
602 
603       if (shader != NULL) {
604          nvk_device_ensure_slm(dev, shader->info.slm_size,
605                                     shader->info.crs_size);
606       }
607 
608       if (stages[i] == MESA_SHADER_COMPUTE ||
609           stages[i] == MESA_SHADER_KERNEL)
610          nvk_cmd_bind_compute_shader(cmd, shader);
611       else
612          nvk_cmd_bind_graphics_shader(cmd, stages[i], shader);
613    }
614 }
615 
616 #define NVK_VK_GRAPHICS_STAGE_BITS VK_SHADER_STAGE_ALL_GRAPHICS
617 
618 void
nvk_cmd_dirty_cbufs_for_descriptors(struct nvk_cmd_buffer * cmd,VkShaderStageFlags stages,uint32_t sets_start,uint32_t sets_end)619 nvk_cmd_dirty_cbufs_for_descriptors(struct nvk_cmd_buffer *cmd,
620                                     VkShaderStageFlags stages,
621                                     uint32_t sets_start, uint32_t sets_end)
622 {
623    if (!(stages & NVK_VK_GRAPHICS_STAGE_BITS))
624       return;
625 
626    uint32_t groups = 0;
627    u_foreach_bit(i, stages & NVK_VK_GRAPHICS_STAGE_BITS) {
628       gl_shader_stage stage = vk_to_mesa_shader_stage(1 << i);
629       uint32_t g = nvk_cbuf_binding_for_stage(stage);
630       groups |= BITFIELD_BIT(g);
631    }
632 
633    u_foreach_bit(g, groups) {
634       struct nvk_cbuf_group *group = &cmd->state.gfx.cbuf_groups[g];
635 
636       for (uint32_t i = 0; i < ARRAY_SIZE(group->cbufs); i++) {
637          const struct nvk_cbuf *cbuf = &group->cbufs[i];
638          switch (cbuf->type) {
639          case NVK_CBUF_TYPE_INVALID:
640          case NVK_CBUF_TYPE_ROOT_DESC:
641          case NVK_CBUF_TYPE_SHADER_DATA:
642             break;
643 
644          case NVK_CBUF_TYPE_DESC_SET:
645          case NVK_CBUF_TYPE_UBO_DESC:
646          case NVK_CBUF_TYPE_DYNAMIC_UBO:
647             if (cbuf->desc_set >= sets_start && cbuf->desc_set < sets_end)
648                group->dirty |= BITFIELD_BIT(i);
649             break;
650 
651          default:
652             unreachable("Invalid cbuf type");
653          }
654       }
655    }
656 }
657 
658 static void
nvk_bind_descriptor_sets(struct nvk_cmd_buffer * cmd,struct nvk_descriptor_state * desc,const VkBindDescriptorSetsInfoKHR * info)659 nvk_bind_descriptor_sets(struct nvk_cmd_buffer *cmd,
660                          struct nvk_descriptor_state *desc,
661                          const VkBindDescriptorSetsInfoKHR *info)
662 {
663    VK_FROM_HANDLE(vk_pipeline_layout, pipeline_layout, info->layout);
664    struct nvk_device *dev = nvk_cmd_buffer_device(cmd);
665    struct nvk_physical_device *pdev = nvk_device_physical(dev);
666 
667    union nvk_buffer_descriptor dynamic_buffers[NVK_MAX_DYNAMIC_BUFFERS];
668    uint8_t set_dynamic_buffer_start[NVK_MAX_SETS];
669 
670    /* Read off the current dynamic buffer start array so we can use it to
671     * determine where we should start binding dynamic buffers.
672     */
673    nvk_descriptor_state_get_root_array(desc, set_dynamic_buffer_start,
674                                        0, NVK_MAX_SETS,
675                                        set_dynamic_buffer_start);
676 
677    /* From the Vulkan 1.3.275 spec:
678     *
679     *    "When binding a descriptor set (see Descriptor Set Binding) to
680     *    set number N...
681     *
682     *    If, additionally, the previously bound descriptor set for set
683     *    N was bound using a pipeline layout not compatible for set N,
684     *    then all bindings in sets numbered greater than N are
685     *    disturbed."
686     *
687     * This means that, if some earlier set gets bound in such a way that
688     * it changes set_dynamic_buffer_start[s], this binding is implicitly
689     * invalidated.  Therefore, we can always look at the current value
690     * of set_dynamic_buffer_start[s] as the base of our dynamic buffer
691     * range and it's only our responsibility to adjust all
692     * set_dynamic_buffer_start[p] for p > s as needed.
693     */
694    const uint8_t dyn_buffer_start = set_dynamic_buffer_start[info->firstSet];
695    uint8_t dyn_buffer_end = dyn_buffer_start;
696 
697    uint32_t next_dyn_offset = 0;
698    for (uint32_t i = 0; i < info->descriptorSetCount; ++i) {
699       unsigned s = i + info->firstSet;
700       VK_FROM_HANDLE(nvk_descriptor_set, set, info->pDescriptorSets[i]);
701 
702       if (desc->sets[s].type != NVK_DESCRIPTOR_SET_TYPE_SET ||
703           desc->sets[s].set != set) {
704          struct nvk_buffer_address set_addr;
705          if (set != NULL) {
706             desc->sets[s].type = NVK_DESCRIPTOR_SET_TYPE_SET;
707             desc->sets[s].set = set;
708             set_addr = nvk_descriptor_set_addr(set);
709          } else {
710             desc->sets[s].type = NVK_DESCRIPTOR_SET_TYPE_NONE;
711             desc->sets[s].set = NULL;
712             set_addr = NVK_BUFFER_ADDRESS_NULL;
713          }
714          nvk_descriptor_state_set_root(cmd, desc, sets[s], set_addr);
715       }
716 
717       set_dynamic_buffer_start[s] = dyn_buffer_end;
718 
719       if (pipeline_layout->set_layouts[s] != NULL) {
720          const struct nvk_descriptor_set_layout *set_layout =
721             vk_to_nvk_descriptor_set_layout(pipeline_layout->set_layouts[s]);
722 
723          if (set != NULL && set_layout->dynamic_buffer_count > 0) {
724             for (uint32_t j = 0; j < set_layout->dynamic_buffer_count; j++) {
725                union nvk_buffer_descriptor db = set->dynamic_buffers[j];
726                uint32_t offset = info->pDynamicOffsets[next_dyn_offset + j];
727                if (BITSET_TEST(set_layout->dynamic_ubos, j) &&
728                    nvk_use_bindless_cbuf(&pdev->info)) {
729                   assert((offset & 0xf) == 0);
730                   db.cbuf.base_addr_shift_4 += offset >> 4;
731                } else {
732                   db.addr.base_addr += offset;
733                }
734                dynamic_buffers[dyn_buffer_end + j] = db;
735             }
736             next_dyn_offset += set->layout->dynamic_buffer_count;
737          }
738 
739          dyn_buffer_end += set_layout->dynamic_buffer_count;
740       } else {
741          assert(set == NULL);
742       }
743    }
744    assert(dyn_buffer_end <= NVK_MAX_DYNAMIC_BUFFERS);
745    assert(next_dyn_offset <= info->dynamicOffsetCount);
746 
747    nvk_descriptor_state_set_root_array(cmd, desc, dynamic_buffers,
748                                        dyn_buffer_start, dyn_buffer_end - dyn_buffer_start,
749                                        &dynamic_buffers[dyn_buffer_start]);
750 
751    /* We need to set everything above first_set because later calls to
752     * nvk_bind_descriptor_sets() depend on it for knowing where to start and
753     * they may not be called on the next consecutive set.
754     */
755    for (uint32_t s = info->firstSet + info->descriptorSetCount;
756         s < NVK_MAX_SETS; s++)
757       set_dynamic_buffer_start[s] = dyn_buffer_end;
758 
759    /* We need to at least sync everything from first_set to NVK_MAX_SETS.
760     * However, we only save anything if firstSet >= 4 so we may as well sync
761     * everything just to be safe.
762     */
763    nvk_descriptor_state_set_root_array(cmd, desc, set_dynamic_buffer_start,
764                                        0, NVK_MAX_SETS,
765                                        set_dynamic_buffer_start);
766 
767    nvk_cmd_dirty_cbufs_for_descriptors(cmd, info->stageFlags, info->firstSet,
768                                        info->firstSet + info->descriptorSetCount);
769 }
770 
771 VKAPI_ATTR void VKAPI_CALL
nvk_CmdBindDescriptorSets2KHR(VkCommandBuffer commandBuffer,const VkBindDescriptorSetsInfoKHR * pBindDescriptorSetsInfo)772 nvk_CmdBindDescriptorSets2KHR(VkCommandBuffer commandBuffer,
773                               const VkBindDescriptorSetsInfoKHR *pBindDescriptorSetsInfo)
774 {
775    VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
776 
777    if (pBindDescriptorSetsInfo->stageFlags & NVK_VK_GRAPHICS_STAGE_BITS) {
778       nvk_bind_descriptor_sets(cmd, &cmd->state.gfx.descriptors,
779                                pBindDescriptorSetsInfo);
780    }
781 
782    if (pBindDescriptorSetsInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT) {
783       nvk_bind_descriptor_sets(cmd, &cmd->state.cs.descriptors,
784                                pBindDescriptorSetsInfo);
785    }
786 }
787 
788 VKAPI_ATTR void VKAPI_CALL
nvk_CmdBindDescriptorBuffersEXT(VkCommandBuffer commandBuffer,uint32_t bufferCount,const VkDescriptorBufferBindingInfoEXT * pBindingInfos)789 nvk_CmdBindDescriptorBuffersEXT(VkCommandBuffer commandBuffer,
790                                 uint32_t bufferCount,
791                                 const VkDescriptorBufferBindingInfoEXT *pBindingInfos)
792 {
793    VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
794 
795    for (uint32_t i = 0; i < bufferCount; i++)
796       cmd->state.descriptor_buffers[i] = pBindingInfos[i].address;
797 }
798 
799 static void
nvk_set_descriptor_buffer_offsets(struct nvk_cmd_buffer * cmd,struct nvk_descriptor_state * desc,const VkSetDescriptorBufferOffsetsInfoEXT * info)800 nvk_set_descriptor_buffer_offsets(struct nvk_cmd_buffer *cmd,
801                                   struct nvk_descriptor_state *desc,
802                                   const VkSetDescriptorBufferOffsetsInfoEXT *info)
803 {
804    VK_FROM_HANDLE(vk_pipeline_layout, pipeline_layout, info->layout);
805 
806    for (uint32_t i = 0; i < info->setCount; ++i) {
807       const uint32_t s = i + info->firstSet;
808 
809       desc->sets[s].type = NVK_DESCRIPTOR_SET_TYPE_BUFFER;
810       desc->sets[s].set = NULL;
811 
812       struct nvk_buffer_address set_addr;
813       if (pipeline_layout->set_layouts[s] != NULL) {
814          const struct nvk_descriptor_set_layout *set_layout =
815             vk_to_nvk_descriptor_set_layout(pipeline_layout->set_layouts[s]);
816          assert(set_layout->flags &
817                 VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT);
818 
819          const uint64_t buffer_base_addr =
820             cmd->state.descriptor_buffers[info->pBufferIndices[i]];
821 
822          set_addr = (struct nvk_buffer_address) {
823             .base_addr = buffer_base_addr + info->pOffsets[i],
824             .size = set_layout->max_buffer_size,
825          };
826       } else {
827          set_addr = NVK_BUFFER_ADDRESS_NULL;
828       }
829       nvk_descriptor_state_set_root(cmd, desc, sets[s], set_addr);
830    }
831 
832    nvk_cmd_dirty_cbufs_for_descriptors(cmd, info->stageFlags,
833                                        info->firstSet,
834                                        info->firstSet + info->setCount);
835 }
836 
837 VKAPI_ATTR void VKAPI_CALL
nvk_CmdSetDescriptorBufferOffsets2EXT(VkCommandBuffer commandBuffer,const VkSetDescriptorBufferOffsetsInfoEXT * pInfo)838 nvk_CmdSetDescriptorBufferOffsets2EXT(VkCommandBuffer commandBuffer,
839                                       const VkSetDescriptorBufferOffsetsInfoEXT *pInfo)
840 {
841    VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
842 
843    if (pInfo->stageFlags & NVK_VK_GRAPHICS_STAGE_BITS) {
844       nvk_set_descriptor_buffer_offsets(cmd, &cmd->state.gfx.descriptors,
845                                         pInfo);
846    }
847 
848    if (pInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT) {
849       nvk_set_descriptor_buffer_offsets(cmd, &cmd->state.cs.descriptors,
850                                         pInfo);
851    }
852 }
853 
854 static void
nvk_bind_embedded_samplers(struct nvk_cmd_buffer * cmd,struct nvk_descriptor_state * desc,const VkBindDescriptorBufferEmbeddedSamplersInfoEXT * info)855 nvk_bind_embedded_samplers(struct nvk_cmd_buffer *cmd,
856                            struct nvk_descriptor_state *desc,
857                            const VkBindDescriptorBufferEmbeddedSamplersInfoEXT *info)
858 {
859    VK_FROM_HANDLE(vk_pipeline_layout, pipeline_layout, info->layout);
860    const struct nvk_descriptor_set_layout *set_layout =
861       vk_to_nvk_descriptor_set_layout(pipeline_layout->set_layouts[info->set]);
862 
863    struct nvk_buffer_address set_addr = {
864       .base_addr = set_layout->embedded_samplers_addr,
865       .size = set_layout->non_variable_descriptor_buffer_size,
866    };
867    nvk_descriptor_state_set_root(cmd, desc, sets[info->set], set_addr);
868 }
869 
870 VKAPI_ATTR void VKAPI_CALL
nvk_CmdBindDescriptorBufferEmbeddedSamplers2EXT(VkCommandBuffer commandBuffer,const VkBindDescriptorBufferEmbeddedSamplersInfoEXT * pInfo)871 nvk_CmdBindDescriptorBufferEmbeddedSamplers2EXT(
872     VkCommandBuffer commandBuffer,
873     const VkBindDescriptorBufferEmbeddedSamplersInfoEXT *pInfo)
874 {
875    VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
876 
877    if (pInfo->stageFlags & NVK_VK_GRAPHICS_STAGE_BITS) {
878       nvk_bind_embedded_samplers(cmd, &cmd->state.gfx.descriptors, pInfo);
879    }
880 
881    if (pInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT) {
882       nvk_bind_embedded_samplers(cmd, &cmd->state.cs.descriptors, pInfo);
883    }
884 }
885 
886 static void
nvk_push_constants(UNUSED struct nvk_cmd_buffer * cmd,struct nvk_descriptor_state * desc,const VkPushConstantsInfoKHR * info)887 nvk_push_constants(UNUSED struct nvk_cmd_buffer *cmd,
888                    struct nvk_descriptor_state *desc,
889                    const VkPushConstantsInfoKHR *info)
890 {
891    nvk_descriptor_state_set_root_array(cmd, desc, push,
892                                        info->offset, info->size,
893                                        (char *)info->pValues);
894 }
895 
896 
897 VKAPI_ATTR void VKAPI_CALL
nvk_CmdPushConstants2KHR(VkCommandBuffer commandBuffer,const VkPushConstantsInfoKHR * pPushConstantsInfo)898 nvk_CmdPushConstants2KHR(VkCommandBuffer commandBuffer,
899                          const VkPushConstantsInfoKHR *pPushConstantsInfo)
900 {
901    VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
902 
903    if (pPushConstantsInfo->stageFlags & NVK_VK_GRAPHICS_STAGE_BITS)
904       nvk_push_constants(cmd, &cmd->state.gfx.descriptors, pPushConstantsInfo);
905 
906    if (pPushConstantsInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT)
907       nvk_push_constants(cmd, &cmd->state.cs.descriptors, pPushConstantsInfo);
908 }
909 
910 static struct nvk_push_descriptor_set *
nvk_cmd_push_descriptors(struct nvk_cmd_buffer * cmd,struct nvk_descriptor_state * desc,uint32_t set)911 nvk_cmd_push_descriptors(struct nvk_cmd_buffer *cmd,
912                          struct nvk_descriptor_state *desc,
913                          uint32_t set)
914 {
915    assert(set < NVK_MAX_SETS);
916    if (unlikely(desc->sets[set].push == NULL)) {
917       desc->sets[set].push = vk_zalloc(&cmd->vk.pool->alloc,
918                                        sizeof(*desc->sets[set].push), 8,
919                                        VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
920       if (unlikely(desc->sets[set].push == NULL)) {
921          vk_command_buffer_set_error(&cmd->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
922          return NULL;
923       }
924    }
925 
926    /* Pushing descriptors replaces whatever sets are bound */
927    desc->sets[set].type = NVK_DESCRIPTOR_SET_TYPE_PUSH;
928    desc->sets[set].set = NULL;
929    desc->push_dirty |= BITFIELD_BIT(set);
930 
931    return desc->sets[set].push;
932 }
933 
934 static void
nvk_push_descriptor_set(struct nvk_cmd_buffer * cmd,struct nvk_descriptor_state * desc,const VkPushDescriptorSetInfoKHR * info)935 nvk_push_descriptor_set(struct nvk_cmd_buffer *cmd,
936                         struct nvk_descriptor_state *desc,
937                         const VkPushDescriptorSetInfoKHR *info)
938 {
939    struct nvk_device *dev = nvk_cmd_buffer_device(cmd);
940    VK_FROM_HANDLE(vk_pipeline_layout, pipeline_layout, info->layout);
941 
942    struct nvk_push_descriptor_set *push_set =
943       nvk_cmd_push_descriptors(cmd, desc, info->set);
944    if (unlikely(push_set == NULL))
945       return;
946 
947    struct nvk_descriptor_set_layout *set_layout =
948       vk_to_nvk_descriptor_set_layout(pipeline_layout->set_layouts[info->set]);
949 
950    nvk_push_descriptor_set_update(dev, push_set, set_layout,
951                                   info->descriptorWriteCount,
952                                   info->pDescriptorWrites);
953 
954    nvk_cmd_dirty_cbufs_for_descriptors(cmd, info->stageFlags,
955                                        info->set, info->set + 1);
956 }
957 
958 VKAPI_ATTR void VKAPI_CALL
nvk_CmdPushDescriptorSet2KHR(VkCommandBuffer commandBuffer,const VkPushDescriptorSetInfoKHR * pPushDescriptorSetInfo)959 nvk_CmdPushDescriptorSet2KHR(VkCommandBuffer commandBuffer,
960                              const VkPushDescriptorSetInfoKHR *pPushDescriptorSetInfo)
961 {
962    VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
963 
964    if (pPushDescriptorSetInfo->stageFlags & NVK_VK_GRAPHICS_STAGE_BITS) {
965       nvk_push_descriptor_set(cmd, &cmd->state.gfx.descriptors,
966                               pPushDescriptorSetInfo);
967    }
968 
969    if (pPushDescriptorSetInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT) {
970       nvk_push_descriptor_set(cmd, &cmd->state.cs.descriptors,
971                               pPushDescriptorSetInfo);
972    }
973 }
974 
975 void
nvk_cmd_buffer_flush_push_descriptors(struct nvk_cmd_buffer * cmd,struct nvk_descriptor_state * desc)976 nvk_cmd_buffer_flush_push_descriptors(struct nvk_cmd_buffer *cmd,
977                                       struct nvk_descriptor_state *desc)
978 {
979    struct nvk_device *dev = nvk_cmd_buffer_device(cmd);
980    struct nvk_physical_device *pdev = nvk_device_physical(dev);
981    const uint32_t min_cbuf_alignment = nvk_min_cbuf_alignment(&pdev->info);
982    VkResult result;
983 
984    u_foreach_bit(set_idx, desc->push_dirty) {
985       if (desc->sets[set_idx].type != NVK_DESCRIPTOR_SET_TYPE_PUSH)
986          continue;
987 
988       struct nvk_push_descriptor_set *push_set = desc->sets[set_idx].push;
989       uint64_t push_set_addr;
990       result = nvk_cmd_buffer_upload_data(cmd, push_set->data,
991                                           sizeof(push_set->data),
992                                           min_cbuf_alignment,
993                                           &push_set_addr);
994       if (unlikely(result != VK_SUCCESS)) {
995          vk_command_buffer_set_error(&cmd->vk, result);
996          return;
997       }
998 
999       struct nvk_buffer_address set_addr = {
1000          .base_addr = push_set_addr,
1001          .size = sizeof(push_set->data),
1002       };
1003       nvk_descriptor_state_set_root(cmd, desc, sets[set_idx], set_addr);
1004    }
1005 }
1006 
1007 bool
nvk_cmd_buffer_get_cbuf_addr(struct nvk_cmd_buffer * cmd,const struct nvk_descriptor_state * desc,const struct nvk_shader * shader,const struct nvk_cbuf * cbuf,struct nvk_buffer_address * addr_out)1008 nvk_cmd_buffer_get_cbuf_addr(struct nvk_cmd_buffer *cmd,
1009                              const struct nvk_descriptor_state *desc,
1010                              const struct nvk_shader *shader,
1011                              const struct nvk_cbuf *cbuf,
1012                              struct nvk_buffer_address *addr_out)
1013 {
1014    struct nvk_device *dev = nvk_cmd_buffer_device(cmd);
1015    struct nvk_physical_device *pdev = nvk_device_physical(dev);
1016 
1017    switch (cbuf->type) {
1018    case NVK_CBUF_TYPE_INVALID:
1019       *addr_out = (struct nvk_buffer_address) { .size = 0 };
1020       return true;
1021 
1022    case NVK_CBUF_TYPE_ROOT_DESC:
1023       unreachable("The caller should handle root descriptors");
1024       return false;
1025 
1026    case NVK_CBUF_TYPE_SHADER_DATA:
1027       *addr_out = (struct nvk_buffer_address) {
1028          .base_addr = shader->data_addr,
1029          .size = shader->data_size,
1030       };
1031       return true;
1032 
1033    case NVK_CBUF_TYPE_DESC_SET:
1034       nvk_descriptor_state_get_root(desc, sets[cbuf->desc_set], addr_out);
1035       return true;
1036 
1037    case NVK_CBUF_TYPE_DYNAMIC_UBO: {
1038       uint8_t dyn_idx;
1039       nvk_descriptor_state_get_root(
1040          desc, set_dynamic_buffer_start[cbuf->desc_set], &dyn_idx);
1041       dyn_idx += cbuf->dynamic_idx;
1042       union nvk_buffer_descriptor ubo_desc;
1043       nvk_descriptor_state_get_root(desc, dynamic_buffers[dyn_idx], &ubo_desc);
1044       *addr_out = nvk_ubo_descriptor_addr(pdev, ubo_desc);
1045       return true;
1046    }
1047 
1048    case NVK_CBUF_TYPE_UBO_DESC: {
1049       if (desc->sets[cbuf->desc_set].type != NVK_DESCRIPTOR_SET_TYPE_PUSH)
1050          return false;
1051 
1052       struct nvk_push_descriptor_set *push = desc->sets[cbuf->desc_set].push;
1053       if (push == NULL)
1054          return false;
1055 
1056       assert(cbuf->desc_offset < NVK_PUSH_DESCRIPTOR_SET_SIZE);
1057       union nvk_buffer_descriptor desc;
1058       memcpy(&desc, &push->data[cbuf->desc_offset], sizeof(desc));
1059       *addr_out = nvk_ubo_descriptor_addr(pdev, desc);
1060       return true;
1061    }
1062 
1063    default:
1064       unreachable("Invalid cbuf type");
1065    }
1066 }
1067 
1068 uint64_t
nvk_cmd_buffer_get_cbuf_descriptor_addr(struct nvk_cmd_buffer * cmd,const struct nvk_descriptor_state * desc,const struct nvk_cbuf * cbuf)1069 nvk_cmd_buffer_get_cbuf_descriptor_addr(struct nvk_cmd_buffer *cmd,
1070                                         const struct nvk_descriptor_state *desc,
1071                                         const struct nvk_cbuf *cbuf)
1072 {
1073    assert(cbuf->type == NVK_CBUF_TYPE_UBO_DESC);
1074    switch (desc->sets[cbuf->desc_set].type) {
1075    case NVK_DESCRIPTOR_SET_TYPE_SET:
1076    case NVK_DESCRIPTOR_SET_TYPE_BUFFER: {
1077       struct nvk_buffer_address set_addr;
1078       nvk_descriptor_state_get_root(desc, sets[cbuf->desc_set], &set_addr);
1079 
1080       assert(cbuf->desc_offset < set_addr.size);
1081       return set_addr.base_addr + cbuf->desc_offset;
1082    }
1083 
1084    default:
1085       unreachable("Unknown descriptor set type");
1086    }
1087 }
1088 
1089 void
nvk_cmd_buffer_dump(struct nvk_cmd_buffer * cmd,FILE * fp)1090 nvk_cmd_buffer_dump(struct nvk_cmd_buffer *cmd, FILE *fp)
1091 {
1092    struct nvk_device *dev = nvk_cmd_buffer_device(cmd);
1093    struct nvk_physical_device *pdev = nvk_device_physical(dev);
1094 
1095    util_dynarray_foreach(&cmd->pushes, struct nvk_cmd_push, p) {
1096       if (p->map) {
1097          struct nv_push push = {
1098             .start = (uint32_t *)p->map,
1099             .end = (uint32_t *)((char *)p->map + p->range),
1100          };
1101          vk_push_print(fp, &push, &pdev->info);
1102       } else {
1103          const uint64_t addr = p->addr;
1104          fprintf(fp, "<%u B of INDIRECT DATA at 0x%" PRIx64 ">\n",
1105                  p->range, addr);
1106 
1107          uint64_t mem_offset = 0;
1108          struct nvkmd_mem *mem =
1109             nvkmd_dev_lookup_mem_by_va(dev->nvkmd, addr, &mem_offset);
1110          if (mem != NULL) {
1111             void *map;
1112             VkResult map_result = nvkmd_mem_map(mem, &dev->vk.base,
1113                                                 NVKMD_MEM_MAP_RD, NULL,
1114                                                 &map);
1115             if (map_result == VK_SUCCESS) {
1116                struct nv_push push = {
1117                   .start = mem->map + mem_offset,
1118                   .end = mem->map + mem_offset + p->range,
1119                };
1120                vk_push_print(fp, &push, &pdev->info);
1121                nvkmd_mem_unmap(mem, 0);
1122             }
1123 
1124             nvkmd_mem_unref(mem);
1125          }
1126       }
1127    }
1128 }
1129 
1130 VKAPI_ATTR void VKAPI_CALL
nvk_CmdPushDescriptorSetWithTemplate2KHR(VkCommandBuffer commandBuffer,const VkPushDescriptorSetWithTemplateInfoKHR * pPushDescriptorSetWithTemplateInfo)1131 nvk_CmdPushDescriptorSetWithTemplate2KHR(
1132    VkCommandBuffer commandBuffer,
1133    const VkPushDescriptorSetWithTemplateInfoKHR *pPushDescriptorSetWithTemplateInfo)
1134 {
1135    VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
1136    struct nvk_device *dev = nvk_cmd_buffer_device(cmd);
1137    VK_FROM_HANDLE(vk_descriptor_update_template, template,
1138                   pPushDescriptorSetWithTemplateInfo->descriptorUpdateTemplate);
1139    VK_FROM_HANDLE(vk_pipeline_layout, pipeline_layout,
1140                   pPushDescriptorSetWithTemplateInfo->layout);
1141    const uint32_t set = pPushDescriptorSetWithTemplateInfo->set;
1142 
1143    struct nvk_descriptor_state *desc =
1144       nvk_get_descriptors_state(cmd, template->bind_point);
1145    struct nvk_push_descriptor_set *push_set =
1146       nvk_cmd_push_descriptors(cmd, desc, set);
1147    if (unlikely(push_set == NULL))
1148       return;
1149 
1150    struct nvk_descriptor_set_layout *set_layout =
1151       vk_to_nvk_descriptor_set_layout(pipeline_layout->set_layouts[set]);
1152 
1153    nvk_push_descriptor_set_update_template(dev, push_set, set_layout, template,
1154                                            pPushDescriptorSetWithTemplateInfo->pData);
1155 
1156    /* We don't know the actual set of stages here so assume everything */
1157    nvk_cmd_dirty_cbufs_for_descriptors(cmd, NVK_VK_GRAPHICS_STAGE_BITS |
1158                                             VK_SHADER_STAGE_COMPUTE_BIT,
1159                                        set, set + 1);
1160 }
1161