• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
3  * SPDX-License-Identifier: MIT
4  */
5 #include "nvk_cmd_buffer.h"
6 
7 #include "nvk_buffer.h"
8 #include "nvk_cmd_pool.h"
9 #include "nvk_descriptor_set.h"
10 #include "nvk_descriptor_set_layout.h"
11 #include "nvk_device.h"
12 #include "nvk_device_memory.h"
13 #include "nvk_entrypoints.h"
14 #include "nvk_mme.h"
15 #include "nvk_physical_device.h"
16 #include "nvk_shader.h"
17 
18 #include "vk_pipeline_layout.h"
19 #include "vk_synchronization.h"
20 
21 #include "nouveau_context.h"
22 
23 #include "nouveau/nouveau.h"
24 
25 #include "nvk_cl906f.h"
26 #include "nvk_cl90b5.h"
27 #include "nvk_cla097.h"
28 #include "nvk_cla0c0.h"
29 #include "nvk_clc597.h"
30 
31 static void
nvk_destroy_cmd_buffer(struct vk_command_buffer * vk_cmd_buffer)32 nvk_destroy_cmd_buffer(struct vk_command_buffer *vk_cmd_buffer)
33 {
34    struct nvk_cmd_buffer *cmd =
35       container_of(vk_cmd_buffer, struct nvk_cmd_buffer, vk);
36    struct nvk_cmd_pool *pool = nvk_cmd_buffer_pool(cmd);
37 
38    nvk_cmd_pool_free_bo_list(pool, &cmd->bos);
39    nvk_cmd_pool_free_bo_list(pool, &cmd->gart_bos);
40    util_dynarray_fini(&cmd->pushes);
41    vk_command_buffer_finish(&cmd->vk);
42    vk_free(&pool->vk.alloc, cmd);
43 }
44 
45 static VkResult
nvk_create_cmd_buffer(struct vk_command_pool * vk_pool,struct vk_command_buffer ** cmd_buffer_out)46 nvk_create_cmd_buffer(struct vk_command_pool *vk_pool,
47                       struct vk_command_buffer **cmd_buffer_out)
48 {
49    struct nvk_cmd_pool *pool = container_of(vk_pool, struct nvk_cmd_pool, vk);
50    struct nvk_device *dev = nvk_cmd_pool_device(pool);
51    struct nvk_cmd_buffer *cmd;
52    VkResult result;
53 
54    cmd = vk_zalloc(&pool->vk.alloc, sizeof(*cmd), 8,
55                    VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
56    if (cmd == NULL)
57       return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
58 
59    result = vk_command_buffer_init(&pool->vk, &cmd->vk,
60                                    &nvk_cmd_buffer_ops, 0);
61    if (result != VK_SUCCESS) {
62       vk_free(&pool->vk.alloc, cmd);
63       return result;
64    }
65 
66    cmd->vk.dynamic_graphics_state.vi = &cmd->state.gfx._dynamic_vi;
67    cmd->vk.dynamic_graphics_state.ms.sample_locations =
68       &cmd->state.gfx._dynamic_sl;
69 
70    list_inithead(&cmd->bos);
71    list_inithead(&cmd->gart_bos);
72    util_dynarray_init(&cmd->pushes, NULL);
73 
74    *cmd_buffer_out = &cmd->vk;
75 
76    return VK_SUCCESS;
77 }
78 
79 static void
nvk_reset_cmd_buffer(struct vk_command_buffer * vk_cmd_buffer,UNUSED VkCommandBufferResetFlags flags)80 nvk_reset_cmd_buffer(struct vk_command_buffer *vk_cmd_buffer,
81                      UNUSED VkCommandBufferResetFlags flags)
82 {
83    struct nvk_cmd_buffer *cmd =
84       container_of(vk_cmd_buffer, struct nvk_cmd_buffer, vk);
85    struct nvk_cmd_pool *pool = nvk_cmd_buffer_pool(cmd);
86 
87    vk_command_buffer_reset(&cmd->vk);
88 
89    nvk_cmd_pool_free_bo_list(pool, &cmd->bos);
90    nvk_cmd_pool_free_gart_bo_list(pool, &cmd->gart_bos);
91    cmd->upload_bo = NULL;
92    cmd->push_bo = NULL;
93    cmd->push_bo_limit = NULL;
94    cmd->push = (struct nv_push) {0};
95 
96    util_dynarray_clear(&cmd->pushes);
97 
98    memset(&cmd->state, 0, sizeof(cmd->state));
99 }
100 
101 const struct vk_command_buffer_ops nvk_cmd_buffer_ops = {
102    .create = nvk_create_cmd_buffer,
103    .reset = nvk_reset_cmd_buffer,
104    .destroy = nvk_destroy_cmd_buffer,
105 };
106 
107 /* If we ever fail to allocate a push, we use this */
108 static uint32_t push_runout[NVK_CMD_BUFFER_MAX_PUSH];
109 
110 static VkResult
nvk_cmd_buffer_alloc_bo(struct nvk_cmd_buffer * cmd,bool force_gart,struct nvk_cmd_bo ** bo_out)111 nvk_cmd_buffer_alloc_bo(struct nvk_cmd_buffer *cmd, bool force_gart, struct nvk_cmd_bo **bo_out)
112 {
113    VkResult result = nvk_cmd_pool_alloc_bo(nvk_cmd_buffer_pool(cmd), force_gart, bo_out);
114    if (result != VK_SUCCESS)
115       return result;
116 
117    if (force_gart)
118       list_addtail(&(*bo_out)->link, &cmd->gart_bos);
119    else
120       list_addtail(&(*bo_out)->link, &cmd->bos);
121 
122    return VK_SUCCESS;
123 }
124 
125 static void
nvk_cmd_buffer_flush_push(struct nvk_cmd_buffer * cmd)126 nvk_cmd_buffer_flush_push(struct nvk_cmd_buffer *cmd)
127 {
128    if (likely(cmd->push_bo != NULL)) {
129       const uint32_t bo_offset =
130          (char *)cmd->push.start - (char *)cmd->push_bo->map;
131 
132       struct nvk_cmd_push push = {
133          .map = cmd->push.start,
134          .addr = cmd->push_bo->bo->offset + bo_offset,
135          .range = nv_push_dw_count(&cmd->push) * 4,
136       };
137       util_dynarray_append(&cmd->pushes, struct nvk_cmd_push, push);
138    }
139 
140    cmd->push.start = cmd->push.end;
141 }
142 
143 void
nvk_cmd_buffer_new_push(struct nvk_cmd_buffer * cmd)144 nvk_cmd_buffer_new_push(struct nvk_cmd_buffer *cmd)
145 {
146    nvk_cmd_buffer_flush_push(cmd);
147 
148    VkResult result = nvk_cmd_buffer_alloc_bo(cmd, false, &cmd->push_bo);
149    if (unlikely(result != VK_SUCCESS)) {
150       STATIC_ASSERT(NVK_CMD_BUFFER_MAX_PUSH <= NVK_CMD_BO_SIZE / 4);
151       cmd->push_bo = NULL;
152       nv_push_init(&cmd->push, push_runout, 0);
153       cmd->push_bo_limit = &push_runout[NVK_CMD_BUFFER_MAX_PUSH];
154    } else {
155       nv_push_init(&cmd->push, cmd->push_bo->map, 0);
156       cmd->push_bo_limit =
157          (uint32_t *)((char *)cmd->push_bo->map + NVK_CMD_BO_SIZE);
158    }
159 }
160 
161 void
nvk_cmd_buffer_push_indirect(struct nvk_cmd_buffer * cmd,uint64_t addr,uint32_t range)162 nvk_cmd_buffer_push_indirect(struct nvk_cmd_buffer *cmd,
163                              uint64_t addr, uint32_t range)
164 {
165    nvk_cmd_buffer_flush_push(cmd);
166 
167    struct nvk_cmd_push push = {
168       .addr = addr,
169       .range = range,
170       .no_prefetch = true,
171    };
172 
173    util_dynarray_append(&cmd->pushes, struct nvk_cmd_push, push);
174 }
175 
176 VkResult
nvk_cmd_buffer_upload_alloc(struct nvk_cmd_buffer * cmd,uint32_t size,uint32_t alignment,uint64_t * addr,void ** ptr)177 nvk_cmd_buffer_upload_alloc(struct nvk_cmd_buffer *cmd,
178                             uint32_t size, uint32_t alignment,
179                             uint64_t *addr, void **ptr)
180 {
181    assert(size % 4 == 0);
182    assert(size <= NVK_CMD_BO_SIZE);
183 
184    uint32_t offset = cmd->upload_offset;
185    if (alignment > 0)
186       offset = align(offset, alignment);
187 
188    assert(offset <= NVK_CMD_BO_SIZE);
189    if (cmd->upload_bo != NULL && size <= NVK_CMD_BO_SIZE - offset) {
190       *addr = cmd->upload_bo->bo->offset + offset;
191       *ptr = (char *)cmd->upload_bo->map + offset;
192 
193       cmd->upload_offset = offset + size;
194 
195       return VK_SUCCESS;
196    }
197 
198    struct nvk_cmd_bo *bo;
199    VkResult result = nvk_cmd_buffer_alloc_bo(cmd, false, &bo);
200    if (unlikely(result != VK_SUCCESS))
201       return result;
202 
203    *addr = bo->bo->offset;
204    *ptr = bo->map;
205 
206    /* Pick whichever of the current upload BO and the new BO will have more
207     * room left to be the BO for the next upload.  If our upload size is
208     * bigger than the old offset, we're better off burning the whole new
209     * upload BO on this one allocation and continuing on the current upload
210     * BO.
211     */
212    if (cmd->upload_bo == NULL || size < cmd->upload_offset) {
213       cmd->upload_bo = bo;
214       cmd->upload_offset = size;
215    }
216 
217    return VK_SUCCESS;
218 }
219 
220 VkResult
nvk_cmd_buffer_upload_data(struct nvk_cmd_buffer * cmd,const void * data,uint32_t size,uint32_t alignment,uint64_t * addr)221 nvk_cmd_buffer_upload_data(struct nvk_cmd_buffer *cmd,
222                            const void *data, uint32_t size,
223                            uint32_t alignment, uint64_t *addr)
224 {
225    VkResult result;
226    void *map;
227 
228    result = nvk_cmd_buffer_upload_alloc(cmd, size, alignment, addr, &map);
229    if (unlikely(result != VK_SUCCESS))
230       return result;
231 
232    memcpy(map, data, size);
233 
234    return VK_SUCCESS;
235 }
236 
237 VkResult
nvk_cmd_buffer_cond_render_alloc(struct nvk_cmd_buffer * cmd,uint64_t * addr)238 nvk_cmd_buffer_cond_render_alloc(struct nvk_cmd_buffer *cmd,
239                                  uint64_t *addr)
240 {
241    uint32_t offset = cmd->cond_render_gart_offset;
242    uint32_t size = 64;
243 
244    assert(offset <= NVK_CMD_BO_SIZE);
245    if (cmd->cond_render_gart_bo != NULL && size <= NVK_CMD_BO_SIZE - offset) {
246       *addr = cmd->cond_render_gart_bo->bo->offset + offset;
247 
248       cmd->cond_render_gart_offset = offset + size;
249 
250       return VK_SUCCESS;
251    }
252 
253    struct nvk_cmd_bo *bo;
254    VkResult result = nvk_cmd_buffer_alloc_bo(cmd, true, &bo);
255    if (unlikely(result != VK_SUCCESS))
256       return result;
257 
258    *addr = bo->bo->offset;
259 
260    /* Pick whichever of the current upload BO and the new BO will have more
261     * room left to be the BO for the next upload.  If our upload size is
262     * bigger than the old offset, we're better off burning the whole new
263     * upload BO on this one allocation and continuing on the current upload
264     * BO.
265     */
266    if (cmd->cond_render_gart_bo == NULL || size < cmd->cond_render_gart_offset) {
267       cmd->cond_render_gart_bo = bo;
268       cmd->cond_render_gart_offset = size;
269    }
270 
271    return VK_SUCCESS;
272 }
273 
274 VKAPI_ATTR VkResult VKAPI_CALL
nvk_BeginCommandBuffer(VkCommandBuffer commandBuffer,const VkCommandBufferBeginInfo * pBeginInfo)275 nvk_BeginCommandBuffer(VkCommandBuffer commandBuffer,
276                        const VkCommandBufferBeginInfo *pBeginInfo)
277 {
278    VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
279 
280    nvk_reset_cmd_buffer(&cmd->vk, 0);
281 
282    /* Start with a nop so we have at least something to submit */
283    struct nv_push *p = nvk_cmd_buffer_push(cmd, 2);
284    P_MTHD(p, NV90B5, NOP);
285    P_NV90B5_NOP(p, 0);
286 
287    nvk_cmd_buffer_begin_compute(cmd, pBeginInfo);
288    nvk_cmd_buffer_begin_graphics(cmd, pBeginInfo);
289 
290    return VK_SUCCESS;
291 }
292 
293 VKAPI_ATTR VkResult VKAPI_CALL
nvk_EndCommandBuffer(VkCommandBuffer commandBuffer)294 nvk_EndCommandBuffer(VkCommandBuffer commandBuffer)
295 {
296    VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
297 
298    nvk_cmd_buffer_flush_push(cmd);
299 
300    return vk_command_buffer_get_record_result(&cmd->vk);
301 }
302 
303 VKAPI_ATTR void VKAPI_CALL
nvk_CmdExecuteCommands(VkCommandBuffer commandBuffer,uint32_t commandBufferCount,const VkCommandBuffer * pCommandBuffers)304 nvk_CmdExecuteCommands(VkCommandBuffer commandBuffer,
305                        uint32_t commandBufferCount,
306                        const VkCommandBuffer *pCommandBuffers)
307 {
308    VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
309 
310    if (commandBufferCount == 0)
311       return;
312 
313    nvk_cmd_buffer_flush_push(cmd);
314 
315    for (uint32_t i = 0; i < commandBufferCount; i++) {
316       VK_FROM_HANDLE(nvk_cmd_buffer, other, pCommandBuffers[i]);
317 
318       /* We only need to copy the pushes.  We do not copy the
319        * nvk_cmd_buffer::bos because that tracks ownership.  Instead, we
320        * depend on the app to not discard secondaries while they are used by a
321        * primary.  The Vulkan 1.3.227 spec for vkFreeCommandBuffers() says:
322        *
323        *    "Any primary command buffer that is in the recording or executable
324        *    state and has any element of pCommandBuffers recorded into it,
325        *    becomes invalid."
326        *
327        * In other words, if the secondary command buffer ever goes away, this
328        * command buffer is invalid and the only thing the client can validly
329        * do with it is reset it.  vkResetCommandPool() has similar language.
330        */
331       util_dynarray_append_dynarray(&cmd->pushes, &other->pushes);
332    }
333 
334    /* From the Vulkan 1.3.275 spec:
335     *
336     *    "When secondary command buffer(s) are recorded to execute on a
337     *    primary command buffer, the secondary command buffer inherits no
338     *    state from the primary command buffer, and all state of the primary
339     *    command buffer is undefined after an execute secondary command buffer
340     *    command is recorded. There is one exception to this rule - if the
341     *    primary command buffer is inside a render pass instance, then the
342     *    render pass and subpass state is not disturbed by executing secondary
343     *    command buffers. For state dependent commands (such as draws and
344     *    dispatches), any state consumed by those commands must not be
345     *    undefined."
346     *
347     * Therefore, it's the client's job to reset all the state in the primary
348     * after the secondary executes.  However, if we're doing any internal
349     * dirty tracking, we may miss the fact that a secondary has messed with
350     * GPU state if we don't invalidate all our internal tracking.
351     */
352    nvk_cmd_invalidate_graphics_state(cmd);
353    nvk_cmd_invalidate_compute_state(cmd);
354 }
355 
356 enum nvk_barrier {
357    NVK_BARRIER_RENDER_WFI              = 1 << 0,
358    NVK_BARRIER_COMPUTE_WFI             = 1 << 1,
359    NVK_BARRIER_FLUSH_SHADER_DATA       = 1 << 2,
360    NVK_BARRIER_INVALIDATE_SHADER_DATA  = 1 << 3,
361    NVK_BARRIER_INVALIDATE_TEX_DATA     = 1 << 4,
362    NVK_BARRIER_INVALIDATE_CONSTANT     = 1 << 5,
363    NVK_BARRIER_INVALIDATE_MME_DATA     = 1 << 6,
364 };
365 
366 static enum nvk_barrier
nvk_barrier_flushes_waits(VkPipelineStageFlags2 stages,VkAccessFlags2 access)367 nvk_barrier_flushes_waits(VkPipelineStageFlags2 stages,
368                           VkAccessFlags2 access)
369 {
370    stages = vk_expand_src_stage_flags2(stages);
371    access = vk_filter_src_access_flags2(stages, access);
372 
373    enum nvk_barrier barriers = 0;
374 
375    if (access & VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT) {
376       barriers |= NVK_BARRIER_FLUSH_SHADER_DATA;
377 
378       if (vk_pipeline_stage_flags2_has_graphics_shader(stages))
379          barriers |= NVK_BARRIER_RENDER_WFI;
380 
381       if (vk_pipeline_stage_flags2_has_compute_shader(stages))
382          barriers |= NVK_BARRIER_COMPUTE_WFI;
383    }
384 
385    if (access & (VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT |
386                  VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
387                  VK_ACCESS_2_TRANSFORM_FEEDBACK_WRITE_BIT_EXT))
388       barriers |= NVK_BARRIER_RENDER_WFI;
389 
390    if ((access & VK_ACCESS_2_TRANSFER_WRITE_BIT) &&
391        (stages & (VK_PIPELINE_STAGE_2_RESOLVE_BIT |
392                   VK_PIPELINE_STAGE_2_BLIT_BIT |
393                   VK_PIPELINE_STAGE_2_CLEAR_BIT)))
394       barriers |= NVK_BARRIER_RENDER_WFI;
395 
396    return barriers;
397 }
398 
399 static enum nvk_barrier
nvk_barrier_invalidates(VkPipelineStageFlags2 stages,VkAccessFlags2 access)400 nvk_barrier_invalidates(VkPipelineStageFlags2 stages,
401                         VkAccessFlags2 access)
402 {
403    stages = vk_expand_dst_stage_flags2(stages);
404    access = vk_filter_dst_access_flags2(stages, access);
405 
406    enum nvk_barrier barriers = 0;
407 
408    if (access & (VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT |
409                  VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT |
410                  VK_ACCESS_2_CONDITIONAL_RENDERING_READ_BIT_EXT))
411       barriers |= NVK_BARRIER_INVALIDATE_MME_DATA;
412 
413    if (access & (VK_ACCESS_2_UNIFORM_READ_BIT |
414                 VK_ACCESS_2_DESCRIPTOR_BUFFER_READ_BIT_EXT))
415       barriers |= NVK_BARRIER_INVALIDATE_SHADER_DATA |
416                   NVK_BARRIER_INVALIDATE_CONSTANT;
417 
418    if (access & (VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT |
419                  VK_ACCESS_2_SHADER_SAMPLED_READ_BIT))
420       barriers |= NVK_BARRIER_INVALIDATE_TEX_DATA;
421 
422    if (access & VK_ACCESS_2_SHADER_STORAGE_READ_BIT)
423       barriers |= NVK_BARRIER_INVALIDATE_SHADER_DATA;
424 
425    if ((access & VK_ACCESS_2_TRANSFER_READ_BIT) &&
426        (stages & (VK_PIPELINE_STAGE_2_RESOLVE_BIT |
427                   VK_PIPELINE_STAGE_2_BLIT_BIT)))
428       barriers |= NVK_BARRIER_INVALIDATE_TEX_DATA;
429 
430    return barriers;
431 }
432 
433 void
nvk_cmd_flush_wait_dep(struct nvk_cmd_buffer * cmd,const VkDependencyInfo * dep,bool wait)434 nvk_cmd_flush_wait_dep(struct nvk_cmd_buffer *cmd,
435                        const VkDependencyInfo *dep,
436                        bool wait)
437 {
438    enum nvk_barrier barriers = 0;
439 
440    for (uint32_t i = 0; i < dep->memoryBarrierCount; i++) {
441       const VkMemoryBarrier2 *bar = &dep->pMemoryBarriers[i];
442       barriers |= nvk_barrier_flushes_waits(bar->srcStageMask,
443                                             bar->srcAccessMask);
444    }
445 
446    for (uint32_t i = 0; i < dep->bufferMemoryBarrierCount; i++) {
447       const VkBufferMemoryBarrier2 *bar = &dep->pBufferMemoryBarriers[i];
448       barriers |= nvk_barrier_flushes_waits(bar->srcStageMask,
449                                             bar->srcAccessMask);
450    }
451 
452    for (uint32_t i = 0; i < dep->imageMemoryBarrierCount; i++) {
453       const VkImageMemoryBarrier2 *bar = &dep->pImageMemoryBarriers[i];
454       barriers |= nvk_barrier_flushes_waits(bar->srcStageMask,
455                                             bar->srcAccessMask);
456    }
457 
458    if (!barriers)
459       return;
460 
461    struct nv_push *p = nvk_cmd_buffer_push(cmd, 4);
462 
463    if (barriers & NVK_BARRIER_FLUSH_SHADER_DATA) {
464       assert(barriers & (NVK_BARRIER_RENDER_WFI | NVK_BARRIER_COMPUTE_WFI));
465       if (barriers & NVK_BARRIER_RENDER_WFI) {
466          P_IMMD(p, NVA097, INVALIDATE_SHADER_CACHES, {
467             .data = DATA_TRUE,
468             .flush_data = FLUSH_DATA_TRUE,
469          });
470       }
471 
472       if (barriers & NVK_BARRIER_COMPUTE_WFI) {
473          P_IMMD(p, NVA0C0, INVALIDATE_SHADER_CACHES, {
474             .data = DATA_TRUE,
475             .flush_data = FLUSH_DATA_TRUE,
476          });
477       }
478    } else if (barriers & NVK_BARRIER_RENDER_WFI) {
479       /* If this comes from a vkCmdSetEvent, we don't need to wait */
480       if (wait)
481          P_IMMD(p, NVA097, WAIT_FOR_IDLE, 0);
482    } else {
483       /* Compute WFI only happens when shader data is flushed */
484       assert(!(barriers & NVK_BARRIER_COMPUTE_WFI));
485    }
486 }
487 
488 void
nvk_cmd_invalidate_deps(struct nvk_cmd_buffer * cmd,uint32_t dep_count,const VkDependencyInfo * deps)489 nvk_cmd_invalidate_deps(struct nvk_cmd_buffer *cmd,
490                         uint32_t dep_count,
491                         const VkDependencyInfo *deps)
492 {
493    enum nvk_barrier barriers = 0;
494 
495    for (uint32_t d = 0; d < dep_count; d++) {
496       const VkDependencyInfo *dep = &deps[d];
497 
498       for (uint32_t i = 0; i < dep->memoryBarrierCount; i++) {
499          const VkMemoryBarrier2 *bar = &dep->pMemoryBarriers[i];
500          barriers |= nvk_barrier_invalidates(bar->dstStageMask,
501                                              bar->dstAccessMask);
502       }
503 
504       for (uint32_t i = 0; i < dep->bufferMemoryBarrierCount; i++) {
505          const VkBufferMemoryBarrier2 *bar = &dep->pBufferMemoryBarriers[i];
506          barriers |= nvk_barrier_invalidates(bar->dstStageMask,
507                                              bar->dstAccessMask);
508       }
509 
510       for (uint32_t i = 0; i < dep->imageMemoryBarrierCount; i++) {
511          const VkImageMemoryBarrier2 *bar = &dep->pImageMemoryBarriers[i];
512          barriers |= nvk_barrier_invalidates(bar->dstStageMask,
513                                              bar->dstAccessMask);
514       }
515    }
516 
517    if (!barriers)
518       return;
519 
520    struct nv_push *p = nvk_cmd_buffer_push(cmd, 8);
521 
522    if (barriers & NVK_BARRIER_INVALIDATE_TEX_DATA) {
523       P_IMMD(p, NVA097, INVALIDATE_TEXTURE_DATA_CACHE_NO_WFI, {
524          .lines = LINES_ALL,
525       });
526    }
527 
528    if (barriers & (NVK_BARRIER_INVALIDATE_SHADER_DATA &
529                    NVK_BARRIER_INVALIDATE_CONSTANT)) {
530       P_IMMD(p, NVA097, INVALIDATE_SHADER_CACHES_NO_WFI, {
531          .global_data = (barriers & NVK_BARRIER_INVALIDATE_SHADER_DATA) != 0,
532          .constant = (barriers & NVK_BARRIER_INVALIDATE_CONSTANT) != 0,
533       });
534    }
535 
536    if (barriers & (NVK_BARRIER_INVALIDATE_MME_DATA)) {
537       __push_immd(p, SUBC_NV9097, NV906F_SET_REFERENCE, 0);
538 
539       if (nvk_cmd_buffer_device(cmd)->pdev->info.cls_eng3d >= TURING_A)
540          P_IMMD(p, NVC597, MME_DMA_SYSMEMBAR, 0);
541    }
542 }
543 
544 VKAPI_ATTR void VKAPI_CALL
nvk_CmdPipelineBarrier2(VkCommandBuffer commandBuffer,const VkDependencyInfo * pDependencyInfo)545 nvk_CmdPipelineBarrier2(VkCommandBuffer commandBuffer,
546                         const VkDependencyInfo *pDependencyInfo)
547 {
548    VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
549 
550    nvk_cmd_flush_wait_dep(cmd, pDependencyInfo, true);
551    nvk_cmd_invalidate_deps(cmd, 1, pDependencyInfo);
552 }
553 
554 void
nvk_cmd_bind_shaders(struct vk_command_buffer * vk_cmd,uint32_t stage_count,const gl_shader_stage * stages,struct vk_shader ** const shaders)555 nvk_cmd_bind_shaders(struct vk_command_buffer *vk_cmd,
556                      uint32_t stage_count,
557                      const gl_shader_stage *stages,
558                      struct vk_shader ** const shaders)
559 {
560    struct nvk_cmd_buffer *cmd = container_of(vk_cmd, struct nvk_cmd_buffer, vk);
561    struct nvk_device *dev = nvk_cmd_buffer_device(cmd);
562 
563    for (uint32_t i = 0; i < stage_count; i++) {
564       struct nvk_shader *shader =
565          container_of(shaders[i], struct nvk_shader, vk);
566 
567       if (shader != NULL && shader->info.slm_size > 0)
568          nvk_device_ensure_slm(dev, shader->info.slm_size);
569 
570       if (stages[i] == MESA_SHADER_COMPUTE ||
571           stages[i] == MESA_SHADER_KERNEL)
572          nvk_cmd_bind_compute_shader(cmd, shader);
573       else
574          nvk_cmd_bind_graphics_shader(cmd, stages[i], shader);
575    }
576 }
577 
578 VKAPI_ATTR void VKAPI_CALL
nvk_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,VkPipelineBindPoint pipelineBindPoint,VkPipelineLayout layout,uint32_t firstSet,uint32_t descriptorSetCount,const VkDescriptorSet * pDescriptorSets,uint32_t dynamicOffsetCount,const uint32_t * pDynamicOffsets)579 nvk_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
580                           VkPipelineBindPoint pipelineBindPoint,
581                           VkPipelineLayout layout,
582                           uint32_t firstSet,
583                           uint32_t descriptorSetCount,
584                           const VkDescriptorSet *pDescriptorSets,
585                           uint32_t dynamicOffsetCount,
586                           const uint32_t *pDynamicOffsets)
587 {
588    VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
589    VK_FROM_HANDLE(vk_pipeline_layout, pipeline_layout, layout);
590    struct nvk_descriptor_state *desc =
591       nvk_get_descriptors_state(cmd, pipelineBindPoint);
592 
593    /* Fro the Vulkan 1.3.275 spec:
594     *
595     *    "When binding a descriptor set (see Descriptor Set Binding) to
596     *    set number N...
597     *
598     *    If, additionally, the previously bound descriptor set for set
599     *    N was bound using a pipeline layout not compatible for set N,
600     *    then all bindings in sets numbered greater than N are
601     *    disturbed."
602     *
603     * This means that, if some earlier set gets bound in such a way that
604     * it changes set_dynamic_buffer_start[s], this binding is implicitly
605     * invalidated.  Therefore, we can always look at the current value
606     * of set_dynamic_buffer_start[s] as the base of our dynamic buffer
607     * range and it's only our responsibility to adjust all
608     * set_dynamic_buffer_start[p] for p > s as needed.
609     */
610    uint8_t dyn_buffer_start = desc->root.set_dynamic_buffer_start[firstSet];
611 
612    uint32_t next_dyn_offset = 0;
613    for (uint32_t i = 0; i < descriptorSetCount; ++i) {
614       unsigned s = i + firstSet;
615       VK_FROM_HANDLE(nvk_descriptor_set, set, pDescriptorSets[i]);
616 
617       if (desc->sets[s] != set) {
618          desc->root.sets[s] = nvk_descriptor_set_addr(set);
619          desc->set_sizes[s] = set->size;
620          desc->sets[s] = set;
621          desc->sets_dirty |= BITFIELD_BIT(s);
622 
623          /* Binding descriptors invalidates push descriptors */
624          desc->push_dirty &= ~BITFIELD_BIT(s);
625       }
626 
627       desc->root.set_dynamic_buffer_start[s] = dyn_buffer_start;
628 
629       if (pipeline_layout->set_layouts[s] != NULL) {
630          const struct nvk_descriptor_set_layout *set_layout =
631             vk_to_nvk_descriptor_set_layout(pipeline_layout->set_layouts[s]);
632 
633          if (set != NULL && set_layout->dynamic_buffer_count > 0) {
634             for (uint32_t j = 0; j < set_layout->dynamic_buffer_count; j++) {
635                struct nvk_buffer_address addr = set->dynamic_buffers[j];
636                addr.base_addr += pDynamicOffsets[next_dyn_offset + j];
637                desc->root.dynamic_buffers[dyn_buffer_start + j] = addr;
638             }
639             next_dyn_offset += set->layout->dynamic_buffer_count;
640          }
641 
642          dyn_buffer_start += set_layout->dynamic_buffer_count;
643       } else {
644          assert(set == NULL);
645       }
646    }
647    assert(dyn_buffer_start <= NVK_MAX_DYNAMIC_BUFFERS);
648    assert(next_dyn_offset <= dynamicOffsetCount);
649 
650    for (uint32_t s = firstSet + descriptorSetCount; s < NVK_MAX_SETS; s++)
651       desc->root.set_dynamic_buffer_start[s] = dyn_buffer_start;
652 }
653 
654 VKAPI_ATTR void VKAPI_CALL
nvk_CmdPushConstants(VkCommandBuffer commandBuffer,VkPipelineLayout layout,VkShaderStageFlags stageFlags,uint32_t offset,uint32_t size,const void * pValues)655 nvk_CmdPushConstants(VkCommandBuffer commandBuffer,
656                      VkPipelineLayout layout,
657                      VkShaderStageFlags stageFlags,
658                      uint32_t offset,
659                      uint32_t size,
660                      const void *pValues)
661 {
662    VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
663 
664    if (stageFlags & VK_SHADER_STAGE_ALL_GRAPHICS) {
665       struct nvk_descriptor_state *desc =
666          nvk_get_descriptors_state(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS);
667 
668       memcpy(desc->root.push + offset, pValues, size);
669    }
670 
671    if (stageFlags & VK_SHADER_STAGE_COMPUTE_BIT) {
672       struct nvk_descriptor_state *desc =
673          nvk_get_descriptors_state(cmd, VK_PIPELINE_BIND_POINT_COMPUTE);
674 
675       memcpy(desc->root.push + offset, pValues, size);
676    }
677 }
678 
679 static struct nvk_push_descriptor_set *
nvk_cmd_push_descriptors(struct nvk_cmd_buffer * cmd,VkPipelineBindPoint bind_point,uint32_t set)680 nvk_cmd_push_descriptors(struct nvk_cmd_buffer *cmd,
681                          VkPipelineBindPoint bind_point,
682                          uint32_t set)
683 {
684    struct nvk_descriptor_state *desc =
685       nvk_get_descriptors_state(cmd, bind_point);
686 
687    assert(set < NVK_MAX_SETS);
688    if (unlikely(desc->push[set] == NULL)) {
689       desc->push[set] = vk_zalloc(&cmd->vk.pool->alloc,
690                                   sizeof(*desc->push[set]), 8,
691                                   VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
692       if (unlikely(desc->push[set] == NULL)) {
693          vk_command_buffer_set_error(&cmd->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
694          return NULL;
695       }
696    }
697 
698    /* Pushing descriptors replaces whatever sets are bound */
699    desc->sets[set] = NULL;
700    desc->push_dirty |= BITFIELD_BIT(set);
701 
702    return desc->push[set];
703 }
704 
705 VKAPI_ATTR void VKAPI_CALL
nvk_CmdPushDescriptorSetKHR(VkCommandBuffer commandBuffer,VkPipelineBindPoint pipelineBindPoint,VkPipelineLayout layout,uint32_t set,uint32_t descriptorWriteCount,const VkWriteDescriptorSet * pDescriptorWrites)706 nvk_CmdPushDescriptorSetKHR(VkCommandBuffer commandBuffer,
707                             VkPipelineBindPoint pipelineBindPoint,
708                             VkPipelineLayout layout,
709                             uint32_t set,
710                             uint32_t descriptorWriteCount,
711                             const VkWriteDescriptorSet *pDescriptorWrites)
712 {
713    VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
714    VK_FROM_HANDLE(vk_pipeline_layout, pipeline_layout, layout);
715 
716    struct nvk_push_descriptor_set *push_set =
717       nvk_cmd_push_descriptors(cmd, pipelineBindPoint, set);
718    if (unlikely(push_set == NULL))
719       return;
720 
721    struct nvk_descriptor_set_layout *set_layout =
722       vk_to_nvk_descriptor_set_layout(pipeline_layout->set_layouts[set]);
723 
724    nvk_push_descriptor_set_update(push_set, set_layout,
725                                   descriptorWriteCount, pDescriptorWrites);
726 }
727 
728 void
nvk_cmd_buffer_flush_push_descriptors(struct nvk_cmd_buffer * cmd,struct nvk_descriptor_state * desc)729 nvk_cmd_buffer_flush_push_descriptors(struct nvk_cmd_buffer *cmd,
730                                       struct nvk_descriptor_state *desc)
731 {
732    struct nvk_device *dev = nvk_cmd_buffer_device(cmd);
733    struct nvk_physical_device *pdev = nvk_device_physical(dev);
734    const uint32_t min_cbuf_alignment = nvk_min_cbuf_alignment(&pdev->info);
735    VkResult result;
736 
737    if (!desc->push_dirty)
738       return;
739 
740    u_foreach_bit(set_idx, desc->push_dirty) {
741       struct nvk_push_descriptor_set *push_set = desc->push[set_idx];
742       uint64_t push_set_addr;
743       result = nvk_cmd_buffer_upload_data(cmd, push_set->data,
744                                           sizeof(push_set->data),
745                                           min_cbuf_alignment,
746                                           &push_set_addr);
747       if (unlikely(result != VK_SUCCESS)) {
748          vk_command_buffer_set_error(&cmd->vk, result);
749          return;
750       }
751 
752       desc->root.sets[set_idx] = push_set_addr;
753       desc->set_sizes[set_idx] = sizeof(push_set->data);
754    }
755 }
756 
757 bool
nvk_cmd_buffer_get_cbuf_descriptor(struct nvk_cmd_buffer * cmd,const struct nvk_descriptor_state * desc,const struct nvk_shader * shader,const struct nvk_cbuf * cbuf,struct nvk_buffer_address * desc_out)758 nvk_cmd_buffer_get_cbuf_descriptor(struct nvk_cmd_buffer *cmd,
759                                    const struct nvk_descriptor_state *desc,
760                                    const struct nvk_shader *shader,
761                                    const struct nvk_cbuf *cbuf,
762                                    struct nvk_buffer_address *desc_out)
763 {
764    switch (cbuf->type) {
765    case NVK_CBUF_TYPE_INVALID:
766       *desc_out = (struct nvk_buffer_address) { .size = 0 };
767       return true;
768 
769    case NVK_CBUF_TYPE_ROOT_DESC:
770       unreachable("The caller should handle root descriptors");
771       return false;
772 
773    case NVK_CBUF_TYPE_SHADER_DATA:
774       *desc_out = (struct nvk_buffer_address) {
775          .base_addr = shader->data_addr,
776          .size = shader->data_size,
777       };
778       return true;
779 
780    case NVK_CBUF_TYPE_DESC_SET:
781       *desc_out = (struct nvk_buffer_address) {
782          .base_addr = desc->root.sets[cbuf->desc_set],
783          .size = desc->set_sizes[cbuf->desc_set],
784       };
785       return true;
786 
787    case NVK_CBUF_TYPE_DYNAMIC_UBO: {
788       const uint32_t dyn_start =
789          desc->root.set_dynamic_buffer_start[cbuf->desc_set];
790       *desc_out = desc->root.dynamic_buffers[dyn_start + cbuf->dynamic_idx];
791       return true;
792    }
793 
794    case NVK_CBUF_TYPE_UBO_DESC: {
795       if (desc->sets[cbuf->desc_set] != NULL)
796          return false;
797 
798       struct nvk_push_descriptor_set *push = desc->push[cbuf->desc_set];
799       if (push == NULL)
800          return false;
801 
802       assert(cbuf->desc_offset < NVK_PUSH_DESCRIPTOR_SET_SIZE);
803       void *desc = &push->data[cbuf->desc_offset];
804       *desc_out = *(struct nvk_buffer_address *)desc;
805       return true;
806    }
807 
808    default:
809       unreachable("Invalid cbuf type");
810    }
811 }
812 
813 uint64_t
nvk_cmd_buffer_get_cbuf_descriptor_addr(struct nvk_cmd_buffer * cmd,const struct nvk_descriptor_state * desc,const struct nvk_cbuf * cbuf)814 nvk_cmd_buffer_get_cbuf_descriptor_addr(struct nvk_cmd_buffer *cmd,
815                                         const struct nvk_descriptor_state *desc,
816                                         const struct nvk_cbuf *cbuf)
817 {
818    assert(cbuf->type == NVK_CBUF_TYPE_UBO_DESC);
819 
820    assert(cbuf->desc_offset < desc->set_sizes[cbuf->desc_set]);
821    return desc->root.sets[cbuf->desc_set] + cbuf->desc_offset;
822 }
823 
824 void
nvk_cmd_buffer_dump(struct nvk_cmd_buffer * cmd,FILE * fp)825 nvk_cmd_buffer_dump(struct nvk_cmd_buffer *cmd, FILE *fp)
826 {
827    struct nvk_device *dev = nvk_cmd_buffer_device(cmd);
828 
829    util_dynarray_foreach(&cmd->pushes, struct nvk_cmd_push, p) {
830       if (p->map) {
831          struct nv_push push = {
832             .start = (uint32_t *)p->map,
833             .end = (uint32_t *)((char *)p->map + p->range),
834          };
835          vk_push_print(fp, &push, &dev->pdev->info);
836       } else {
837          const uint64_t addr = p->addr;
838          fprintf(fp, "<%u B of INDIRECT DATA at 0x%" PRIx64 ">\n",
839                  p->range, addr);
840       }
841    }
842 }
843 
844 VKAPI_ATTR void VKAPI_CALL
nvk_CmdPushDescriptorSetWithTemplateKHR(VkCommandBuffer commandBuffer,VkDescriptorUpdateTemplate _template,VkPipelineLayout _layout,uint32_t set,const void * pData)845 nvk_CmdPushDescriptorSetWithTemplateKHR(VkCommandBuffer commandBuffer,
846                                         VkDescriptorUpdateTemplate _template,
847                                         VkPipelineLayout _layout,
848                                         uint32_t set,
849                                         const void *pData)
850 {
851    VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
852    VK_FROM_HANDLE(vk_descriptor_update_template, template, _template);
853    VK_FROM_HANDLE(vk_pipeline_layout, pipeline_layout, _layout);
854 
855    struct nvk_push_descriptor_set *push_set =
856       nvk_cmd_push_descriptors(cmd, template->bind_point, set);
857    if (unlikely(push_set == NULL))
858       return;
859 
860    struct nvk_descriptor_set_layout *set_layout =
861       vk_to_nvk_descriptor_set_layout(pipeline_layout->set_layouts[set]);
862 
863    nvk_push_descriptor_set_update_template(push_set, set_layout, template,
864                                            pData);
865 }
866