• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
3  * SPDX-License-Identifier: MIT
4  */
5 #include "nvk_queue.h"
6 
7 #include "nvk_buffer.h"
8 #include "nvk_cmd_buffer.h"
9 #include "nvk_device.h"
10 #include "nvk_image.h"
11 #include "nvk_physical_device.h"
12 #include "nv_push.h"
13 
14 #include "nv_push_cl9039.h"
15 #include "nv_push_cl9097.h"
16 #include "nv_push_cl90b5.h"
17 #include "nv_push_cla0c0.h"
18 #include "cla1c0.h"
19 #include "nv_push_clc3c0.h"
20 #include "nv_push_clc397.h"
21 
22 static VkResult
23 nvk_queue_submit_simple(struct nvk_queue *queue,
24                         uint32_t dw_count, const uint32_t *dw);
25 
26 static void
nvk_queue_state_init(struct nvk_queue_state * qs)27 nvk_queue_state_init(struct nvk_queue_state *qs)
28 {
29    memset(qs, 0, sizeof(*qs));
30 }
31 
32 static void
nvk_queue_state_finish(struct nvk_device * dev,struct nvk_queue_state * qs)33 nvk_queue_state_finish(struct nvk_device *dev,
34                        struct nvk_queue_state *qs)
35 {
36    if (qs->images.mem)
37       nvkmd_mem_unref(qs->images.mem);
38    if (qs->samplers.mem)
39       nvkmd_mem_unref(qs->samplers.mem);
40    if (qs->slm.mem)
41       nvkmd_mem_unref(qs->slm.mem);
42 }
43 
44 static VkResult
nvk_queue_state_update(struct nvk_queue * queue,struct nvk_queue_state * qs)45 nvk_queue_state_update(struct nvk_queue *queue,
46                        struct nvk_queue_state *qs)
47 {
48    struct nvk_device *dev = nvk_queue_device(queue);
49    struct nvk_physical_device *pdev = nvk_device_physical(dev);
50    struct nvkmd_mem *mem;
51    uint32_t alloc_count, bytes_per_warp, bytes_per_tpc;
52    bool dirty = false;
53 
54    mem = nvk_descriptor_table_get_mem_ref(&dev->images, &alloc_count);
55    if (qs->images.mem != mem || qs->images.alloc_count != alloc_count) {
56       if (qs->images.mem)
57          nvkmd_mem_unref(qs->images.mem);
58       qs->images.mem = mem;
59       qs->images.alloc_count = alloc_count;
60       dirty = true;
61    } else {
62       /* No change */
63       if (mem)
64          nvkmd_mem_unref(mem);
65    }
66 
67    mem = nvk_descriptor_table_get_mem_ref(&dev->samplers, &alloc_count);
68    if (qs->samplers.mem != mem || qs->samplers.alloc_count != alloc_count) {
69       if (qs->samplers.mem)
70          nvkmd_mem_unref(qs->samplers.mem);
71       qs->samplers.mem = mem;
72       qs->samplers.alloc_count = alloc_count;
73       dirty = true;
74    } else {
75       /* No change */
76       if (mem)
77          nvkmd_mem_unref(mem);
78    }
79 
80    mem = nvk_slm_area_get_mem_ref(&dev->slm, &bytes_per_warp, &bytes_per_tpc);
81    if (qs->slm.mem != mem || qs->slm.bytes_per_warp != bytes_per_warp ||
82        qs->slm.bytes_per_tpc != bytes_per_tpc) {
83       if (qs->slm.mem)
84          nvkmd_mem_unref(qs->slm.mem);
85       qs->slm.mem = mem;
86       qs->slm.bytes_per_warp = bytes_per_warp;
87       qs->slm.bytes_per_tpc = bytes_per_tpc;
88       dirty = true;
89    } else {
90       /* No change */
91       if (mem)
92          nvkmd_mem_unref(mem);
93    }
94 
95    if (!dirty)
96       return VK_SUCCESS;
97 
98    uint32_t push_data[64];
99    struct nv_push push;
100    nv_push_init(&push, push_data, 64);
101    struct nv_push *p = &push;
102 
103    if (qs->images.mem) {
104       if (queue->engines & NVKMD_ENGINE_COMPUTE) {
105          P_MTHD(p, NVA0C0, SET_TEX_HEADER_POOL_A);
106          P_NVA0C0_SET_TEX_HEADER_POOL_A(p, qs->images.mem->va->addr >> 32);
107          P_NVA0C0_SET_TEX_HEADER_POOL_B(p, qs->images.mem->va->addr);
108          P_NVA0C0_SET_TEX_HEADER_POOL_C(p, qs->images.alloc_count - 1);
109          P_IMMD(p, NVA0C0, INVALIDATE_TEXTURE_HEADER_CACHE_NO_WFI, {
110             .lines = LINES_ALL
111          });
112       }
113 
114       if (queue->engines & NVKMD_ENGINE_3D) {
115          P_MTHD(p, NV9097, SET_TEX_HEADER_POOL_A);
116          P_NV9097_SET_TEX_HEADER_POOL_A(p, qs->images.mem->va->addr >> 32);
117          P_NV9097_SET_TEX_HEADER_POOL_B(p, qs->images.mem->va->addr);
118          P_NV9097_SET_TEX_HEADER_POOL_C(p, qs->images.alloc_count - 1);
119          P_IMMD(p, NV9097, INVALIDATE_TEXTURE_HEADER_CACHE_NO_WFI, {
120             .lines = LINES_ALL
121          });
122       }
123    }
124 
125    if (qs->samplers.mem) {
126       if (queue->engines & NVKMD_ENGINE_COMPUTE) {
127          P_MTHD(p, NVA0C0, SET_TEX_SAMPLER_POOL_A);
128          P_NVA0C0_SET_TEX_SAMPLER_POOL_A(p, qs->samplers.mem->va->addr >> 32);
129          P_NVA0C0_SET_TEX_SAMPLER_POOL_B(p, qs->samplers.mem->va->addr);
130          P_NVA0C0_SET_TEX_SAMPLER_POOL_C(p, qs->samplers.alloc_count - 1);
131          P_IMMD(p, NVA0C0, INVALIDATE_SAMPLER_CACHE_NO_WFI, {
132             .lines = LINES_ALL
133          });
134       }
135 
136       if (queue->engines & NVKMD_ENGINE_3D) {
137          P_MTHD(p, NV9097, SET_TEX_SAMPLER_POOL_A);
138          P_NV9097_SET_TEX_SAMPLER_POOL_A(p, qs->samplers.mem->va->addr >> 32);
139          P_NV9097_SET_TEX_SAMPLER_POOL_B(p, qs->samplers.mem->va->addr);
140          P_NV9097_SET_TEX_SAMPLER_POOL_C(p, qs->samplers.alloc_count - 1);
141          P_IMMD(p, NV9097, INVALIDATE_SAMPLER_CACHE_NO_WFI, {
142             .lines = LINES_ALL
143          });
144       }
145    }
146 
147    if (qs->slm.mem) {
148       const uint64_t slm_addr = qs->slm.mem->va->addr;
149       const uint64_t slm_size = qs->slm.mem->size_B;
150       const uint64_t slm_per_warp = qs->slm.bytes_per_warp;
151       const uint64_t slm_per_tpc = qs->slm.bytes_per_tpc;
152       assert(!(slm_per_tpc & 0x7fff));
153 
154       if (queue->engines & NVKMD_ENGINE_COMPUTE) {
155          P_MTHD(p, NVA0C0, SET_SHADER_LOCAL_MEMORY_A);
156          P_NVA0C0_SET_SHADER_LOCAL_MEMORY_A(p, slm_addr >> 32);
157          P_NVA0C0_SET_SHADER_LOCAL_MEMORY_B(p, slm_addr);
158 
159          P_MTHD(p, NVA0C0, SET_SHADER_LOCAL_MEMORY_NON_THROTTLED_A);
160          P_NVA0C0_SET_SHADER_LOCAL_MEMORY_NON_THROTTLED_A(p, slm_per_tpc >> 32);
161          P_NVA0C0_SET_SHADER_LOCAL_MEMORY_NON_THROTTLED_B(p, slm_per_tpc);
162          P_NVA0C0_SET_SHADER_LOCAL_MEMORY_NON_THROTTLED_C(p, 0xff);
163 
164          if (pdev->info.cls_compute < VOLTA_COMPUTE_A) {
165             P_MTHD(p, NVA0C0, SET_SHADER_LOCAL_MEMORY_THROTTLED_A);
166             P_NVA0C0_SET_SHADER_LOCAL_MEMORY_THROTTLED_A(p, slm_per_tpc >> 32);
167             P_NVA0C0_SET_SHADER_LOCAL_MEMORY_THROTTLED_B(p, slm_per_tpc);
168             P_NVA0C0_SET_SHADER_LOCAL_MEMORY_THROTTLED_C(p, 0xff);
169          }
170       }
171 
172       if (queue->engines & NVKMD_ENGINE_3D) {
173          P_MTHD(p, NV9097, SET_SHADER_LOCAL_MEMORY_A);
174          P_NV9097_SET_SHADER_LOCAL_MEMORY_A(p, slm_addr >> 32);
175          P_NV9097_SET_SHADER_LOCAL_MEMORY_B(p, slm_addr);
176          P_NV9097_SET_SHADER_LOCAL_MEMORY_C(p, slm_size >> 32);
177          P_NV9097_SET_SHADER_LOCAL_MEMORY_D(p, slm_size);
178          P_NV9097_SET_SHADER_LOCAL_MEMORY_E(p, slm_per_warp);
179       }
180    }
181 
182    /* We set memory windows unconditionally.  Otherwise, the memory window
183     * might be in a random place and cause us to fault off into nowhere.
184     */
185    if (queue->engines & NVKMD_ENGINE_COMPUTE) {
186       if (pdev->info.cls_compute >= VOLTA_COMPUTE_A) {
187          uint64_t temp = 0xfeULL << 24;
188          P_MTHD(p, NVC3C0, SET_SHADER_SHARED_MEMORY_WINDOW_A);
189          P_NVC3C0_SET_SHADER_SHARED_MEMORY_WINDOW_A(p, temp >> 32);
190          P_NVC3C0_SET_SHADER_SHARED_MEMORY_WINDOW_B(p, temp & 0xffffffff);
191 
192          temp = 0xffULL << 24;
193          P_MTHD(p, NVC3C0, SET_SHADER_LOCAL_MEMORY_WINDOW_A);
194          P_NVC3C0_SET_SHADER_LOCAL_MEMORY_WINDOW_A(p, temp >> 32);
195          P_NVC3C0_SET_SHADER_LOCAL_MEMORY_WINDOW_B(p, temp & 0xffffffff);
196       } else {
197          P_MTHD(p, NVA0C0, SET_SHADER_LOCAL_MEMORY_WINDOW);
198          P_NVA0C0_SET_SHADER_LOCAL_MEMORY_WINDOW(p, 0xff << 24);
199 
200          P_MTHD(p, NVA0C0, SET_SHADER_SHARED_MEMORY_WINDOW);
201          P_NVA0C0_SET_SHADER_SHARED_MEMORY_WINDOW(p, 0xfe << 24);
202       }
203 
204       /* From nvc0_screen.c:
205        *
206        *    "Reduce likelihood of collision with real buffers by placing the
207        *    hole at the top of the 4G area. This will have to be dealt with
208        *    for real eventually by blocking off that area from the VM."
209        *
210        * Really?!?  TODO: Fix this for realz.  Annoyingly, we only have a
211        * 32-bit pointer for this in 3D rather than a full 48 like we have for
212        * compute.
213        */
214       P_IMMD(p, NV9097, SET_SHADER_LOCAL_MEMORY_WINDOW, 0xff << 24);
215    }
216 
217    /* From nvc0_screen.c:
218     *
219     *    "Reduce likelihood of collision with real buffers by placing the
220     *    hole at the top of the 4G area. This will have to be dealt with
221     *    for real eventually by blocking off that area from the VM."
222     *
223     * Really?!?  TODO: Fix this for realz.  Annoyingly, we only have a
224     * 32-bit pointer for this in 3D rather than a full 48 like we have for
225     * compute.
226     */
227    P_IMMD(p, NV9097, SET_SHADER_LOCAL_MEMORY_WINDOW, 0xff << 24);
228 
229    return nvk_queue_submit_simple(queue, nv_push_dw_count(p), push_data);
230 }
231 
232 static VkResult
nvk_queue_submit_bind(struct nvk_queue * queue,struct vk_queue_submit * submit)233 nvk_queue_submit_bind(struct nvk_queue *queue,
234                       struct vk_queue_submit *submit)
235 {
236    VkResult result;
237 
238    result = nvkmd_ctx_wait(queue->bind_ctx, &queue->vk.base,
239                            submit->wait_count, submit->waits);
240    if (result != VK_SUCCESS)
241       return result;
242 
243    for (uint32_t i = 0; i < submit->buffer_bind_count; i++) {
244       result = nvk_queue_buffer_bind(queue, &submit->buffer_binds[i]);
245       if (result != VK_SUCCESS)
246          return result;
247    }
248 
249    for (uint32_t i = 0; i < submit->image_bind_count; i++) {
250       result = nvk_queue_image_bind(queue, &submit->image_binds[i]);
251       if (result != VK_SUCCESS)
252          return result;
253    }
254 
255    for (uint32_t i = 0; i < submit->image_opaque_bind_count; i++) {
256       result = nvk_queue_image_opaque_bind(queue, &submit->image_opaque_binds[i]);
257       if (result != VK_SUCCESS)
258          return result;
259    }
260 
261    result = nvkmd_ctx_signal(queue->bind_ctx, &queue->vk.base,
262                              submit->signal_count, submit->signals);
263    if (result != VK_SUCCESS)
264       return result;
265 
266    return VK_SUCCESS;
267 }
268 
269 static VkResult
nvk_queue_submit_exec(struct nvk_queue * queue,struct vk_queue_submit * submit)270 nvk_queue_submit_exec(struct nvk_queue *queue,
271                       struct vk_queue_submit *submit)
272 {
273    struct nvk_device *dev = nvk_queue_device(queue);
274    struct nvk_physical_device *pdev = nvk_device_physical(dev);
275    VkResult result;
276 
277    const bool sync = pdev->debug_flags & NVK_DEBUG_PUSH_SYNC;
278 
279    if (submit->command_buffer_count > 0) {
280       result = nvk_queue_state_update(queue, &queue->state);
281       if (result != VK_SUCCESS)
282          return result;
283 
284       uint64_t upload_time_point;
285       result = nvk_upload_queue_flush(dev, &dev->upload, &upload_time_point);
286       if (result != VK_SUCCESS)
287          return result;
288 
289       if (upload_time_point > 0) {
290          struct vk_sync_wait wait = {
291             .sync = dev->upload.sync,
292             .stage_mask = ~0,
293             .wait_value = upload_time_point,
294          };
295          result = nvkmd_ctx_wait(queue->exec_ctx, &queue->vk.base, 1, &wait);
296          if (result != VK_SUCCESS)
297             goto fail;
298       }
299    }
300 
301    result = nvkmd_ctx_wait(queue->exec_ctx, &queue->vk.base,
302                            submit->wait_count, submit->waits);
303    if (result != VK_SUCCESS)
304       goto fail;
305 
306    for (unsigned i = 0; i < submit->command_buffer_count; i++) {
307       struct nvk_cmd_buffer *cmd =
308          container_of(submit->command_buffers[i], struct nvk_cmd_buffer, vk);
309 
310       const uint32_t max_execs =
311          util_dynarray_num_elements(&cmd->pushes, struct nvk_cmd_push);
312       STACK_ARRAY(struct nvkmd_ctx_exec, execs, max_execs);
313       uint32_t exec_count = 0;
314 
315       util_dynarray_foreach(&cmd->pushes, struct nvk_cmd_push, push) {
316          if (push->range == 0)
317             continue;
318 
319          execs[exec_count++] = (struct nvkmd_ctx_exec) {
320             .addr = push->addr,
321             .size_B = push->range,
322             .no_prefetch = push->no_prefetch,
323          };
324       }
325 
326       result = nvkmd_ctx_exec(queue->exec_ctx, &queue->vk.base,
327                               exec_count, execs);
328 
329       STACK_ARRAY_FINISH(execs);
330 
331       if (result != VK_SUCCESS)
332          goto fail;
333    }
334 
335    result = nvkmd_ctx_signal(queue->exec_ctx, &queue->vk.base,
336                              submit->signal_count, submit->signals);
337    if (result != VK_SUCCESS)
338       goto fail;
339 
340    if (sync) {
341       result = nvkmd_ctx_sync(queue->exec_ctx, &queue->vk.base);
342       if (result != VK_SUCCESS)
343          goto fail;
344    }
345 
346 fail:
347    if ((sync && result != VK_SUCCESS) ||
348        (pdev->debug_flags & NVK_DEBUG_PUSH_DUMP)) {
349       for (unsigned i = 0; i < submit->command_buffer_count; i++) {
350          struct nvk_cmd_buffer *cmd =
351             container_of(submit->command_buffers[i], struct nvk_cmd_buffer, vk);
352 
353          nvk_cmd_buffer_dump(cmd, stderr);
354       }
355    }
356 
357    return result;
358 }
359 
360 static VkResult
nvk_queue_submit(struct vk_queue * vk_queue,struct vk_queue_submit * submit)361 nvk_queue_submit(struct vk_queue *vk_queue,
362                  struct vk_queue_submit *submit)
363 {
364    struct nvk_queue *queue = container_of(vk_queue, struct nvk_queue, vk);
365    VkResult result;
366 
367    if (vk_queue_is_lost(&queue->vk))
368       return VK_ERROR_DEVICE_LOST;
369 
370    if (submit->buffer_bind_count > 0 ||
371        submit->image_bind_count > 0  ||
372        submit->image_opaque_bind_count > 0) {
373       assert(submit->command_buffer_count == 0);
374       result = nvk_queue_submit_bind(queue, submit);
375       if (result != VK_SUCCESS)
376          return vk_queue_set_lost(&queue->vk, "Bind operation failed");
377    } else {
378       result = nvk_queue_submit_exec(queue, submit);
379       if (result != VK_SUCCESS)
380          return vk_queue_set_lost(&queue->vk, "Submit failed");
381    }
382 
383    return VK_SUCCESS;
384 }
385 
386 static VkResult
nvk_queue_submit_simple(struct nvk_queue * queue,uint32_t dw_count,const uint32_t * dw)387 nvk_queue_submit_simple(struct nvk_queue *queue,
388                         uint32_t dw_count, const uint32_t *dw)
389 {
390    struct nvk_device *dev = nvk_queue_device(queue);
391    struct nvk_physical_device *pdev = nvk_device_physical(dev);
392    VkResult result;
393 
394    if (vk_queue_is_lost(&queue->vk))
395       return VK_ERROR_DEVICE_LOST;
396 
397    struct nvkmd_mem *push_mem;
398    result = nvkmd_dev_alloc_mapped_mem(dev->nvkmd, &dev->vk.base,
399                                        dw_count * 4, 0,
400                                        NVKMD_MEM_GART,
401                                        NVKMD_MEM_MAP_WR, &push_mem);
402    if (result != VK_SUCCESS)
403       return result;
404 
405    memcpy(push_mem->map, dw, dw_count * 4);
406 
407    const struct nvkmd_ctx_exec exec = {
408       .addr = push_mem->va->addr,
409       .size_B = dw_count * 4,
410    };
411    result = nvkmd_ctx_exec(queue->exec_ctx, &queue->vk.base, 1, &exec);
412    if (result == VK_SUCCESS)
413       result = nvkmd_ctx_sync(queue->exec_ctx, &queue->vk.base);
414 
415    nvkmd_mem_unref(push_mem);
416 
417    const bool debug_sync = pdev->debug_flags & NVK_DEBUG_PUSH_SYNC;
418    if ((debug_sync && result != VK_SUCCESS) ||
419        (pdev->debug_flags & NVK_DEBUG_PUSH_DUMP)) {
420       struct nv_push push = {
421          .start = (uint32_t *)dw,
422          .end = (uint32_t *)dw + dw_count,
423       };
424       vk_push_print(stderr, &push, &pdev->info);
425    }
426 
427    if (result != VK_SUCCESS)
428       return vk_queue_set_lost(&queue->vk, "Submit failed");
429 
430    return VK_SUCCESS;
431 }
432 
433 static VkResult
nvk_queue_init_context_state(struct nvk_queue * queue)434 nvk_queue_init_context_state(struct nvk_queue *queue)
435 {
436    struct nvk_device *dev = nvk_queue_device(queue);
437    struct nvk_physical_device *pdev = nvk_device_physical(dev);
438    VkResult result;
439 
440    uint32_t push_data[4096];
441    struct nv_push push;
442    nv_push_init(&push, push_data, ARRAY_SIZE(push_data));
443    struct nv_push *p = &push;
444 
445    /* M2MF state */
446    if (pdev->info.cls_m2mf <= FERMI_MEMORY_TO_MEMORY_FORMAT_A) {
447       /* we absolutely do not support Fermi, but if somebody wants to toy
448        * around with it, this is a must
449        */
450       P_MTHD(p, NV9039, SET_OBJECT);
451       P_NV9039_SET_OBJECT(p, {
452          .class_id = pdev->info.cls_m2mf,
453          .engine_id = 0,
454       });
455    }
456 
457    if (queue->engines & NVKMD_ENGINE_3D) {
458       result = nvk_push_draw_state_init(queue, p);
459       if (result != VK_SUCCESS)
460          return result;
461    }
462 
463    if (queue->engines & NVKMD_ENGINE_COMPUTE) {
464       result = nvk_push_dispatch_state_init(queue, p);
465       if (result != VK_SUCCESS)
466          return result;
467    }
468 
469    return nvk_queue_submit_simple(queue, nv_push_dw_count(&push), push_data);
470 }
471 
472 static VkQueueGlobalPriority
get_queue_global_priority(const VkDeviceQueueCreateInfo * pCreateInfo)473 get_queue_global_priority(const VkDeviceQueueCreateInfo *pCreateInfo)
474 {
475    const VkDeviceQueueGlobalPriorityCreateInfo *priority_info =
476       vk_find_struct_const(pCreateInfo->pNext,
477                            DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO);
478    if (priority_info == NULL)
479       return VK_QUEUE_GLOBAL_PRIORITY_MEDIUM;
480 
481    return priority_info->globalPriority;
482 }
483 
484 VkResult
nvk_queue_init(struct nvk_device * dev,struct nvk_queue * queue,const VkDeviceQueueCreateInfo * pCreateInfo,uint32_t index_in_family)485 nvk_queue_init(struct nvk_device *dev, struct nvk_queue *queue,
486                const VkDeviceQueueCreateInfo *pCreateInfo,
487                uint32_t index_in_family)
488 {
489    struct nvk_physical_device *pdev = nvk_device_physical(dev);
490    VkResult result;
491 
492    assert(pCreateInfo->queueFamilyIndex < pdev->queue_family_count);
493    const struct nvk_queue_family *queue_family =
494       &pdev->queue_families[pCreateInfo->queueFamilyIndex];
495 
496    const VkQueueGlobalPriority global_priority =
497       get_queue_global_priority(pCreateInfo);
498 
499    /* From the Vulkan 1.3.295 spec:
500     *
501     *    "If the globalPriorityQuery feature is enabled and the requested
502     *    global priority is not reported via
503     *    VkQueueFamilyGlobalPriorityPropertiesKHR, the driver implementation
504     *    must fail the queue creation. In this scenario,
505     *    VK_ERROR_INITIALIZATION_FAILED is returned."
506     */
507    if (dev->vk.enabled_features.globalPriorityQuery &&
508        global_priority != VK_QUEUE_GLOBAL_PRIORITY_MEDIUM)
509       return VK_ERROR_INITIALIZATION_FAILED;
510 
511    if (global_priority > VK_QUEUE_GLOBAL_PRIORITY_MEDIUM)
512       return VK_ERROR_NOT_PERMITTED;
513 
514    result = vk_queue_init(&queue->vk, &dev->vk, pCreateInfo, index_in_family);
515    if (result != VK_SUCCESS)
516       return result;
517 
518    nvk_queue_state_init(&queue->state);
519 
520    queue->engines = 0;
521    if (queue_family->queue_flags & VK_QUEUE_GRAPHICS_BIT) {
522       queue->engines |= NVKMD_ENGINE_3D;
523       /* We rely on compute shaders for queries */
524       queue->engines |= NVKMD_ENGINE_COMPUTE;
525    }
526    if (queue_family->queue_flags & VK_QUEUE_COMPUTE_BIT) {
527       queue->engines |= NVKMD_ENGINE_COMPUTE;
528       /* We currently rely on 3D engine MMEs for indirect dispatch */
529       queue->engines |= NVKMD_ENGINE_3D;
530    }
531    if (queue_family->queue_flags & VK_QUEUE_TRANSFER_BIT)
532       queue->engines |= NVKMD_ENGINE_COPY;
533 
534    if (queue->engines) {
535       result = nvkmd_dev_create_ctx(dev->nvkmd, &dev->vk.base,
536                                     queue->engines, &queue->exec_ctx);
537       if (result != VK_SUCCESS)
538          goto fail_init;
539 
540       result = nvkmd_dev_alloc_mem(dev->nvkmd, &dev->vk.base,
541                                    4096, 0, NVKMD_MEM_LOCAL,
542                                    &queue->draw_cb0);
543       if (result != VK_SUCCESS)
544          goto fail_exec_ctx;
545 
546       result = nvk_upload_queue_fill(dev, &dev->upload,
547                                      queue->draw_cb0->va->addr, 0,
548                                      queue->draw_cb0->size_B);
549       if (result != VK_SUCCESS)
550          goto fail_draw_cb0;
551    }
552 
553    if (queue_family->queue_flags & VK_QUEUE_SPARSE_BINDING_BIT) {
554       result = nvkmd_dev_create_ctx(dev->nvkmd, &dev->vk.base,
555                                     NVKMD_ENGINE_BIND, &queue->bind_ctx);
556       if (result != VK_SUCCESS)
557          goto fail_draw_cb0;
558    }
559 
560    result = nvk_queue_init_context_state(queue);
561    if (result != VK_SUCCESS)
562       goto fail_bind_ctx;
563 
564    queue->vk.driver_submit = nvk_queue_submit;
565 
566    return VK_SUCCESS;
567 
568 fail_bind_ctx:
569    if (queue->bind_ctx != NULL)
570       nvkmd_ctx_destroy(queue->bind_ctx);
571 fail_draw_cb0:
572    if (queue->draw_cb0 != NULL)
573       nvkmd_mem_unref(queue->draw_cb0);
574 fail_exec_ctx:
575    if (queue->exec_ctx != NULL)
576       nvkmd_ctx_destroy(queue->exec_ctx);
577 fail_init:
578    nvk_queue_state_finish(dev, &queue->state);
579    vk_queue_finish(&queue->vk);
580 
581    return result;
582 }
583 
584 void
nvk_queue_finish(struct nvk_device * dev,struct nvk_queue * queue)585 nvk_queue_finish(struct nvk_device *dev, struct nvk_queue *queue)
586 {
587    if (queue->draw_cb0 != NULL) {
588       nvk_upload_queue_sync(dev, &dev->upload);
589       nvkmd_mem_unref(queue->draw_cb0);
590    }
591    nvk_queue_state_finish(dev, &queue->state);
592    if (queue->bind_ctx != NULL)
593       nvkmd_ctx_destroy(queue->bind_ctx);
594    if (queue->exec_ctx != NULL)
595       nvkmd_ctx_destroy(queue->exec_ctx);
596    vk_queue_finish(&queue->vk);
597 }
598