• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2019 Raspberry Pi Ltd
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "v3dv_private.h"
25 #include "drm-uapi/v3d_drm.h"
26 
27 #include "broadcom/clif/clif_dump.h"
28 #include "util/libsync.h"
29 #include "util/os_time.h"
30 #include "vk_drm_syncobj.h"
31 
32 #include <errno.h>
33 #include <time.h>
34 
35 static void
v3dv_clif_dump(struct v3dv_device * device,struct v3dv_job * job,struct drm_v3d_submit_cl * submit)36 v3dv_clif_dump(struct v3dv_device *device,
37                struct v3dv_job *job,
38                struct drm_v3d_submit_cl *submit)
39 {
40    if (!(unlikely(V3D_DEBUG & (V3D_DEBUG_CL |
41                                V3D_DEBUG_CL_NO_BIN |
42                                V3D_DEBUG_CLIF))))
43       return;
44 
45    struct clif_dump *clif = clif_dump_init(&device->devinfo,
46                                            stderr,
47                                            V3D_DEBUG & (V3D_DEBUG_CL |
48                                                         V3D_DEBUG_CL_NO_BIN),
49                                            V3D_DEBUG & V3D_DEBUG_CL_NO_BIN);
50 
51    set_foreach(job->bos, entry) {
52       struct v3dv_bo *bo = (void *)entry->key;
53       char *name = ralloc_asprintf(NULL, "%s_0x%x",
54                                    bo->name, bo->offset);
55 
56       bool ok = v3dv_bo_map(device, bo, bo->size);
57       if (!ok) {
58          fprintf(stderr, "failed to map BO for clif_dump.\n");
59          ralloc_free(name);
60          goto free_clif;
61       }
62       clif_dump_add_bo(clif, name, bo->offset, bo->size, bo->map);
63 
64       ralloc_free(name);
65    }
66 
67    clif_dump(clif, submit);
68 
69  free_clif:
70    clif_dump_destroy(clif);
71 }
72 
73 static VkResult
queue_wait_idle(struct v3dv_queue * queue,struct v3dv_submit_sync_info * sync_info)74 queue_wait_idle(struct v3dv_queue *queue,
75                 struct v3dv_submit_sync_info *sync_info)
76 {
77    if (queue->device->pdevice->caps.multisync) {
78       int ret = drmSyncobjWait(queue->device->pdevice->render_fd,
79                                queue->last_job_syncs.syncs, 3,
80                                INT64_MAX, DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL,
81                                NULL);
82       if (ret) {
83          return vk_errorf(queue, VK_ERROR_DEVICE_LOST,
84                           "syncobj wait failed: %m");
85       }
86 
87       bool first = true;
88       for (int i = 0; i < 3; i++) {
89          if (!queue->last_job_syncs.first[i])
90             first = false;
91       }
92 
93       /* If we're not the first job, that means we're waiting on some
94        * per-queue-type syncobj which transitively waited on the semaphores
95        * so we can skip the semaphore wait.
96        */
97       if (first) {
98          VkResult result = vk_sync_wait_many(&queue->device->vk,
99                                              sync_info->wait_count,
100                                              sync_info->waits,
101                                              VK_SYNC_WAIT_COMPLETE,
102                                              UINT64_MAX);
103          if (result != VK_SUCCESS)
104             return result;
105       }
106    } else {
107       /* Without multisync, all the semaphores are baked into the one syncobj
108        * at the start of each submit so we only need to wait on the one.
109        */
110       int ret = drmSyncobjWait(queue->device->pdevice->render_fd,
111                                &queue->last_job_syncs.syncs[V3DV_QUEUE_ANY], 1,
112                                INT64_MAX, DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL,
113                                NULL);
114       if (ret) {
115          return vk_errorf(queue, VK_ERROR_DEVICE_LOST,
116                           "syncobj wait failed: %m");
117       }
118    }
119 
120    for (int i = 0; i < 3; i++)
121       queue->last_job_syncs.first[i] = false;
122 
123    return VK_SUCCESS;
124 }
125 
126 static VkResult
handle_reset_query_cpu_job(struct v3dv_queue * queue,struct v3dv_job * job,struct v3dv_submit_sync_info * sync_info)127 handle_reset_query_cpu_job(struct v3dv_queue *queue, struct v3dv_job *job,
128                            struct v3dv_submit_sync_info *sync_info)
129 {
130    struct v3dv_reset_query_cpu_job_info *info = &job->cpu.query_reset;
131    assert(info->pool);
132 
133    /* We are about to reset query counters so we need to make sure that
134     * The GPU is not using them. The exception is timestamp queries, since
135     * we handle those in the CPU.
136     */
137    if (info->pool->query_type == VK_QUERY_TYPE_OCCLUSION)
138       v3dv_bo_wait(job->device, info->pool->bo, PIPE_TIMEOUT_INFINITE);
139 
140    if (info->pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) {
141       struct vk_sync_wait waits[info->count];
142       unsigned wait_count = 0;
143       for (int i = 0; i < info->count; i++) {
144          struct v3dv_query *query = &info->pool->queries[i];
145          /* Only wait for a query if we've used it otherwise we will be
146           * waiting forever for the fence to become signaled.
147           */
148          if (query->maybe_available) {
149             waits[wait_count] = (struct vk_sync_wait){
150                .sync = info->pool->queries[i].perf.last_job_sync
151             };
152             wait_count++;
153          };
154       }
155 
156       VkResult result = vk_sync_wait_many(&job->device->vk, wait_count, waits,
157                                           VK_SYNC_WAIT_COMPLETE, UINT64_MAX);
158 
159       if (result != VK_SUCCESS)
160          return result;
161    }
162 
163    v3dv_reset_query_pools(job->device, info->pool, info->first, info->count);
164 
165    return VK_SUCCESS;
166 }
167 
168 static VkResult
export_perfmon_last_job_sync(struct v3dv_queue * queue,struct v3dv_job * job,int * fd)169 export_perfmon_last_job_sync(struct v3dv_queue *queue, struct v3dv_job *job, int *fd)
170 {
171    int err;
172    if (job->device->pdevice->caps.multisync) {
173       static const enum v3dv_queue_type queues_to_sync[] = {
174          V3DV_QUEUE_CL,
175          V3DV_QUEUE_CSD,
176       };
177 
178       for (uint32_t i = 0; i < ARRAY_SIZE(queues_to_sync); i++) {
179          enum v3dv_queue_type queue_type = queues_to_sync[i];
180          int tmp_fd = -1;
181 
182          err = drmSyncobjExportSyncFile(job->device->pdevice->render_fd,
183                                         queue->last_job_syncs.syncs[queue_type],
184                                         &tmp_fd);
185 
186          if (err) {
187             close(*fd);
188             return vk_errorf(&job->device->queue, VK_ERROR_UNKNOWN,
189                              "sync file export failed: %m");
190          }
191 
192          err = sync_accumulate("v3dv", fd, tmp_fd);
193 
194          if (err) {
195             close(tmp_fd);
196             close(*fd);
197             return vk_errorf(&job->device->queue, VK_ERROR_UNKNOWN,
198                              "failed to accumulate sync files: %m");
199          }
200       }
201    } else {
202       err = drmSyncobjExportSyncFile(job->device->pdevice->render_fd,
203                                      queue->last_job_syncs.syncs[V3DV_QUEUE_ANY],
204                                      fd);
205 
206       if (err) {
207          return vk_errorf(&job->device->queue, VK_ERROR_UNKNOWN,
208                           "sync file export failed: %m");
209       }
210    }
211    return VK_SUCCESS;
212 }
213 
214 static VkResult
handle_end_query_cpu_job(struct v3dv_job * job,uint32_t counter_pass_idx)215 handle_end_query_cpu_job(struct v3dv_job *job, uint32_t counter_pass_idx)
216 {
217    VkResult result = VK_SUCCESS;
218 
219    mtx_lock(&job->device->query_mutex);
220 
221    struct v3dv_end_query_cpu_job_info *info = &job->cpu.query_end;
222    struct v3dv_queue *queue = &job->device->queue;
223 
224    int err = 0;
225    int fd = -1;
226 
227    if (info->pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) {
228       result = export_perfmon_last_job_sync(queue, job, &fd);
229 
230       if (result != VK_SUCCESS)
231          goto fail;
232 
233       assert(fd >= 0);
234    }
235 
236    for (uint32_t i = 0; i < info->count; i++) {
237       assert(info->query + i < info->pool->query_count);
238       struct v3dv_query *query = &info->pool->queries[info->query + i];
239 
240       if (info->pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) {
241          uint32_t syncobj = vk_sync_as_drm_syncobj(query->perf.last_job_sync)->syncobj;
242          err = drmSyncobjImportSyncFile(job->device->pdevice->render_fd,
243                                         syncobj, fd);
244 
245          if (err) {
246             result = vk_errorf(queue, VK_ERROR_UNKNOWN,
247                                "sync file import failed: %m");
248             goto fail;
249          }
250       }
251 
252       query->maybe_available = true;
253    }
254 
255 fail:
256    if (info->pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR)
257       close(fd);
258 
259    cnd_broadcast(&job->device->query_ended);
260    mtx_unlock(&job->device->query_mutex);
261 
262    return result;
263 }
264 
265 static VkResult
handle_copy_query_results_cpu_job(struct v3dv_job * job)266 handle_copy_query_results_cpu_job(struct v3dv_job *job)
267 {
268    struct v3dv_copy_query_results_cpu_job_info *info =
269       &job->cpu.query_copy_results;
270 
271    assert(info->dst && info->dst->mem && info->dst->mem->bo);
272    struct v3dv_bo *bo = info->dst->mem->bo;
273 
274    /* Map the entire dst buffer for the CPU copy if needed */
275    assert(!bo->map || bo->map_size == bo->size);
276    if (!bo->map && !v3dv_bo_map(job->device, bo, bo->size))
277       return vk_error(job->device, VK_ERROR_OUT_OF_HOST_MEMORY);
278 
279    uint8_t *offset = ((uint8_t *) bo->map) +
280                      info->offset + info->dst->mem_offset;
281    v3dv_get_query_pool_results(job->device,
282                                info->pool,
283                                info->first,
284                                info->count,
285                                offset,
286                                info->stride,
287                                info->flags);
288 
289    return VK_SUCCESS;
290 }
291 
292 static VkResult
handle_set_event_cpu_job(struct v3dv_queue * queue,struct v3dv_job * job,struct v3dv_submit_sync_info * sync_info)293 handle_set_event_cpu_job(struct v3dv_queue *queue, struct v3dv_job *job,
294                          struct v3dv_submit_sync_info *sync_info)
295 {
296    /* From the Vulkan 1.0 spec:
297     *
298     *    "When vkCmdSetEvent is submitted to a queue, it defines an execution
299     *     dependency on commands that were submitted before it, and defines an
300     *     event signal operation which sets the event to the signaled state.
301     *     The first synchronization scope includes every command previously
302     *     submitted to the same queue, including those in the same command
303     *     buffer and batch".
304     *
305     * So we should wait for all prior work to be completed before signaling
306     * the event, this includes all active CPU wait threads spawned for any
307     * command buffer submitted *before* this.
308     */
309 
310    VkResult result = queue_wait_idle(queue, sync_info);
311    if (result != VK_SUCCESS)
312       return result;
313 
314    struct v3dv_event_set_cpu_job_info *info = &job->cpu.event_set;
315    p_atomic_set(&info->event->state, info->state);
316 
317    return VK_SUCCESS;
318 }
319 
320 static bool
check_wait_events_complete(struct v3dv_job * job)321 check_wait_events_complete(struct v3dv_job *job)
322 {
323    assert(job->type == V3DV_JOB_TYPE_CPU_WAIT_EVENTS);
324 
325    struct v3dv_event_wait_cpu_job_info *info = &job->cpu.event_wait;
326    for (uint32_t i = 0; i < info->event_count; i++) {
327       if (!p_atomic_read(&info->events[i]->state))
328          return false;
329    }
330    return true;
331 }
332 
333 static VkResult
handle_wait_events_cpu_job(struct v3dv_job * job)334 handle_wait_events_cpu_job(struct v3dv_job *job)
335 {
336    assert(job->type == V3DV_JOB_TYPE_CPU_WAIT_EVENTS);
337 
338    /* Wait for events to be signaled */
339    const useconds_t wait_interval_ms = 1;
340    while (!check_wait_events_complete(job))
341       usleep(wait_interval_ms * 1000);
342 
343    return VK_SUCCESS;
344 }
345 
346 static VkResult
handle_copy_buffer_to_image_cpu_job(struct v3dv_queue * queue,struct v3dv_job * job,struct v3dv_submit_sync_info * sync_info)347 handle_copy_buffer_to_image_cpu_job(struct v3dv_queue *queue,
348                                     struct v3dv_job *job,
349                                     struct v3dv_submit_sync_info *sync_info)
350 {
351    assert(job->type == V3DV_JOB_TYPE_CPU_COPY_BUFFER_TO_IMAGE);
352    struct v3dv_copy_buffer_to_image_cpu_job_info *info =
353       &job->cpu.copy_buffer_to_image;
354 
355    /* Wait for all GPU work to finish first, since we may be accessing
356     * the BOs involved in the operation.
357     */
358    VkResult result = queue_wait_idle(queue, sync_info);
359    if (result != VK_SUCCESS)
360       return result;
361 
362    /* Map BOs */
363    struct v3dv_bo *dst_bo = info->image->mem->bo;
364    assert(!dst_bo->map || dst_bo->map_size == dst_bo->size);
365    if (!dst_bo->map && !v3dv_bo_map(job->device, dst_bo, dst_bo->size))
366       return vk_error(job->device, VK_ERROR_OUT_OF_HOST_MEMORY);
367    void *dst_ptr = dst_bo->map;
368 
369    struct v3dv_bo *src_bo = info->buffer->mem->bo;
370    assert(!src_bo->map || src_bo->map_size == src_bo->size);
371    if (!src_bo->map && !v3dv_bo_map(job->device, src_bo, src_bo->size))
372       return vk_error(job->device, VK_ERROR_OUT_OF_HOST_MEMORY);
373    void *src_ptr = src_bo->map;
374 
375    const struct v3d_resource_slice *slice =
376       &info->image->slices[info->mip_level];
377 
378    const struct pipe_box box = {
379       info->image_offset.x, info->image_offset.y, info->base_layer,
380       info->image_extent.width, info->image_extent.height, info->layer_count,
381    };
382 
383    /* Copy each layer */
384    for (uint32_t i = 0; i < info->layer_count; i++) {
385       const uint32_t dst_offset =
386          v3dv_layer_offset(info->image, info->mip_level, info->base_layer + i);
387       const uint32_t src_offset =
388          info->buffer->mem_offset + info->buffer_offset +
389          info->buffer_layer_stride * i;
390       v3d_store_tiled_image(
391          dst_ptr + dst_offset, slice->stride,
392          src_ptr + src_offset, info->buffer_stride,
393          slice->tiling, info->image->cpp, slice->padded_height, &box);
394    }
395 
396    return VK_SUCCESS;
397 }
398 
399 static VkResult
handle_timestamp_query_cpu_job(struct v3dv_queue * queue,struct v3dv_job * job,struct v3dv_submit_sync_info * sync_info)400 handle_timestamp_query_cpu_job(struct v3dv_queue *queue, struct v3dv_job *job,
401                                struct v3dv_submit_sync_info *sync_info)
402 {
403    assert(job->type == V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY);
404    struct v3dv_timestamp_query_cpu_job_info *info = &job->cpu.query_timestamp;
405 
406    /* Wait for completion of all work queued before the timestamp query */
407    VkResult result = queue_wait_idle(queue, sync_info);
408    if (result != VK_SUCCESS)
409       return result;
410 
411    mtx_lock(&job->device->query_mutex);
412 
413    /* Compute timestamp */
414    struct timespec t;
415    clock_gettime(CLOCK_MONOTONIC, &t);
416 
417    for (uint32_t i = 0; i < info->count; i++) {
418       assert(info->query + i < info->pool->query_count);
419       struct v3dv_query *query = &info->pool->queries[info->query + i];
420       query->maybe_available = true;
421       if (i == 0)
422          query->value = t.tv_sec * 1000000000ull + t.tv_nsec;
423    }
424 
425    cnd_broadcast(&job->device->query_ended);
426    mtx_unlock(&job->device->query_mutex);
427 
428    return VK_SUCCESS;
429 }
430 
431 static VkResult
handle_csd_indirect_cpu_job(struct v3dv_queue * queue,struct v3dv_job * job,struct v3dv_submit_sync_info * sync_info)432 handle_csd_indirect_cpu_job(struct v3dv_queue *queue,
433                             struct v3dv_job *job,
434                             struct v3dv_submit_sync_info *sync_info)
435 {
436    assert(job->type == V3DV_JOB_TYPE_CPU_CSD_INDIRECT);
437    struct v3dv_csd_indirect_cpu_job_info *info = &job->cpu.csd_indirect;
438    assert(info->csd_job);
439 
440    /* Make sure the GPU is no longer using the indirect buffer*/
441    assert(info->buffer && info->buffer->mem && info->buffer->mem->bo);
442    v3dv_bo_wait(queue->device, info->buffer->mem->bo, PIPE_TIMEOUT_INFINITE);
443 
444    /* Map the indirect buffer and read the dispatch parameters */
445    assert(info->buffer && info->buffer->mem && info->buffer->mem->bo);
446    struct v3dv_bo *bo = info->buffer->mem->bo;
447    if (!bo->map && !v3dv_bo_map(job->device, bo, bo->size))
448       return vk_error(job->device, VK_ERROR_OUT_OF_HOST_MEMORY);
449    assert(bo->map);
450 
451    const uint32_t offset = info->buffer->mem_offset + info->offset;
452    const uint32_t *group_counts = (uint32_t *) (bo->map + offset);
453    if (group_counts[0] == 0 || group_counts[1] == 0|| group_counts[2] == 0)
454       return VK_SUCCESS;
455 
456    if (memcmp(group_counts, info->csd_job->csd.wg_count,
457               sizeof(info->csd_job->csd.wg_count)) != 0) {
458       v3dv_cmd_buffer_rewrite_indirect_csd_job(info, group_counts);
459    }
460 
461    return VK_SUCCESS;
462 }
463 
464 static VkResult
process_waits(struct v3dv_queue * queue,uint32_t count,struct vk_sync_wait * waits)465 process_waits(struct v3dv_queue *queue,
466               uint32_t count, struct vk_sync_wait *waits)
467 {
468    struct v3dv_device *device = queue->device;
469    VkResult result = VK_SUCCESS;
470    int err = 0;
471 
472    if (count == 0)
473       return VK_SUCCESS;
474 
475    /* If multisync is supported, we wait on semaphores in the first job
476     * submitted to each of the individual queues.  We don't need to
477     * pre-populate the syncobjs.
478     */
479    if (queue->device->pdevice->caps.multisync)
480       return VK_SUCCESS;
481 
482    int fd = -1;
483    err = drmSyncobjExportSyncFile(device->pdevice->render_fd,
484                                   queue->last_job_syncs.syncs[V3DV_QUEUE_ANY],
485                                   &fd);
486    if (err) {
487       result = vk_errorf(queue, VK_ERROR_UNKNOWN,
488                          "sync file export failed: %m");
489       goto fail;
490    }
491 
492    for (uint32_t i = 0; i < count; i++) {
493       uint32_t syncobj = vk_sync_as_drm_syncobj(waits[i].sync)->syncobj;
494       int wait_fd = -1;
495 
496       err = drmSyncobjExportSyncFile(device->pdevice->render_fd,
497                                      syncobj, &wait_fd);
498       if (err) {
499          result = vk_errorf(queue, VK_ERROR_UNKNOWN,
500                             "sync file export failed: %m");
501          goto fail;
502       }
503 
504       err = sync_accumulate("v3dv", &fd, wait_fd);
505       close(wait_fd);
506       if (err) {
507          result = vk_errorf(queue, VK_ERROR_UNKNOWN,
508                             "sync file merge failed: %m");
509          goto fail;
510       }
511    }
512 
513    err = drmSyncobjImportSyncFile(device->pdevice->render_fd,
514                                   queue->last_job_syncs.syncs[V3DV_QUEUE_ANY],
515                                   fd);
516    if (err) {
517       result = vk_errorf(queue, VK_ERROR_UNKNOWN,
518                          "sync file import failed: %m");
519    }
520 
521 fail:
522    close(fd);
523    return result;
524 }
525 
526 static VkResult
process_signals(struct v3dv_queue * queue,uint32_t count,struct vk_sync_signal * signals)527 process_signals(struct v3dv_queue *queue,
528                 uint32_t count, struct vk_sync_signal *signals)
529 {
530    struct v3dv_device *device = queue->device;
531 
532    if (count == 0)
533       return VK_SUCCESS;
534 
535    /* If multisync is supported, we are signalling semaphores in the last job
536     * of the last command buffer and, therefore, we do not need to process any
537     * semaphores here.
538     */
539    if (device->pdevice->caps.multisync)
540       return VK_SUCCESS;
541 
542    int fd;
543    drmSyncobjExportSyncFile(device->pdevice->render_fd,
544                             queue->last_job_syncs.syncs[V3DV_QUEUE_ANY],
545                             &fd);
546    if (fd == -1) {
547       return vk_errorf(queue, VK_ERROR_UNKNOWN,
548                        "sync file export failed: %m");
549    }
550 
551    VkResult result = VK_SUCCESS;
552    for (uint32_t i = 0; i < count; i++) {
553       uint32_t syncobj = vk_sync_as_drm_syncobj(signals[i].sync)->syncobj;
554       int err = drmSyncobjImportSyncFile(device->pdevice->render_fd,
555                                          syncobj, fd);
556       if (err) {
557          result = vk_errorf(queue, VK_ERROR_UNKNOWN,
558                             "sync file import failed: %m");
559          break;
560       }
561    }
562 
563    assert(fd >= 0);
564    close(fd);
565 
566    return result;
567 }
568 
569 static void
multisync_free(struct v3dv_device * device,struct drm_v3d_multi_sync * ms)570 multisync_free(struct v3dv_device *device,
571                struct drm_v3d_multi_sync *ms)
572 {
573    vk_free(&device->vk.alloc, (void *)(uintptr_t)ms->out_syncs);
574    vk_free(&device->vk.alloc, (void *)(uintptr_t)ms->in_syncs);
575 }
576 
577 static struct drm_v3d_sem *
set_in_syncs(struct v3dv_queue * queue,struct v3dv_job * job,enum v3dv_queue_type queue_sync,uint32_t * count,struct v3dv_submit_sync_info * sync_info)578 set_in_syncs(struct v3dv_queue *queue,
579              struct v3dv_job *job,
580              enum v3dv_queue_type queue_sync,
581              uint32_t *count,
582              struct v3dv_submit_sync_info *sync_info)
583 {
584    struct v3dv_device *device = queue->device;
585    uint32_t n_syncs = 0;
586 
587    /* If this is the first job submitted to a given GPU queue in this cmd buf
588     * batch, it has to wait on wait semaphores (if any) before running.
589     */
590    if (queue->last_job_syncs.first[queue_sync])
591       n_syncs = sync_info->wait_count;
592 
593    /* If the serialize flag is set the job needs to be serialized in the
594     * corresponding queues. Notice that we may implement transfer operations
595     * as both CL or TFU jobs.
596     *
597     * FIXME: maybe we could track more precisely if the source of a transfer
598     * barrier is a CL and/or a TFU job.
599     */
600    bool sync_csd  = job->serialize & V3DV_BARRIER_COMPUTE_BIT;
601    bool sync_tfu  = job->serialize & V3DV_BARRIER_TRANSFER_BIT;
602    bool sync_cl   = job->serialize & (V3DV_BARRIER_GRAPHICS_BIT |
603                                       V3DV_BARRIER_TRANSFER_BIT);
604    *count = n_syncs;
605    if (sync_cl)
606       (*count)++;
607    if (sync_tfu)
608       (*count)++;
609    if (sync_csd)
610       (*count)++;
611 
612    if (!*count)
613       return NULL;
614 
615    struct drm_v3d_sem *syncs =
616       vk_zalloc(&device->vk.alloc, *count * sizeof(struct drm_v3d_sem),
617                 8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
618 
619    if (!syncs)
620       return NULL;
621 
622    for (int i = 0; i < n_syncs; i++) {
623       syncs[i].handle =
624          vk_sync_as_drm_syncobj(sync_info->waits[i].sync)->syncobj;
625    }
626 
627    if (sync_cl)
628       syncs[n_syncs++].handle = queue->last_job_syncs.syncs[V3DV_QUEUE_CL];
629 
630    if (sync_csd)
631       syncs[n_syncs++].handle = queue->last_job_syncs.syncs[V3DV_QUEUE_CSD];
632 
633    if (sync_tfu)
634       syncs[n_syncs++].handle = queue->last_job_syncs.syncs[V3DV_QUEUE_TFU];
635 
636    assert(n_syncs == *count);
637    return syncs;
638 }
639 
640 static struct drm_v3d_sem *
set_out_syncs(struct v3dv_queue * queue,struct v3dv_job * job,enum v3dv_queue_type queue_sync,uint32_t * count,struct v3dv_submit_sync_info * sync_info,bool signal_syncs)641 set_out_syncs(struct v3dv_queue *queue,
642               struct v3dv_job *job,
643               enum v3dv_queue_type queue_sync,
644               uint32_t *count,
645               struct v3dv_submit_sync_info *sync_info,
646               bool signal_syncs)
647 {
648    struct v3dv_device *device = queue->device;
649 
650    uint32_t n_vk_syncs = signal_syncs ? sync_info->signal_count : 0;
651 
652    /* We always signal the syncobj from `device->last_job_syncs` related to
653     * this v3dv_queue_type to track the last job submitted to this queue.
654     */
655    (*count) = n_vk_syncs + 1;
656 
657    struct drm_v3d_sem *syncs =
658       vk_zalloc(&device->vk.alloc, *count * sizeof(struct drm_v3d_sem),
659                 8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
660 
661    if (!syncs)
662       return NULL;
663 
664    if (n_vk_syncs) {
665       for (unsigned i = 0; i < n_vk_syncs; i++) {
666          syncs[i].handle =
667             vk_sync_as_drm_syncobj(sync_info->signals[i].sync)->syncobj;
668       }
669    }
670 
671    syncs[n_vk_syncs].handle = queue->last_job_syncs.syncs[queue_sync];
672 
673    return syncs;
674 }
675 
676 static void
set_ext(struct drm_v3d_extension * ext,struct drm_v3d_extension * next,uint32_t id,uintptr_t flags)677 set_ext(struct drm_v3d_extension *ext,
678 	struct drm_v3d_extension *next,
679 	uint32_t id,
680 	uintptr_t flags)
681 {
682    ext->next = (uintptr_t)(void *)next;
683    ext->id = id;
684    ext->flags = flags;
685 }
686 
687 /* This function sets the extension for multiple in/out syncobjs. When it is
688  * successful, it sets the extension id to DRM_V3D_EXT_ID_MULTI_SYNC.
689  * Otherwise, the extension id is 0, which means an out-of-memory error.
690  */
691 static void
set_multisync(struct drm_v3d_multi_sync * ms,struct v3dv_submit_sync_info * sync_info,struct drm_v3d_extension * next,struct v3dv_device * device,struct v3dv_job * job,enum v3dv_queue_type queue_sync,enum v3d_queue wait_stage,bool signal_syncs)692 set_multisync(struct drm_v3d_multi_sync *ms,
693               struct v3dv_submit_sync_info *sync_info,
694               struct drm_v3d_extension *next,
695               struct v3dv_device *device,
696               struct v3dv_job *job,
697               enum v3dv_queue_type queue_sync,
698               enum v3d_queue wait_stage,
699               bool signal_syncs)
700 {
701    struct v3dv_queue *queue = &device->queue;
702    uint32_t out_sync_count = 0, in_sync_count = 0;
703    struct drm_v3d_sem *out_syncs = NULL, *in_syncs = NULL;
704 
705    in_syncs = set_in_syncs(queue, job, queue_sync,
706                            &in_sync_count, sync_info);
707    if (!in_syncs && in_sync_count)
708       goto fail;
709 
710    out_syncs = set_out_syncs(queue, job, queue_sync,
711                              &out_sync_count, sync_info, signal_syncs);
712 
713    assert(out_sync_count > 0);
714 
715    if (!out_syncs)
716       goto fail;
717 
718    set_ext(&ms->base, next, DRM_V3D_EXT_ID_MULTI_SYNC, 0);
719    ms->wait_stage = wait_stage;
720    ms->out_sync_count = out_sync_count;
721    ms->out_syncs = (uintptr_t)(void *)out_syncs;
722    ms->in_sync_count = in_sync_count;
723    ms->in_syncs = (uintptr_t)(void *)in_syncs;
724 
725    return;
726 
727 fail:
728    if (in_syncs)
729       vk_free(&device->vk.alloc, in_syncs);
730    assert(!out_syncs);
731 
732    return;
733 }
734 
735 static VkResult
handle_cl_job(struct v3dv_queue * queue,struct v3dv_job * job,uint32_t counter_pass_idx,struct v3dv_submit_sync_info * sync_info,bool signal_syncs)736 handle_cl_job(struct v3dv_queue *queue,
737               struct v3dv_job *job,
738               uint32_t counter_pass_idx,
739               struct v3dv_submit_sync_info *sync_info,
740               bool signal_syncs)
741 {
742    struct v3dv_device *device = queue->device;
743 
744    struct drm_v3d_submit_cl submit = { 0 };
745 
746    /* Sanity check: we should only flag a bcl sync on a job that needs to be
747     * serialized.
748     */
749    assert(job->serialize || !job->needs_bcl_sync);
750 
751    /* We expect to have just one RCL per job which should fit in just one BO.
752     * Our BCL, could chain multiple BOS together though.
753     */
754    assert(list_length(&job->rcl.bo_list) == 1);
755    assert(list_length(&job->bcl.bo_list) >= 1);
756    struct v3dv_bo *bcl_fist_bo =
757       list_first_entry(&job->bcl.bo_list, struct v3dv_bo, list_link);
758    submit.bcl_start = bcl_fist_bo->offset;
759    submit.bcl_end = job->bcl.bo->offset + v3dv_cl_offset(&job->bcl);
760    submit.rcl_start = job->rcl.bo->offset;
761    submit.rcl_end = job->rcl.bo->offset + v3dv_cl_offset(&job->rcl);
762 
763    submit.qma = job->tile_alloc->offset;
764    submit.qms = job->tile_alloc->size;
765    submit.qts = job->tile_state->offset;
766 
767    submit.flags = 0;
768    if (job->tmu_dirty_rcl)
769       submit.flags |= DRM_V3D_SUBMIT_CL_FLUSH_CACHE;
770 
771    /* If the job uses VK_KHR_buffer_device_addess we need to ensure all
772     * buffers flagged with VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_KHR
773     * are included.
774     */
775    if (job->uses_buffer_device_address) {
776       util_dynarray_foreach(&queue->device->device_address_bo_list,
777                             struct v3dv_bo *, bo) {
778          v3dv_job_add_bo(job, *bo);
779       }
780    }
781 
782    submit.bo_handle_count = job->bo_count;
783    uint32_t *bo_handles =
784       (uint32_t *) malloc(sizeof(uint32_t) * submit.bo_handle_count);
785    uint32_t bo_idx = 0;
786    set_foreach(job->bos, entry) {
787       struct v3dv_bo *bo = (struct v3dv_bo *)entry->key;
788       bo_handles[bo_idx++] = bo->handle;
789    }
790    assert(bo_idx == submit.bo_handle_count);
791    submit.bo_handles = (uintptr_t)(void *)bo_handles;
792 
793    submit.perfmon_id = job->perf ?
794       job->perf->kperfmon_ids[counter_pass_idx] : 0;
795    const bool needs_perf_sync = queue->last_perfmon_id != submit.perfmon_id;
796    queue->last_perfmon_id = submit.perfmon_id;
797 
798    /* We need a binning sync if we are the first CL job waiting on a semaphore
799     * with a wait stage that involves the geometry pipeline, or if the job
800     * comes after a pipeline barrier that involves geometry stages
801     * (needs_bcl_sync) or when performance queries are in use.
802     *
803     * We need a render sync if the job doesn't need a binning sync but has
804     * still been flagged for serialization. It should be noted that RCL jobs
805     * don't start until the previous RCL job has finished so we don't really
806     * need to add a fence for those, however, we might need to wait on a CSD or
807     * TFU job, which are not automatically serialized with CL jobs.
808     */
809    bool needs_bcl_sync = job->needs_bcl_sync || needs_perf_sync;
810    if (queue->last_job_syncs.first[V3DV_QUEUE_CL]) {
811       for (int i = 0; !needs_bcl_sync && i < sync_info->wait_count; i++) {
812          needs_bcl_sync = sync_info->waits[i].stage_mask &
813              (VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT |
814               VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT |
815               VK_PIPELINE_STAGE_ALL_COMMANDS_BIT |
816               VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT |
817               VK_PIPELINE_STAGE_VERTEX_INPUT_BIT |
818               VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
819               VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT |
820               VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT |
821               VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT);
822       }
823    }
824 
825    bool needs_rcl_sync = job->serialize && !needs_bcl_sync;
826 
827    /* Replace single semaphore settings whenever our kernel-driver supports
828     * multiple semaphores extension.
829     */
830    struct drm_v3d_multi_sync ms = { 0 };
831    if (device->pdevice->caps.multisync) {
832       enum v3d_queue wait_stage = needs_rcl_sync ? V3D_RENDER : V3D_BIN;
833       set_multisync(&ms, sync_info, NULL, device, job,
834                     V3DV_QUEUE_CL, wait_stage, signal_syncs);
835       if (!ms.base.id)
836          return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
837 
838       submit.flags |= DRM_V3D_SUBMIT_EXTENSION;
839       submit.extensions = (uintptr_t)(void *)&ms;
840       /* Disable legacy sync interface when multisync extension is used */
841       submit.in_sync_rcl = 0;
842       submit.in_sync_bcl = 0;
843       submit.out_sync = 0;
844    } else {
845       uint32_t last_job_sync = queue->last_job_syncs.syncs[V3DV_QUEUE_ANY];
846       submit.in_sync_bcl = needs_bcl_sync ? last_job_sync : 0;
847       submit.in_sync_rcl = needs_rcl_sync ? last_job_sync : 0;
848       submit.out_sync = last_job_sync;
849    }
850 
851    v3dv_clif_dump(device, job, &submit);
852    int ret = v3dv_ioctl(device->pdevice->render_fd,
853                         DRM_IOCTL_V3D_SUBMIT_CL, &submit);
854 
855    static bool warned = false;
856    if (ret && !warned) {
857       fprintf(stderr, "Draw call returned %s. Expect corruption.\n",
858               strerror(errno));
859       warned = true;
860    }
861 
862    free(bo_handles);
863    multisync_free(device, &ms);
864 
865    queue->last_job_syncs.first[V3DV_QUEUE_CL] = false;
866 
867    if (ret)
868       return vk_queue_set_lost(&queue->vk, "V3D_SUBMIT_CL failed: %m");
869 
870    return VK_SUCCESS;
871 }
872 
873 static VkResult
handle_tfu_job(struct v3dv_queue * queue,struct v3dv_job * job,struct v3dv_submit_sync_info * sync_info,bool signal_syncs)874 handle_tfu_job(struct v3dv_queue *queue,
875                struct v3dv_job *job,
876                struct v3dv_submit_sync_info *sync_info,
877                bool signal_syncs)
878 {
879    struct v3dv_device *device = queue->device;
880 
881    const bool needs_sync = sync_info->wait_count || job->serialize;
882 
883    /* Replace single semaphore settings whenever our kernel-driver supports
884     * multiple semaphore extension.
885     */
886    struct drm_v3d_multi_sync ms = { 0 };
887    if (device->pdevice->caps.multisync) {
888       set_multisync(&ms, sync_info, NULL, device, job,
889                     V3DV_QUEUE_TFU, V3D_TFU, signal_syncs);
890       if (!ms.base.id)
891          return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
892 
893       job->tfu.flags |= DRM_V3D_SUBMIT_EXTENSION;
894       job->tfu.extensions = (uintptr_t)(void *)&ms;
895       /* Disable legacy sync interface when multisync extension is used */
896       job->tfu.in_sync = 0;
897       job->tfu.out_sync = 0;
898    } else {
899       uint32_t last_job_sync = queue->last_job_syncs.syncs[V3DV_QUEUE_ANY];
900       job->tfu.in_sync = needs_sync ? last_job_sync : 0;
901       job->tfu.out_sync = last_job_sync;
902    }
903    int ret = v3dv_ioctl(device->pdevice->render_fd,
904                         DRM_IOCTL_V3D_SUBMIT_TFU, &job->tfu);
905 
906    multisync_free(device, &ms);
907    queue->last_job_syncs.first[V3DV_QUEUE_TFU] = false;
908 
909    if (ret != 0)
910       return vk_queue_set_lost(&queue->vk, "V3D_SUBMIT_TFU failed: %m");
911 
912    return VK_SUCCESS;
913 }
914 
915 static VkResult
handle_csd_job(struct v3dv_queue * queue,struct v3dv_job * job,uint32_t counter_pass_idx,struct v3dv_submit_sync_info * sync_info,bool signal_syncs)916 handle_csd_job(struct v3dv_queue *queue,
917                struct v3dv_job *job,
918                uint32_t counter_pass_idx,
919                struct v3dv_submit_sync_info *sync_info,
920                bool signal_syncs)
921 {
922    struct v3dv_device *device = queue->device;
923 
924    struct drm_v3d_submit_csd *submit = &job->csd.submit;
925 
926    /* If the job uses VK_KHR_buffer_device_addess we need to ensure all
927     * buffers flagged with VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_KHR
928     * are included.
929     */
930    if (job->uses_buffer_device_address) {
931       util_dynarray_foreach(&queue->device->device_address_bo_list,
932                             struct v3dv_bo *, bo) {
933          v3dv_job_add_bo(job, *bo);
934       }
935    }
936 
937    submit->bo_handle_count = job->bo_count;
938    uint32_t *bo_handles =
939       (uint32_t *) malloc(sizeof(uint32_t) * MAX2(4, submit->bo_handle_count * 2));
940    uint32_t bo_idx = 0;
941    set_foreach(job->bos, entry) {
942       struct v3dv_bo *bo = (struct v3dv_bo *)entry->key;
943       bo_handles[bo_idx++] = bo->handle;
944    }
945    assert(bo_idx == submit->bo_handle_count);
946    submit->bo_handles = (uintptr_t)(void *)bo_handles;
947 
948    const bool needs_sync = sync_info->wait_count || job->serialize;
949 
950    /* Replace single semaphore settings whenever our kernel-driver supports
951     * multiple semaphore extension.
952     */
953    struct drm_v3d_multi_sync ms = { 0 };
954    if (device->pdevice->caps.multisync) {
955       set_multisync(&ms, sync_info, NULL, device, job,
956                     V3DV_QUEUE_CSD, V3D_CSD, signal_syncs);
957       if (!ms.base.id)
958          return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
959 
960       submit->flags |= DRM_V3D_SUBMIT_EXTENSION;
961       submit->extensions = (uintptr_t)(void *)&ms;
962       /* Disable legacy sync interface when multisync extension is used */
963       submit->in_sync = 0;
964       submit->out_sync = 0;
965    } else {
966       uint32_t last_job_sync = queue->last_job_syncs.syncs[V3DV_QUEUE_ANY];
967       submit->in_sync = needs_sync ? last_job_sync : 0;
968       submit->out_sync = last_job_sync;
969    }
970    submit->perfmon_id = job->perf ?
971       job->perf->kperfmon_ids[counter_pass_idx] : 0;
972    queue->last_perfmon_id = submit->perfmon_id;
973    int ret = v3dv_ioctl(device->pdevice->render_fd,
974                         DRM_IOCTL_V3D_SUBMIT_CSD, submit);
975 
976    static bool warned = false;
977    if (ret && !warned) {
978       fprintf(stderr, "Compute dispatch returned %s. Expect corruption.\n",
979               strerror(errno));
980       warned = true;
981    }
982 
983    free(bo_handles);
984 
985    multisync_free(device, &ms);
986    queue->last_job_syncs.first[V3DV_QUEUE_CSD] = false;
987 
988    if (ret)
989       return vk_queue_set_lost(&queue->vk, "V3D_SUBMIT_CSD failed: %m");
990 
991    return VK_SUCCESS;
992 }
993 
994 static VkResult
queue_handle_job(struct v3dv_queue * queue,struct v3dv_job * job,uint32_t counter_pass_idx,struct v3dv_submit_sync_info * sync_info,bool signal_syncs)995 queue_handle_job(struct v3dv_queue *queue,
996                  struct v3dv_job *job,
997                  uint32_t counter_pass_idx,
998                  struct v3dv_submit_sync_info *sync_info,
999                  bool signal_syncs)
1000 {
1001    switch (job->type) {
1002    case V3DV_JOB_TYPE_GPU_CL:
1003       return handle_cl_job(queue, job, counter_pass_idx, sync_info, signal_syncs);
1004    case V3DV_JOB_TYPE_GPU_TFU:
1005       return handle_tfu_job(queue, job, sync_info, signal_syncs);
1006    case V3DV_JOB_TYPE_GPU_CSD:
1007       return handle_csd_job(queue, job, counter_pass_idx, sync_info, signal_syncs);
1008    case V3DV_JOB_TYPE_CPU_RESET_QUERIES:
1009       return handle_reset_query_cpu_job(queue, job, sync_info);
1010    case V3DV_JOB_TYPE_CPU_END_QUERY:
1011       return handle_end_query_cpu_job(job, counter_pass_idx);
1012    case V3DV_JOB_TYPE_CPU_COPY_QUERY_RESULTS:
1013       return handle_copy_query_results_cpu_job(job);
1014    case V3DV_JOB_TYPE_CPU_SET_EVENT:
1015       return handle_set_event_cpu_job(queue, job, sync_info);
1016    case V3DV_JOB_TYPE_CPU_WAIT_EVENTS:
1017       return handle_wait_events_cpu_job(job);
1018    case V3DV_JOB_TYPE_CPU_COPY_BUFFER_TO_IMAGE:
1019       return handle_copy_buffer_to_image_cpu_job(queue, job, sync_info);
1020    case V3DV_JOB_TYPE_CPU_CSD_INDIRECT:
1021       return handle_csd_indirect_cpu_job(queue, job, sync_info);
1022    case V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY:
1023       return handle_timestamp_query_cpu_job(queue, job, sync_info);
1024    default:
1025       unreachable("Unhandled job type");
1026    }
1027 }
1028 
1029 static VkResult
queue_create_noop_job(struct v3dv_queue * queue)1030 queue_create_noop_job(struct v3dv_queue *queue)
1031 {
1032    struct v3dv_device *device = queue->device;
1033    queue->noop_job = vk_zalloc(&device->vk.alloc, sizeof(struct v3dv_job), 8,
1034                                VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1035    if (!queue->noop_job)
1036       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1037    v3dv_job_init(queue->noop_job, V3DV_JOB_TYPE_GPU_CL, device, NULL, -1);
1038 
1039    v3dv_X(device, job_emit_noop)(queue->noop_job);
1040 
1041    /* We use no-op jobs to signal semaphores/fences. These jobs needs to be
1042     * serialized across all hw queues to comply with Vulkan's signal operation
1043     * order requirements, which basically require that signal operations occur
1044     * in submission order.
1045     */
1046    queue->noop_job->serialize = V3DV_BARRIER_ALL;
1047 
1048    return VK_SUCCESS;
1049 }
1050 
1051 static VkResult
queue_submit_noop_job(struct v3dv_queue * queue,uint32_t counter_pass_idx,struct v3dv_submit_sync_info * sync_info,bool signal_syncs)1052 queue_submit_noop_job(struct v3dv_queue *queue,
1053                       uint32_t counter_pass_idx,
1054                       struct v3dv_submit_sync_info *sync_info,
1055                       bool signal_syncs)
1056 {
1057    if (!queue->noop_job) {
1058       VkResult result = queue_create_noop_job(queue);
1059       if (result != VK_SUCCESS)
1060          return result;
1061    }
1062 
1063    assert(queue->noop_job);
1064    return queue_handle_job(queue, queue->noop_job, counter_pass_idx,
1065                            sync_info, signal_syncs);
1066 }
1067 
1068 VkResult
v3dv_queue_driver_submit(struct vk_queue * vk_queue,struct vk_queue_submit * submit)1069 v3dv_queue_driver_submit(struct vk_queue *vk_queue,
1070                          struct vk_queue_submit *submit)
1071 {
1072    struct v3dv_queue *queue = container_of(vk_queue, struct v3dv_queue, vk);
1073    VkResult result;
1074 
1075    struct v3dv_submit_sync_info sync_info = {
1076       .wait_count = submit->wait_count,
1077       .waits = submit->waits,
1078       .signal_count = submit->signal_count,
1079       .signals = submit->signals,
1080    };
1081 
1082    for (int i = 0; i < V3DV_QUEUE_COUNT; i++)
1083       queue->last_job_syncs.first[i] = true;
1084 
1085    result = process_waits(queue, sync_info.wait_count, sync_info.waits);
1086    if (result != VK_SUCCESS)
1087       return result;
1088 
1089    for (uint32_t i = 0; i < submit->command_buffer_count; i++) {
1090       struct v3dv_cmd_buffer *cmd_buffer =
1091          container_of(submit->command_buffers[i], struct v3dv_cmd_buffer, vk);
1092       list_for_each_entry_safe(struct v3dv_job, job,
1093                                &cmd_buffer->jobs, list_link) {
1094 
1095          result = queue_handle_job(queue, job, submit->perf_pass_index,
1096                                    &sync_info, false);
1097          if (result != VK_SUCCESS)
1098             return result;
1099       }
1100 
1101       /* If the command buffer ends with a barrier we need to consume it now.
1102        *
1103        * FIXME: this will drain all hw queues. Instead, we could use the pending
1104        * barrier state to limit the queues we serialize against.
1105        */
1106       if (cmd_buffer->state.barrier.dst_mask) {
1107          result = queue_submit_noop_job(queue, submit->perf_pass_index,
1108                                         &sync_info, false);
1109          if (result != VK_SUCCESS)
1110             return result;
1111       }
1112    }
1113 
1114    /* Finish by submitting a no-op job that synchronizes across all queues.
1115     * This will ensure that the signal semaphores don't get triggered until
1116     * all work on any queue completes. See Vulkan's signal operation order
1117     * requirements.
1118     */
1119    if (submit->signal_count > 0) {
1120       result = queue_submit_noop_job(queue, submit->perf_pass_index,
1121                                      &sync_info, true);
1122       if (result != VK_SUCCESS)
1123          return result;
1124    }
1125 
1126    process_signals(queue, sync_info.signal_count, sync_info.signals);
1127 
1128    return VK_SUCCESS;
1129 }
1130 
1131 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_QueueBindSparse(VkQueue _queue,uint32_t bindInfoCount,const VkBindSparseInfo * pBindInfo,VkFence fence)1132 v3dv_QueueBindSparse(VkQueue _queue,
1133                      uint32_t bindInfoCount,
1134                      const VkBindSparseInfo *pBindInfo,
1135                      VkFence fence)
1136 {
1137    V3DV_FROM_HANDLE(v3dv_queue, queue, _queue);
1138    return vk_error(queue, VK_ERROR_FEATURE_NOT_PRESENT);
1139 }
1140