• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2019 Raspberry Pi
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "v3dv_private.h"
25 #include "drm-uapi/v3d_drm.h"
26 
27 #include "broadcom/clif/clif_dump.h"
28 
29 #include <errno.h>
30 #include <time.h>
31 
32 static void
v3dv_clif_dump(struct v3dv_device * device,struct v3dv_job * job,struct drm_v3d_submit_cl * submit)33 v3dv_clif_dump(struct v3dv_device *device,
34                struct v3dv_job *job,
35                struct drm_v3d_submit_cl *submit)
36 {
37    if (!(unlikely(V3D_DEBUG & (V3D_DEBUG_CL |
38                                V3D_DEBUG_CL_NO_BIN |
39                                V3D_DEBUG_CLIF))))
40       return;
41 
42    struct clif_dump *clif = clif_dump_init(&device->devinfo,
43                                            stderr,
44                                            V3D_DEBUG & (V3D_DEBUG_CL |
45                                                         V3D_DEBUG_CL_NO_BIN),
46                                            V3D_DEBUG & V3D_DEBUG_CL_NO_BIN);
47 
48    set_foreach(job->bos, entry) {
49       struct v3dv_bo *bo = (void *)entry->key;
50       char *name = ralloc_asprintf(NULL, "%s_0x%x",
51                                    bo->name, bo->offset);
52 
53       bool ok = v3dv_bo_map(device, bo, bo->size);
54       if (!ok) {
55          fprintf(stderr, "failed to map BO for clif_dump.\n");
56          ralloc_free(name);
57          goto free_clif;
58       }
59       clif_dump_add_bo(clif, name, bo->offset, bo->size, bo->map);
60 
61       ralloc_free(name);
62    }
63 
64    clif_dump(clif, submit);
65 
66  free_clif:
67    clif_dump_destroy(clif);
68 }
69 
70 static uint64_t
gettime_ns()71 gettime_ns()
72 {
73    struct timespec current;
74    clock_gettime(CLOCK_MONOTONIC, &current);
75    return (uint64_t)current.tv_sec * NSEC_PER_SEC + current.tv_nsec;
76 }
77 
78 static uint64_t
get_absolute_timeout(uint64_t timeout)79 get_absolute_timeout(uint64_t timeout)
80 {
81    uint64_t current_time = gettime_ns();
82    uint64_t max_timeout = (uint64_t) INT64_MAX - current_time;
83 
84    timeout = MIN2(max_timeout, timeout);
85 
86    return (current_time + timeout);
87 }
88 
89 static VkResult
90 queue_submit_job(struct v3dv_queue *queue,
91                  struct v3dv_job *job,
92                  bool do_sem_wait,
93                  pthread_t *wait_thread);
94 
95 /* Waits for active CPU wait threads spawned before the current thread to
96  * complete and submit all their GPU jobs.
97  */
98 static void
cpu_queue_wait_idle(struct v3dv_queue * queue)99 cpu_queue_wait_idle(struct v3dv_queue *queue)
100 {
101    const pthread_t this_thread = pthread_self();
102 
103 retry:
104    mtx_lock(&queue->mutex);
105    list_for_each_entry(struct v3dv_queue_submit_wait_info, info,
106                        &queue->submit_wait_list, list_link) {
107       for (uint32_t  i = 0; i < info->wait_thread_count; i++) {
108          if (info->wait_threads[i].finished)
109             continue;
110 
111          /* Because we are testing this against the list of spawned threads
112           * it will never match for the main thread, so when we call this from
113           * the main thread we are effectively waiting for all active threads
114           * to complete, and otherwise we are only waiting for work submitted
115           * before the wait thread that called this (a wait thread should never
116           * be waiting for work submitted after it).
117           */
118          if (info->wait_threads[i].thread == this_thread)
119             goto done;
120 
121          /* Wait and try again */
122          mtx_unlock(&queue->mutex);
123          usleep(500); /* 0.5 ms */
124          goto retry;
125       }
126    }
127 
128 done:
129    mtx_unlock(&queue->mutex);
130 }
131 
132 static VkResult
gpu_queue_wait_idle(struct v3dv_queue * queue)133 gpu_queue_wait_idle(struct v3dv_queue *queue)
134 {
135    struct v3dv_device *device = queue->device;
136 
137    mtx_lock(&device->mutex);
138    uint32_t last_job_sync = device->last_job_sync;
139    mtx_unlock(&device->mutex);
140 
141    int ret = drmSyncobjWait(device->pdevice->render_fd,
142                             &last_job_sync, 1, INT64_MAX, 0, NULL);
143    if (ret)
144       return VK_ERROR_DEVICE_LOST;
145 
146    return VK_SUCCESS;
147 }
148 
149 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_QueueWaitIdle(VkQueue _queue)150 v3dv_QueueWaitIdle(VkQueue _queue)
151 {
152    V3DV_FROM_HANDLE(v3dv_queue, queue, _queue);
153 
154    /* Check that we don't have any wait threads running in the CPU first,
155     * as these can spawn new GPU jobs.
156     */
157    cpu_queue_wait_idle(queue);
158 
159    /* Check we don't have any GPU jobs running */
160    return gpu_queue_wait_idle(queue);
161 }
162 
163 static VkResult
handle_reset_query_cpu_job(struct v3dv_job * job)164 handle_reset_query_cpu_job(struct v3dv_job *job)
165 {
166    struct v3dv_reset_query_cpu_job_info *info = &job->cpu.query_reset;
167    assert(info->pool);
168 
169    /* We are about to reset query counters so we need to make sure that
170     * The GPU is not using them. The exception is timestamp queries, since
171     * we handle those in the CPU.
172     *
173     * FIXME: we could avoid blocking the main thread for this if we use
174     *        submission thread.
175     */
176    if (info->pool->query_type == VK_QUERY_TYPE_OCCLUSION)
177          v3dv_bo_wait(job->device, info->pool->bo, PIPE_TIMEOUT_INFINITE);
178 
179    for (uint32_t i = info->first; i < info->first + info->count; i++) {
180       assert(i < info->pool->query_count);
181       struct v3dv_query *q = &info->pool->queries[i];
182       q->maybe_available = false;
183       switch (info->pool->query_type) {
184       case VK_QUERY_TYPE_OCCLUSION: {
185          const uint8_t *q_addr = ((uint8_t *) q->bo->map) + q->offset;
186          uint32_t *counter = (uint32_t *) q_addr;
187          *counter = 0;
188          break;
189       }
190       case VK_QUERY_TYPE_TIMESTAMP:
191          q->value = 0;
192          break;
193       default:
194          unreachable("Unsupported query type");
195       }
196    }
197 
198    return VK_SUCCESS;
199 }
200 
201 static VkResult
handle_end_query_cpu_job(struct v3dv_job * job)202 handle_end_query_cpu_job(struct v3dv_job *job)
203 {
204    struct v3dv_end_query_cpu_job_info *info = &job->cpu.query_end;
205    for (uint32_t i = 0; i < info->count; i++) {
206       assert(info->query + i < info->pool->query_count);
207       struct v3dv_query *query = &info->pool->queries[info->query + i];
208       query->maybe_available = true;
209    }
210 
211    return VK_SUCCESS;
212 }
213 
214 static VkResult
handle_copy_query_results_cpu_job(struct v3dv_job * job)215 handle_copy_query_results_cpu_job(struct v3dv_job *job)
216 {
217    struct v3dv_copy_query_results_cpu_job_info *info =
218       &job->cpu.query_copy_results;
219 
220    assert(info->dst && info->dst->mem && info->dst->mem->bo);
221    struct v3dv_bo *bo = info->dst->mem->bo;
222 
223    /* Map the entire dst buffer for the CPU copy if needed */
224    assert(!bo->map || bo->map_size == bo->size);
225    if (!bo->map && !v3dv_bo_map(job->device, bo, bo->size))
226       return vk_error(job->device, VK_ERROR_OUT_OF_HOST_MEMORY);
227 
228    /* FIXME: if flags includes VK_QUERY_RESULT_WAIT_BIT this could trigger a
229     * sync wait on the CPU for the corresponding GPU jobs to finish. We might
230     * want to use a submission thread to avoid blocking on the main thread.
231     */
232    uint8_t *offset = ((uint8_t *) bo->map) +
233                      info->offset + info->dst->mem_offset;
234    v3dv_get_query_pool_results_cpu(job->device,
235                                    info->pool,
236                                    info->first,
237                                    info->count,
238                                    offset,
239                                    info->stride,
240                                    info->flags);
241 
242    return VK_SUCCESS;
243 }
244 
245 static VkResult
handle_set_event_cpu_job(struct v3dv_job * job,bool is_wait_thread)246 handle_set_event_cpu_job(struct v3dv_job *job, bool is_wait_thread)
247 {
248    /* From the Vulkan 1.0 spec:
249     *
250     *    "When vkCmdSetEvent is submitted to a queue, it defines an execution
251     *     dependency on commands that were submitted before it, and defines an
252     *     event signal operation which sets the event to the signaled state.
253     *     The first synchronization scope includes every command previously
254     *     submitted to the same queue, including those in the same command
255     *     buffer and batch".
256     *
257     * So we should wait for all prior work to be completed before signaling
258     * the event, this includes all active CPU wait threads spawned for any
259     * command buffer submitted *before* this.
260     *
261     * FIXME: we could avoid blocking the main thread for this if we use a
262     *        submission thread.
263     */
264 
265    /* If we are calling this from a wait thread it will only wait
266     * wait threads sspawned before it, otherwise it will wait for
267     * all active threads to complete.
268     */
269    cpu_queue_wait_idle(&job->device->queue);
270 
271    VkResult result = gpu_queue_wait_idle(&job->device->queue);
272    if (result != VK_SUCCESS)
273       return result;
274 
275    struct v3dv_event_set_cpu_job_info *info = &job->cpu.event_set;
276    p_atomic_set(&info->event->state, info->state);
277 
278    return VK_SUCCESS;
279 }
280 
281 static bool
check_wait_events_complete(struct v3dv_job * job)282 check_wait_events_complete(struct v3dv_job *job)
283 {
284    assert(job->type == V3DV_JOB_TYPE_CPU_WAIT_EVENTS);
285 
286    struct v3dv_event_wait_cpu_job_info *info = &job->cpu.event_wait;
287    for (uint32_t i = 0; i < info->event_count; i++) {
288       if (!p_atomic_read(&info->events[i]->state))
289          return false;
290    }
291    return true;
292 }
293 
294 static void
wait_thread_finish(struct v3dv_queue * queue,pthread_t thread)295 wait_thread_finish(struct v3dv_queue *queue, pthread_t thread)
296 {
297    mtx_lock(&queue->mutex);
298    list_for_each_entry(struct v3dv_queue_submit_wait_info, info,
299                        &queue->submit_wait_list, list_link) {
300       for (uint32_t  i = 0; i < info->wait_thread_count; i++) {
301          if (info->wait_threads[i].thread == thread) {
302             info->wait_threads[i].finished = true;
303             goto done;
304          }
305       }
306    }
307 
308    unreachable(!"Failed to finish wait thread: not found");
309 
310 done:
311    mtx_unlock(&queue->mutex);
312 }
313 
314 static void *
event_wait_thread_func(void * _job)315 event_wait_thread_func(void *_job)
316 {
317    struct v3dv_job *job = (struct v3dv_job *) _job;
318    assert(job->type == V3DV_JOB_TYPE_CPU_WAIT_EVENTS);
319    struct v3dv_event_wait_cpu_job_info *info = &job->cpu.event_wait;
320 
321    /* Wait for events to be signaled */
322    const useconds_t wait_interval_ms = 1;
323    while (!check_wait_events_complete(job))
324       usleep(wait_interval_ms * 1000);
325 
326    /* Now continue submitting pending jobs for the same command buffer after
327     * the wait job.
328     */
329    struct v3dv_queue *queue = &job->device->queue;
330    list_for_each_entry_from(struct v3dv_job, pjob, job->list_link.next,
331                             &job->cmd_buffer->jobs, list_link) {
332       /* We don't want to spawn more than one wait thread per command buffer.
333        * If this job also requires a wait for events, we will do the wait here.
334        */
335       VkResult result = queue_submit_job(queue, pjob, info->sem_wait, NULL);
336       if (result == VK_NOT_READY) {
337          while (!check_wait_events_complete(pjob)) {
338             usleep(wait_interval_ms * 1000);
339          }
340          result = VK_SUCCESS;
341       }
342 
343       if (result != VK_SUCCESS) {
344          fprintf(stderr, "Wait thread job execution failed.\n");
345          goto done;
346       }
347    }
348 
349 done:
350    wait_thread_finish(queue, pthread_self());
351    return NULL;
352 }
353 
354 static VkResult
spawn_event_wait_thread(struct v3dv_job * job,pthread_t * wait_thread)355 spawn_event_wait_thread(struct v3dv_job *job, pthread_t *wait_thread)
356 
357 {
358    assert(job->type == V3DV_JOB_TYPE_CPU_WAIT_EVENTS);
359    assert(job->cmd_buffer);
360    assert(wait_thread != NULL);
361 
362    if (pthread_create(wait_thread, NULL, event_wait_thread_func, job))
363       return vk_error(job->device, VK_ERROR_DEVICE_LOST);
364 
365    return VK_NOT_READY;
366 }
367 
368 static VkResult
handle_wait_events_cpu_job(struct v3dv_job * job,bool sem_wait,pthread_t * wait_thread)369 handle_wait_events_cpu_job(struct v3dv_job *job,
370                            bool sem_wait,
371                            pthread_t *wait_thread)
372 {
373    assert(job->type == V3DV_JOB_TYPE_CPU_WAIT_EVENTS);
374    struct v3dv_event_wait_cpu_job_info *info = &job->cpu.event_wait;
375 
376    /* If all events are signaled then we are done and can continue submitting
377     * the rest of the command buffer normally.
378     */
379    if (check_wait_events_complete(job))
380       return VK_SUCCESS;
381 
382    /* Otherwise, we put the rest of the command buffer on a wait thread until
383     * all events are signaled. We only spawn a new thread on the first
384     * wait job we see for a command buffer, any additional wait jobs in the
385     * same command buffer will run in that same wait thread and will get here
386     * with a NULL wait_thread pointer.
387     *
388     * Also, whether we spawn a wait thread or not, we always return
389     * VK_NOT_READY (unless an error happened), so we stop trying to submit
390     * any jobs in the same command buffer after the wait job. The wait thread
391     * will attempt to submit them after the wait completes.
392     */
393    info->sem_wait = sem_wait;
394    if (wait_thread)
395       return spawn_event_wait_thread(job, wait_thread);
396    else
397       return VK_NOT_READY;
398 }
399 
400 static VkResult
handle_copy_buffer_to_image_cpu_job(struct v3dv_job * job)401 handle_copy_buffer_to_image_cpu_job(struct v3dv_job *job)
402 {
403    assert(job->type == V3DV_JOB_TYPE_CPU_COPY_BUFFER_TO_IMAGE);
404    struct v3dv_copy_buffer_to_image_cpu_job_info *info =
405       &job->cpu.copy_buffer_to_image;
406 
407    /* Wait for all GPU work to finish first, since we may be accessing
408     * the BOs involved in the operation.
409     */
410    v3dv_QueueWaitIdle(v3dv_queue_to_handle(&job->device->queue));
411 
412    /* Map BOs */
413    struct v3dv_bo *dst_bo = info->image->mem->bo;
414    assert(!dst_bo->map || dst_bo->map_size == dst_bo->size);
415    if (!dst_bo->map && !v3dv_bo_map(job->device, dst_bo, dst_bo->size))
416       return vk_error(job->device, VK_ERROR_OUT_OF_HOST_MEMORY);
417    void *dst_ptr = dst_bo->map;
418 
419    struct v3dv_bo *src_bo = info->buffer->mem->bo;
420    assert(!src_bo->map || src_bo->map_size == src_bo->size);
421    if (!src_bo->map && !v3dv_bo_map(job->device, src_bo, src_bo->size))
422       return vk_error(job->device, VK_ERROR_OUT_OF_HOST_MEMORY);
423    void *src_ptr = src_bo->map;
424 
425    const struct v3d_resource_slice *slice =
426       &info->image->slices[info->mip_level];
427 
428    const struct pipe_box box = {
429       info->image_offset.x, info->image_offset.y, info->base_layer,
430       info->image_extent.width, info->image_extent.height, info->layer_count,
431    };
432 
433    /* Copy each layer */
434    for (uint32_t i = 0; i < info->layer_count; i++) {
435       const uint32_t dst_offset =
436          v3dv_layer_offset(info->image, info->mip_level, info->base_layer + i);
437       const uint32_t src_offset =
438          info->buffer->mem_offset + info->buffer_offset +
439          info->buffer_layer_stride * i;
440       v3d_store_tiled_image(
441          dst_ptr + dst_offset, slice->stride,
442          src_ptr + src_offset, info->buffer_stride,
443          slice->tiling, info->image->cpp, slice->padded_height, &box);
444    }
445 
446    return VK_SUCCESS;
447 }
448 
449 static VkResult
handle_timestamp_query_cpu_job(struct v3dv_job * job)450 handle_timestamp_query_cpu_job(struct v3dv_job *job)
451 {
452    assert(job->type == V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY);
453    struct v3dv_timestamp_query_cpu_job_info *info = &job->cpu.query_timestamp;
454 
455    /* Wait for completion of all work queued before the timestamp query */
456    v3dv_QueueWaitIdle(v3dv_queue_to_handle(&job->device->queue));
457 
458    /* Compute timestamp */
459    struct timespec t;
460    clock_gettime(CLOCK_MONOTONIC, &t);
461 
462    for (uint32_t i = 0; i < info->count; i++) {
463       assert(info->query + i < info->pool->query_count);
464       struct v3dv_query *query = &info->pool->queries[info->query + i];
465       query->maybe_available = true;
466       if (i == 0)
467          query->value = t.tv_sec * 1000000000ull + t.tv_nsec;
468    }
469 
470    return VK_SUCCESS;
471 }
472 
473 static VkResult
474 handle_csd_job(struct v3dv_queue *queue,
475                struct v3dv_job *job,
476                bool do_sem_wait);
477 
478 static VkResult
handle_csd_indirect_cpu_job(struct v3dv_queue * queue,struct v3dv_job * job,bool do_sem_wait)479 handle_csd_indirect_cpu_job(struct v3dv_queue *queue,
480                             struct v3dv_job *job,
481                             bool do_sem_wait)
482 {
483    assert(job->type == V3DV_JOB_TYPE_CPU_CSD_INDIRECT);
484    struct v3dv_csd_indirect_cpu_job_info *info = &job->cpu.csd_indirect;
485    assert(info->csd_job);
486 
487    /* Make sure the GPU is no longer using the indirect buffer*/
488    assert(info->buffer && info->buffer->mem && info->buffer->mem->bo);
489    v3dv_bo_wait(queue->device, info->buffer->mem->bo, PIPE_TIMEOUT_INFINITE);
490 
491    /* Map the indirect buffer and read the dispatch parameters */
492    assert(info->buffer && info->buffer->mem && info->buffer->mem->bo);
493    struct v3dv_bo *bo = info->buffer->mem->bo;
494    if (!bo->map && !v3dv_bo_map(job->device, bo, bo->size))
495       return vk_error(job->device, VK_ERROR_OUT_OF_HOST_MEMORY);
496    assert(bo->map);
497 
498    const uint32_t offset = info->buffer->mem_offset + info->offset;
499    const uint32_t *group_counts = (uint32_t *) (bo->map + offset);
500    if (group_counts[0] == 0 || group_counts[1] == 0|| group_counts[2] == 0)
501       return VK_SUCCESS;
502 
503    if (memcmp(group_counts, info->csd_job->csd.wg_count,
504               sizeof(info->csd_job->csd.wg_count)) != 0) {
505       v3dv_cmd_buffer_rewrite_indirect_csd_job(info, group_counts);
506    }
507 
508    handle_csd_job(queue, info->csd_job, do_sem_wait);
509 
510    return VK_SUCCESS;
511 }
512 
513 static VkResult
process_semaphores_to_signal(struct v3dv_device * device,uint32_t count,const VkSemaphore * sems)514 process_semaphores_to_signal(struct v3dv_device *device,
515                              uint32_t count, const VkSemaphore *sems)
516 {
517    if (count == 0)
518       return VK_SUCCESS;
519 
520    int render_fd = device->pdevice->render_fd;
521 
522    int fd;
523    mtx_lock(&device->mutex);
524    drmSyncobjExportSyncFile(render_fd, device->last_job_sync, &fd);
525    mtx_unlock(&device->mutex);
526    if (fd == -1)
527       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
528 
529    VkResult result = VK_SUCCESS;
530    for (uint32_t i = 0; i < count; i++) {
531       struct v3dv_semaphore *sem = v3dv_semaphore_from_handle(sems[i]);
532 
533       int ret;
534       if (!sem->temp_sync)
535          ret = drmSyncobjImportSyncFile(render_fd, sem->sync, fd);
536       else
537          ret = drmSyncobjImportSyncFile(render_fd, sem->temp_sync, fd);
538 
539       if (ret) {
540          result = VK_ERROR_OUT_OF_HOST_MEMORY;
541          break;
542       }
543    }
544 
545    assert(fd >= 0);
546    close(fd);
547 
548    return result;
549 }
550 
551 static VkResult
process_fence_to_signal(struct v3dv_device * device,VkFence _fence)552 process_fence_to_signal(struct v3dv_device *device, VkFence _fence)
553 {
554    if (_fence == VK_NULL_HANDLE)
555       return VK_SUCCESS;
556 
557    struct v3dv_fence *fence = v3dv_fence_from_handle(_fence);
558 
559    int render_fd = device->pdevice->render_fd;
560 
561    int fd;
562    mtx_lock(&device->mutex);
563    drmSyncobjExportSyncFile(render_fd, device->last_job_sync, &fd);
564    mtx_unlock(&device->mutex);
565    if (fd == -1)
566       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
567 
568    int ret;
569    if (!fence->temp_sync)
570       ret = drmSyncobjImportSyncFile(render_fd, fence->sync, fd);
571    else
572       ret = drmSyncobjImportSyncFile(render_fd, fence->temp_sync, fd);
573 
574    assert(fd >= 0);
575    close(fd);
576 
577    return ret ? VK_ERROR_OUT_OF_HOST_MEMORY : VK_SUCCESS;
578 }
579 
580 static VkResult
handle_cl_job(struct v3dv_queue * queue,struct v3dv_job * job,bool do_sem_wait)581 handle_cl_job(struct v3dv_queue *queue,
582               struct v3dv_job *job,
583               bool do_sem_wait)
584 {
585    struct v3dv_device *device = queue->device;
586 
587    struct drm_v3d_submit_cl submit = { 0 };
588 
589    /* Sanity check: we should only flag a bcl sync on a job that needs to be
590     * serialized.
591     */
592    assert(job->serialize || !job->needs_bcl_sync);
593 
594    /* We expect to have just one RCL per job which should fit in just one BO.
595     * Our BCL, could chain multiple BOS together though.
596     */
597    assert(list_length(&job->rcl.bo_list) == 1);
598    assert(list_length(&job->bcl.bo_list) >= 1);
599    struct v3dv_bo *bcl_fist_bo =
600       list_first_entry(&job->bcl.bo_list, struct v3dv_bo, list_link);
601    submit.bcl_start = bcl_fist_bo->offset;
602    submit.bcl_end = job->bcl.bo->offset + v3dv_cl_offset(&job->bcl);
603    submit.rcl_start = job->rcl.bo->offset;
604    submit.rcl_end = job->rcl.bo->offset + v3dv_cl_offset(&job->rcl);
605 
606    submit.qma = job->tile_alloc->offset;
607    submit.qms = job->tile_alloc->size;
608    submit.qts = job->tile_state->offset;
609 
610    submit.flags = 0;
611    if (job->tmu_dirty_rcl)
612       submit.flags |= DRM_V3D_SUBMIT_CL_FLUSH_CACHE;
613 
614    submit.bo_handle_count = job->bo_count;
615    uint32_t *bo_handles =
616       (uint32_t *) malloc(sizeof(uint32_t) * submit.bo_handle_count);
617    uint32_t bo_idx = 0;
618    set_foreach(job->bos, entry) {
619       struct v3dv_bo *bo = (struct v3dv_bo *)entry->key;
620       bo_handles[bo_idx++] = bo->handle;
621    }
622    assert(bo_idx == submit.bo_handle_count);
623    submit.bo_handles = (uintptr_t)(void *)bo_handles;
624 
625    /* We need a binning sync if we are waiting on a sempahore (do_sem_wait) or
626     * if the job comes after a pipeline barrier than involves geometry stages
627     * (needs_bcl_sync).
628     *
629     * We need a render sync if the job doesn't need a binning sync but has
630     * still been flagged for serialization. It should be noted that RCL jobs
631     * don't start until the previous RCL job has finished so we don't really
632     * need to add a fence for those, however, we might need to wait on a CSD or
633     * TFU job, which are not automatically serialized with CL jobs.
634     *
635     * FIXME: for now, if we are asked to wait on any semaphores, we just wait
636     * on the last job we submitted. In the future we might want to pass the
637     * actual syncobj of the wait semaphores so we don't block on the last RCL
638     * if we only need to wait for a previous CSD or TFU, for example, but
639     * we would have to extend our kernel interface to support the case where
640     * we have more than one semaphore to wait on.
641     */
642    const bool needs_bcl_sync = do_sem_wait || job->needs_bcl_sync;
643    const bool needs_rcl_sync = job->serialize && !needs_bcl_sync;
644 
645    mtx_lock(&queue->device->mutex);
646    submit.in_sync_bcl = needs_bcl_sync ? device->last_job_sync : 0;
647    submit.in_sync_rcl = needs_rcl_sync ? device->last_job_sync : 0;
648    submit.out_sync = device->last_job_sync;
649    v3dv_clif_dump(device, job, &submit);
650    int ret = v3dv_ioctl(device->pdevice->render_fd,
651                         DRM_IOCTL_V3D_SUBMIT_CL, &submit);
652    mtx_unlock(&queue->device->mutex);
653 
654    static bool warned = false;
655    if (ret && !warned) {
656       fprintf(stderr, "Draw call returned %s. Expect corruption.\n",
657               strerror(errno));
658       warned = true;
659    }
660 
661    free(bo_handles);
662 
663    if (ret)
664       return vk_error(device, VK_ERROR_DEVICE_LOST);
665 
666    return VK_SUCCESS;
667 }
668 
669 static VkResult
handle_tfu_job(struct v3dv_queue * queue,struct v3dv_job * job,bool do_sem_wait)670 handle_tfu_job(struct v3dv_queue *queue,
671                struct v3dv_job *job,
672                bool do_sem_wait)
673 {
674    struct v3dv_device *device = queue->device;
675 
676    const bool needs_sync = do_sem_wait || job->serialize;
677 
678    mtx_lock(&device->mutex);
679    job->tfu.in_sync = needs_sync ? device->last_job_sync : 0;
680    job->tfu.out_sync = device->last_job_sync;
681    int ret = v3dv_ioctl(device->pdevice->render_fd,
682                         DRM_IOCTL_V3D_SUBMIT_TFU, &job->tfu);
683    mtx_unlock(&device->mutex);
684 
685    if (ret != 0) {
686       fprintf(stderr, "Failed to submit TFU job: %d\n", ret);
687       return vk_error(device, VK_ERROR_DEVICE_LOST);
688    }
689 
690    return VK_SUCCESS;
691 }
692 
693 static VkResult
handle_csd_job(struct v3dv_queue * queue,struct v3dv_job * job,bool do_sem_wait)694 handle_csd_job(struct v3dv_queue *queue,
695                struct v3dv_job *job,
696                bool do_sem_wait)
697 {
698    struct v3dv_device *device = queue->device;
699 
700    struct drm_v3d_submit_csd *submit = &job->csd.submit;
701 
702    submit->bo_handle_count = job->bo_count;
703    uint32_t *bo_handles =
704       (uint32_t *) malloc(sizeof(uint32_t) * MAX2(4, submit->bo_handle_count * 2));
705    uint32_t bo_idx = 0;
706    set_foreach(job->bos, entry) {
707       struct v3dv_bo *bo = (struct v3dv_bo *)entry->key;
708       bo_handles[bo_idx++] = bo->handle;
709    }
710    assert(bo_idx == submit->bo_handle_count);
711    submit->bo_handles = (uintptr_t)(void *)bo_handles;
712 
713    const bool needs_sync = do_sem_wait || job->serialize;
714 
715    mtx_lock(&queue->device->mutex);
716    submit->in_sync = needs_sync ? device->last_job_sync : 0;
717    submit->out_sync = device->last_job_sync;
718    int ret = v3dv_ioctl(device->pdevice->render_fd,
719                         DRM_IOCTL_V3D_SUBMIT_CSD, submit);
720    mtx_unlock(&queue->device->mutex);
721 
722    static bool warned = false;
723    if (ret && !warned) {
724       fprintf(stderr, "Compute dispatch returned %s. Expect corruption.\n",
725               strerror(errno));
726       warned = true;
727    }
728 
729    free(bo_handles);
730 
731    if (ret)
732       return vk_error(device, VK_ERROR_DEVICE_LOST);
733 
734    return VK_SUCCESS;
735 }
736 
737 static VkResult
queue_submit_job(struct v3dv_queue * queue,struct v3dv_job * job,bool do_sem_wait,pthread_t * wait_thread)738 queue_submit_job(struct v3dv_queue *queue,
739                  struct v3dv_job *job,
740                  bool do_sem_wait,
741                  pthread_t *wait_thread)
742 {
743    assert(job);
744 
745    switch (job->type) {
746    case V3DV_JOB_TYPE_GPU_CL:
747       return handle_cl_job(queue, job, do_sem_wait);
748    case V3DV_JOB_TYPE_GPU_TFU:
749       return handle_tfu_job(queue, job, do_sem_wait);
750    case V3DV_JOB_TYPE_GPU_CSD:
751       return handle_csd_job(queue, job, do_sem_wait);
752    case V3DV_JOB_TYPE_CPU_RESET_QUERIES:
753       return handle_reset_query_cpu_job(job);
754    case V3DV_JOB_TYPE_CPU_END_QUERY:
755       return handle_end_query_cpu_job(job);
756    case V3DV_JOB_TYPE_CPU_COPY_QUERY_RESULTS:
757       return handle_copy_query_results_cpu_job(job);
758    case V3DV_JOB_TYPE_CPU_SET_EVENT:
759       return handle_set_event_cpu_job(job, wait_thread != NULL);
760    case V3DV_JOB_TYPE_CPU_WAIT_EVENTS:
761       return handle_wait_events_cpu_job(job, do_sem_wait, wait_thread);
762    case V3DV_JOB_TYPE_CPU_COPY_BUFFER_TO_IMAGE:
763       return handle_copy_buffer_to_image_cpu_job(job);
764    case V3DV_JOB_TYPE_CPU_CSD_INDIRECT:
765       return handle_csd_indirect_cpu_job(queue, job, do_sem_wait);
766    case V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY:
767       return handle_timestamp_query_cpu_job(job);
768    default:
769       unreachable("Unhandled job type");
770    }
771 }
772 
773 static VkResult
queue_create_noop_job(struct v3dv_queue * queue)774 queue_create_noop_job(struct v3dv_queue *queue)
775 {
776    struct v3dv_device *device = queue->device;
777    queue->noop_job = vk_zalloc(&device->vk.alloc, sizeof(struct v3dv_job), 8,
778                                VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
779    if (!queue->noop_job)
780       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
781    v3dv_job_init(queue->noop_job, V3DV_JOB_TYPE_GPU_CL, device, NULL, -1);
782 
783    v3dv_X(device, job_emit_noop)(queue->noop_job);
784 
785    return VK_SUCCESS;
786 }
787 
788 static VkResult
queue_submit_noop_job(struct v3dv_queue * queue,const VkSubmitInfo * pSubmit)789 queue_submit_noop_job(struct v3dv_queue *queue, const VkSubmitInfo *pSubmit)
790 {
791    /* VkQueue host access is externally synchronized so we don't need to lock
792     * here for the static variable.
793     */
794    if (!queue->noop_job) {
795       VkResult result = queue_create_noop_job(queue);
796       if (result != VK_SUCCESS)
797          return result;
798    }
799 
800    return queue_submit_job(queue, queue->noop_job,
801                            pSubmit->waitSemaphoreCount > 0, NULL);
802 }
803 
804 static VkResult
queue_submit_cmd_buffer(struct v3dv_queue * queue,struct v3dv_cmd_buffer * cmd_buffer,const VkSubmitInfo * pSubmit,pthread_t * wait_thread)805 queue_submit_cmd_buffer(struct v3dv_queue *queue,
806                         struct v3dv_cmd_buffer *cmd_buffer,
807                         const VkSubmitInfo *pSubmit,
808                         pthread_t *wait_thread)
809 {
810    assert(cmd_buffer);
811    assert(cmd_buffer->status == V3DV_CMD_BUFFER_STATUS_EXECUTABLE);
812 
813    if (list_is_empty(&cmd_buffer->jobs))
814       return queue_submit_noop_job(queue, pSubmit);
815 
816    list_for_each_entry_safe(struct v3dv_job, job,
817                             &cmd_buffer->jobs, list_link) {
818       VkResult result = queue_submit_job(queue, job,
819                                          pSubmit->waitSemaphoreCount > 0,
820                                          wait_thread);
821       if (result != VK_SUCCESS)
822          return result;
823    }
824 
825    return VK_SUCCESS;
826 }
827 
828 static void
add_wait_thread_to_list(struct v3dv_device * device,pthread_t thread,struct v3dv_queue_submit_wait_info ** wait_info)829 add_wait_thread_to_list(struct v3dv_device *device,
830                         pthread_t thread,
831                         struct v3dv_queue_submit_wait_info **wait_info)
832 {
833    /* If this is the first time we spawn a wait thread for this queue
834     * submission create a v3dv_queue_submit_wait_info to track this and
835     * any other threads in the same submission and add it to the global list
836     * in the queue.
837     */
838    if (*wait_info == NULL) {
839       *wait_info =
840          vk_zalloc(&device->vk.alloc, sizeof(struct v3dv_queue_submit_wait_info), 8,
841                    VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
842       (*wait_info)->device = device;
843    }
844 
845    /* And add the thread to the list of wait threads for this submission */
846    const uint32_t thread_idx = (*wait_info)->wait_thread_count;
847    assert(thread_idx < 16);
848    (*wait_info)->wait_threads[thread_idx].thread = thread;
849    (*wait_info)->wait_threads[thread_idx].finished = false;
850    (*wait_info)->wait_thread_count++;
851 }
852 
853 static void
add_signal_semaphores_to_wait_list(struct v3dv_device * device,const VkSubmitInfo * pSubmit,struct v3dv_queue_submit_wait_info * wait_info)854 add_signal_semaphores_to_wait_list(struct v3dv_device *device,
855                                    const VkSubmitInfo *pSubmit,
856                                    struct v3dv_queue_submit_wait_info *wait_info)
857 {
858    assert(wait_info);
859 
860    if (pSubmit->signalSemaphoreCount == 0)
861       return;
862 
863    /* FIXME: We put all the semaphores in a list and we signal all of them
864     * together from the submit master thread when the last wait thread in the
865     * submit completes. We could do better though: group the semaphores per
866     * submit and signal them as soon as all wait threads for a particular
867     * submit completes. Not sure if the extra work would be worth it though,
868     * since we only spawn waith threads for event waits and only when the
869     * event if set from the host after the queue submission.
870     */
871 
872    /* Check the size of the current semaphore list */
873    const uint32_t prev_count = wait_info->signal_semaphore_count;
874    const uint32_t prev_alloc_size = prev_count * sizeof(VkSemaphore);
875    VkSemaphore *prev_list = wait_info->signal_semaphores;
876 
877    /* Resize the list to hold the additional semaphores */
878    const uint32_t extra_alloc_size =
879       pSubmit->signalSemaphoreCount * sizeof(VkSemaphore);
880    wait_info->signal_semaphore_count += pSubmit->signalSemaphoreCount;
881    wait_info->signal_semaphores =
882       vk_alloc(&device->vk.alloc, prev_alloc_size + extra_alloc_size, 8,
883                VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
884 
885    /* Copy the old list to the new allocation and free the old list */
886    if (prev_count > 0) {
887       memcpy(wait_info->signal_semaphores, prev_list, prev_alloc_size);
888       vk_free(&device->vk.alloc, prev_list);
889    }
890 
891    /* Add the new semaphores to the list */
892    memcpy(wait_info->signal_semaphores + prev_count,
893           pSubmit->pSignalSemaphores, extra_alloc_size);
894 }
895 
896 static VkResult
queue_submit_cmd_buffer_batch(struct v3dv_queue * queue,const VkSubmitInfo * pSubmit,struct v3dv_queue_submit_wait_info ** wait_info)897 queue_submit_cmd_buffer_batch(struct v3dv_queue *queue,
898                               const VkSubmitInfo *pSubmit,
899                               struct v3dv_queue_submit_wait_info **wait_info)
900 {
901    VkResult result = VK_SUCCESS;
902    bool has_wait_threads = false;
903 
904    /* Even if we don't have any actual work to submit we still need to wait
905     * on the wait semaphores and signal the signal semaphores and fence, so
906     * in this scenario we just submit a trivial no-op job so we don't have
907     * to do anything special, it should not be a common case anyway.
908     */
909    if (pSubmit->commandBufferCount == 0) {
910       result = queue_submit_noop_job(queue, pSubmit);
911    } else {
912       for (uint32_t i = 0; i < pSubmit->commandBufferCount; i++) {
913          pthread_t wait_thread;
914          struct v3dv_cmd_buffer *cmd_buffer =
915             v3dv_cmd_buffer_from_handle(pSubmit->pCommandBuffers[i]);
916          result = queue_submit_cmd_buffer(queue, cmd_buffer, pSubmit,
917                                           &wait_thread);
918 
919          /* We get VK_NOT_READY if we had to spawn a wait thread for the
920           * command buffer. In that scenario, we want to continue submitting
921           * any pending command buffers in the batch, but we don't want to
922           * process any signal semaphores for the batch until we know we have
923           * submitted every job for every command buffer in the batch.
924           */
925          if (result == VK_NOT_READY) {
926             result = VK_SUCCESS;
927             add_wait_thread_to_list(queue->device, wait_thread, wait_info);
928             has_wait_threads = true;
929          }
930 
931          if (result != VK_SUCCESS)
932             break;
933       }
934    }
935 
936    if (result != VK_SUCCESS)
937       return result;
938 
939    /* If had to emit any wait threads in this submit we need to wait for all
940     * of them to complete before we can signal any semaphores.
941     */
942    if (!has_wait_threads) {
943       return process_semaphores_to_signal(queue->device,
944                                           pSubmit->signalSemaphoreCount,
945                                           pSubmit->pSignalSemaphores);
946    } else {
947       assert(*wait_info);
948       add_signal_semaphores_to_wait_list(queue->device, pSubmit, *wait_info);
949       return VK_NOT_READY;
950    }
951 }
952 
953 static void *
master_wait_thread_func(void * _wait_info)954 master_wait_thread_func(void *_wait_info)
955 {
956    struct v3dv_queue_submit_wait_info *wait_info =
957       (struct v3dv_queue_submit_wait_info *) _wait_info;
958 
959    struct v3dv_queue *queue = &wait_info->device->queue;
960 
961    /* Wait for all command buffer wait threads to complete */
962    for (uint32_t i = 0; i < wait_info->wait_thread_count; i++) {
963       int res = pthread_join(wait_info->wait_threads[i].thread, NULL);
964       if (res != 0)
965          fprintf(stderr, "Wait thread failed to join.\n");
966    }
967 
968    /* Signal semaphores and fences */
969    VkResult result;
970    result = process_semaphores_to_signal(wait_info->device,
971                                          wait_info->signal_semaphore_count,
972                                          wait_info->signal_semaphores);
973    if (result != VK_SUCCESS)
974       fprintf(stderr, "Wait thread semaphore signaling failed.");
975 
976    result = process_fence_to_signal(wait_info->device, wait_info->fence);
977    if (result != VK_SUCCESS)
978       fprintf(stderr, "Wait thread fence signaling failed.");
979 
980    /* Release wait_info */
981    mtx_lock(&queue->mutex);
982    list_del(&wait_info->list_link);
983    mtx_unlock(&queue->mutex);
984 
985    vk_free(&wait_info->device->vk.alloc, wait_info->signal_semaphores);
986    vk_free(&wait_info->device->vk.alloc, wait_info);
987 
988    return NULL;
989 }
990 
991 
992 static VkResult
spawn_master_wait_thread(struct v3dv_queue * queue,struct v3dv_queue_submit_wait_info * wait_info)993 spawn_master_wait_thread(struct v3dv_queue *queue,
994                          struct v3dv_queue_submit_wait_info *wait_info)
995 
996 {
997    VkResult result = VK_SUCCESS;
998 
999    mtx_lock(&queue->mutex);
1000    if (pthread_create(&wait_info->master_wait_thread, NULL,
1001                       master_wait_thread_func, wait_info)) {
1002       result = vk_error(queue, VK_ERROR_DEVICE_LOST);
1003       goto done;
1004    }
1005 
1006    list_addtail(&wait_info->list_link, &queue->submit_wait_list);
1007 
1008 done:
1009    mtx_unlock(&queue->mutex);
1010    return result;
1011 }
1012 
1013 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_QueueSubmit(VkQueue _queue,uint32_t submitCount,const VkSubmitInfo * pSubmits,VkFence fence)1014 v3dv_QueueSubmit(VkQueue _queue,
1015                  uint32_t submitCount,
1016                  const VkSubmitInfo* pSubmits,
1017                  VkFence fence)
1018 {
1019    V3DV_FROM_HANDLE(v3dv_queue, queue, _queue);
1020 
1021    struct v3dv_queue_submit_wait_info *wait_info = NULL;
1022 
1023    VkResult result = VK_SUCCESS;
1024    for (uint32_t i = 0; i < submitCount; i++) {
1025       result = queue_submit_cmd_buffer_batch(queue, &pSubmits[i], &wait_info);
1026       if (result != VK_SUCCESS && result != VK_NOT_READY)
1027          goto done;
1028    }
1029 
1030    if (!wait_info) {
1031       assert(result != VK_NOT_READY);
1032       result = process_fence_to_signal(queue->device, fence);
1033       goto done;
1034    }
1035 
1036    /* We emitted wait threads, so we have to spwan a master thread for this
1037     * queue submission that waits for all other threads to complete and then
1038     * will signal any semaphores and fences.
1039     */
1040    assert(wait_info);
1041    wait_info->fence = fence;
1042    result = spawn_master_wait_thread(queue, wait_info);
1043 
1044 done:
1045    return result;
1046 }
1047 
1048 static void
destroy_syncobj(uint32_t device_fd,uint32_t * sync)1049 destroy_syncobj(uint32_t device_fd, uint32_t *sync)
1050 {
1051    assert(sync);
1052    drmSyncobjDestroy(device_fd, *sync);
1053    *sync = 0;
1054 }
1055 
1056 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_CreateSemaphore(VkDevice _device,const VkSemaphoreCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkSemaphore * pSemaphore)1057 v3dv_CreateSemaphore(VkDevice _device,
1058                      const VkSemaphoreCreateInfo *pCreateInfo,
1059                      const VkAllocationCallbacks *pAllocator,
1060                      VkSemaphore *pSemaphore)
1061 {
1062    V3DV_FROM_HANDLE(v3dv_device, device, _device);
1063 
1064    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO);
1065 
1066    struct v3dv_semaphore *sem =
1067       vk_object_zalloc(&device->vk, pAllocator, sizeof(struct v3dv_semaphore),
1068                        VK_OBJECT_TYPE_SEMAPHORE);
1069    if (sem == NULL)
1070       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1071 
1072    int ret = drmSyncobjCreate(device->pdevice->render_fd, 0, &sem->sync);
1073    if (ret) {
1074       vk_object_free(&device->vk, pAllocator, sem);
1075       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1076    }
1077 
1078    *pSemaphore = v3dv_semaphore_to_handle(sem);
1079 
1080    return VK_SUCCESS;
1081 }
1082 
1083 VKAPI_ATTR void VKAPI_CALL
v3dv_GetPhysicalDeviceExternalSemaphoreProperties(VkPhysicalDevice physicalDevice,const VkPhysicalDeviceExternalSemaphoreInfo * pExternalSemaphoreInfo,VkExternalSemaphoreProperties * pExternalSemaphoreProperties)1084 v3dv_GetPhysicalDeviceExternalSemaphoreProperties(
1085     VkPhysicalDevice physicalDevice,
1086     const VkPhysicalDeviceExternalSemaphoreInfo *pExternalSemaphoreInfo,
1087     VkExternalSemaphoreProperties *pExternalSemaphoreProperties)
1088 {
1089    switch (pExternalSemaphoreInfo->handleType) {
1090    case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT:
1091    case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT:
1092       pExternalSemaphoreProperties->exportFromImportedHandleTypes =
1093          VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT |
1094          VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
1095       pExternalSemaphoreProperties->compatibleHandleTypes =
1096          VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT |
1097          VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
1098 
1099       /* FIXME: we can't import external semaphores until we improve the kernel
1100        * submit interface to handle multiple in syncobjs, because once we have
1101        * an imported semaphore in our list of semaphores to wait on, we can no
1102        * longer use the workaround of waiting on the last syncobj fence produced
1103        * from the device, since the imported semaphore may not (and in fact, it
1104        * would typically not) have been produced from same device.
1105        *
1106        * This behavior is exercised via dEQP-VK.synchronization.cross_instance.*.
1107        * Particularly, this test:
1108        * dEQP-VK.synchronization.cross_instance.dedicated.
1109        * write_ssbo_compute_read_vertex_input.buffer_16384_binary_semaphore_fd
1110        * fails consistently because of this, so it'll be a good reference to
1111        * verify the implementation when the kernel bits are in place.
1112        */
1113       pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;
1114 
1115       /* FIXME: See comment in GetPhysicalDeviceExternalFenceProperties
1116        * for details on why we can't export to SYNC_FD.
1117        */
1118       if (pExternalSemaphoreInfo->handleType !=
1119           VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT) {
1120          pExternalSemaphoreProperties->externalSemaphoreFeatures |=
1121             VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT;
1122       }
1123       break;
1124    default:
1125       pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0;
1126       pExternalSemaphoreProperties->compatibleHandleTypes = 0;
1127       pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;
1128       break;
1129    }
1130 }
1131 
1132 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_ImportSemaphoreFdKHR(VkDevice _device,const VkImportSemaphoreFdInfoKHR * pImportSemaphoreFdInfo)1133 v3dv_ImportSemaphoreFdKHR(
1134    VkDevice _device,
1135    const VkImportSemaphoreFdInfoKHR *pImportSemaphoreFdInfo)
1136 {
1137    V3DV_FROM_HANDLE(v3dv_device, device, _device);
1138    V3DV_FROM_HANDLE(v3dv_semaphore, sem, pImportSemaphoreFdInfo->semaphore);
1139 
1140    assert(pImportSemaphoreFdInfo->sType ==
1141           VK_STRUCTURE_TYPE_IMPORT_SEMAPHORE_FD_INFO_KHR);
1142 
1143    int fd = pImportSemaphoreFdInfo->fd;
1144    int render_fd = device->pdevice->render_fd;
1145 
1146    bool is_temporary =
1147       pImportSemaphoreFdInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT ||
1148       (pImportSemaphoreFdInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT);
1149 
1150    uint32_t new_sync;
1151    switch (pImportSemaphoreFdInfo->handleType) {
1152    case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT: {
1153       /* "If handleType is VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT, the
1154        *  special value -1 for fd is treated like a valid sync file descriptor
1155        *  referring to an object that has already signaled. The import
1156        *  operation will succeed and the VkSemaphore will have a temporarily
1157        *  imported payload as if a valid file descriptor had been provided."
1158        */
1159       unsigned flags = fd == -1 ? DRM_SYNCOBJ_CREATE_SIGNALED : 0;
1160       if (drmSyncobjCreate(render_fd, flags, &new_sync))
1161          return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1162 
1163       if (fd != -1) {
1164          if (drmSyncobjImportSyncFile(render_fd, new_sync, fd)) {
1165             drmSyncobjDestroy(render_fd, new_sync);
1166             return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
1167          }
1168       }
1169       break;
1170    }
1171    case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT: {
1172       if (drmSyncobjFDToHandle(render_fd, fd, &new_sync))
1173          return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
1174       break;
1175    }
1176    default:
1177       return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
1178    }
1179 
1180    destroy_syncobj(render_fd, &sem->temp_sync);
1181    if (is_temporary) {
1182       sem->temp_sync = new_sync;
1183    } else {
1184       destroy_syncobj(render_fd, &sem->sync);
1185       sem->sync = new_sync;
1186    }
1187 
1188    /* From the Vulkan 1.0.53 spec:
1189     *
1190     *    "Importing a semaphore payload from a file descriptor transfers
1191     *     ownership of the file descriptor from the application to the
1192     *     Vulkan implementation. The application must not perform any
1193     *     operations on the file descriptor after a successful import."
1194     *
1195     * If the import fails, we leave the file descriptor open.
1196     */
1197    if (fd != -1)
1198       close(fd);
1199 
1200    return VK_SUCCESS;
1201 }
1202 
1203 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_GetSemaphoreFdKHR(VkDevice _device,const VkSemaphoreGetFdInfoKHR * pGetFdInfo,int * pFd)1204 v3dv_GetSemaphoreFdKHR(VkDevice _device,
1205                        const VkSemaphoreGetFdInfoKHR *pGetFdInfo,
1206                        int *pFd)
1207 {
1208    V3DV_FROM_HANDLE(v3dv_device, device, _device);
1209    V3DV_FROM_HANDLE(v3dv_semaphore, sem, pGetFdInfo->semaphore);
1210 
1211    assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR);
1212 
1213    *pFd = -1;
1214    int render_fd = device->pdevice->render_fd;
1215    switch (pGetFdInfo->handleType) {
1216    case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT: {
1217       drmSyncobjExportSyncFile(render_fd, sem->sync, pFd);
1218       if (*pFd == -1)
1219          return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1220       break;
1221    case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT:
1222       drmSyncobjHandleToFD(render_fd, sem->sync, pFd);
1223       if (*pFd == -1)
1224          return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1225       break;
1226    }
1227    default:
1228       unreachable("Unsupported external semaphore handle type");
1229    }
1230 
1231    return VK_SUCCESS;
1232 }
1233 
1234 VKAPI_ATTR void VKAPI_CALL
v3dv_DestroySemaphore(VkDevice _device,VkSemaphore semaphore,const VkAllocationCallbacks * pAllocator)1235 v3dv_DestroySemaphore(VkDevice _device,
1236                       VkSemaphore semaphore,
1237                       const VkAllocationCallbacks *pAllocator)
1238 {
1239    V3DV_FROM_HANDLE(v3dv_device, device, _device);
1240    V3DV_FROM_HANDLE(v3dv_semaphore, sem, semaphore);
1241 
1242    if (sem == NULL)
1243       return;
1244 
1245    destroy_syncobj(device->pdevice->render_fd, &sem->sync);
1246    destroy_syncobj(device->pdevice->render_fd, &sem->temp_sync);
1247 
1248    vk_object_free(&device->vk, pAllocator, sem);
1249 }
1250 
1251 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_CreateFence(VkDevice _device,const VkFenceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkFence * pFence)1252 v3dv_CreateFence(VkDevice _device,
1253                  const VkFenceCreateInfo *pCreateInfo,
1254                  const VkAllocationCallbacks *pAllocator,
1255                  VkFence *pFence)
1256 {
1257    V3DV_FROM_HANDLE(v3dv_device, device, _device);
1258 
1259    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FENCE_CREATE_INFO);
1260 
1261    struct v3dv_fence *fence =
1262       vk_object_zalloc(&device->vk, pAllocator, sizeof(struct v3dv_fence),
1263                        VK_OBJECT_TYPE_FENCE);
1264    if (fence == NULL)
1265       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1266 
1267    unsigned flags = 0;
1268    if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT)
1269       flags |= DRM_SYNCOBJ_CREATE_SIGNALED;
1270    int ret = drmSyncobjCreate(device->pdevice->render_fd, flags, &fence->sync);
1271    if (ret) {
1272       vk_object_free(&device->vk, pAllocator, fence);
1273       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1274    }
1275 
1276    *pFence = v3dv_fence_to_handle(fence);
1277 
1278    return VK_SUCCESS;
1279 }
1280 
1281 VKAPI_ATTR void VKAPI_CALL
v3dv_GetPhysicalDeviceExternalFenceProperties(VkPhysicalDevice physicalDevice,const VkPhysicalDeviceExternalFenceInfo * pExternalFenceInfo,VkExternalFenceProperties * pExternalFenceProperties)1282 v3dv_GetPhysicalDeviceExternalFenceProperties(
1283     VkPhysicalDevice physicalDevice,
1284     const VkPhysicalDeviceExternalFenceInfo *pExternalFenceInfo,
1285     VkExternalFenceProperties *pExternalFenceProperties)
1286 
1287 {
1288    switch (pExternalFenceInfo->handleType) {
1289    case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT:
1290    case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT:
1291       pExternalFenceProperties->exportFromImportedHandleTypes =
1292          VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT |
1293          VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT;
1294       pExternalFenceProperties->compatibleHandleTypes =
1295          VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT |
1296          VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT;
1297       pExternalFenceProperties->externalFenceFeatures =
1298          VK_EXTERNAL_FENCE_FEATURE_IMPORTABLE_BIT;
1299 
1300       /* FIXME: SYNC_FD exports the actual fence referenced by the syncobj, not
1301        * the syncobj itself, and that fence is only created after we have
1302        * submitted to the kernel and updated the syncobj for the fence to import
1303        * the actual DRM fence created with the submission. Unfortunately, if the
1304        * queue submission has a 'wait for events' we may hold any jobs after the
1305        * wait in a user-space thread until the events are signaled, and in that
1306        * case we don't update the out fence of the submit until the events are
1307        * signaled and we can submit all the jobs involved with the vkQueueSubmit
1308        * call. This means that if the applications submits with an out fence and
1309        * a wait for events, trying to export the out fence to a SYNC_FD rigth
1310        * after the submission and before the events are signaled will fail,
1311        * because the actual DRM fence won't exist yet. This is not a problem
1312        * with OPAQUE_FD because in this case we export the entire syncobj, not
1313        * the underlying DRM fence. To fix this we need to rework our kernel
1314        * interface to be more flexible and accept multiple in/out syncobjs so
1315        * we can implement event waits as regular fence waits on the kernel side,
1316        * until then, we can only reliably export OPAQUE_FD.
1317        */
1318       if (pExternalFenceInfo->handleType !=
1319           VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT) {
1320          pExternalFenceProperties->externalFenceFeatures |=
1321             VK_EXTERNAL_FENCE_FEATURE_EXPORTABLE_BIT;
1322       }
1323       break;
1324    default:
1325       pExternalFenceProperties->exportFromImportedHandleTypes = 0;
1326       pExternalFenceProperties->compatibleHandleTypes = 0;
1327       pExternalFenceProperties->externalFenceFeatures = 0;
1328       break;
1329    }
1330 }
1331 
1332 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_ImportFenceFdKHR(VkDevice _device,const VkImportFenceFdInfoKHR * pImportFenceFdInfo)1333 v3dv_ImportFenceFdKHR(VkDevice _device,
1334                       const VkImportFenceFdInfoKHR *pImportFenceFdInfo)
1335 {
1336    V3DV_FROM_HANDLE(v3dv_device, device, _device);
1337    V3DV_FROM_HANDLE(v3dv_fence, fence, pImportFenceFdInfo->fence);
1338 
1339    assert(pImportFenceFdInfo->sType ==
1340           VK_STRUCTURE_TYPE_IMPORT_FENCE_FD_INFO_KHR);
1341 
1342    int fd = pImportFenceFdInfo->fd;
1343    int render_fd = device->pdevice->render_fd;
1344 
1345    bool is_temporary =
1346       pImportFenceFdInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT ||
1347       (pImportFenceFdInfo->flags & VK_FENCE_IMPORT_TEMPORARY_BIT);
1348 
1349    uint32_t new_sync;
1350    switch (pImportFenceFdInfo->handleType) {
1351    case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT: {
1352       /* "If handleType is VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT, the
1353        *  special value -1 for fd is treated like a valid sync file descriptor
1354        *  referring to an object that has already signaled. The import
1355        *  operation will succeed and the VkFence will have a temporarily
1356        *  imported payload as if a valid file descriptor had been provided."
1357        */
1358       unsigned flags = fd == -1 ? DRM_SYNCOBJ_CREATE_SIGNALED : 0;
1359       if (drmSyncobjCreate(render_fd, flags, &new_sync))
1360          return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1361 
1362       if (fd != -1) {
1363          if (drmSyncobjImportSyncFile(render_fd, new_sync, fd)) {
1364             drmSyncobjDestroy(render_fd, new_sync);
1365             return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
1366          }
1367       }
1368       break;
1369    }
1370    case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT: {
1371       if (drmSyncobjFDToHandle(render_fd, fd, &new_sync))
1372          return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
1373       break;
1374    }
1375    default:
1376       return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
1377    }
1378 
1379    destroy_syncobj(render_fd, &fence->temp_sync);
1380    if (is_temporary) {
1381       fence->temp_sync = new_sync;
1382    } else {
1383       destroy_syncobj(render_fd, &fence->sync);
1384       fence->sync = new_sync;
1385    }
1386 
1387    /* From the Vulkan 1.0.53 spec:
1388     *
1389     *    "Importing a fence payload from a file descriptor transfers
1390     *     ownership of the file descriptor from the application to the
1391     *     Vulkan implementation. The application must not perform any
1392     *     operations on the file descriptor after a successful import."
1393     *
1394     * If the import fails, we leave the file descriptor open.
1395     */
1396    if (fd != -1)
1397       close(fd);
1398 
1399    return VK_SUCCESS;
1400 }
1401 
1402 VKAPI_ATTR void VKAPI_CALL
v3dv_DestroyFence(VkDevice _device,VkFence _fence,const VkAllocationCallbacks * pAllocator)1403 v3dv_DestroyFence(VkDevice _device,
1404                   VkFence _fence,
1405                   const VkAllocationCallbacks *pAllocator)
1406 {
1407    V3DV_FROM_HANDLE(v3dv_device, device, _device);
1408    V3DV_FROM_HANDLE(v3dv_fence, fence, _fence);
1409 
1410    if (fence == NULL)
1411       return;
1412 
1413    destroy_syncobj(device->pdevice->render_fd, &fence->sync);
1414    destroy_syncobj(device->pdevice->render_fd, &fence->temp_sync);
1415 
1416    vk_object_free(&device->vk, pAllocator, fence);
1417 }
1418 
1419 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_GetFenceStatus(VkDevice _device,VkFence _fence)1420 v3dv_GetFenceStatus(VkDevice _device, VkFence _fence)
1421 {
1422    V3DV_FROM_HANDLE(v3dv_device, device, _device);
1423    V3DV_FROM_HANDLE(v3dv_fence, fence, _fence);
1424 
1425    int ret = drmSyncobjWait(device->pdevice->render_fd, &fence->sync, 1,
1426                             0, DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, NULL);
1427    if (ret == -ETIME)
1428       return VK_NOT_READY;
1429    else if (ret)
1430       return vk_error(device, VK_ERROR_DEVICE_LOST);
1431    return VK_SUCCESS;
1432 }
1433 
1434 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_GetFenceFdKHR(VkDevice _device,const VkFenceGetFdInfoKHR * pGetFdInfo,int * pFd)1435 v3dv_GetFenceFdKHR(VkDevice _device,
1436                    const VkFenceGetFdInfoKHR *pGetFdInfo,
1437                    int *pFd)
1438 {
1439    V3DV_FROM_HANDLE(v3dv_device, device, _device);
1440    V3DV_FROM_HANDLE(v3dv_fence, fence, pGetFdInfo->fence);
1441 
1442    assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_FENCE_GET_FD_INFO_KHR);
1443 
1444    *pFd = -1;
1445    int render_fd = device->pdevice->render_fd;
1446    switch (pGetFdInfo->handleType) {
1447    case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT: {
1448       drmSyncobjExportSyncFile(render_fd, fence->sync, pFd);
1449       if (*pFd == -1)
1450          return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1451       break;
1452    case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT:
1453       drmSyncobjHandleToFD(render_fd, fence->sync, pFd);
1454       if (*pFd == -1)
1455          return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1456       break;
1457    }
1458    default:
1459       unreachable("Unsupported external fence handle type");
1460    }
1461 
1462    return VK_SUCCESS;
1463 }
1464 
1465 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_ResetFences(VkDevice _device,uint32_t fenceCount,const VkFence * pFences)1466 v3dv_ResetFences(VkDevice _device, uint32_t fenceCount, const VkFence *pFences)
1467 {
1468    V3DV_FROM_HANDLE(v3dv_device, device, _device);
1469 
1470    uint32_t *syncobjs = vk_alloc(&device->vk.alloc,
1471                                  sizeof(*syncobjs) * fenceCount, 8,
1472                                  VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1473    if (!syncobjs)
1474       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1475 
1476    int render_fd = device->pdevice->render_fd;
1477    uint32_t reset_count = 0;
1478    for (uint32_t i = 0; i < fenceCount; i++) {
1479       struct v3dv_fence *fence = v3dv_fence_from_handle(pFences[i]);
1480       /* From the Vulkan spec, section 'Importing Fence Payloads':
1481        *
1482        *    "If the import is temporary, the fence will be restored to its
1483        *     permanent state the next time that fence is passed to
1484        *     vkResetFences.
1485        *
1486        *     Note: Restoring a fence to its prior permanent payload is a
1487        *     distinct operation from resetting a fence payload."
1488        *
1489        * To restore the previous state, we just need to destroy the temporary.
1490        */
1491       if (fence->temp_sync)
1492          destroy_syncobj(render_fd, &fence->temp_sync);
1493       else
1494          syncobjs[reset_count++] = fence->sync;
1495    }
1496 
1497    int ret = 0;
1498    if (reset_count > 0)
1499       ret = drmSyncobjReset(render_fd, syncobjs, reset_count);
1500 
1501    vk_free(&device->vk.alloc, syncobjs);
1502 
1503    if (ret)
1504       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1505    return VK_SUCCESS;
1506 }
1507 
1508 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_WaitForFences(VkDevice _device,uint32_t fenceCount,const VkFence * pFences,VkBool32 waitAll,uint64_t timeout)1509 v3dv_WaitForFences(VkDevice _device,
1510                    uint32_t fenceCount,
1511                    const VkFence *pFences,
1512                    VkBool32 waitAll,
1513                    uint64_t timeout)
1514 {
1515    V3DV_FROM_HANDLE(v3dv_device, device, _device);
1516 
1517    const uint64_t abs_timeout = get_absolute_timeout(timeout);
1518 
1519    uint32_t *syncobjs = vk_alloc(&device->vk.alloc,
1520                                  sizeof(*syncobjs) * fenceCount, 8,
1521                                  VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1522    if (!syncobjs)
1523       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1524 
1525    for (uint32_t i = 0; i < fenceCount; i++) {
1526       struct v3dv_fence *fence = v3dv_fence_from_handle(pFences[i]);
1527       syncobjs[i] = fence->temp_sync ? fence->temp_sync : fence->sync;
1528    }
1529 
1530    unsigned flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT;
1531    if (waitAll)
1532       flags |= DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL;
1533 
1534    int ret;
1535    do {
1536       ret = drmSyncobjWait(device->pdevice->render_fd, syncobjs, fenceCount,
1537                            timeout, flags, NULL);
1538    } while (ret == -ETIME && gettime_ns() < abs_timeout);
1539 
1540    vk_free(&device->vk.alloc, syncobjs);
1541 
1542    if (ret == -ETIME)
1543       return VK_TIMEOUT;
1544    else if (ret)
1545       return vk_error(device, VK_ERROR_DEVICE_LOST);
1546    return VK_SUCCESS;
1547 }
1548 
1549 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_QueueBindSparse(VkQueue _queue,uint32_t bindInfoCount,const VkBindSparseInfo * pBindInfo,VkFence fence)1550 v3dv_QueueBindSparse(VkQueue _queue,
1551                      uint32_t bindInfoCount,
1552                      const VkBindSparseInfo *pBindInfo,
1553                      VkFence fence)
1554 {
1555    V3DV_FROM_HANDLE(v3dv_queue, queue, _queue);
1556    return vk_error(queue, VK_ERROR_FEATURE_NOT_PRESENT);
1557 }
1558