1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /**
25 * This file implements VkQueue, VkFence, and VkSemaphore
26 */
27
28 #include <errno.h>
29 #include <fcntl.h>
30 #include <unistd.h>
31
32 #include "util/os_file.h"
33
34 #include "anv_private.h"
35 #include "vk_util.h"
36
37 #include "genxml/gen7_pack.h"
38
anv_gettime_ns(void)39 uint64_t anv_gettime_ns(void)
40 {
41 struct timespec current;
42 clock_gettime(CLOCK_MONOTONIC, ¤t);
43 return (uint64_t)current.tv_sec * NSEC_PER_SEC + current.tv_nsec;
44 }
45
anv_get_absolute_timeout(uint64_t timeout)46 uint64_t anv_get_absolute_timeout(uint64_t timeout)
47 {
48 if (timeout == 0)
49 return 0;
50 uint64_t current_time = anv_gettime_ns();
51 uint64_t max_timeout = (uint64_t) INT64_MAX - current_time;
52
53 timeout = MIN2(max_timeout, timeout);
54
55 return (current_time + timeout);
56 }
57
anv_get_relative_timeout(uint64_t abs_timeout)58 static int64_t anv_get_relative_timeout(uint64_t abs_timeout)
59 {
60 uint64_t now = anv_gettime_ns();
61
62 /* We don't want negative timeouts.
63 *
64 * DRM_IOCTL_I915_GEM_WAIT uses a signed 64 bit timeout and is
65 * supposed to block indefinitely timeouts < 0. Unfortunately,
66 * this was broken for a couple of kernel releases. Since there's
67 * no way to know whether or not the kernel we're using is one of
68 * the broken ones, the best we can do is to clamp the timeout to
69 * INT64_MAX. This limits the maximum timeout from 584 years to
70 * 292 years - likely not a big deal.
71 */
72 if (abs_timeout < now)
73 return 0;
74
75 uint64_t rel_timeout = abs_timeout - now;
76 if (rel_timeout > (uint64_t) INT64_MAX)
77 rel_timeout = INT64_MAX;
78
79 return rel_timeout;
80 }
81
82 static struct anv_semaphore *anv_semaphore_ref(struct anv_semaphore *semaphore);
83 static void anv_semaphore_unref(struct anv_device *device, struct anv_semaphore *semaphore);
84 static void anv_semaphore_impl_cleanup(struct anv_device *device,
85 struct anv_semaphore_impl *impl);
86
87 static void
anv_queue_submit_free(struct anv_device * device,struct anv_queue_submit * submit)88 anv_queue_submit_free(struct anv_device *device,
89 struct anv_queue_submit *submit)
90 {
91 const VkAllocationCallbacks *alloc = submit->alloc;
92
93 for (uint32_t i = 0; i < submit->temporary_semaphore_count; i++)
94 anv_semaphore_impl_cleanup(device, &submit->temporary_semaphores[i]);
95 for (uint32_t i = 0; i < submit->sync_fd_semaphore_count; i++)
96 anv_semaphore_unref(device, submit->sync_fd_semaphores[i]);
97 /* Execbuf does not consume the in_fence. It's our job to close it. */
98 if (submit->in_fence != -1) {
99 assert(!device->has_thread_submit);
100 close(submit->in_fence);
101 }
102 if (submit->out_fence != -1) {
103 assert(!device->has_thread_submit);
104 close(submit->out_fence);
105 }
106 vk_free(alloc, submit->fences);
107 vk_free(alloc, submit->fence_values);
108 vk_free(alloc, submit->temporary_semaphores);
109 vk_free(alloc, submit->wait_timelines);
110 vk_free(alloc, submit->wait_timeline_values);
111 vk_free(alloc, submit->signal_timelines);
112 vk_free(alloc, submit->signal_timeline_values);
113 vk_free(alloc, submit->fence_bos);
114 vk_free(alloc, submit);
115 }
116
117 static bool
anv_queue_submit_ready_locked(struct anv_queue_submit * submit)118 anv_queue_submit_ready_locked(struct anv_queue_submit *submit)
119 {
120 for (uint32_t i = 0; i < submit->wait_timeline_count; i++) {
121 if (submit->wait_timeline_values[i] > submit->wait_timelines[i]->highest_pending)
122 return false;
123 }
124
125 return true;
126 }
127
128 static VkResult
anv_timeline_init(struct anv_device * device,struct anv_timeline * timeline,uint64_t initial_value)129 anv_timeline_init(struct anv_device *device,
130 struct anv_timeline *timeline,
131 uint64_t initial_value)
132 {
133 timeline->highest_past =
134 timeline->highest_pending = initial_value;
135 list_inithead(&timeline->points);
136 list_inithead(&timeline->free_points);
137
138 return VK_SUCCESS;
139 }
140
141 static void
anv_timeline_finish(struct anv_device * device,struct anv_timeline * timeline)142 anv_timeline_finish(struct anv_device *device,
143 struct anv_timeline *timeline)
144 {
145 list_for_each_entry_safe(struct anv_timeline_point, point,
146 &timeline->free_points, link) {
147 list_del(&point->link);
148 anv_device_release_bo(device, point->bo);
149 vk_free(&device->vk.alloc, point);
150 }
151 list_for_each_entry_safe(struct anv_timeline_point, point,
152 &timeline->points, link) {
153 list_del(&point->link);
154 anv_device_release_bo(device, point->bo);
155 vk_free(&device->vk.alloc, point);
156 }
157 }
158
159 static VkResult
anv_timeline_add_point_locked(struct anv_device * device,struct anv_timeline * timeline,uint64_t value,struct anv_timeline_point ** point)160 anv_timeline_add_point_locked(struct anv_device *device,
161 struct anv_timeline *timeline,
162 uint64_t value,
163 struct anv_timeline_point **point)
164 {
165 VkResult result = VK_SUCCESS;
166
167 if (list_is_empty(&timeline->free_points)) {
168 *point =
169 vk_zalloc(&device->vk.alloc, sizeof(**point),
170 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
171 if (!(*point))
172 result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
173 if (result == VK_SUCCESS) {
174 result = anv_device_alloc_bo(device, 4096,
175 ANV_BO_ALLOC_EXTERNAL |
176 ANV_BO_ALLOC_IMPLICIT_SYNC,
177 0 /* explicit_address */,
178 &(*point)->bo);
179 if (result != VK_SUCCESS)
180 vk_free(&device->vk.alloc, *point);
181 }
182 } else {
183 *point = list_first_entry(&timeline->free_points,
184 struct anv_timeline_point, link);
185 list_del(&(*point)->link);
186 }
187
188 if (result == VK_SUCCESS) {
189 (*point)->serial = value;
190 list_addtail(&(*point)->link, &timeline->points);
191 }
192
193 return result;
194 }
195
196 static VkResult
anv_timeline_gc_locked(struct anv_device * device,struct anv_timeline * timeline)197 anv_timeline_gc_locked(struct anv_device *device,
198 struct anv_timeline *timeline)
199 {
200 list_for_each_entry_safe(struct anv_timeline_point, point,
201 &timeline->points, link) {
202 /* timeline->higest_pending is only incremented once submission has
203 * happened. If this point has a greater serial, it means the point
204 * hasn't been submitted yet.
205 */
206 if (point->serial > timeline->highest_pending)
207 return VK_SUCCESS;
208
209 /* If someone is waiting on this time point, consider it busy and don't
210 * try to recycle it. There's a slim possibility that it's no longer
211 * busy by the time we look at it but we would be recycling it out from
212 * under a waiter and that can lead to weird races.
213 *
214 * We walk the list in-order so if this time point is still busy so is
215 * every following time point
216 */
217 assert(point->waiting >= 0);
218 if (point->waiting)
219 return VK_SUCCESS;
220
221 /* Garbage collect any signaled point. */
222 VkResult result = anv_device_bo_busy(device, point->bo);
223 if (result == VK_NOT_READY) {
224 /* We walk the list in-order so if this time point is still busy so
225 * is every following time point
226 */
227 return VK_SUCCESS;
228 } else if (result != VK_SUCCESS) {
229 return result;
230 }
231
232 assert(timeline->highest_past < point->serial);
233 timeline->highest_past = point->serial;
234
235 list_del(&point->link);
236 list_add(&point->link, &timeline->free_points);
237 }
238
239 return VK_SUCCESS;
240 }
241
242 static VkResult anv_queue_submit_add_fence_bo(struct anv_queue_submit *submit,
243 struct anv_bo *bo,
244 bool signal);
245
246 static VkResult
anv_queue_submit_timeline_locked(struct anv_queue * queue,struct anv_queue_submit * submit)247 anv_queue_submit_timeline_locked(struct anv_queue *queue,
248 struct anv_queue_submit *submit)
249 {
250 VkResult result;
251
252 for (uint32_t i = 0; i < submit->wait_timeline_count; i++) {
253 struct anv_timeline *timeline = submit->wait_timelines[i];
254 uint64_t wait_value = submit->wait_timeline_values[i];
255
256 if (timeline->highest_past >= wait_value)
257 continue;
258
259 list_for_each_entry(struct anv_timeline_point, point, &timeline->points, link) {
260 if (point->serial < wait_value)
261 continue;
262 result = anv_queue_submit_add_fence_bo(submit, point->bo, false);
263 if (result != VK_SUCCESS)
264 return result;
265 break;
266 }
267 }
268 for (uint32_t i = 0; i < submit->signal_timeline_count; i++) {
269 struct anv_timeline *timeline = submit->signal_timelines[i];
270 uint64_t signal_value = submit->signal_timeline_values[i];
271 struct anv_timeline_point *point;
272
273 result = anv_timeline_add_point_locked(queue->device, timeline,
274 signal_value, &point);
275 if (result != VK_SUCCESS)
276 return result;
277
278 result = anv_queue_submit_add_fence_bo(submit, point->bo, true);
279 if (result != VK_SUCCESS)
280 return result;
281 }
282
283 result = anv_queue_execbuf_locked(queue, submit);
284
285 if (result == VK_SUCCESS) {
286 /* Update the pending values in the timeline objects. */
287 for (uint32_t i = 0; i < submit->signal_timeline_count; i++) {
288 struct anv_timeline *timeline = submit->signal_timelines[i];
289 uint64_t signal_value = submit->signal_timeline_values[i];
290
291 assert(signal_value > timeline->highest_pending);
292 timeline->highest_pending = signal_value;
293 }
294
295 /* Update signaled semaphores backed by syncfd. */
296 for (uint32_t i = 0; i < submit->sync_fd_semaphore_count; i++) {
297 struct anv_semaphore *semaphore = submit->sync_fd_semaphores[i];
298 /* Out fences can't have temporary state because that would imply
299 * that we imported a sync file and are trying to signal it.
300 */
301 assert(semaphore->temporary.type == ANV_SEMAPHORE_TYPE_NONE);
302 struct anv_semaphore_impl *impl = &semaphore->permanent;
303
304 assert(impl->type == ANV_SEMAPHORE_TYPE_SYNC_FILE);
305 impl->fd = os_dupfd_cloexec(submit->out_fence);
306 }
307 } else {
308 /* Unblock any waiter by signaling the points, the application will get
309 * a device lost error code.
310 */
311 for (uint32_t i = 0; i < submit->signal_timeline_count; i++) {
312 struct anv_timeline *timeline = submit->signal_timelines[i];
313 uint64_t signal_value = submit->signal_timeline_values[i];
314
315 assert(signal_value > timeline->highest_pending);
316 timeline->highest_past = timeline->highest_pending = signal_value;
317 }
318 }
319
320 return result;
321 }
322
323 static VkResult
anv_queue_submit_deferred_locked(struct anv_queue * queue,uint32_t * advance)324 anv_queue_submit_deferred_locked(struct anv_queue *queue, uint32_t *advance)
325 {
326 VkResult result = VK_SUCCESS;
327
328 /* Go through all the queued submissions and submit then until we find one
329 * that's waiting on a point that hasn't materialized yet.
330 */
331 list_for_each_entry_safe(struct anv_queue_submit, submit,
332 &queue->queued_submits, link) {
333 if (!anv_queue_submit_ready_locked(submit))
334 break;
335
336 (*advance)++;
337 list_del(&submit->link);
338
339 result = anv_queue_submit_timeline_locked(queue, submit);
340
341 anv_queue_submit_free(queue->device, submit);
342
343 if (result != VK_SUCCESS)
344 break;
345 }
346
347 return result;
348 }
349
350 static VkResult
anv_device_submit_deferred_locked(struct anv_device * device)351 anv_device_submit_deferred_locked(struct anv_device *device)
352 {
353 uint32_t advance = 0;
354 return anv_queue_submit_deferred_locked(&device->queue, &advance);
355 }
356
357 static void
anv_queue_submit_signal_fences(struct anv_device * device,struct anv_queue_submit * submit)358 anv_queue_submit_signal_fences(struct anv_device *device,
359 struct anv_queue_submit *submit)
360 {
361 for (uint32_t i = 0; i < submit->fence_count; i++) {
362 if (submit->fences[i].flags & I915_EXEC_FENCE_SIGNAL) {
363 anv_gem_syncobj_timeline_signal(device, &submit->fences[i].handle,
364 &submit->fence_values[i], 1);
365 }
366 }
367 }
368
369 static void *
anv_queue_task(void * _queue)370 anv_queue_task(void *_queue)
371 {
372 struct anv_queue *queue = _queue;
373
374 pthread_mutex_lock(&queue->mutex);
375
376 while (!queue->quit) {
377 while (!list_is_empty(&queue->queued_submits)) {
378 struct anv_queue_submit *submit =
379 list_first_entry(&queue->queued_submits, struct anv_queue_submit, link);
380 list_del(&submit->link);
381
382 pthread_mutex_unlock(&queue->mutex);
383
384 VkResult result = VK_ERROR_DEVICE_LOST;
385
386 /* Wait for timeline points to materialize before submitting. We need
387 * to do this because we're using threads to do the submit to i915.
388 * We could end up in a situation where the application submits to 2
389 * queues with the first submit creating the dma-fence for the
390 * second. But because the scheduling of the submission threads might
391 * wakeup the second queue thread first, this would make that execbuf
392 * fail because the dma-fence it depends on hasn't materialized yet.
393 */
394 if (!queue->lost && submit->wait_timeline_count > 0) {
395 int ret = queue->device->no_hw ? 0 :
396 anv_gem_syncobj_timeline_wait(
397 queue->device, submit->wait_timeline_syncobjs,
398 submit->wait_timeline_values, submit->wait_timeline_count,
399 anv_get_absolute_timeout(UINT64_MAX) /* wait forever */,
400 true /* wait for all */, true /* wait for materialize */);
401 if (ret) {
402 result = anv_queue_set_lost(queue, "timeline timeout: %s",
403 strerror(errno));
404 }
405 }
406
407 /* Now submit */
408 if (!queue->lost) {
409 pthread_mutex_lock(&queue->device->mutex);
410 result = anv_queue_execbuf_locked(queue, submit);
411 pthread_mutex_unlock(&queue->device->mutex);
412 }
413
414 for (uint32_t i = 0; i < submit->sync_fd_semaphore_count; i++) {
415 struct anv_semaphore *semaphore = submit->sync_fd_semaphores[i];
416 /* Out fences can't have temporary state because that would imply
417 * that we imported a sync file and are trying to signal it.
418 */
419 assert(semaphore->temporary.type == ANV_SEMAPHORE_TYPE_NONE);
420 struct anv_semaphore_impl *impl = &semaphore->permanent;
421
422 assert(impl->type == ANV_SEMAPHORE_TYPE_SYNC_FILE);
423 impl->fd = dup(submit->out_fence);
424 }
425
426 if (result != VK_SUCCESS) {
427 /* vkQueueSubmit or some other entry point will report the
428 * DEVICE_LOST error at some point, but until we have emptied our
429 * list of execbufs we need to wake up all potential the waiters
430 * until one of them spots the error.
431 */
432 anv_queue_submit_signal_fences(queue->device, submit);
433 }
434
435 anv_queue_submit_free(queue->device, submit);
436
437 pthread_mutex_lock(&queue->mutex);
438 }
439
440 if (!queue->quit)
441 pthread_cond_wait(&queue->cond, &queue->mutex);
442 }
443
444 pthread_mutex_unlock(&queue->mutex);
445
446 return NULL;
447 }
448
449 static VkResult
_anv_queue_submit(struct anv_queue * queue,struct anv_queue_submit ** _submit,bool flush_queue)450 _anv_queue_submit(struct anv_queue *queue, struct anv_queue_submit **_submit,
451 bool flush_queue)
452 {
453 struct anv_queue_submit *submit = *_submit;
454
455 /* Wait before signal behavior means we might keep alive the
456 * anv_queue_submit object a bit longer, so transfer the ownership to the
457 * anv_queue.
458 */
459 *_submit = NULL;
460 if (queue->device->has_thread_submit) {
461 pthread_mutex_lock(&queue->mutex);
462 pthread_cond_broadcast(&queue->cond);
463 list_addtail(&submit->link, &queue->queued_submits);
464 pthread_mutex_unlock(&queue->mutex);
465 return VK_SUCCESS;
466 } else {
467 pthread_mutex_lock(&queue->device->mutex);
468 list_addtail(&submit->link, &queue->queued_submits);
469 VkResult result = anv_device_submit_deferred_locked(queue->device);
470 if (flush_queue) {
471 while (result == VK_SUCCESS && !list_is_empty(&queue->queued_submits)) {
472 int ret = pthread_cond_wait(&queue->device->queue_submit,
473 &queue->device->mutex);
474 if (ret != 0) {
475 result = anv_device_set_lost(queue->device, "wait timeout");
476 break;
477 }
478
479 result = anv_device_submit_deferred_locked(queue->device);
480 }
481 }
482 pthread_mutex_unlock(&queue->device->mutex);
483 return result;
484 }
485 }
486
487 VkResult
anv_queue_init(struct anv_device * device,struct anv_queue * queue)488 anv_queue_init(struct anv_device *device, struct anv_queue *queue)
489 {
490 VkResult result;
491
492 queue->device = device;
493 queue->flags = 0;
494 queue->lost = false;
495 queue->quit = false;
496
497 list_inithead(&queue->queued_submits);
498
499 /* We only need those additional thread/mutex when using a thread for
500 * submission.
501 */
502 if (device->has_thread_submit) {
503 if (pthread_mutex_init(&queue->mutex, NULL) != 0)
504 return vk_error(VK_ERROR_INITIALIZATION_FAILED);
505
506 if (pthread_cond_init(&queue->cond, NULL) != 0) {
507 result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
508 goto fail_mutex;
509 }
510 if (pthread_create(&queue->thread, NULL, anv_queue_task, queue)) {
511 result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
512 goto fail_cond;
513 }
514 }
515
516 vk_object_base_init(&device->vk, &queue->base, VK_OBJECT_TYPE_QUEUE);
517
518 return VK_SUCCESS;
519
520 fail_cond:
521 pthread_cond_destroy(&queue->cond);
522 fail_mutex:
523 pthread_mutex_destroy(&queue->mutex);
524
525 return result;
526 }
527
528 void
anv_queue_finish(struct anv_queue * queue)529 anv_queue_finish(struct anv_queue *queue)
530 {
531 vk_object_base_finish(&queue->base);
532
533 if (!queue->device->has_thread_submit)
534 return;
535
536 pthread_mutex_lock(&queue->mutex);
537 pthread_cond_broadcast(&queue->cond);
538 queue->quit = true;
539 pthread_mutex_unlock(&queue->mutex);
540
541 void *ret;
542 pthread_join(queue->thread, &ret);
543
544 pthread_cond_destroy(&queue->cond);
545 pthread_mutex_destroy(&queue->mutex);
546 }
547
548 static VkResult
anv_queue_submit_add_fence_bo(struct anv_queue_submit * submit,struct anv_bo * bo,bool signal)549 anv_queue_submit_add_fence_bo(struct anv_queue_submit *submit,
550 struct anv_bo *bo,
551 bool signal)
552 {
553 if (submit->fence_bo_count >= submit->fence_bo_array_length) {
554 uint32_t new_len = MAX2(submit->fence_bo_array_length * 2, 64);
555 uintptr_t *new_fence_bos =
556 vk_realloc(submit->alloc,
557 submit->fence_bos, new_len * sizeof(*submit->fence_bos),
558 8, submit->alloc_scope);
559 if (new_fence_bos == NULL)
560 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
561
562 submit->fence_bos = new_fence_bos;
563 submit->fence_bo_array_length = new_len;
564 }
565
566 /* Take advantage that anv_bo are allocated at 8 byte alignement so we can
567 * use the lowest bit to store whether this is a BO we need to signal.
568 */
569 submit->fence_bos[submit->fence_bo_count++] = anv_pack_ptr(bo, 1, signal);
570
571 return VK_SUCCESS;
572 }
573
574 static VkResult
anv_queue_submit_add_syncobj(struct anv_queue_submit * submit,struct anv_device * device,uint32_t handle,uint32_t flags,uint64_t value)575 anv_queue_submit_add_syncobj(struct anv_queue_submit* submit,
576 struct anv_device *device,
577 uint32_t handle, uint32_t flags,
578 uint64_t value)
579 {
580 assert(flags != 0);
581
582 if (device->has_thread_submit && (flags & I915_EXEC_FENCE_WAIT)) {
583 if (submit->wait_timeline_count >= submit->wait_timeline_array_length) {
584 uint32_t new_len = MAX2(submit->wait_timeline_array_length * 2, 64);
585
586 uint32_t *new_wait_timeline_syncobjs =
587 vk_realloc(submit->alloc,
588 submit->wait_timeline_syncobjs,
589 new_len * sizeof(*submit->wait_timeline_syncobjs),
590 8, submit->alloc_scope);
591 if (new_wait_timeline_syncobjs == NULL)
592 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
593
594 submit->wait_timeline_syncobjs = new_wait_timeline_syncobjs;
595
596 uint64_t *new_wait_timeline_values =
597 vk_realloc(submit->alloc,
598 submit->wait_timeline_values, new_len * sizeof(*submit->wait_timeline_values),
599 8, submit->alloc_scope);
600 if (new_wait_timeline_values == NULL)
601 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
602
603 submit->wait_timeline_values = new_wait_timeline_values;
604 submit->wait_timeline_array_length = new_len;
605 }
606
607 submit->wait_timeline_syncobjs[submit->wait_timeline_count] = handle;
608 submit->wait_timeline_values[submit->wait_timeline_count] = value;
609
610 submit->wait_timeline_count++;
611 }
612
613 if (submit->fence_count >= submit->fence_array_length) {
614 uint32_t new_len = MAX2(submit->fence_array_length * 2, 64);
615 struct drm_i915_gem_exec_fence *new_fences =
616 vk_realloc(submit->alloc,
617 submit->fences, new_len * sizeof(*submit->fences),
618 8, submit->alloc_scope);
619 if (new_fences == NULL)
620 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
621
622 submit->fences = new_fences;
623
624 uint64_t *new_fence_values =
625 vk_realloc(submit->alloc,
626 submit->fence_values, new_len * sizeof(*submit->fence_values),
627 8, submit->alloc_scope);
628 if (new_fence_values == NULL)
629 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
630
631 submit->fence_values = new_fence_values;
632 submit->fence_array_length = new_len;
633 }
634
635 submit->fences[submit->fence_count] = (struct drm_i915_gem_exec_fence) {
636 .handle = handle,
637 .flags = flags,
638 };
639 submit->fence_values[submit->fence_count] = value;
640 submit->fence_count++;
641
642 return VK_SUCCESS;
643 }
644
645 static VkResult
anv_queue_submit_add_sync_fd_fence(struct anv_queue_submit * submit,struct anv_semaphore * semaphore)646 anv_queue_submit_add_sync_fd_fence(struct anv_queue_submit *submit,
647 struct anv_semaphore *semaphore)
648 {
649 if (submit->sync_fd_semaphore_count >= submit->sync_fd_semaphore_array_length) {
650 uint32_t new_len = MAX2(submit->sync_fd_semaphore_array_length * 2, 64);
651 struct anv_semaphore **new_semaphores =
652 vk_realloc(submit->alloc, submit->sync_fd_semaphores,
653 new_len * sizeof(*submit->sync_fd_semaphores), 8,
654 submit->alloc_scope);
655 if (new_semaphores == NULL)
656 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
657
658 submit->sync_fd_semaphores = new_semaphores;
659 }
660
661 submit->sync_fd_semaphores[submit->sync_fd_semaphore_count++] =
662 anv_semaphore_ref(semaphore);
663 submit->need_out_fence = true;
664
665 return VK_SUCCESS;
666 }
667
668 static VkResult
anv_queue_submit_add_timeline_wait(struct anv_queue_submit * submit,struct anv_device * device,struct anv_timeline * timeline,uint64_t value)669 anv_queue_submit_add_timeline_wait(struct anv_queue_submit* submit,
670 struct anv_device *device,
671 struct anv_timeline *timeline,
672 uint64_t value)
673 {
674 if (submit->wait_timeline_count >= submit->wait_timeline_array_length) {
675 uint32_t new_len = MAX2(submit->wait_timeline_array_length * 2, 64);
676 struct anv_timeline **new_wait_timelines =
677 vk_realloc(submit->alloc,
678 submit->wait_timelines, new_len * sizeof(*submit->wait_timelines),
679 8, submit->alloc_scope);
680 if (new_wait_timelines == NULL)
681 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
682
683 submit->wait_timelines = new_wait_timelines;
684
685 uint64_t *new_wait_timeline_values =
686 vk_realloc(submit->alloc,
687 submit->wait_timeline_values, new_len * sizeof(*submit->wait_timeline_values),
688 8, submit->alloc_scope);
689 if (new_wait_timeline_values == NULL)
690 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
691
692 submit->wait_timeline_values = new_wait_timeline_values;
693
694 submit->wait_timeline_array_length = new_len;
695 }
696
697 submit->wait_timelines[submit->wait_timeline_count] = timeline;
698 submit->wait_timeline_values[submit->wait_timeline_count] = value;
699
700 submit->wait_timeline_count++;
701
702 return VK_SUCCESS;
703 }
704
705 static VkResult
anv_queue_submit_add_timeline_signal(struct anv_queue_submit * submit,struct anv_device * device,struct anv_timeline * timeline,uint64_t value)706 anv_queue_submit_add_timeline_signal(struct anv_queue_submit* submit,
707 struct anv_device *device,
708 struct anv_timeline *timeline,
709 uint64_t value)
710 {
711 assert(timeline->highest_pending < value);
712
713 if (submit->signal_timeline_count >= submit->signal_timeline_array_length) {
714 uint32_t new_len = MAX2(submit->signal_timeline_array_length * 2, 64);
715 struct anv_timeline **new_signal_timelines =
716 vk_realloc(submit->alloc,
717 submit->signal_timelines, new_len * sizeof(*submit->signal_timelines),
718 8, submit->alloc_scope);
719 if (new_signal_timelines == NULL)
720 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
721
722 submit->signal_timelines = new_signal_timelines;
723
724 uint64_t *new_signal_timeline_values =
725 vk_realloc(submit->alloc,
726 submit->signal_timeline_values, new_len * sizeof(*submit->signal_timeline_values),
727 8, submit->alloc_scope);
728 if (new_signal_timeline_values == NULL)
729 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
730
731 submit->signal_timeline_values = new_signal_timeline_values;
732
733 submit->signal_timeline_array_length = new_len;
734 }
735
736 submit->signal_timelines[submit->signal_timeline_count] = timeline;
737 submit->signal_timeline_values[submit->signal_timeline_count] = value;
738
739 submit->signal_timeline_count++;
740
741 return VK_SUCCESS;
742 }
743
744 static struct anv_queue_submit *
anv_queue_submit_alloc(struct anv_device * device,int perf_query_pass)745 anv_queue_submit_alloc(struct anv_device *device, int perf_query_pass)
746 {
747 const VkAllocationCallbacks *alloc = &device->vk.alloc;
748 VkSystemAllocationScope alloc_scope = VK_SYSTEM_ALLOCATION_SCOPE_DEVICE;
749
750 struct anv_queue_submit *submit = vk_zalloc(alloc, sizeof(*submit), 8, alloc_scope);
751 if (!submit)
752 return NULL;
753
754 submit->alloc = alloc;
755 submit->alloc_scope = alloc_scope;
756 submit->in_fence = -1;
757 submit->out_fence = -1;
758 submit->perf_query_pass = perf_query_pass;
759
760 return submit;
761 }
762
763 VkResult
anv_queue_submit_simple_batch(struct anv_queue * queue,struct anv_batch * batch)764 anv_queue_submit_simple_batch(struct anv_queue *queue,
765 struct anv_batch *batch)
766 {
767 if (queue->device->no_hw)
768 return VK_SUCCESS;
769
770 struct anv_device *device = queue->device;
771 struct anv_queue_submit *submit = anv_queue_submit_alloc(device, -1);
772 if (!submit)
773 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
774
775 bool has_syncobj_wait = device->physical->has_syncobj_wait;
776 VkResult result;
777 uint32_t syncobj;
778 struct anv_bo *batch_bo, *sync_bo;
779
780 if (has_syncobj_wait) {
781 syncobj = anv_gem_syncobj_create(device, 0);
782 if (!syncobj) {
783 result = vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
784 goto err_free_submit;
785 }
786
787 result = anv_queue_submit_add_syncobj(submit, device, syncobj,
788 I915_EXEC_FENCE_SIGNAL, 0);
789 } else {
790 result = anv_device_alloc_bo(device, 4096,
791 ANV_BO_ALLOC_EXTERNAL |
792 ANV_BO_ALLOC_IMPLICIT_SYNC,
793 0 /* explicit_address */,
794 &sync_bo);
795 if (result != VK_SUCCESS)
796 goto err_free_submit;
797
798 result = anv_queue_submit_add_fence_bo(submit, sync_bo, true /* signal */);
799 }
800
801 if (result != VK_SUCCESS)
802 goto err_destroy_sync_primitive;
803
804 if (batch) {
805 uint32_t size = align_u32(batch->next - batch->start, 8);
806 result = anv_bo_pool_alloc(&device->batch_bo_pool, size, &batch_bo);
807 if (result != VK_SUCCESS)
808 goto err_destroy_sync_primitive;
809
810 memcpy(batch_bo->map, batch->start, size);
811 if (!device->info.has_llc)
812 gen_flush_range(batch_bo->map, size);
813
814 submit->simple_bo = batch_bo;
815 submit->simple_bo_size = size;
816 }
817
818 result = _anv_queue_submit(queue, &submit, true);
819
820 if (result == VK_SUCCESS) {
821 if (has_syncobj_wait) {
822 if (anv_gem_syncobj_wait(device, &syncobj, 1,
823 anv_get_absolute_timeout(INT64_MAX), true))
824 result = anv_device_set_lost(device, "anv_gem_syncobj_wait failed: %m");
825 anv_gem_syncobj_destroy(device, syncobj);
826 } else {
827 result = anv_device_wait(device, sync_bo,
828 anv_get_relative_timeout(INT64_MAX));
829 anv_device_release_bo(device, sync_bo);
830 }
831 }
832
833 if (batch)
834 anv_bo_pool_free(&device->batch_bo_pool, batch_bo);
835
836 if (submit)
837 anv_queue_submit_free(device, submit);
838
839 return result;
840
841 err_destroy_sync_primitive:
842 if (has_syncobj_wait)
843 anv_gem_syncobj_destroy(device, syncobj);
844 else
845 anv_device_release_bo(device, sync_bo);
846 err_free_submit:
847 if (submit)
848 anv_queue_submit_free(device, submit);
849
850 return result;
851 }
852
853 /* Transfer ownership of temporary semaphores from the VkSemaphore object to
854 * the anv_queue_submit object. Those temporary semaphores are then freed in
855 * anv_queue_submit_free() once the driver is finished with them.
856 */
857 static VkResult
maybe_transfer_temporary_semaphore(struct anv_queue_submit * submit,struct anv_semaphore * semaphore,struct anv_semaphore_impl ** out_impl)858 maybe_transfer_temporary_semaphore(struct anv_queue_submit *submit,
859 struct anv_semaphore *semaphore,
860 struct anv_semaphore_impl **out_impl)
861 {
862 struct anv_semaphore_impl *impl = &semaphore->temporary;
863
864 if (impl->type == ANV_SEMAPHORE_TYPE_NONE) {
865 *out_impl = &semaphore->permanent;
866 return VK_SUCCESS;
867 }
868
869 /* BO backed timeline semaphores cannot be temporary. */
870 assert(impl->type != ANV_SEMAPHORE_TYPE_TIMELINE);
871
872 /*
873 * There is a requirement to reset semaphore to their permanent state after
874 * submission. From the Vulkan 1.0.53 spec:
875 *
876 * "If the import is temporary, the implementation must restore the
877 * semaphore to its prior permanent state after submitting the next
878 * semaphore wait operation."
879 *
880 * In the case we defer the actual submission to a thread because of the
881 * wait-before-submit behavior required for timeline semaphores, we need to
882 * make copies of the temporary syncobj to ensure they stay alive until we
883 * do the actual execbuffer ioctl.
884 */
885 if (submit->temporary_semaphore_count >= submit->temporary_semaphore_array_length) {
886 uint32_t new_len = MAX2(submit->temporary_semaphore_array_length * 2, 8);
887 /* Make sure that if the realloc fails, we still have the old semaphore
888 * array around to properly clean things up on failure.
889 */
890 struct anv_semaphore_impl *new_array =
891 vk_realloc(submit->alloc,
892 submit->temporary_semaphores,
893 new_len * sizeof(*submit->temporary_semaphores),
894 8, submit->alloc_scope);
895 if (new_array == NULL)
896 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
897
898 submit->temporary_semaphores = new_array;
899 submit->temporary_semaphore_array_length = new_len;
900 }
901
902 /* Copy anv_semaphore_impl into anv_queue_submit. */
903 submit->temporary_semaphores[submit->temporary_semaphore_count++] = *impl;
904 *out_impl = &submit->temporary_semaphores[submit->temporary_semaphore_count - 1];
905
906 /* Clear the incoming semaphore */
907 impl->type = ANV_SEMAPHORE_TYPE_NONE;
908
909 return VK_SUCCESS;
910 }
911
912 static VkResult
anv_queue_submit(struct anv_queue * queue,struct anv_cmd_buffer * cmd_buffer,const VkSemaphore * in_semaphores,const uint64_t * in_values,uint32_t num_in_semaphores,const VkSemaphore * out_semaphores,const uint64_t * out_values,uint32_t num_out_semaphores,struct anv_bo * wsi_signal_bo,VkFence _fence,int perf_query_pass)913 anv_queue_submit(struct anv_queue *queue,
914 struct anv_cmd_buffer *cmd_buffer,
915 const VkSemaphore *in_semaphores,
916 const uint64_t *in_values,
917 uint32_t num_in_semaphores,
918 const VkSemaphore *out_semaphores,
919 const uint64_t *out_values,
920 uint32_t num_out_semaphores,
921 struct anv_bo *wsi_signal_bo,
922 VkFence _fence,
923 int perf_query_pass)
924 {
925 ANV_FROM_HANDLE(anv_fence, fence, _fence);
926 struct anv_device *device = queue->device;
927 UNUSED struct anv_physical_device *pdevice = device->physical;
928 struct anv_queue_submit *submit = anv_queue_submit_alloc(device, perf_query_pass);
929 if (!submit)
930 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
931
932 submit->cmd_buffer = cmd_buffer;
933
934 VkResult result = VK_SUCCESS;
935 for (uint32_t i = 0; i < num_in_semaphores; i++) {
936 ANV_FROM_HANDLE(anv_semaphore, semaphore, in_semaphores[i]);
937 struct anv_semaphore_impl *impl;
938
939 result = maybe_transfer_temporary_semaphore(submit, semaphore, &impl);
940 if (result != VK_SUCCESS)
941 goto error;
942
943 switch (impl->type) {
944 case ANV_SEMAPHORE_TYPE_BO:
945 assert(!pdevice->has_syncobj);
946 result = anv_queue_submit_add_fence_bo(submit, impl->bo, false /* signal */);
947 if (result != VK_SUCCESS)
948 goto error;
949 break;
950
951 case ANV_SEMAPHORE_TYPE_WSI_BO:
952 /* When using a window-system buffer as a semaphore, always enable
953 * EXEC_OBJECT_WRITE. This gives us a WaR hazard with the display or
954 * compositor's read of the buffer and enforces that we don't start
955 * rendering until they are finished. This is exactly the
956 * synchronization we want with vkAcquireNextImage.
957 */
958 result = anv_queue_submit_add_fence_bo(submit, impl->bo, true /* signal */);
959 if (result != VK_SUCCESS)
960 goto error;
961 break;
962
963 case ANV_SEMAPHORE_TYPE_SYNC_FILE:
964 assert(!pdevice->has_syncobj);
965 if (submit->in_fence == -1) {
966 submit->in_fence = impl->fd;
967 if (submit->in_fence == -1) {
968 result = vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE);
969 goto error;
970 }
971 impl->fd = -1;
972 } else {
973 int merge = anv_gem_sync_file_merge(device, submit->in_fence, impl->fd);
974 if (merge == -1) {
975 result = vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE);
976 goto error;
977 }
978 close(impl->fd);
979 close(submit->in_fence);
980 impl->fd = -1;
981 submit->in_fence = merge;
982 }
983 break;
984
985 case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ: {
986 result = anv_queue_submit_add_syncobj(submit, device,
987 impl->syncobj,
988 I915_EXEC_FENCE_WAIT,
989 0);
990 if (result != VK_SUCCESS)
991 goto error;
992 break;
993 }
994
995 case ANV_SEMAPHORE_TYPE_TIMELINE:
996 result = anv_queue_submit_add_timeline_wait(submit, device,
997 &impl->timeline,
998 in_values ? in_values[i] : 0);
999 if (result != VK_SUCCESS)
1000 goto error;
1001 break;
1002
1003 case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE:
1004 result = anv_queue_submit_add_syncobj(submit, device,
1005 impl->syncobj,
1006 I915_EXEC_FENCE_WAIT,
1007 in_values ? in_values[i] : 0);
1008 if (result != VK_SUCCESS)
1009 goto error;
1010 break;
1011
1012 default:
1013 break;
1014 }
1015 }
1016
1017 for (uint32_t i = 0; i < num_out_semaphores; i++) {
1018 ANV_FROM_HANDLE(anv_semaphore, semaphore, out_semaphores[i]);
1019
1020 /* Under most circumstances, out fences won't be temporary. However,
1021 * the spec does allow it for opaque_fd. From the Vulkan 1.0.53 spec:
1022 *
1023 * "If the import is temporary, the implementation must restore the
1024 * semaphore to its prior permanent state after submitting the next
1025 * semaphore wait operation."
1026 *
1027 * The spec says nothing whatsoever about signal operations on
1028 * temporarily imported semaphores so it appears they are allowed.
1029 * There are also CTS tests that require this to work.
1030 */
1031 struct anv_semaphore_impl *impl =
1032 semaphore->temporary.type != ANV_SEMAPHORE_TYPE_NONE ?
1033 &semaphore->temporary : &semaphore->permanent;
1034
1035 switch (impl->type) {
1036 case ANV_SEMAPHORE_TYPE_BO:
1037 assert(!pdevice->has_syncobj);
1038 result = anv_queue_submit_add_fence_bo(submit, impl->bo, true /* signal */);
1039 if (result != VK_SUCCESS)
1040 goto error;
1041 break;
1042
1043 case ANV_SEMAPHORE_TYPE_SYNC_FILE:
1044 assert(!pdevice->has_syncobj);
1045 result = anv_queue_submit_add_sync_fd_fence(submit, semaphore);
1046 if (result != VK_SUCCESS)
1047 goto error;
1048 break;
1049
1050 case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ: {
1051 result = anv_queue_submit_add_syncobj(submit, device, impl->syncobj,
1052 I915_EXEC_FENCE_SIGNAL,
1053 0);
1054 if (result != VK_SUCCESS)
1055 goto error;
1056 break;
1057 }
1058
1059 case ANV_SEMAPHORE_TYPE_TIMELINE:
1060 result = anv_queue_submit_add_timeline_signal(submit, device,
1061 &impl->timeline,
1062 out_values ? out_values[i] : 0);
1063 if (result != VK_SUCCESS)
1064 goto error;
1065 break;
1066
1067 case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE:
1068 result = anv_queue_submit_add_syncobj(submit, device, impl->syncobj,
1069 I915_EXEC_FENCE_SIGNAL,
1070 out_values ? out_values[i] : 0);
1071 if (result != VK_SUCCESS)
1072 goto error;
1073 break;
1074
1075 default:
1076 break;
1077 }
1078 }
1079
1080 if (wsi_signal_bo) {
1081 result = anv_queue_submit_add_fence_bo(submit, wsi_signal_bo, true /* signal */);
1082 if (result != VK_SUCCESS)
1083 goto error;
1084 }
1085
1086 if (fence) {
1087 /* Under most circumstances, out fences won't be temporary. However,
1088 * the spec does allow it for opaque_fd. From the Vulkan 1.0.53 spec:
1089 *
1090 * "If the import is temporary, the implementation must restore the
1091 * semaphore to its prior permanent state after submitting the next
1092 * semaphore wait operation."
1093 *
1094 * The spec says nothing whatsoever about signal operations on
1095 * temporarily imported semaphores so it appears they are allowed.
1096 * There are also CTS tests that require this to work.
1097 */
1098 struct anv_fence_impl *impl =
1099 fence->temporary.type != ANV_FENCE_TYPE_NONE ?
1100 &fence->temporary : &fence->permanent;
1101
1102 switch (impl->type) {
1103 case ANV_FENCE_TYPE_BO:
1104 assert(!device->has_thread_submit);
1105 result = anv_queue_submit_add_fence_bo(submit, impl->bo.bo, true /* signal */);
1106 if (result != VK_SUCCESS)
1107 goto error;
1108 break;
1109
1110 case ANV_FENCE_TYPE_SYNCOBJ: {
1111 /*
1112 * For the same reason we reset the signaled binary syncobj above,
1113 * also reset the fence's syncobj so that they don't contain a
1114 * signaled dma-fence.
1115 */
1116 anv_gem_syncobj_reset(device, impl->syncobj);
1117
1118 result = anv_queue_submit_add_syncobj(submit, device, impl->syncobj,
1119 I915_EXEC_FENCE_SIGNAL,
1120 0);
1121 if (result != VK_SUCCESS)
1122 goto error;
1123 break;
1124 }
1125
1126 default:
1127 unreachable("Invalid fence type");
1128 }
1129 }
1130
1131 result = _anv_queue_submit(queue, &submit, false);
1132 if (result != VK_SUCCESS)
1133 goto error;
1134
1135 if (fence && fence->permanent.type == ANV_FENCE_TYPE_BO) {
1136 assert(!device->has_thread_submit);
1137 /* If we have permanent BO fence, the only type of temporary possible
1138 * would be BO_WSI (because BO fences are not shareable). The Vulkan spec
1139 * also requires that the fence passed to vkQueueSubmit() be :
1140 *
1141 * * unsignaled
1142 * * not be associated with any other queue command that has not yet
1143 * completed execution on that queue
1144 *
1145 * So the only acceptable type for the temporary is NONE.
1146 */
1147 assert(fence->temporary.type == ANV_FENCE_TYPE_NONE);
1148
1149 /* Once the execbuf has returned, we need to set the fence state to
1150 * SUBMITTED. We can't do this before calling execbuf because
1151 * anv_GetFenceStatus does take the global device lock before checking
1152 * fence->state.
1153 *
1154 * We set the fence state to SUBMITTED regardless of whether or not the
1155 * execbuf succeeds because we need to ensure that vkWaitForFences() and
1156 * vkGetFenceStatus() return a valid result (VK_ERROR_DEVICE_LOST or
1157 * VK_SUCCESS) in a finite amount of time even if execbuf fails.
1158 */
1159 fence->permanent.bo.state = ANV_BO_FENCE_STATE_SUBMITTED;
1160 }
1161
1162 error:
1163 if (submit)
1164 anv_queue_submit_free(device, submit);
1165
1166 return result;
1167 }
1168
anv_QueueSubmit(VkQueue _queue,uint32_t submitCount,const VkSubmitInfo * pSubmits,VkFence fence)1169 VkResult anv_QueueSubmit(
1170 VkQueue _queue,
1171 uint32_t submitCount,
1172 const VkSubmitInfo* pSubmits,
1173 VkFence fence)
1174 {
1175 ANV_FROM_HANDLE(anv_queue, queue, _queue);
1176
1177 if (queue->device->no_hw)
1178 return VK_SUCCESS;
1179
1180 /* Query for device status prior to submitting. Technically, we don't need
1181 * to do this. However, if we have a client that's submitting piles of
1182 * garbage, we would rather break as early as possible to keep the GPU
1183 * hanging contained. If we don't check here, we'll either be waiting for
1184 * the kernel to kick us or we'll have to wait until the client waits on a
1185 * fence before we actually know whether or not we've hung.
1186 */
1187 VkResult result = anv_device_query_status(queue->device);
1188 if (result != VK_SUCCESS)
1189 return result;
1190
1191 if (fence && submitCount == 0) {
1192 /* If we don't have any command buffers, we need to submit a dummy
1193 * batch to give GEM something to wait on. We could, potentially,
1194 * come up with something more efficient but this shouldn't be a
1195 * common case.
1196 */
1197 result = anv_queue_submit(queue, NULL, NULL, NULL, 0, NULL, NULL, 0,
1198 NULL, fence, -1);
1199 goto out;
1200 }
1201
1202 for (uint32_t i = 0; i < submitCount; i++) {
1203 /* Fence for this submit. NULL for all but the last one */
1204 VkFence submit_fence = (i == submitCount - 1) ? fence : VK_NULL_HANDLE;
1205
1206 const struct wsi_memory_signal_submit_info *mem_signal_info =
1207 vk_find_struct_const(pSubmits[i].pNext,
1208 WSI_MEMORY_SIGNAL_SUBMIT_INFO_MESA);
1209 struct anv_bo *wsi_signal_bo =
1210 mem_signal_info && mem_signal_info->memory != VK_NULL_HANDLE ?
1211 anv_device_memory_from_handle(mem_signal_info->memory)->bo : NULL;
1212
1213 const VkTimelineSemaphoreSubmitInfoKHR *timeline_info =
1214 vk_find_struct_const(pSubmits[i].pNext,
1215 TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR);
1216 const VkPerformanceQuerySubmitInfoKHR *perf_info =
1217 vk_find_struct_const(pSubmits[i].pNext,
1218 PERFORMANCE_QUERY_SUBMIT_INFO_KHR);
1219 const uint64_t *wait_values =
1220 timeline_info && timeline_info->waitSemaphoreValueCount ?
1221 timeline_info->pWaitSemaphoreValues : NULL;
1222 const uint64_t *signal_values =
1223 timeline_info && timeline_info->signalSemaphoreValueCount ?
1224 timeline_info->pSignalSemaphoreValues : NULL;
1225
1226 if (pSubmits[i].commandBufferCount == 0) {
1227 /* If we don't have any command buffers, we need to submit a dummy
1228 * batch to give GEM something to wait on. We could, potentially,
1229 * come up with something more efficient but this shouldn't be a
1230 * common case.
1231 */
1232 result = anv_queue_submit(queue, NULL,
1233 pSubmits[i].pWaitSemaphores,
1234 wait_values,
1235 pSubmits[i].waitSemaphoreCount,
1236 pSubmits[i].pSignalSemaphores,
1237 signal_values,
1238 pSubmits[i].signalSemaphoreCount,
1239 wsi_signal_bo,
1240 submit_fence,
1241 -1);
1242 if (result != VK_SUCCESS)
1243 goto out;
1244
1245 continue;
1246 }
1247
1248 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
1249 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer,
1250 pSubmits[i].pCommandBuffers[j]);
1251 assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1252 assert(!anv_batch_has_error(&cmd_buffer->batch));
1253
1254 /* Fence for this execbuf. NULL for all but the last one */
1255 VkFence execbuf_fence =
1256 (j == pSubmits[i].commandBufferCount - 1) ?
1257 submit_fence : VK_NULL_HANDLE;
1258
1259 const VkSemaphore *in_semaphores = NULL, *out_semaphores = NULL;
1260 const uint64_t *in_values = NULL, *out_values = NULL;
1261 uint32_t num_in_semaphores = 0, num_out_semaphores = 0;
1262 if (j == 0) {
1263 /* Only the first batch gets the in semaphores */
1264 in_semaphores = pSubmits[i].pWaitSemaphores;
1265 in_values = wait_values;
1266 num_in_semaphores = pSubmits[i].waitSemaphoreCount;
1267 }
1268
1269 if (j == pSubmits[i].commandBufferCount - 1) {
1270 /* Only the last batch gets the out semaphores */
1271 out_semaphores = pSubmits[i].pSignalSemaphores;
1272 out_values = signal_values;
1273 num_out_semaphores = pSubmits[i].signalSemaphoreCount;
1274 }
1275
1276 result = anv_queue_submit(queue, cmd_buffer,
1277 in_semaphores, in_values, num_in_semaphores,
1278 out_semaphores, out_values, num_out_semaphores,
1279 wsi_signal_bo, execbuf_fence,
1280 perf_info ? perf_info->counterPassIndex : 0);
1281 if (result != VK_SUCCESS)
1282 goto out;
1283 }
1284 }
1285
1286 out:
1287 if (result != VK_SUCCESS && result != VK_ERROR_DEVICE_LOST) {
1288 /* In the case that something has gone wrong we may end up with an
1289 * inconsistent state from which it may not be trivial to recover.
1290 * For example, we might have computed address relocations and
1291 * any future attempt to re-submit this job will need to know about
1292 * this and avoid computing relocation addresses again.
1293 *
1294 * To avoid this sort of issues, we assume that if something was
1295 * wrong during submission we must already be in a really bad situation
1296 * anyway (such us being out of memory) and return
1297 * VK_ERROR_DEVICE_LOST to ensure that clients do not attempt to
1298 * submit the same job again to this device.
1299 *
1300 * We skip doing this on VK_ERROR_DEVICE_LOST because
1301 * anv_device_set_lost() would have been called already by a callee of
1302 * anv_queue_submit().
1303 */
1304 result = anv_device_set_lost(queue->device, "vkQueueSubmit() failed");
1305 }
1306
1307 return result;
1308 }
1309
anv_QueueWaitIdle(VkQueue _queue)1310 VkResult anv_QueueWaitIdle(
1311 VkQueue _queue)
1312 {
1313 ANV_FROM_HANDLE(anv_queue, queue, _queue);
1314
1315 if (anv_device_is_lost(queue->device))
1316 return VK_ERROR_DEVICE_LOST;
1317
1318 return anv_queue_submit_simple_batch(queue, NULL);
1319 }
1320
anv_CreateFence(VkDevice _device,const VkFenceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkFence * pFence)1321 VkResult anv_CreateFence(
1322 VkDevice _device,
1323 const VkFenceCreateInfo* pCreateInfo,
1324 const VkAllocationCallbacks* pAllocator,
1325 VkFence* pFence)
1326 {
1327 ANV_FROM_HANDLE(anv_device, device, _device);
1328 struct anv_fence *fence;
1329
1330 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FENCE_CREATE_INFO);
1331
1332 fence = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*fence), 8,
1333 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1334 if (fence == NULL)
1335 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1336
1337 vk_object_base_init(&device->vk, &fence->base, VK_OBJECT_TYPE_FENCE);
1338
1339 if (device->physical->has_syncobj_wait) {
1340 fence->permanent.type = ANV_FENCE_TYPE_SYNCOBJ;
1341
1342 uint32_t create_flags = 0;
1343 if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT)
1344 create_flags |= DRM_SYNCOBJ_CREATE_SIGNALED;
1345
1346 fence->permanent.syncobj = anv_gem_syncobj_create(device, create_flags);
1347 if (!fence->permanent.syncobj)
1348 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1349 } else {
1350 fence->permanent.type = ANV_FENCE_TYPE_BO;
1351
1352 VkResult result = anv_bo_pool_alloc(&device->batch_bo_pool, 4096,
1353 &fence->permanent.bo.bo);
1354 if (result != VK_SUCCESS)
1355 return result;
1356
1357 if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT) {
1358 fence->permanent.bo.state = ANV_BO_FENCE_STATE_SIGNALED;
1359 } else {
1360 fence->permanent.bo.state = ANV_BO_FENCE_STATE_RESET;
1361 }
1362 }
1363
1364 *pFence = anv_fence_to_handle(fence);
1365
1366 return VK_SUCCESS;
1367 }
1368
1369 static void
anv_fence_impl_cleanup(struct anv_device * device,struct anv_fence_impl * impl)1370 anv_fence_impl_cleanup(struct anv_device *device,
1371 struct anv_fence_impl *impl)
1372 {
1373 switch (impl->type) {
1374 case ANV_FENCE_TYPE_NONE:
1375 /* Dummy. Nothing to do */
1376 break;
1377
1378 case ANV_FENCE_TYPE_BO:
1379 anv_bo_pool_free(&device->batch_bo_pool, impl->bo.bo);
1380 break;
1381
1382 case ANV_FENCE_TYPE_WSI_BO:
1383 anv_device_release_bo(device, impl->bo.bo);
1384 break;
1385
1386 case ANV_FENCE_TYPE_SYNCOBJ:
1387 anv_gem_syncobj_destroy(device, impl->syncobj);
1388 break;
1389
1390 case ANV_FENCE_TYPE_WSI:
1391 impl->fence_wsi->destroy(impl->fence_wsi);
1392 break;
1393
1394 default:
1395 unreachable("Invalid fence type");
1396 }
1397
1398 impl->type = ANV_FENCE_TYPE_NONE;
1399 }
1400
1401 void
anv_fence_reset_temporary(struct anv_device * device,struct anv_fence * fence)1402 anv_fence_reset_temporary(struct anv_device *device,
1403 struct anv_fence *fence)
1404 {
1405 if (fence->temporary.type == ANV_FENCE_TYPE_NONE)
1406 return;
1407
1408 anv_fence_impl_cleanup(device, &fence->temporary);
1409 }
1410
anv_DestroyFence(VkDevice _device,VkFence _fence,const VkAllocationCallbacks * pAllocator)1411 void anv_DestroyFence(
1412 VkDevice _device,
1413 VkFence _fence,
1414 const VkAllocationCallbacks* pAllocator)
1415 {
1416 ANV_FROM_HANDLE(anv_device, device, _device);
1417 ANV_FROM_HANDLE(anv_fence, fence, _fence);
1418
1419 if (!fence)
1420 return;
1421
1422 anv_fence_impl_cleanup(device, &fence->temporary);
1423 anv_fence_impl_cleanup(device, &fence->permanent);
1424
1425 vk_object_base_finish(&fence->base);
1426 vk_free2(&device->vk.alloc, pAllocator, fence);
1427 }
1428
anv_ResetFences(VkDevice _device,uint32_t fenceCount,const VkFence * pFences)1429 VkResult anv_ResetFences(
1430 VkDevice _device,
1431 uint32_t fenceCount,
1432 const VkFence* pFences)
1433 {
1434 ANV_FROM_HANDLE(anv_device, device, _device);
1435
1436 for (uint32_t i = 0; i < fenceCount; i++) {
1437 ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
1438
1439 /* From the Vulkan 1.0.53 spec:
1440 *
1441 * "If any member of pFences currently has its payload imported with
1442 * temporary permanence, that fence’s prior permanent payload is
1443 * first restored. The remaining operations described therefore
1444 * operate on the restored payload.
1445 */
1446 anv_fence_reset_temporary(device, fence);
1447
1448 struct anv_fence_impl *impl = &fence->permanent;
1449
1450 switch (impl->type) {
1451 case ANV_FENCE_TYPE_BO:
1452 impl->bo.state = ANV_BO_FENCE_STATE_RESET;
1453 break;
1454
1455 case ANV_FENCE_TYPE_SYNCOBJ:
1456 anv_gem_syncobj_reset(device, impl->syncobj);
1457 break;
1458
1459 default:
1460 unreachable("Invalid fence type");
1461 }
1462 }
1463
1464 return VK_SUCCESS;
1465 }
1466
anv_GetFenceStatus(VkDevice _device,VkFence _fence)1467 VkResult anv_GetFenceStatus(
1468 VkDevice _device,
1469 VkFence _fence)
1470 {
1471 ANV_FROM_HANDLE(anv_device, device, _device);
1472 ANV_FROM_HANDLE(anv_fence, fence, _fence);
1473
1474 if (anv_device_is_lost(device))
1475 return VK_ERROR_DEVICE_LOST;
1476
1477 struct anv_fence_impl *impl =
1478 fence->temporary.type != ANV_FENCE_TYPE_NONE ?
1479 &fence->temporary : &fence->permanent;
1480
1481 switch (impl->type) {
1482 case ANV_FENCE_TYPE_BO:
1483 case ANV_FENCE_TYPE_WSI_BO:
1484 switch (impl->bo.state) {
1485 case ANV_BO_FENCE_STATE_RESET:
1486 /* If it hasn't even been sent off to the GPU yet, it's not ready */
1487 return VK_NOT_READY;
1488
1489 case ANV_BO_FENCE_STATE_SIGNALED:
1490 /* It's been signaled, return success */
1491 return VK_SUCCESS;
1492
1493 case ANV_BO_FENCE_STATE_SUBMITTED: {
1494 VkResult result = anv_device_bo_busy(device, impl->bo.bo);
1495 if (result == VK_SUCCESS) {
1496 impl->bo.state = ANV_BO_FENCE_STATE_SIGNALED;
1497 return VK_SUCCESS;
1498 } else {
1499 return result;
1500 }
1501 }
1502 default:
1503 unreachable("Invalid fence status");
1504 }
1505
1506 case ANV_FENCE_TYPE_SYNCOBJ: {
1507 if (device->has_thread_submit) {
1508 uint64_t binary_value = 0;
1509 int ret = anv_gem_syncobj_timeline_wait(device, &impl->syncobj,
1510 &binary_value, 1, 0,
1511 true /* wait_all */,
1512 false /* wait_materialize */);
1513 if (ret == -1) {
1514 if (errno == ETIME) {
1515 return VK_NOT_READY;
1516 } else {
1517 /* We don't know the real error. */
1518 return anv_device_set_lost(device, "drm_syncobj_wait failed: %m");
1519 }
1520 } else {
1521 return VK_SUCCESS;
1522 }
1523 } else {
1524 int ret = anv_gem_syncobj_wait(device, &impl->syncobj, 1, 0, false);
1525 if (ret == -1) {
1526 if (errno == ETIME) {
1527 return VK_NOT_READY;
1528 } else {
1529 /* We don't know the real error. */
1530 return anv_device_set_lost(device, "drm_syncobj_wait failed: %m");
1531 }
1532 } else {
1533 return VK_SUCCESS;
1534 }
1535 }
1536 }
1537
1538 default:
1539 unreachable("Invalid fence type");
1540 }
1541 }
1542
1543 static VkResult
anv_wait_for_syncobj_fences(struct anv_device * device,uint32_t fenceCount,const VkFence * pFences,bool waitAll,uint64_t abs_timeout_ns)1544 anv_wait_for_syncobj_fences(struct anv_device *device,
1545 uint32_t fenceCount,
1546 const VkFence *pFences,
1547 bool waitAll,
1548 uint64_t abs_timeout_ns)
1549 {
1550 uint32_t *syncobjs = vk_zalloc(&device->vk.alloc,
1551 sizeof(*syncobjs) * fenceCount, 8,
1552 VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1553 if (!syncobjs)
1554 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1555
1556 for (uint32_t i = 0; i < fenceCount; i++) {
1557 ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
1558 assert(fence->permanent.type == ANV_FENCE_TYPE_SYNCOBJ);
1559
1560 struct anv_fence_impl *impl =
1561 fence->temporary.type != ANV_FENCE_TYPE_NONE ?
1562 &fence->temporary : &fence->permanent;
1563
1564 assert(impl->type == ANV_FENCE_TYPE_SYNCOBJ);
1565 syncobjs[i] = impl->syncobj;
1566 }
1567
1568 int ret = 0;
1569 /* The gem_syncobj_wait ioctl may return early due to an inherent
1570 * limitation in the way it computes timeouts. Loop until we've actually
1571 * passed the timeout.
1572 */
1573 do {
1574 ret = anv_gem_syncobj_wait(device, syncobjs, fenceCount,
1575 abs_timeout_ns, waitAll);
1576 } while (ret == -1 && errno == ETIME && anv_gettime_ns() < abs_timeout_ns);
1577
1578 vk_free(&device->vk.alloc, syncobjs);
1579
1580 if (ret == -1) {
1581 if (errno == ETIME) {
1582 return VK_TIMEOUT;
1583 } else {
1584 /* We don't know the real error. */
1585 return anv_device_set_lost(device, "drm_syncobj_wait failed: %m");
1586 }
1587 } else {
1588 return VK_SUCCESS;
1589 }
1590 }
1591
1592 static VkResult
anv_wait_for_bo_fences(struct anv_device * device,uint32_t fenceCount,const VkFence * pFences,bool waitAll,uint64_t abs_timeout_ns)1593 anv_wait_for_bo_fences(struct anv_device *device,
1594 uint32_t fenceCount,
1595 const VkFence *pFences,
1596 bool waitAll,
1597 uint64_t abs_timeout_ns)
1598 {
1599 VkResult result = VK_SUCCESS;
1600 uint32_t pending_fences = fenceCount;
1601 while (pending_fences) {
1602 pending_fences = 0;
1603 bool signaled_fences = false;
1604 for (uint32_t i = 0; i < fenceCount; i++) {
1605 ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
1606
1607 struct anv_fence_impl *impl =
1608 fence->temporary.type != ANV_FENCE_TYPE_NONE ?
1609 &fence->temporary : &fence->permanent;
1610 assert(impl->type == ANV_FENCE_TYPE_BO ||
1611 impl->type == ANV_FENCE_TYPE_WSI_BO);
1612
1613 switch (impl->bo.state) {
1614 case ANV_BO_FENCE_STATE_RESET:
1615 /* This fence hasn't been submitted yet, we'll catch it the next
1616 * time around. Yes, this may mean we dead-loop but, short of
1617 * lots of locking and a condition variable, there's not much that
1618 * we can do about that.
1619 */
1620 pending_fences++;
1621 continue;
1622
1623 case ANV_BO_FENCE_STATE_SIGNALED:
1624 /* This fence is not pending. If waitAll isn't set, we can return
1625 * early. Otherwise, we have to keep going.
1626 */
1627 if (!waitAll) {
1628 result = VK_SUCCESS;
1629 goto done;
1630 }
1631 continue;
1632
1633 case ANV_BO_FENCE_STATE_SUBMITTED:
1634 /* These are the fences we really care about. Go ahead and wait
1635 * on it until we hit a timeout.
1636 */
1637 result = anv_device_wait(device, impl->bo.bo,
1638 anv_get_relative_timeout(abs_timeout_ns));
1639 switch (result) {
1640 case VK_SUCCESS:
1641 impl->bo.state = ANV_BO_FENCE_STATE_SIGNALED;
1642 signaled_fences = true;
1643 if (!waitAll)
1644 goto done;
1645 break;
1646
1647 case VK_TIMEOUT:
1648 goto done;
1649
1650 default:
1651 return result;
1652 }
1653 }
1654 }
1655
1656 if (pending_fences && !signaled_fences) {
1657 /* If we've hit this then someone decided to vkWaitForFences before
1658 * they've actually submitted any of them to a queue. This is a
1659 * fairly pessimal case, so it's ok to lock here and use a standard
1660 * pthreads condition variable.
1661 */
1662 pthread_mutex_lock(&device->mutex);
1663
1664 /* It's possible that some of the fences have changed state since the
1665 * last time we checked. Now that we have the lock, check for
1666 * pending fences again and don't wait if it's changed.
1667 */
1668 uint32_t now_pending_fences = 0;
1669 for (uint32_t i = 0; i < fenceCount; i++) {
1670 ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
1671 if (fence->permanent.bo.state == ANV_BO_FENCE_STATE_RESET)
1672 now_pending_fences++;
1673 }
1674 assert(now_pending_fences <= pending_fences);
1675
1676 if (now_pending_fences == pending_fences) {
1677 struct timespec abstime = {
1678 .tv_sec = abs_timeout_ns / NSEC_PER_SEC,
1679 .tv_nsec = abs_timeout_ns % NSEC_PER_SEC,
1680 };
1681
1682 ASSERTED int ret;
1683 ret = pthread_cond_timedwait(&device->queue_submit,
1684 &device->mutex, &abstime);
1685 assert(ret != EINVAL);
1686 if (anv_gettime_ns() >= abs_timeout_ns) {
1687 pthread_mutex_unlock(&device->mutex);
1688 result = VK_TIMEOUT;
1689 goto done;
1690 }
1691 }
1692
1693 pthread_mutex_unlock(&device->mutex);
1694 }
1695 }
1696
1697 done:
1698 if (anv_device_is_lost(device))
1699 return VK_ERROR_DEVICE_LOST;
1700
1701 return result;
1702 }
1703
1704 static VkResult
anv_wait_for_wsi_fence(struct anv_device * device,struct anv_fence_impl * impl,uint64_t abs_timeout)1705 anv_wait_for_wsi_fence(struct anv_device *device,
1706 struct anv_fence_impl *impl,
1707 uint64_t abs_timeout)
1708 {
1709 return impl->fence_wsi->wait(impl->fence_wsi, abs_timeout);
1710 }
1711
1712 static VkResult
anv_wait_for_fences(struct anv_device * device,uint32_t fenceCount,const VkFence * pFences,bool waitAll,uint64_t abs_timeout)1713 anv_wait_for_fences(struct anv_device *device,
1714 uint32_t fenceCount,
1715 const VkFence *pFences,
1716 bool waitAll,
1717 uint64_t abs_timeout)
1718 {
1719 VkResult result = VK_SUCCESS;
1720
1721 if (fenceCount <= 1 || waitAll) {
1722 for (uint32_t i = 0; i < fenceCount; i++) {
1723 ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
1724 struct anv_fence_impl *impl =
1725 fence->temporary.type != ANV_FENCE_TYPE_NONE ?
1726 &fence->temporary : &fence->permanent;
1727
1728 switch (impl->type) {
1729 case ANV_FENCE_TYPE_BO:
1730 assert(!device->physical->has_syncobj_wait);
1731 /* fall-through */
1732 case ANV_FENCE_TYPE_WSI_BO:
1733 result = anv_wait_for_bo_fences(device, 1, &pFences[i],
1734 true, abs_timeout);
1735 break;
1736 case ANV_FENCE_TYPE_SYNCOBJ:
1737 result = anv_wait_for_syncobj_fences(device, 1, &pFences[i],
1738 true, abs_timeout);
1739 break;
1740 case ANV_FENCE_TYPE_WSI:
1741 result = anv_wait_for_wsi_fence(device, impl, abs_timeout);
1742 break;
1743 case ANV_FENCE_TYPE_NONE:
1744 result = VK_SUCCESS;
1745 break;
1746 }
1747 if (result != VK_SUCCESS)
1748 return result;
1749 }
1750 } else {
1751 do {
1752 for (uint32_t i = 0; i < fenceCount; i++) {
1753 if (anv_wait_for_fences(device, 1, &pFences[i], true, 0) == VK_SUCCESS)
1754 return VK_SUCCESS;
1755 }
1756 } while (anv_gettime_ns() < abs_timeout);
1757 result = VK_TIMEOUT;
1758 }
1759 return result;
1760 }
1761
anv_all_fences_syncobj(uint32_t fenceCount,const VkFence * pFences)1762 static bool anv_all_fences_syncobj(uint32_t fenceCount, const VkFence *pFences)
1763 {
1764 for (uint32_t i = 0; i < fenceCount; ++i) {
1765 ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
1766 struct anv_fence_impl *impl =
1767 fence->temporary.type != ANV_FENCE_TYPE_NONE ?
1768 &fence->temporary : &fence->permanent;
1769 if (impl->type != ANV_FENCE_TYPE_SYNCOBJ)
1770 return false;
1771 }
1772 return true;
1773 }
1774
anv_all_fences_bo(uint32_t fenceCount,const VkFence * pFences)1775 static bool anv_all_fences_bo(uint32_t fenceCount, const VkFence *pFences)
1776 {
1777 for (uint32_t i = 0; i < fenceCount; ++i) {
1778 ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
1779 struct anv_fence_impl *impl =
1780 fence->temporary.type != ANV_FENCE_TYPE_NONE ?
1781 &fence->temporary : &fence->permanent;
1782 if (impl->type != ANV_FENCE_TYPE_BO &&
1783 impl->type != ANV_FENCE_TYPE_WSI_BO)
1784 return false;
1785 }
1786 return true;
1787 }
1788
anv_WaitForFences(VkDevice _device,uint32_t fenceCount,const VkFence * pFences,VkBool32 waitAll,uint64_t timeout)1789 VkResult anv_WaitForFences(
1790 VkDevice _device,
1791 uint32_t fenceCount,
1792 const VkFence* pFences,
1793 VkBool32 waitAll,
1794 uint64_t timeout)
1795 {
1796 ANV_FROM_HANDLE(anv_device, device, _device);
1797
1798 if (device->no_hw)
1799 return VK_SUCCESS;
1800
1801 if (anv_device_is_lost(device))
1802 return VK_ERROR_DEVICE_LOST;
1803
1804 uint64_t abs_timeout = anv_get_absolute_timeout(timeout);
1805 if (anv_all_fences_syncobj(fenceCount, pFences)) {
1806 return anv_wait_for_syncobj_fences(device, fenceCount, pFences,
1807 waitAll, abs_timeout);
1808 } else if (anv_all_fences_bo(fenceCount, pFences)) {
1809 return anv_wait_for_bo_fences(device, fenceCount, pFences,
1810 waitAll, abs_timeout);
1811 } else {
1812 return anv_wait_for_fences(device, fenceCount, pFences,
1813 waitAll, abs_timeout);
1814 }
1815 }
1816
anv_GetPhysicalDeviceExternalFenceProperties(VkPhysicalDevice physicalDevice,const VkPhysicalDeviceExternalFenceInfo * pExternalFenceInfo,VkExternalFenceProperties * pExternalFenceProperties)1817 void anv_GetPhysicalDeviceExternalFenceProperties(
1818 VkPhysicalDevice physicalDevice,
1819 const VkPhysicalDeviceExternalFenceInfo* pExternalFenceInfo,
1820 VkExternalFenceProperties* pExternalFenceProperties)
1821 {
1822 ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice);
1823
1824 switch (pExternalFenceInfo->handleType) {
1825 case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT:
1826 case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT:
1827 if (device->has_syncobj_wait) {
1828 pExternalFenceProperties->exportFromImportedHandleTypes =
1829 VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT |
1830 VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT;
1831 pExternalFenceProperties->compatibleHandleTypes =
1832 VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT |
1833 VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT;
1834 pExternalFenceProperties->externalFenceFeatures =
1835 VK_EXTERNAL_FENCE_FEATURE_EXPORTABLE_BIT |
1836 VK_EXTERNAL_FENCE_FEATURE_IMPORTABLE_BIT;
1837 return;
1838 }
1839 break;
1840
1841 default:
1842 break;
1843 }
1844
1845 pExternalFenceProperties->exportFromImportedHandleTypes = 0;
1846 pExternalFenceProperties->compatibleHandleTypes = 0;
1847 pExternalFenceProperties->externalFenceFeatures = 0;
1848 }
1849
anv_ImportFenceFdKHR(VkDevice _device,const VkImportFenceFdInfoKHR * pImportFenceFdInfo)1850 VkResult anv_ImportFenceFdKHR(
1851 VkDevice _device,
1852 const VkImportFenceFdInfoKHR* pImportFenceFdInfo)
1853 {
1854 ANV_FROM_HANDLE(anv_device, device, _device);
1855 ANV_FROM_HANDLE(anv_fence, fence, pImportFenceFdInfo->fence);
1856 int fd = pImportFenceFdInfo->fd;
1857
1858 assert(pImportFenceFdInfo->sType ==
1859 VK_STRUCTURE_TYPE_IMPORT_FENCE_FD_INFO_KHR);
1860
1861 struct anv_fence_impl new_impl = {
1862 .type = ANV_FENCE_TYPE_NONE,
1863 };
1864
1865 switch (pImportFenceFdInfo->handleType) {
1866 case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT:
1867 new_impl.type = ANV_FENCE_TYPE_SYNCOBJ;
1868
1869 new_impl.syncobj = anv_gem_syncobj_fd_to_handle(device, fd);
1870 if (!new_impl.syncobj)
1871 return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE);
1872
1873 break;
1874
1875 case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT: {
1876 /* Sync files are a bit tricky. Because we want to continue using the
1877 * syncobj implementation of WaitForFences, we don't use the sync file
1878 * directly but instead import it into a syncobj.
1879 */
1880 new_impl.type = ANV_FENCE_TYPE_SYNCOBJ;
1881
1882 /* "If handleType is VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT, the
1883 * special value -1 for fd is treated like a valid sync file descriptor
1884 * referring to an object that has already signaled. The import
1885 * operation will succeed and the VkFence will have a temporarily
1886 * imported payload as if a valid file descriptor had been provided."
1887 */
1888 uint32_t create_flags = 0;
1889 if (fd == -1)
1890 create_flags |= DRM_SYNCOBJ_CREATE_SIGNALED;
1891
1892 new_impl.syncobj = anv_gem_syncobj_create(device, create_flags);
1893 if (!new_impl.syncobj)
1894 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1895
1896 if (fd != -1 &&
1897 anv_gem_syncobj_import_sync_file(device, new_impl.syncobj, fd)) {
1898 anv_gem_syncobj_destroy(device, new_impl.syncobj);
1899 return vk_errorf(device, NULL, VK_ERROR_INVALID_EXTERNAL_HANDLE,
1900 "syncobj sync file import failed: %m");
1901 }
1902 break;
1903 }
1904
1905 default:
1906 return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE);
1907 }
1908
1909 /* From the Vulkan 1.0.53 spec:
1910 *
1911 * "Importing a fence payload from a file descriptor transfers
1912 * ownership of the file descriptor from the application to the
1913 * Vulkan implementation. The application must not perform any
1914 * operations on the file descriptor after a successful import."
1915 *
1916 * If the import fails, we leave the file descriptor open.
1917 */
1918 if (fd != -1)
1919 close(fd);
1920
1921 if (pImportFenceFdInfo->flags & VK_FENCE_IMPORT_TEMPORARY_BIT) {
1922 anv_fence_impl_cleanup(device, &fence->temporary);
1923 fence->temporary = new_impl;
1924 } else {
1925 anv_fence_impl_cleanup(device, &fence->permanent);
1926 fence->permanent = new_impl;
1927 }
1928
1929 return VK_SUCCESS;
1930 }
1931
1932 /* The sideband payload of the DRM syncobj was incremented when the
1933 * application called vkQueueSubmit(). Here we wait for a fence with the same
1934 * value to materialize so that we can exporting (typically as a SyncFD).
1935 */
1936 static VkResult
wait_syncobj_materialize(struct anv_device * device,uint32_t syncobj,int * fd)1937 wait_syncobj_materialize(struct anv_device *device,
1938 uint32_t syncobj,
1939 int *fd)
1940 {
1941 if (!device->has_thread_submit)
1942 return VK_SUCCESS;
1943
1944 uint64_t binary_value = 0;
1945 /* We might need to wait until the fence materializes before we can
1946 * export to a sync FD when we use a thread for submission.
1947 */
1948 if (anv_gem_syncobj_timeline_wait(device, &syncobj, &binary_value, 1,
1949 anv_get_absolute_timeout(5ull * NSEC_PER_SEC),
1950 true /* wait_all */,
1951 true /* wait_materialize */))
1952 return anv_device_set_lost(device, "anv_gem_syncobj_timeline_wait failed: %m");
1953
1954 return VK_SUCCESS;
1955 }
1956
anv_GetFenceFdKHR(VkDevice _device,const VkFenceGetFdInfoKHR * pGetFdInfo,int * pFd)1957 VkResult anv_GetFenceFdKHR(
1958 VkDevice _device,
1959 const VkFenceGetFdInfoKHR* pGetFdInfo,
1960 int* pFd)
1961 {
1962 ANV_FROM_HANDLE(anv_device, device, _device);
1963 ANV_FROM_HANDLE(anv_fence, fence, pGetFdInfo->fence);
1964
1965 assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_FENCE_GET_FD_INFO_KHR);
1966
1967 struct anv_fence_impl *impl =
1968 fence->temporary.type != ANV_FENCE_TYPE_NONE ?
1969 &fence->temporary : &fence->permanent;
1970
1971 assert(impl->type == ANV_FENCE_TYPE_SYNCOBJ);
1972 switch (pGetFdInfo->handleType) {
1973 case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT: {
1974 int fd = anv_gem_syncobj_handle_to_fd(device, impl->syncobj);
1975 if (fd < 0)
1976 return vk_error(VK_ERROR_TOO_MANY_OBJECTS);
1977
1978 *pFd = fd;
1979 break;
1980 }
1981
1982 case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT: {
1983 VkResult result = wait_syncobj_materialize(device, impl->syncobj, pFd);
1984 if (result != VK_SUCCESS)
1985 return result;
1986
1987 int fd = anv_gem_syncobj_export_sync_file(device, impl->syncobj);
1988 if (fd < 0)
1989 return vk_error(VK_ERROR_TOO_MANY_OBJECTS);
1990
1991 *pFd = fd;
1992 break;
1993 }
1994
1995 default:
1996 unreachable("Invalid fence export handle type");
1997 }
1998
1999 /* From the Vulkan 1.0.53 spec:
2000 *
2001 * "Export operations have the same transference as the specified handle
2002 * type’s import operations. [...] If the fence was using a
2003 * temporarily imported payload, the fence’s prior permanent payload
2004 * will be restored.
2005 */
2006 if (impl == &fence->temporary)
2007 anv_fence_impl_cleanup(device, impl);
2008
2009 return VK_SUCCESS;
2010 }
2011
2012 // Queue semaphore functions
2013
2014 static VkSemaphoreTypeKHR
get_semaphore_type(const void * pNext,uint64_t * initial_value)2015 get_semaphore_type(const void *pNext, uint64_t *initial_value)
2016 {
2017 const VkSemaphoreTypeCreateInfoKHR *type_info =
2018 vk_find_struct_const(pNext, SEMAPHORE_TYPE_CREATE_INFO_KHR);
2019
2020 if (!type_info)
2021 return VK_SEMAPHORE_TYPE_BINARY_KHR;
2022
2023 if (initial_value)
2024 *initial_value = type_info->initialValue;
2025 return type_info->semaphoreType;
2026 }
2027
2028 static VkResult
binary_semaphore_create(struct anv_device * device,struct anv_semaphore_impl * impl,bool exportable)2029 binary_semaphore_create(struct anv_device *device,
2030 struct anv_semaphore_impl *impl,
2031 bool exportable)
2032 {
2033 if (device->physical->has_syncobj) {
2034 impl->type = ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ;
2035 impl->syncobj = anv_gem_syncobj_create(device, 0);
2036 if (!impl->syncobj)
2037 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2038 return VK_SUCCESS;
2039 } else {
2040 impl->type = ANV_SEMAPHORE_TYPE_BO;
2041 VkResult result =
2042 anv_device_alloc_bo(device, 4096,
2043 ANV_BO_ALLOC_EXTERNAL |
2044 ANV_BO_ALLOC_IMPLICIT_SYNC,
2045 0 /* explicit_address */,
2046 &impl->bo);
2047 /* If we're going to use this as a fence, we need to *not* have the
2048 * EXEC_OBJECT_ASYNC bit set.
2049 */
2050 assert(!(impl->bo->flags & EXEC_OBJECT_ASYNC));
2051 return result;
2052 }
2053 }
2054
2055 static VkResult
timeline_semaphore_create(struct anv_device * device,struct anv_semaphore_impl * impl,uint64_t initial_value)2056 timeline_semaphore_create(struct anv_device *device,
2057 struct anv_semaphore_impl *impl,
2058 uint64_t initial_value)
2059 {
2060 if (device->has_thread_submit) {
2061 impl->type = ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE;
2062 impl->syncobj = anv_gem_syncobj_create(device, 0);
2063 if (!impl->syncobj)
2064 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2065 if (initial_value) {
2066 if (anv_gem_syncobj_timeline_signal(device,
2067 &impl->syncobj,
2068 &initial_value, 1)) {
2069 anv_gem_syncobj_destroy(device, impl->syncobj);
2070 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2071 }
2072 }
2073 } else {
2074 impl->type = ANV_SEMAPHORE_TYPE_TIMELINE;
2075 anv_timeline_init(device, &impl->timeline, initial_value);
2076 }
2077
2078 return VK_SUCCESS;
2079 }
2080
anv_CreateSemaphore(VkDevice _device,const VkSemaphoreCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkSemaphore * pSemaphore)2081 VkResult anv_CreateSemaphore(
2082 VkDevice _device,
2083 const VkSemaphoreCreateInfo* pCreateInfo,
2084 const VkAllocationCallbacks* pAllocator,
2085 VkSemaphore* pSemaphore)
2086 {
2087 ANV_FROM_HANDLE(anv_device, device, _device);
2088 struct anv_semaphore *semaphore;
2089
2090 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO);
2091
2092 uint64_t timeline_value = 0;
2093 VkSemaphoreTypeKHR sem_type = get_semaphore_type(pCreateInfo->pNext, &timeline_value);
2094
2095 semaphore = vk_alloc(&device->vk.alloc, sizeof(*semaphore), 8,
2096 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
2097 if (semaphore == NULL)
2098 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2099
2100 vk_object_base_init(&device->vk, &semaphore->base, VK_OBJECT_TYPE_SEMAPHORE);
2101
2102 p_atomic_set(&semaphore->refcount, 1);
2103
2104 const VkExportSemaphoreCreateInfo *export =
2105 vk_find_struct_const(pCreateInfo->pNext, EXPORT_SEMAPHORE_CREATE_INFO);
2106 VkExternalSemaphoreHandleTypeFlags handleTypes =
2107 export ? export->handleTypes : 0;
2108 VkResult result;
2109
2110 if (handleTypes == 0) {
2111 if (sem_type == VK_SEMAPHORE_TYPE_BINARY_KHR)
2112 result = binary_semaphore_create(device, &semaphore->permanent, false);
2113 else
2114 result = timeline_semaphore_create(device, &semaphore->permanent, timeline_value);
2115 if (result != VK_SUCCESS) {
2116 vk_free2(&device->vk.alloc, pAllocator, semaphore);
2117 return result;
2118 }
2119 } else if (handleTypes & VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT) {
2120 assert(handleTypes == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT);
2121 if (sem_type == VK_SEMAPHORE_TYPE_BINARY_KHR)
2122 result = binary_semaphore_create(device, &semaphore->permanent, true);
2123 else
2124 result = timeline_semaphore_create(device, &semaphore->permanent, timeline_value);
2125 if (result != VK_SUCCESS) {
2126 vk_free2(&device->vk.alloc, pAllocator, semaphore);
2127 return result;
2128 }
2129 } else if (handleTypes & VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT) {
2130 assert(handleTypes == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT);
2131 assert(sem_type == VK_SEMAPHORE_TYPE_BINARY_KHR);
2132 if (device->physical->has_syncobj) {
2133 semaphore->permanent.type = ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ;
2134 semaphore->permanent.syncobj = anv_gem_syncobj_create(device, 0);
2135 if (!semaphore->permanent.syncobj) {
2136 vk_free2(&device->vk.alloc, pAllocator, semaphore);
2137 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2138 }
2139 } else {
2140 semaphore->permanent.type = ANV_SEMAPHORE_TYPE_SYNC_FILE;
2141 semaphore->permanent.fd = -1;
2142 }
2143 } else {
2144 assert(!"Unknown handle type");
2145 vk_free2(&device->vk.alloc, pAllocator, semaphore);
2146 return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE);
2147 }
2148
2149 semaphore->temporary.type = ANV_SEMAPHORE_TYPE_NONE;
2150
2151 *pSemaphore = anv_semaphore_to_handle(semaphore);
2152
2153 return VK_SUCCESS;
2154 }
2155
2156 static void
anv_semaphore_impl_cleanup(struct anv_device * device,struct anv_semaphore_impl * impl)2157 anv_semaphore_impl_cleanup(struct anv_device *device,
2158 struct anv_semaphore_impl *impl)
2159 {
2160 switch (impl->type) {
2161 case ANV_SEMAPHORE_TYPE_NONE:
2162 case ANV_SEMAPHORE_TYPE_DUMMY:
2163 /* Dummy. Nothing to do */
2164 break;
2165
2166 case ANV_SEMAPHORE_TYPE_BO:
2167 case ANV_SEMAPHORE_TYPE_WSI_BO:
2168 anv_device_release_bo(device, impl->bo);
2169 break;
2170
2171 case ANV_SEMAPHORE_TYPE_SYNC_FILE:
2172 if (impl->fd >= 0)
2173 close(impl->fd);
2174 break;
2175
2176 case ANV_SEMAPHORE_TYPE_TIMELINE:
2177 anv_timeline_finish(device, &impl->timeline);
2178 break;
2179
2180 case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ:
2181 case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE:
2182 anv_gem_syncobj_destroy(device, impl->syncobj);
2183 break;
2184
2185 default:
2186 unreachable("Invalid semaphore type");
2187 }
2188
2189 impl->type = ANV_SEMAPHORE_TYPE_NONE;
2190 }
2191
2192 void
anv_semaphore_reset_temporary(struct anv_device * device,struct anv_semaphore * semaphore)2193 anv_semaphore_reset_temporary(struct anv_device *device,
2194 struct anv_semaphore *semaphore)
2195 {
2196 if (semaphore->temporary.type == ANV_SEMAPHORE_TYPE_NONE)
2197 return;
2198
2199 anv_semaphore_impl_cleanup(device, &semaphore->temporary);
2200 }
2201
2202 static struct anv_semaphore *
anv_semaphore_ref(struct anv_semaphore * semaphore)2203 anv_semaphore_ref(struct anv_semaphore *semaphore)
2204 {
2205 assert(semaphore->refcount);
2206 p_atomic_inc(&semaphore->refcount);
2207 return semaphore;
2208 }
2209
2210 static void
anv_semaphore_unref(struct anv_device * device,struct anv_semaphore * semaphore)2211 anv_semaphore_unref(struct anv_device *device, struct anv_semaphore *semaphore)
2212 {
2213 if (!p_atomic_dec_zero(&semaphore->refcount))
2214 return;
2215
2216 anv_semaphore_impl_cleanup(device, &semaphore->temporary);
2217 anv_semaphore_impl_cleanup(device, &semaphore->permanent);
2218
2219 vk_object_base_finish(&semaphore->base);
2220 vk_free(&device->vk.alloc, semaphore);
2221 }
2222
anv_DestroySemaphore(VkDevice _device,VkSemaphore _semaphore,const VkAllocationCallbacks * pAllocator)2223 void anv_DestroySemaphore(
2224 VkDevice _device,
2225 VkSemaphore _semaphore,
2226 const VkAllocationCallbacks* pAllocator)
2227 {
2228 ANV_FROM_HANDLE(anv_device, device, _device);
2229 ANV_FROM_HANDLE(anv_semaphore, semaphore, _semaphore);
2230
2231 if (semaphore == NULL)
2232 return;
2233
2234 anv_semaphore_unref(device, semaphore);
2235 }
2236
anv_GetPhysicalDeviceExternalSemaphoreProperties(VkPhysicalDevice physicalDevice,const VkPhysicalDeviceExternalSemaphoreInfo * pExternalSemaphoreInfo,VkExternalSemaphoreProperties * pExternalSemaphoreProperties)2237 void anv_GetPhysicalDeviceExternalSemaphoreProperties(
2238 VkPhysicalDevice physicalDevice,
2239 const VkPhysicalDeviceExternalSemaphoreInfo* pExternalSemaphoreInfo,
2240 VkExternalSemaphoreProperties* pExternalSemaphoreProperties)
2241 {
2242 ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice);
2243
2244 VkSemaphoreTypeKHR sem_type =
2245 get_semaphore_type(pExternalSemaphoreInfo->pNext, NULL);
2246
2247 switch (pExternalSemaphoreInfo->handleType) {
2248 case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT:
2249 /* Timeline semaphores are not exportable, unless we have threaded
2250 * submission.
2251 */
2252 if (sem_type == VK_SEMAPHORE_TYPE_TIMELINE_KHR && !device->has_thread_submit)
2253 break;
2254 pExternalSemaphoreProperties->exportFromImportedHandleTypes =
2255 VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
2256 pExternalSemaphoreProperties->compatibleHandleTypes =
2257 VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
2258 pExternalSemaphoreProperties->externalSemaphoreFeatures =
2259 VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT |
2260 VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
2261 return;
2262
2263 case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT:
2264 if (sem_type == VK_SEMAPHORE_TYPE_TIMELINE_KHR)
2265 break;
2266 if (!device->has_exec_fence)
2267 break;
2268 pExternalSemaphoreProperties->exportFromImportedHandleTypes =
2269 VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
2270 pExternalSemaphoreProperties->compatibleHandleTypes =
2271 VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
2272 pExternalSemaphoreProperties->externalSemaphoreFeatures =
2273 VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT |
2274 VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
2275 return;
2276
2277 default:
2278 break;
2279 }
2280
2281 pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0;
2282 pExternalSemaphoreProperties->compatibleHandleTypes = 0;
2283 pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;
2284 }
2285
anv_ImportSemaphoreFdKHR(VkDevice _device,const VkImportSemaphoreFdInfoKHR * pImportSemaphoreFdInfo)2286 VkResult anv_ImportSemaphoreFdKHR(
2287 VkDevice _device,
2288 const VkImportSemaphoreFdInfoKHR* pImportSemaphoreFdInfo)
2289 {
2290 ANV_FROM_HANDLE(anv_device, device, _device);
2291 ANV_FROM_HANDLE(anv_semaphore, semaphore, pImportSemaphoreFdInfo->semaphore);
2292 int fd = pImportSemaphoreFdInfo->fd;
2293
2294 struct anv_semaphore_impl new_impl = {
2295 .type = ANV_SEMAPHORE_TYPE_NONE,
2296 };
2297
2298 switch (pImportSemaphoreFdInfo->handleType) {
2299 case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT:
2300 if (device->physical->has_syncobj) {
2301 /* When importing non temporarily, reuse the semaphore's existing
2302 * type. The Linux/DRM implementation allows to interchangeably use
2303 * binary & timeline semaphores and we have no way to differenciate
2304 * them.
2305 */
2306 if (pImportSemaphoreFdInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT)
2307 new_impl.type = ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ;
2308 else
2309 new_impl.type = semaphore->permanent.type;
2310
2311 new_impl.syncobj = anv_gem_syncobj_fd_to_handle(device, fd);
2312 if (!new_impl.syncobj)
2313 return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE);
2314 } else {
2315 new_impl.type = ANV_SEMAPHORE_TYPE_BO;
2316
2317 VkResult result = anv_device_import_bo(device, fd,
2318 ANV_BO_ALLOC_EXTERNAL |
2319 ANV_BO_ALLOC_IMPLICIT_SYNC,
2320 0 /* client_address */,
2321 &new_impl.bo);
2322 if (result != VK_SUCCESS)
2323 return result;
2324
2325 if (new_impl.bo->size < 4096) {
2326 anv_device_release_bo(device, new_impl.bo);
2327 return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE);
2328 }
2329
2330 /* If we're going to use this as a fence, we need to *not* have the
2331 * EXEC_OBJECT_ASYNC bit set.
2332 */
2333 assert(!(new_impl.bo->flags & EXEC_OBJECT_ASYNC));
2334 }
2335
2336 /* From the Vulkan spec:
2337 *
2338 * "Importing semaphore state from a file descriptor transfers
2339 * ownership of the file descriptor from the application to the
2340 * Vulkan implementation. The application must not perform any
2341 * operations on the file descriptor after a successful import."
2342 *
2343 * If the import fails, we leave the file descriptor open.
2344 */
2345 close(fd);
2346 break;
2347
2348 case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT:
2349 if (device->physical->has_syncobj) {
2350 uint32_t create_flags = 0;
2351
2352 if (fd == -1)
2353 create_flags |= DRM_SYNCOBJ_CREATE_SIGNALED;
2354
2355 new_impl = (struct anv_semaphore_impl) {
2356 .type = ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ,
2357 .syncobj = anv_gem_syncobj_create(device, create_flags),
2358 };
2359
2360 if (!new_impl.syncobj)
2361 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2362
2363 if (fd != -1) {
2364 if (anv_gem_syncobj_import_sync_file(device, new_impl.syncobj, fd)) {
2365 anv_gem_syncobj_destroy(device, new_impl.syncobj);
2366 return vk_errorf(device, NULL, VK_ERROR_INVALID_EXTERNAL_HANDLE,
2367 "syncobj sync file import failed: %m");
2368 }
2369 /* Ownership of the FD is transfered to Anv. Since we don't need it
2370 * anymore because the associated fence has been put into a syncobj,
2371 * we must close the FD.
2372 */
2373 close(fd);
2374 }
2375 } else {
2376 new_impl = (struct anv_semaphore_impl) {
2377 .type = ANV_SEMAPHORE_TYPE_SYNC_FILE,
2378 .fd = fd,
2379 };
2380 }
2381 break;
2382
2383 default:
2384 return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE);
2385 }
2386
2387 if (pImportSemaphoreFdInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT) {
2388 anv_semaphore_impl_cleanup(device, &semaphore->temporary);
2389 semaphore->temporary = new_impl;
2390 } else {
2391 anv_semaphore_impl_cleanup(device, &semaphore->permanent);
2392 semaphore->permanent = new_impl;
2393 }
2394
2395 return VK_SUCCESS;
2396 }
2397
anv_GetSemaphoreFdKHR(VkDevice _device,const VkSemaphoreGetFdInfoKHR * pGetFdInfo,int * pFd)2398 VkResult anv_GetSemaphoreFdKHR(
2399 VkDevice _device,
2400 const VkSemaphoreGetFdInfoKHR* pGetFdInfo,
2401 int* pFd)
2402 {
2403 ANV_FROM_HANDLE(anv_device, device, _device);
2404 ANV_FROM_HANDLE(anv_semaphore, semaphore, pGetFdInfo->semaphore);
2405 VkResult result;
2406 int fd;
2407
2408 assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR);
2409
2410 struct anv_semaphore_impl *impl =
2411 semaphore->temporary.type != ANV_SEMAPHORE_TYPE_NONE ?
2412 &semaphore->temporary : &semaphore->permanent;
2413
2414 switch (impl->type) {
2415 case ANV_SEMAPHORE_TYPE_BO:
2416 result = anv_device_export_bo(device, impl->bo, pFd);
2417 if (result != VK_SUCCESS)
2418 return result;
2419 break;
2420
2421 case ANV_SEMAPHORE_TYPE_SYNC_FILE: {
2422 /* There's a potential race here with vkQueueSubmit if you are trying
2423 * to export a semaphore Fd while the queue submit is still happening.
2424 * This can happen if we see all dependencies get resolved via timeline
2425 * semaphore waits completing before the execbuf completes and we
2426 * process the resulting out fence. To work around this, take a lock
2427 * around grabbing the fd.
2428 */
2429 pthread_mutex_lock(&device->mutex);
2430
2431 /* From the Vulkan 1.0.53 spec:
2432 *
2433 * "...exporting a semaphore payload to a handle with copy
2434 * transference has the same side effects on the source
2435 * semaphore’s payload as executing a semaphore wait operation."
2436 *
2437 * In other words, it may still be a SYNC_FD semaphore, but it's now
2438 * considered to have been waited on and no longer has a sync file
2439 * attached.
2440 */
2441 int fd = impl->fd;
2442 impl->fd = -1;
2443
2444 pthread_mutex_unlock(&device->mutex);
2445
2446 /* There are two reasons why this could happen:
2447 *
2448 * 1) The user is trying to export without submitting something that
2449 * signals the semaphore. If this is the case, it's their bug so
2450 * what we return here doesn't matter.
2451 *
2452 * 2) The kernel didn't give us a file descriptor. The most likely
2453 * reason for this is running out of file descriptors.
2454 */
2455 if (fd < 0)
2456 return vk_error(VK_ERROR_TOO_MANY_OBJECTS);
2457
2458 *pFd = fd;
2459 return VK_SUCCESS;
2460 }
2461
2462 case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ:
2463 if (pGetFdInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT) {
2464 VkResult result = wait_syncobj_materialize(device, impl->syncobj, pFd);
2465 if (result != VK_SUCCESS)
2466 return result;
2467
2468 fd = anv_gem_syncobj_export_sync_file(device, impl->syncobj);
2469 } else {
2470 assert(pGetFdInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT);
2471 fd = anv_gem_syncobj_handle_to_fd(device, impl->syncobj);
2472 }
2473 if (fd < 0)
2474 return vk_error(VK_ERROR_TOO_MANY_OBJECTS);
2475 *pFd = fd;
2476 break;
2477
2478 case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE:
2479 assert(pGetFdInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT);
2480 fd = anv_gem_syncobj_handle_to_fd(device, impl->syncobj);
2481 if (fd < 0)
2482 return vk_error(VK_ERROR_TOO_MANY_OBJECTS);
2483 *pFd = fd;
2484 break;
2485
2486 default:
2487 return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE);
2488 }
2489
2490 /* From the Vulkan 1.0.53 spec:
2491 *
2492 * "Export operations have the same transference as the specified handle
2493 * type’s import operations. [...] If the semaphore was using a
2494 * temporarily imported payload, the semaphore’s prior permanent payload
2495 * will be restored.
2496 */
2497 if (impl == &semaphore->temporary)
2498 anv_semaphore_impl_cleanup(device, impl);
2499
2500 return VK_SUCCESS;
2501 }
2502
anv_GetSemaphoreCounterValue(VkDevice _device,VkSemaphore _semaphore,uint64_t * pValue)2503 VkResult anv_GetSemaphoreCounterValue(
2504 VkDevice _device,
2505 VkSemaphore _semaphore,
2506 uint64_t* pValue)
2507 {
2508 ANV_FROM_HANDLE(anv_device, device, _device);
2509 ANV_FROM_HANDLE(anv_semaphore, semaphore, _semaphore);
2510
2511 struct anv_semaphore_impl *impl =
2512 semaphore->temporary.type != ANV_SEMAPHORE_TYPE_NONE ?
2513 &semaphore->temporary : &semaphore->permanent;
2514
2515 switch (impl->type) {
2516 case ANV_SEMAPHORE_TYPE_TIMELINE: {
2517 pthread_mutex_lock(&device->mutex);
2518 anv_timeline_gc_locked(device, &impl->timeline);
2519 *pValue = impl->timeline.highest_past;
2520 pthread_mutex_unlock(&device->mutex);
2521 return VK_SUCCESS;
2522 }
2523
2524 case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE: {
2525 int ret = anv_gem_syncobj_timeline_query(device, &impl->syncobj, pValue, 1);
2526
2527 if (ret != 0)
2528 return anv_device_set_lost(device, "unable to query timeline syncobj");
2529
2530 return VK_SUCCESS;
2531 }
2532
2533 default:
2534 unreachable("Invalid semaphore type");
2535 }
2536 }
2537
2538 static VkResult
anv_timeline_wait_locked(struct anv_device * device,struct anv_timeline * timeline,uint64_t serial,uint64_t abs_timeout_ns)2539 anv_timeline_wait_locked(struct anv_device *device,
2540 struct anv_timeline *timeline,
2541 uint64_t serial, uint64_t abs_timeout_ns)
2542 {
2543 /* Wait on the queue_submit condition variable until the timeline has a
2544 * time point pending that's at least as high as serial.
2545 */
2546 while (timeline->highest_pending < serial) {
2547 struct timespec abstime = {
2548 .tv_sec = abs_timeout_ns / NSEC_PER_SEC,
2549 .tv_nsec = abs_timeout_ns % NSEC_PER_SEC,
2550 };
2551
2552 UNUSED int ret = pthread_cond_timedwait(&device->queue_submit,
2553 &device->mutex, &abstime);
2554 assert(ret != EINVAL);
2555 if (anv_gettime_ns() >= abs_timeout_ns &&
2556 timeline->highest_pending < serial)
2557 return VK_TIMEOUT;
2558 }
2559
2560 while (1) {
2561 VkResult result = anv_timeline_gc_locked(device, timeline);
2562 if (result != VK_SUCCESS)
2563 return result;
2564
2565 if (timeline->highest_past >= serial)
2566 return VK_SUCCESS;
2567
2568 /* If we got here, our earliest time point has a busy BO */
2569 struct anv_timeline_point *point =
2570 list_first_entry(&timeline->points,
2571 struct anv_timeline_point, link);
2572
2573 /* Drop the lock while we wait. */
2574 point->waiting++;
2575 pthread_mutex_unlock(&device->mutex);
2576
2577 result = anv_device_wait(device, point->bo,
2578 anv_get_relative_timeout(abs_timeout_ns));
2579
2580 /* Pick the mutex back up */
2581 pthread_mutex_lock(&device->mutex);
2582 point->waiting--;
2583
2584 /* This covers both VK_TIMEOUT and VK_ERROR_DEVICE_LOST */
2585 if (result != VK_SUCCESS)
2586 return result;
2587 }
2588 }
2589
2590 static VkResult
anv_timelines_wait(struct anv_device * device,struct anv_timeline ** timelines,const uint64_t * serials,uint32_t n_timelines,bool wait_all,uint64_t abs_timeout_ns)2591 anv_timelines_wait(struct anv_device *device,
2592 struct anv_timeline **timelines,
2593 const uint64_t *serials,
2594 uint32_t n_timelines,
2595 bool wait_all,
2596 uint64_t abs_timeout_ns)
2597 {
2598 if (!wait_all && n_timelines > 1) {
2599 pthread_mutex_lock(&device->mutex);
2600
2601 while (1) {
2602 VkResult result;
2603 for (uint32_t i = 0; i < n_timelines; i++) {
2604 result =
2605 anv_timeline_wait_locked(device, timelines[i], serials[i], 0);
2606 if (result != VK_TIMEOUT)
2607 break;
2608 }
2609
2610 if (result != VK_TIMEOUT ||
2611 anv_gettime_ns() >= abs_timeout_ns) {
2612 pthread_mutex_unlock(&device->mutex);
2613 return result;
2614 }
2615
2616 /* If none of them are ready do a short wait so we don't completely
2617 * spin while holding the lock. The 10us is completely arbitrary.
2618 */
2619 uint64_t abs_short_wait_ns =
2620 anv_get_absolute_timeout(
2621 MIN2((anv_gettime_ns() - abs_timeout_ns) / 10, 10 * 1000));
2622 struct timespec abstime = {
2623 .tv_sec = abs_short_wait_ns / NSEC_PER_SEC,
2624 .tv_nsec = abs_short_wait_ns % NSEC_PER_SEC,
2625 };
2626 ASSERTED int ret;
2627 ret = pthread_cond_timedwait(&device->queue_submit,
2628 &device->mutex, &abstime);
2629 assert(ret != EINVAL);
2630 }
2631 } else {
2632 VkResult result = VK_SUCCESS;
2633 pthread_mutex_lock(&device->mutex);
2634 for (uint32_t i = 0; i < n_timelines; i++) {
2635 result =
2636 anv_timeline_wait_locked(device, timelines[i],
2637 serials[i], abs_timeout_ns);
2638 if (result != VK_SUCCESS)
2639 break;
2640 }
2641 pthread_mutex_unlock(&device->mutex);
2642 return result;
2643 }
2644 }
2645
anv_WaitSemaphores(VkDevice _device,const VkSemaphoreWaitInfoKHR * pWaitInfo,uint64_t timeout)2646 VkResult anv_WaitSemaphores(
2647 VkDevice _device,
2648 const VkSemaphoreWaitInfoKHR* pWaitInfo,
2649 uint64_t timeout)
2650 {
2651 ANV_FROM_HANDLE(anv_device, device, _device);
2652 uint32_t *handles;
2653 struct anv_timeline **timelines;
2654 uint64_t *values;
2655
2656 ANV_MULTIALLOC(ma);
2657
2658 anv_multialloc_add(&ma, &values, pWaitInfo->semaphoreCount);
2659 if (device->has_thread_submit) {
2660 anv_multialloc_add(&ma, &handles, pWaitInfo->semaphoreCount);
2661 } else {
2662 anv_multialloc_add(&ma, &timelines, pWaitInfo->semaphoreCount);
2663 }
2664
2665 if (!anv_multialloc_alloc(&ma, &device->vk.alloc,
2666 VK_SYSTEM_ALLOCATION_SCOPE_COMMAND))
2667 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2668
2669 uint32_t handle_count = 0;
2670 for (uint32_t i = 0; i < pWaitInfo->semaphoreCount; i++) {
2671 ANV_FROM_HANDLE(anv_semaphore, semaphore, pWaitInfo->pSemaphores[i]);
2672 struct anv_semaphore_impl *impl =
2673 semaphore->temporary.type != ANV_SEMAPHORE_TYPE_NONE ?
2674 &semaphore->temporary : &semaphore->permanent;
2675
2676 if (pWaitInfo->pValues[i] == 0)
2677 continue;
2678
2679 if (device->has_thread_submit) {
2680 assert(impl->type == ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE);
2681 handles[handle_count] = impl->syncobj;
2682 } else {
2683 assert(impl->type == ANV_SEMAPHORE_TYPE_TIMELINE);
2684 timelines[handle_count] = &impl->timeline;
2685 }
2686 values[handle_count] = pWaitInfo->pValues[i];
2687 handle_count++;
2688 }
2689
2690 VkResult result = VK_SUCCESS;
2691 if (handle_count > 0) {
2692 if (device->has_thread_submit) {
2693 int ret =
2694 anv_gem_syncobj_timeline_wait(device,
2695 handles, values, handle_count,
2696 anv_get_absolute_timeout(timeout),
2697 !(pWaitInfo->flags & VK_SEMAPHORE_WAIT_ANY_BIT_KHR),
2698 false);
2699 if (ret != 0)
2700 result = errno == ETIME ? VK_TIMEOUT :
2701 anv_device_set_lost(device, "unable to wait on timeline syncobj");
2702 } else {
2703 result =
2704 anv_timelines_wait(device, timelines, values, handle_count,
2705 !(pWaitInfo->flags & VK_SEMAPHORE_WAIT_ANY_BIT_KHR),
2706 anv_get_absolute_timeout(timeout));
2707 }
2708 }
2709
2710 vk_free(&device->vk.alloc, values);
2711
2712 return result;
2713 }
2714
anv_SignalSemaphore(VkDevice _device,const VkSemaphoreSignalInfoKHR * pSignalInfo)2715 VkResult anv_SignalSemaphore(
2716 VkDevice _device,
2717 const VkSemaphoreSignalInfoKHR* pSignalInfo)
2718 {
2719 ANV_FROM_HANDLE(anv_device, device, _device);
2720 ANV_FROM_HANDLE(anv_semaphore, semaphore, pSignalInfo->semaphore);
2721
2722 struct anv_semaphore_impl *impl =
2723 semaphore->temporary.type != ANV_SEMAPHORE_TYPE_NONE ?
2724 &semaphore->temporary : &semaphore->permanent;
2725
2726 switch (impl->type) {
2727 case ANV_SEMAPHORE_TYPE_TIMELINE: {
2728 pthread_mutex_lock(&device->mutex);
2729
2730 VkResult result = anv_timeline_gc_locked(device, &impl->timeline);
2731
2732 assert(pSignalInfo->value > impl->timeline.highest_pending);
2733
2734 impl->timeline.highest_pending = impl->timeline.highest_past = pSignalInfo->value;
2735
2736 if (result == VK_SUCCESS)
2737 result = anv_device_submit_deferred_locked(device);
2738
2739 pthread_cond_broadcast(&device->queue_submit);
2740 pthread_mutex_unlock(&device->mutex);
2741 return result;
2742 }
2743
2744 case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE: {
2745 /* Timeline semaphores are created with a value of 0, so signaling on 0
2746 * is a waste of time.
2747 */
2748 if (pSignalInfo->value == 0)
2749 return VK_SUCCESS;
2750
2751 int ret = anv_gem_syncobj_timeline_signal(device, &impl->syncobj,
2752 &pSignalInfo->value, 1);
2753
2754 return ret == 0 ? VK_SUCCESS :
2755 anv_device_set_lost(device, "unable to signal timeline syncobj");
2756 }
2757
2758 default:
2759 unreachable("Invalid semaphore type");
2760 }
2761 }
2762