1 /*
2 * Copyright 2020 Google LLC
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include <errno.h>
7 #include <fcntl.h>
8 #include <poll.h>
9 #include <sys/mman.h>
10 #include <sys/stat.h>
11 #include <sys/types.h>
12 #include <unistd.h>
13 #include <xf86drm.h>
14
15 #ifdef MAJOR_IN_MKDEV
16 #include <sys/mkdev.h>
17 #endif
18 #ifdef MAJOR_IN_SYSMACROS
19 #include <sys/sysmacros.h>
20 #endif
21
22 #include "drm-uapi/virtgpu_drm.h"
23 #include "util/sparse_array.h"
24 #define VIRGL_RENDERER_UNSTABLE_APIS
25 #include "virtio-gpu/virglrenderer_hw.h"
26
27 #include "vn_renderer_internal.h"
28
29 #ifndef VIRTGPU_PARAM_GUEST_VRAM
30 /* All guest allocations happen via virtgpu dedicated heap. */
31 #define VIRTGPU_PARAM_GUEST_VRAM 9
32 #endif
33
34 #ifndef VIRTGPU_BLOB_MEM_GUEST_VRAM
35 #define VIRTGPU_BLOB_MEM_GUEST_VRAM 0x0004
36 #endif
37
38 /* XXX comment these out to really use kernel uapi */
39 #define SIMULATE_BO_SIZE_FIX 1
40 #define SIMULATE_SYNCOBJ 1
41 #define SIMULATE_SUBMIT 1
42
43 #define VIRTGPU_PCI_VENDOR_ID 0x1af4
44 #define VIRTGPU_PCI_DEVICE_ID 0x1050
45
46 struct virtgpu;
47
48 struct virtgpu_shmem {
49 struct vn_renderer_shmem base;
50 uint32_t gem_handle;
51 };
52
53 struct virtgpu_bo {
54 struct vn_renderer_bo base;
55 uint32_t gem_handle;
56 uint32_t blob_flags;
57 };
58
59 struct virtgpu_sync {
60 struct vn_renderer_sync base;
61
62 /*
63 * drm_syncobj is in one of these states
64 *
65 * - value N: drm_syncobj has a signaled fence chain with seqno N
66 * - pending N->M: drm_syncobj has an unsignaled fence chain with seqno M
67 * (which may point to another unsignaled fence chain with
68 * seqno between N and M, and so on)
69 *
70 * TODO Do we want to use binary drm_syncobjs? They would be
71 *
72 * - value 0: drm_syncobj has no fence
73 * - value 1: drm_syncobj has a signaled fence with seqno 0
74 *
75 * They are cheaper but require special care.
76 */
77 uint32_t syncobj_handle;
78 };
79
80 struct virtgpu {
81 struct vn_renderer base;
82
83 struct vn_instance *instance;
84
85 int fd;
86
87 bool has_primary;
88 int primary_major;
89 int primary_minor;
90 int render_major;
91 int render_minor;
92
93 int bustype;
94 drmPciBusInfo pci_bus_info;
95
96 uint32_t max_timeline_count;
97
98 struct {
99 enum virgl_renderer_capset id;
100 uint32_t version;
101 struct virgl_renderer_capset_venus data;
102 } capset;
103
104 uint32_t shmem_blob_mem;
105 uint32_t bo_blob_mem;
106
107 /* note that we use gem_handle instead of res_id to index because
108 * res_id is monotonically increasing by default (see
109 * virtio_gpu_resource_id_get)
110 */
111 struct util_sparse_array shmem_array;
112 struct util_sparse_array bo_array;
113
114 mtx_t dma_buf_import_mutex;
115
116 struct vn_renderer_shmem_cache shmem_cache;
117 };
118
119 #ifdef SIMULATE_SYNCOBJ
120
121 #include "util/hash_table.h"
122 #include "util/u_idalloc.h"
123
124 static struct {
125 mtx_t mutex;
126 struct hash_table *syncobjs;
127 struct util_idalloc ida;
128
129 int signaled_fd;
130 } sim;
131
132 struct sim_syncobj {
133 mtx_t mutex;
134 uint64_t point;
135
136 int pending_fd;
137 uint64_t pending_point;
138 bool pending_cpu;
139 };
140
141 static uint32_t
sim_syncobj_create(struct virtgpu * gpu,bool signaled)142 sim_syncobj_create(struct virtgpu *gpu, bool signaled)
143 {
144 struct sim_syncobj *syncobj = calloc(1, sizeof(*syncobj));
145 if (!syncobj)
146 return 0;
147
148 mtx_init(&syncobj->mutex, mtx_plain);
149 syncobj->pending_fd = -1;
150
151 mtx_lock(&sim.mutex);
152
153 /* initialize lazily */
154 if (!sim.syncobjs) {
155 sim.syncobjs = _mesa_pointer_hash_table_create(NULL);
156 if (!sim.syncobjs) {
157 mtx_unlock(&sim.mutex);
158 return 0;
159 }
160
161 util_idalloc_init(&sim.ida, 32);
162
163 struct drm_virtgpu_execbuffer args = {
164 .flags = VIRTGPU_EXECBUF_FENCE_FD_OUT |
165 (gpu->base.info.supports_multiple_timelines
166 ? VIRTGPU_EXECBUF_RING_IDX
167 : 0),
168 .ring_idx = 0, /* CPU ring */
169 };
170 int ret = drmIoctl(gpu->fd, DRM_IOCTL_VIRTGPU_EXECBUFFER, &args);
171 if (ret || args.fence_fd < 0) {
172 _mesa_hash_table_destroy(sim.syncobjs, NULL);
173 sim.syncobjs = NULL;
174 mtx_unlock(&sim.mutex);
175 return 0;
176 }
177
178 sim.signaled_fd = args.fence_fd;
179 }
180
181 const unsigned syncobj_handle = util_idalloc_alloc(&sim.ida) + 1;
182 _mesa_hash_table_insert(sim.syncobjs,
183 (const void *)(uintptr_t)syncobj_handle, syncobj);
184
185 mtx_unlock(&sim.mutex);
186
187 return syncobj_handle;
188 }
189
190 static void
sim_syncobj_destroy(struct virtgpu * gpu,uint32_t syncobj_handle)191 sim_syncobj_destroy(struct virtgpu *gpu, uint32_t syncobj_handle)
192 {
193 struct sim_syncobj *syncobj = NULL;
194
195 mtx_lock(&sim.mutex);
196
197 struct hash_entry *entry = _mesa_hash_table_search(
198 sim.syncobjs, (const void *)(uintptr_t)syncobj_handle);
199 if (entry) {
200 syncobj = entry->data;
201 _mesa_hash_table_remove(sim.syncobjs, entry);
202 util_idalloc_free(&sim.ida, syncobj_handle - 1);
203 }
204
205 mtx_unlock(&sim.mutex);
206
207 if (syncobj) {
208 if (syncobj->pending_fd >= 0)
209 close(syncobj->pending_fd);
210 mtx_destroy(&syncobj->mutex);
211 free(syncobj);
212 }
213 }
214
215 static VkResult
sim_syncobj_poll(int fd,int poll_timeout)216 sim_syncobj_poll(int fd, int poll_timeout)
217 {
218 struct pollfd pollfd = {
219 .fd = fd,
220 .events = POLLIN,
221 };
222 int ret;
223 do {
224 ret = poll(&pollfd, 1, poll_timeout);
225 } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
226
227 if (ret < 0 || (ret > 0 && !(pollfd.revents & POLLIN))) {
228 return (ret < 0 && errno == ENOMEM) ? VK_ERROR_OUT_OF_HOST_MEMORY
229 : VK_ERROR_DEVICE_LOST;
230 }
231
232 return ret ? VK_SUCCESS : VK_TIMEOUT;
233 }
234
235 static void
sim_syncobj_set_point_locked(struct sim_syncobj * syncobj,uint64_t point)236 sim_syncobj_set_point_locked(struct sim_syncobj *syncobj, uint64_t point)
237 {
238 syncobj->point = point;
239
240 if (syncobj->pending_fd >= 0) {
241 close(syncobj->pending_fd);
242 syncobj->pending_fd = -1;
243 syncobj->pending_point = point;
244 }
245 }
246
247 static void
sim_syncobj_update_point_locked(struct sim_syncobj * syncobj,int poll_timeout)248 sim_syncobj_update_point_locked(struct sim_syncobj *syncobj, int poll_timeout)
249 {
250 if (syncobj->pending_fd >= 0) {
251 VkResult result;
252 if (syncobj->pending_cpu) {
253 if (poll_timeout == -1) {
254 const int max_cpu_timeout = 2000;
255 poll_timeout = max_cpu_timeout;
256 result = sim_syncobj_poll(syncobj->pending_fd, poll_timeout);
257 if (result == VK_TIMEOUT) {
258 vn_log(NULL, "cpu sync timed out after %dms; ignoring",
259 poll_timeout);
260 result = VK_SUCCESS;
261 }
262 } else {
263 result = sim_syncobj_poll(syncobj->pending_fd, poll_timeout);
264 }
265 } else {
266 result = sim_syncobj_poll(syncobj->pending_fd, poll_timeout);
267 }
268 if (result == VK_SUCCESS) {
269 close(syncobj->pending_fd);
270 syncobj->pending_fd = -1;
271 syncobj->point = syncobj->pending_point;
272 }
273 }
274 }
275
276 static struct sim_syncobj *
sim_syncobj_lookup(struct virtgpu * gpu,uint32_t syncobj_handle)277 sim_syncobj_lookup(struct virtgpu *gpu, uint32_t syncobj_handle)
278 {
279 struct sim_syncobj *syncobj = NULL;
280
281 mtx_lock(&sim.mutex);
282 struct hash_entry *entry = _mesa_hash_table_search(
283 sim.syncobjs, (const void *)(uintptr_t)syncobj_handle);
284 if (entry)
285 syncobj = entry->data;
286 mtx_unlock(&sim.mutex);
287
288 return syncobj;
289 }
290
291 static int
sim_syncobj_reset(struct virtgpu * gpu,uint32_t syncobj_handle)292 sim_syncobj_reset(struct virtgpu *gpu, uint32_t syncobj_handle)
293 {
294 struct sim_syncobj *syncobj = sim_syncobj_lookup(gpu, syncobj_handle);
295 if (!syncobj)
296 return -1;
297
298 mtx_lock(&syncobj->mutex);
299 sim_syncobj_set_point_locked(syncobj, 0);
300 mtx_unlock(&syncobj->mutex);
301
302 return 0;
303 }
304
305 static int
sim_syncobj_query(struct virtgpu * gpu,uint32_t syncobj_handle,uint64_t * point)306 sim_syncobj_query(struct virtgpu *gpu,
307 uint32_t syncobj_handle,
308 uint64_t *point)
309 {
310 struct sim_syncobj *syncobj = sim_syncobj_lookup(gpu, syncobj_handle);
311 if (!syncobj)
312 return -1;
313
314 mtx_lock(&syncobj->mutex);
315 sim_syncobj_update_point_locked(syncobj, 0);
316 *point = syncobj->point;
317 mtx_unlock(&syncobj->mutex);
318
319 return 0;
320 }
321
322 static int
sim_syncobj_signal(struct virtgpu * gpu,uint32_t syncobj_handle,uint64_t point)323 sim_syncobj_signal(struct virtgpu *gpu,
324 uint32_t syncobj_handle,
325 uint64_t point)
326 {
327 struct sim_syncobj *syncobj = sim_syncobj_lookup(gpu, syncobj_handle);
328 if (!syncobj)
329 return -1;
330
331 mtx_lock(&syncobj->mutex);
332 sim_syncobj_set_point_locked(syncobj, point);
333 mtx_unlock(&syncobj->mutex);
334
335 return 0;
336 }
337
338 static int
sim_syncobj_submit(struct virtgpu * gpu,uint32_t syncobj_handle,int sync_fd,uint64_t point,bool cpu)339 sim_syncobj_submit(struct virtgpu *gpu,
340 uint32_t syncobj_handle,
341 int sync_fd,
342 uint64_t point,
343 bool cpu)
344 {
345 struct sim_syncobj *syncobj = sim_syncobj_lookup(gpu, syncobj_handle);
346 if (!syncobj)
347 return -1;
348
349 int pending_fd = dup(sync_fd);
350 if (pending_fd < 0) {
351 vn_log(gpu->instance, "failed to dup sync fd");
352 return -1;
353 }
354
355 mtx_lock(&syncobj->mutex);
356
357 if (syncobj->pending_fd >= 0) {
358 mtx_unlock(&syncobj->mutex);
359
360 /* TODO */
361 vn_log(gpu->instance, "sorry, no simulated timeline semaphore");
362 close(pending_fd);
363 return -1;
364 }
365 if (syncobj->point >= point)
366 vn_log(gpu->instance, "non-monotonic signaling");
367
368 syncobj->pending_fd = pending_fd;
369 syncobj->pending_point = point;
370 syncobj->pending_cpu = cpu;
371
372 mtx_unlock(&syncobj->mutex);
373
374 return 0;
375 }
376
377 static int
timeout_to_poll_timeout(uint64_t timeout)378 timeout_to_poll_timeout(uint64_t timeout)
379 {
380 const uint64_t ns_per_ms = 1000000;
381 const uint64_t ms = (timeout + ns_per_ms - 1) / ns_per_ms;
382 if (!ms && timeout)
383 return -1;
384 return ms <= INT_MAX ? ms : -1;
385 }
386
387 static int
sim_syncobj_wait(struct virtgpu * gpu,const struct vn_renderer_wait * wait,bool wait_avail)388 sim_syncobj_wait(struct virtgpu *gpu,
389 const struct vn_renderer_wait *wait,
390 bool wait_avail)
391 {
392 if (wait_avail)
393 return -1;
394
395 const int poll_timeout = timeout_to_poll_timeout(wait->timeout);
396
397 /* TODO poll all fds at the same time */
398 for (uint32_t i = 0; i < wait->sync_count; i++) {
399 struct virtgpu_sync *sync = (struct virtgpu_sync *)wait->syncs[i];
400 const uint64_t point = wait->sync_values[i];
401
402 struct sim_syncobj *syncobj =
403 sim_syncobj_lookup(gpu, sync->syncobj_handle);
404 if (!syncobj)
405 return -1;
406
407 mtx_lock(&syncobj->mutex);
408
409 if (syncobj->point < point)
410 sim_syncobj_update_point_locked(syncobj, poll_timeout);
411
412 if (syncobj->point < point) {
413 if (wait->wait_any && i < wait->sync_count - 1 &&
414 syncobj->pending_fd < 0) {
415 mtx_unlock(&syncobj->mutex);
416 continue;
417 }
418 errno = ETIME;
419 mtx_unlock(&syncobj->mutex);
420 return -1;
421 }
422
423 mtx_unlock(&syncobj->mutex);
424
425 if (wait->wait_any)
426 break;
427
428 /* TODO adjust poll_timeout */
429 }
430
431 return 0;
432 }
433
434 static int
sim_syncobj_export(struct virtgpu * gpu,uint32_t syncobj_handle)435 sim_syncobj_export(struct virtgpu *gpu, uint32_t syncobj_handle)
436 {
437 struct sim_syncobj *syncobj = sim_syncobj_lookup(gpu, syncobj_handle);
438 if (!syncobj)
439 return -1;
440
441 int fd = -1;
442 mtx_lock(&syncobj->mutex);
443 if (syncobj->pending_fd >= 0)
444 fd = dup(syncobj->pending_fd);
445 else
446 fd = dup(sim.signaled_fd);
447 mtx_unlock(&syncobj->mutex);
448
449 return fd;
450 }
451
452 static uint32_t
sim_syncobj_import(struct virtgpu * gpu,uint32_t syncobj_handle,int fd)453 sim_syncobj_import(struct virtgpu *gpu, uint32_t syncobj_handle, int fd)
454 {
455 struct sim_syncobj *syncobj = sim_syncobj_lookup(gpu, syncobj_handle);
456 if (!syncobj)
457 return 0;
458
459 if (sim_syncobj_submit(gpu, syncobj_handle, fd, 1, false))
460 return 0;
461
462 return syncobj_handle;
463 }
464
465 #endif /* SIMULATE_SYNCOBJ */
466
467 #ifdef SIMULATE_SUBMIT
468
469 static int
sim_submit_signal_syncs(struct virtgpu * gpu,int sync_fd,struct vn_renderer_sync * const * syncs,const uint64_t * sync_values,uint32_t sync_count,bool cpu)470 sim_submit_signal_syncs(struct virtgpu *gpu,
471 int sync_fd,
472 struct vn_renderer_sync *const *syncs,
473 const uint64_t *sync_values,
474 uint32_t sync_count,
475 bool cpu)
476 {
477 for (uint32_t i = 0; i < sync_count; i++) {
478 struct virtgpu_sync *sync = (struct virtgpu_sync *)syncs[i];
479 const uint64_t pending_point = sync_values[i];
480
481 #ifdef SIMULATE_SYNCOBJ
482 int ret = sim_syncobj_submit(gpu, sync->syncobj_handle, sync_fd,
483 pending_point, cpu);
484 if (ret)
485 return ret;
486 #else
487 /* we can in theory do a DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE followed by a
488 * DRM_IOCTL_SYNCOBJ_TRANSFER
489 */
490 return -1;
491 #endif
492 }
493
494 return 0;
495 }
496
497 static uint32_t *
sim_submit_alloc_gem_handles(struct vn_renderer_bo * const * bos,uint32_t bo_count)498 sim_submit_alloc_gem_handles(struct vn_renderer_bo *const *bos,
499 uint32_t bo_count)
500 {
501 uint32_t *gem_handles = malloc(sizeof(*gem_handles) * bo_count);
502 if (!gem_handles)
503 return NULL;
504
505 for (uint32_t i = 0; i < bo_count; i++) {
506 struct virtgpu_bo *bo = (struct virtgpu_bo *)bos[i];
507 gem_handles[i] = bo->gem_handle;
508 }
509
510 return gem_handles;
511 }
512
513 static int
sim_submit(struct virtgpu * gpu,const struct vn_renderer_submit * submit)514 sim_submit(struct virtgpu *gpu, const struct vn_renderer_submit *submit)
515 {
516 const bool use_ring_idx = gpu->base.info.supports_multiple_timelines;
517
518 /* TODO replace submit->bos by submit->gem_handles to avoid malloc/loop */
519 uint32_t *gem_handles = NULL;
520 if (submit->bo_count) {
521 gem_handles =
522 sim_submit_alloc_gem_handles(submit->bos, submit->bo_count);
523 if (!gem_handles)
524 return -1;
525 }
526
527 assert(submit->batch_count);
528
529 int ret = 0;
530 for (uint32_t i = 0; i < submit->batch_count; i++) {
531 const struct vn_renderer_submit_batch *batch = &submit->batches[i];
532
533 struct drm_virtgpu_execbuffer args = {
534 .flags = (batch->sync_count ? VIRTGPU_EXECBUF_FENCE_FD_OUT : 0) |
535 (use_ring_idx ? VIRTGPU_EXECBUF_RING_IDX : 0),
536 .size = batch->cs_size,
537 .command = (uintptr_t)batch->cs_data,
538 .bo_handles = (uintptr_t)gem_handles,
539 .num_bo_handles = submit->bo_count,
540 .ring_idx = (use_ring_idx ? batch->ring_idx : 0),
541 };
542
543 ret = drmIoctl(gpu->fd, DRM_IOCTL_VIRTGPU_EXECBUFFER, &args);
544 if (ret) {
545 vn_log(gpu->instance, "failed to execbuffer: %s", strerror(errno));
546 break;
547 }
548
549 if (batch->sync_count) {
550 ret = sim_submit_signal_syncs(gpu, args.fence_fd, batch->syncs,
551 batch->sync_values, batch->sync_count,
552 batch->ring_idx == 0);
553 close(args.fence_fd);
554 if (ret)
555 break;
556 }
557 }
558
559 free(gem_handles);
560 return ret;
561 }
562
563 #endif /* SIMULATE_SUBMIT */
564
565 static int
virtgpu_ioctl(struct virtgpu * gpu,unsigned long request,void * args)566 virtgpu_ioctl(struct virtgpu *gpu, unsigned long request, void *args)
567 {
568 return drmIoctl(gpu->fd, request, args);
569 }
570
571 static uint64_t
virtgpu_ioctl_getparam(struct virtgpu * gpu,uint64_t param)572 virtgpu_ioctl_getparam(struct virtgpu *gpu, uint64_t param)
573 {
574 /* val must be zeroed because kernel only writes the lower 32 bits */
575 uint64_t val = 0;
576 struct drm_virtgpu_getparam args = {
577 .param = param,
578 .value = (uintptr_t)&val,
579 };
580
581 const int ret = virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_GETPARAM, &args);
582 return ret ? 0 : val;
583 }
584
585 static int
virtgpu_ioctl_get_caps(struct virtgpu * gpu,enum virgl_renderer_capset id,uint32_t version,void * capset,size_t capset_size)586 virtgpu_ioctl_get_caps(struct virtgpu *gpu,
587 enum virgl_renderer_capset id,
588 uint32_t version,
589 void *capset,
590 size_t capset_size)
591 {
592 struct drm_virtgpu_get_caps args = {
593 .cap_set_id = id,
594 .cap_set_ver = version,
595 .addr = (uintptr_t)capset,
596 .size = capset_size,
597 };
598
599 return virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_GET_CAPS, &args);
600 }
601
602 static int
virtgpu_ioctl_context_init(struct virtgpu * gpu,enum virgl_renderer_capset capset_id)603 virtgpu_ioctl_context_init(struct virtgpu *gpu,
604 enum virgl_renderer_capset capset_id)
605 {
606 struct drm_virtgpu_context_set_param ctx_set_params[3] = {
607 {
608 .param = VIRTGPU_CONTEXT_PARAM_CAPSET_ID,
609 .value = capset_id,
610 },
611 {
612 .param = VIRTGPU_CONTEXT_PARAM_NUM_RINGS,
613 .value = 64,
614 },
615 {
616 .param = VIRTGPU_CONTEXT_PARAM_POLL_RINGS_MASK,
617 .value = 0, /* don't generate drm_events on fence signaling */
618 },
619 };
620
621 struct drm_virtgpu_context_init args = {
622 .num_params = ARRAY_SIZE(ctx_set_params),
623 .ctx_set_params = (uintptr_t)&ctx_set_params,
624 };
625
626 return virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_CONTEXT_INIT, &args);
627 }
628
629 static uint32_t
virtgpu_ioctl_resource_create_blob(struct virtgpu * gpu,uint32_t blob_mem,uint32_t blob_flags,size_t blob_size,uint64_t blob_id,uint32_t * res_id)630 virtgpu_ioctl_resource_create_blob(struct virtgpu *gpu,
631 uint32_t blob_mem,
632 uint32_t blob_flags,
633 size_t blob_size,
634 uint64_t blob_id,
635 uint32_t *res_id)
636 {
637 #ifdef SIMULATE_BO_SIZE_FIX
638 blob_size = align64(blob_size, 4096);
639 #endif
640
641 struct drm_virtgpu_resource_create_blob args = {
642 .blob_mem = blob_mem,
643 .blob_flags = blob_flags,
644 .size = blob_size,
645 .blob_id = blob_id,
646 };
647
648 if (virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_RESOURCE_CREATE_BLOB, &args))
649 return 0;
650
651 *res_id = args.res_handle;
652 return args.bo_handle;
653 }
654
655 static int
virtgpu_ioctl_resource_info(struct virtgpu * gpu,uint32_t gem_handle,struct drm_virtgpu_resource_info * info)656 virtgpu_ioctl_resource_info(struct virtgpu *gpu,
657 uint32_t gem_handle,
658 struct drm_virtgpu_resource_info *info)
659 {
660 *info = (struct drm_virtgpu_resource_info){
661 .bo_handle = gem_handle,
662 };
663
664 return virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_RESOURCE_INFO, info);
665 }
666
667 static void
virtgpu_ioctl_gem_close(struct virtgpu * gpu,uint32_t gem_handle)668 virtgpu_ioctl_gem_close(struct virtgpu *gpu, uint32_t gem_handle)
669 {
670 struct drm_gem_close args = {
671 .handle = gem_handle,
672 };
673
674 ASSERTED const int ret = virtgpu_ioctl(gpu, DRM_IOCTL_GEM_CLOSE, &args);
675 assert(!ret);
676 }
677
678 static int
virtgpu_ioctl_prime_handle_to_fd(struct virtgpu * gpu,uint32_t gem_handle,bool mappable)679 virtgpu_ioctl_prime_handle_to_fd(struct virtgpu *gpu,
680 uint32_t gem_handle,
681 bool mappable)
682 {
683 struct drm_prime_handle args = {
684 .handle = gem_handle,
685 .flags = DRM_CLOEXEC | (mappable ? DRM_RDWR : 0),
686 };
687
688 const int ret = virtgpu_ioctl(gpu, DRM_IOCTL_PRIME_HANDLE_TO_FD, &args);
689 return ret ? -1 : args.fd;
690 }
691
692 static uint32_t
virtgpu_ioctl_prime_fd_to_handle(struct virtgpu * gpu,int fd)693 virtgpu_ioctl_prime_fd_to_handle(struct virtgpu *gpu, int fd)
694 {
695 struct drm_prime_handle args = {
696 .fd = fd,
697 };
698
699 const int ret = virtgpu_ioctl(gpu, DRM_IOCTL_PRIME_FD_TO_HANDLE, &args);
700 return ret ? 0 : args.handle;
701 }
702
703 static void *
virtgpu_ioctl_map(struct virtgpu * gpu,uint32_t gem_handle,size_t size)704 virtgpu_ioctl_map(struct virtgpu *gpu, uint32_t gem_handle, size_t size)
705 {
706 struct drm_virtgpu_map args = {
707 .handle = gem_handle,
708 };
709
710 if (virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_MAP, &args))
711 return NULL;
712
713 void *ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, gpu->fd,
714 args.offset);
715 if (ptr == MAP_FAILED)
716 return NULL;
717
718 return ptr;
719 }
720
721 static uint32_t
virtgpu_ioctl_syncobj_create(struct virtgpu * gpu,bool signaled)722 virtgpu_ioctl_syncobj_create(struct virtgpu *gpu, bool signaled)
723 {
724 #ifdef SIMULATE_SYNCOBJ
725 return sim_syncobj_create(gpu, signaled);
726 #endif
727
728 struct drm_syncobj_create args = {
729 .flags = signaled ? DRM_SYNCOBJ_CREATE_SIGNALED : 0,
730 };
731
732 const int ret = virtgpu_ioctl(gpu, DRM_IOCTL_SYNCOBJ_CREATE, &args);
733 return ret ? 0 : args.handle;
734 }
735
736 static void
virtgpu_ioctl_syncobj_destroy(struct virtgpu * gpu,uint32_t syncobj_handle)737 virtgpu_ioctl_syncobj_destroy(struct virtgpu *gpu, uint32_t syncobj_handle)
738 {
739 #ifdef SIMULATE_SYNCOBJ
740 sim_syncobj_destroy(gpu, syncobj_handle);
741 return;
742 #endif
743
744 struct drm_syncobj_destroy args = {
745 .handle = syncobj_handle,
746 };
747
748 ASSERTED const int ret =
749 virtgpu_ioctl(gpu, DRM_IOCTL_SYNCOBJ_DESTROY, &args);
750 assert(!ret);
751 }
752
753 static int
virtgpu_ioctl_syncobj_handle_to_fd(struct virtgpu * gpu,uint32_t syncobj_handle,bool sync_file)754 virtgpu_ioctl_syncobj_handle_to_fd(struct virtgpu *gpu,
755 uint32_t syncobj_handle,
756 bool sync_file)
757 {
758 #ifdef SIMULATE_SYNCOBJ
759 return sync_file ? sim_syncobj_export(gpu, syncobj_handle) : -1;
760 #endif
761
762 struct drm_syncobj_handle args = {
763 .handle = syncobj_handle,
764 .flags =
765 sync_file ? DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE : 0,
766 };
767
768 int ret = virtgpu_ioctl(gpu, DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD, &args);
769 if (ret)
770 return -1;
771
772 return args.fd;
773 }
774
775 static uint32_t
virtgpu_ioctl_syncobj_fd_to_handle(struct virtgpu * gpu,int fd,uint32_t syncobj_handle)776 virtgpu_ioctl_syncobj_fd_to_handle(struct virtgpu *gpu,
777 int fd,
778 uint32_t syncobj_handle)
779 {
780 #ifdef SIMULATE_SYNCOBJ
781 return syncobj_handle ? sim_syncobj_import(gpu, syncobj_handle, fd) : 0;
782 #endif
783
784 struct drm_syncobj_handle args = {
785 .handle = syncobj_handle,
786 .flags =
787 syncobj_handle ? DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_IMPORT_SYNC_FILE : 0,
788 .fd = fd,
789 };
790
791 int ret = virtgpu_ioctl(gpu, DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, &args);
792 if (ret)
793 return 0;
794
795 return args.handle;
796 }
797
798 static int
virtgpu_ioctl_syncobj_reset(struct virtgpu * gpu,uint32_t syncobj_handle)799 virtgpu_ioctl_syncobj_reset(struct virtgpu *gpu, uint32_t syncobj_handle)
800 {
801 #ifdef SIMULATE_SYNCOBJ
802 return sim_syncobj_reset(gpu, syncobj_handle);
803 #endif
804
805 struct drm_syncobj_array args = {
806 .handles = (uintptr_t)&syncobj_handle,
807 .count_handles = 1,
808 };
809
810 return virtgpu_ioctl(gpu, DRM_IOCTL_SYNCOBJ_RESET, &args);
811 }
812
813 static int
virtgpu_ioctl_syncobj_query(struct virtgpu * gpu,uint32_t syncobj_handle,uint64_t * point)814 virtgpu_ioctl_syncobj_query(struct virtgpu *gpu,
815 uint32_t syncobj_handle,
816 uint64_t *point)
817 {
818 #ifdef SIMULATE_SYNCOBJ
819 return sim_syncobj_query(gpu, syncobj_handle, point);
820 #endif
821
822 struct drm_syncobj_timeline_array args = {
823 .handles = (uintptr_t)&syncobj_handle,
824 .points = (uintptr_t)point,
825 .count_handles = 1,
826 };
827
828 return virtgpu_ioctl(gpu, DRM_IOCTL_SYNCOBJ_QUERY, &args);
829 }
830
831 static int
virtgpu_ioctl_syncobj_timeline_signal(struct virtgpu * gpu,uint32_t syncobj_handle,uint64_t point)832 virtgpu_ioctl_syncobj_timeline_signal(struct virtgpu *gpu,
833 uint32_t syncobj_handle,
834 uint64_t point)
835 {
836 #ifdef SIMULATE_SYNCOBJ
837 return sim_syncobj_signal(gpu, syncobj_handle, point);
838 #endif
839
840 struct drm_syncobj_timeline_array args = {
841 .handles = (uintptr_t)&syncobj_handle,
842 .points = (uintptr_t)&point,
843 .count_handles = 1,
844 };
845
846 return virtgpu_ioctl(gpu, DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL, &args);
847 }
848
849 static int
virtgpu_ioctl_syncobj_timeline_wait(struct virtgpu * gpu,const struct vn_renderer_wait * wait,bool wait_avail)850 virtgpu_ioctl_syncobj_timeline_wait(struct virtgpu *gpu,
851 const struct vn_renderer_wait *wait,
852 bool wait_avail)
853 {
854 #ifdef SIMULATE_SYNCOBJ
855 return sim_syncobj_wait(gpu, wait, wait_avail);
856 #endif
857
858 /* always enable wait-before-submit */
859 uint32_t flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT;
860 if (!wait->wait_any)
861 flags |= DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL;
862 /* wait for fences to appear instead of signaling */
863 if (wait_avail)
864 flags |= DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE;
865
866 /* TODO replace wait->syncs by wait->sync_handles to avoid malloc/loop */
867 uint32_t *syncobj_handles =
868 malloc(sizeof(*syncobj_handles) * wait->sync_count);
869 if (!syncobj_handles)
870 return -1;
871 for (uint32_t i = 0; i < wait->sync_count; i++) {
872 struct virtgpu_sync *sync = (struct virtgpu_sync *)wait->syncs[i];
873 syncobj_handles[i] = sync->syncobj_handle;
874 }
875
876 struct drm_syncobj_timeline_wait args = {
877 .handles = (uintptr_t)syncobj_handles,
878 .points = (uintptr_t)wait->sync_values,
879 .timeout_nsec = os_time_get_absolute_timeout(wait->timeout),
880 .count_handles = wait->sync_count,
881 .flags = flags,
882 };
883
884 const int ret = virtgpu_ioctl(gpu, DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT, &args);
885
886 free(syncobj_handles);
887
888 return ret;
889 }
890
891 static int
virtgpu_ioctl_submit(struct virtgpu * gpu,const struct vn_renderer_submit * submit)892 virtgpu_ioctl_submit(struct virtgpu *gpu,
893 const struct vn_renderer_submit *submit)
894 {
895 #ifdef SIMULATE_SUBMIT
896 return sim_submit(gpu, submit);
897 #endif
898 return -1;
899 }
900
901 static VkResult
virtgpu_sync_write(struct vn_renderer * renderer,struct vn_renderer_sync * _sync,uint64_t val)902 virtgpu_sync_write(struct vn_renderer *renderer,
903 struct vn_renderer_sync *_sync,
904 uint64_t val)
905 {
906 struct virtgpu *gpu = (struct virtgpu *)renderer;
907 struct virtgpu_sync *sync = (struct virtgpu_sync *)_sync;
908
909 const int ret =
910 virtgpu_ioctl_syncobj_timeline_signal(gpu, sync->syncobj_handle, val);
911
912 return ret ? VK_ERROR_OUT_OF_DEVICE_MEMORY : VK_SUCCESS;
913 }
914
915 static VkResult
virtgpu_sync_read(struct vn_renderer * renderer,struct vn_renderer_sync * _sync,uint64_t * val)916 virtgpu_sync_read(struct vn_renderer *renderer,
917 struct vn_renderer_sync *_sync,
918 uint64_t *val)
919 {
920 struct virtgpu *gpu = (struct virtgpu *)renderer;
921 struct virtgpu_sync *sync = (struct virtgpu_sync *)_sync;
922
923 const int ret =
924 virtgpu_ioctl_syncobj_query(gpu, sync->syncobj_handle, val);
925
926 return ret ? VK_ERROR_OUT_OF_DEVICE_MEMORY : VK_SUCCESS;
927 }
928
929 static VkResult
virtgpu_sync_reset(struct vn_renderer * renderer,struct vn_renderer_sync * _sync,uint64_t initial_val)930 virtgpu_sync_reset(struct vn_renderer *renderer,
931 struct vn_renderer_sync *_sync,
932 uint64_t initial_val)
933 {
934 struct virtgpu *gpu = (struct virtgpu *)renderer;
935 struct virtgpu_sync *sync = (struct virtgpu_sync *)_sync;
936
937 int ret = virtgpu_ioctl_syncobj_reset(gpu, sync->syncobj_handle);
938 if (!ret) {
939 ret = virtgpu_ioctl_syncobj_timeline_signal(gpu, sync->syncobj_handle,
940 initial_val);
941 }
942
943 return ret ? VK_ERROR_OUT_OF_DEVICE_MEMORY : VK_SUCCESS;
944 }
945
946 static int
virtgpu_sync_export_syncobj(struct vn_renderer * renderer,struct vn_renderer_sync * _sync,bool sync_file)947 virtgpu_sync_export_syncobj(struct vn_renderer *renderer,
948 struct vn_renderer_sync *_sync,
949 bool sync_file)
950 {
951 struct virtgpu *gpu = (struct virtgpu *)renderer;
952 struct virtgpu_sync *sync = (struct virtgpu_sync *)_sync;
953
954 return virtgpu_ioctl_syncobj_handle_to_fd(gpu, sync->syncobj_handle,
955 sync_file);
956 }
957
958 static void
virtgpu_sync_destroy(struct vn_renderer * renderer,struct vn_renderer_sync * _sync)959 virtgpu_sync_destroy(struct vn_renderer *renderer,
960 struct vn_renderer_sync *_sync)
961 {
962 struct virtgpu *gpu = (struct virtgpu *)renderer;
963 struct virtgpu_sync *sync = (struct virtgpu_sync *)_sync;
964
965 virtgpu_ioctl_syncobj_destroy(gpu, sync->syncobj_handle);
966
967 free(sync);
968 }
969
970 static VkResult
virtgpu_sync_create_from_syncobj(struct vn_renderer * renderer,int fd,bool sync_file,struct vn_renderer_sync ** out_sync)971 virtgpu_sync_create_from_syncobj(struct vn_renderer *renderer,
972 int fd,
973 bool sync_file,
974 struct vn_renderer_sync **out_sync)
975 {
976 struct virtgpu *gpu = (struct virtgpu *)renderer;
977
978 uint32_t syncobj_handle;
979 if (sync_file) {
980 syncobj_handle = virtgpu_ioctl_syncobj_create(gpu, false);
981 if (!syncobj_handle)
982 return VK_ERROR_OUT_OF_HOST_MEMORY;
983 if (!virtgpu_ioctl_syncobj_fd_to_handle(gpu, fd, syncobj_handle)) {
984 virtgpu_ioctl_syncobj_destroy(gpu, syncobj_handle);
985 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
986 }
987 } else {
988 syncobj_handle = virtgpu_ioctl_syncobj_fd_to_handle(gpu, fd, 0);
989 if (!syncobj_handle)
990 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
991 }
992
993 struct virtgpu_sync *sync = calloc(1, sizeof(*sync));
994 if (!sync) {
995 virtgpu_ioctl_syncobj_destroy(gpu, syncobj_handle);
996 return VK_ERROR_OUT_OF_HOST_MEMORY;
997 }
998
999 sync->syncobj_handle = syncobj_handle;
1000 sync->base.sync_id = 0; /* TODO */
1001
1002 *out_sync = &sync->base;
1003
1004 return VK_SUCCESS;
1005 }
1006
1007 static VkResult
virtgpu_sync_create(struct vn_renderer * renderer,uint64_t initial_val,uint32_t flags,struct vn_renderer_sync ** out_sync)1008 virtgpu_sync_create(struct vn_renderer *renderer,
1009 uint64_t initial_val,
1010 uint32_t flags,
1011 struct vn_renderer_sync **out_sync)
1012 {
1013 struct virtgpu *gpu = (struct virtgpu *)renderer;
1014
1015 /* TODO */
1016 if (flags & VN_RENDERER_SYNC_SHAREABLE)
1017 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
1018
1019 /* always false because we don't use binary drm_syncobjs */
1020 const bool signaled = false;
1021 const uint32_t syncobj_handle =
1022 virtgpu_ioctl_syncobj_create(gpu, signaled);
1023 if (!syncobj_handle)
1024 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
1025
1026 /* add a signaled fence chain with seqno initial_val */
1027 const int ret =
1028 virtgpu_ioctl_syncobj_timeline_signal(gpu, syncobj_handle, initial_val);
1029 if (ret) {
1030 virtgpu_ioctl_syncobj_destroy(gpu, syncobj_handle);
1031 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
1032 }
1033
1034 struct virtgpu_sync *sync = calloc(1, sizeof(*sync));
1035 if (!sync) {
1036 virtgpu_ioctl_syncobj_destroy(gpu, syncobj_handle);
1037 return VK_ERROR_OUT_OF_HOST_MEMORY;
1038 }
1039
1040 sync->syncobj_handle = syncobj_handle;
1041 /* we will have a sync_id when shareable is true and virtio-gpu associates
1042 * a host sync object with guest drm_syncobj
1043 */
1044 sync->base.sync_id = 0;
1045
1046 *out_sync = &sync->base;
1047
1048 return VK_SUCCESS;
1049 }
1050
1051 static void
virtgpu_bo_invalidate(struct vn_renderer * renderer,struct vn_renderer_bo * bo,VkDeviceSize offset,VkDeviceSize size)1052 virtgpu_bo_invalidate(struct vn_renderer *renderer,
1053 struct vn_renderer_bo *bo,
1054 VkDeviceSize offset,
1055 VkDeviceSize size)
1056 {
1057 /* nop because kernel makes every mapping coherent */
1058 }
1059
1060 static void
virtgpu_bo_flush(struct vn_renderer * renderer,struct vn_renderer_bo * bo,VkDeviceSize offset,VkDeviceSize size)1061 virtgpu_bo_flush(struct vn_renderer *renderer,
1062 struct vn_renderer_bo *bo,
1063 VkDeviceSize offset,
1064 VkDeviceSize size)
1065 {
1066 /* nop because kernel makes every mapping coherent */
1067 }
1068
1069 static void *
virtgpu_bo_map(struct vn_renderer * renderer,struct vn_renderer_bo * _bo)1070 virtgpu_bo_map(struct vn_renderer *renderer, struct vn_renderer_bo *_bo)
1071 {
1072 struct virtgpu *gpu = (struct virtgpu *)renderer;
1073 struct virtgpu_bo *bo = (struct virtgpu_bo *)_bo;
1074 const bool mappable = bo->blob_flags & VIRTGPU_BLOB_FLAG_USE_MAPPABLE;
1075
1076 /* not thread-safe but is fine */
1077 if (!bo->base.mmap_ptr && mappable) {
1078 bo->base.mmap_ptr =
1079 virtgpu_ioctl_map(gpu, bo->gem_handle, bo->base.mmap_size);
1080 }
1081
1082 return bo->base.mmap_ptr;
1083 }
1084
1085 static int
virtgpu_bo_export_dma_buf(struct vn_renderer * renderer,struct vn_renderer_bo * _bo)1086 virtgpu_bo_export_dma_buf(struct vn_renderer *renderer,
1087 struct vn_renderer_bo *_bo)
1088 {
1089 struct virtgpu *gpu = (struct virtgpu *)renderer;
1090 struct virtgpu_bo *bo = (struct virtgpu_bo *)_bo;
1091 const bool mappable = bo->blob_flags & VIRTGPU_BLOB_FLAG_USE_MAPPABLE;
1092 const bool shareable = bo->blob_flags & VIRTGPU_BLOB_FLAG_USE_SHAREABLE;
1093
1094 return shareable
1095 ? virtgpu_ioctl_prime_handle_to_fd(gpu, bo->gem_handle, mappable)
1096 : -1;
1097 }
1098
1099 static bool
virtgpu_bo_destroy(struct vn_renderer * renderer,struct vn_renderer_bo * _bo)1100 virtgpu_bo_destroy(struct vn_renderer *renderer, struct vn_renderer_bo *_bo)
1101 {
1102 struct virtgpu *gpu = (struct virtgpu *)renderer;
1103 struct virtgpu_bo *bo = (struct virtgpu_bo *)_bo;
1104
1105 mtx_lock(&gpu->dma_buf_import_mutex);
1106
1107 /* Check the refcount again after the import lock is grabbed. Yes, we use
1108 * the double-checked locking anti-pattern.
1109 */
1110 if (vn_refcount_is_valid(&bo->base.refcount)) {
1111 mtx_unlock(&gpu->dma_buf_import_mutex);
1112 return false;
1113 }
1114
1115 if (bo->base.mmap_ptr)
1116 munmap(bo->base.mmap_ptr, bo->base.mmap_size);
1117 virtgpu_ioctl_gem_close(gpu, bo->gem_handle);
1118
1119 /* set gem_handle to 0 to indicate that the bo is invalid */
1120 bo->gem_handle = 0;
1121
1122 mtx_unlock(&gpu->dma_buf_import_mutex);
1123
1124 return true;
1125 }
1126
1127 static uint32_t
virtgpu_bo_blob_flags(VkMemoryPropertyFlags flags,VkExternalMemoryHandleTypeFlags external_handles)1128 virtgpu_bo_blob_flags(VkMemoryPropertyFlags flags,
1129 VkExternalMemoryHandleTypeFlags external_handles)
1130 {
1131 uint32_t blob_flags = 0;
1132 if (flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
1133 blob_flags |= VIRTGPU_BLOB_FLAG_USE_MAPPABLE;
1134 if (external_handles)
1135 blob_flags |= VIRTGPU_BLOB_FLAG_USE_SHAREABLE;
1136 if (external_handles & VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT)
1137 blob_flags |= VIRTGPU_BLOB_FLAG_USE_CROSS_DEVICE;
1138
1139 return blob_flags;
1140 }
1141
1142 static VkResult
virtgpu_bo_create_from_dma_buf(struct vn_renderer * renderer,VkDeviceSize size,int fd,VkMemoryPropertyFlags flags,struct vn_renderer_bo ** out_bo)1143 virtgpu_bo_create_from_dma_buf(struct vn_renderer *renderer,
1144 VkDeviceSize size,
1145 int fd,
1146 VkMemoryPropertyFlags flags,
1147 struct vn_renderer_bo **out_bo)
1148 {
1149 struct virtgpu *gpu = (struct virtgpu *)renderer;
1150 struct drm_virtgpu_resource_info info;
1151 uint32_t gem_handle = 0;
1152 struct virtgpu_bo *bo = NULL;
1153
1154 mtx_lock(&gpu->dma_buf_import_mutex);
1155
1156 gem_handle = virtgpu_ioctl_prime_fd_to_handle(gpu, fd);
1157 if (!gem_handle)
1158 goto fail;
1159 bo = util_sparse_array_get(&gpu->bo_array, gem_handle);
1160
1161 if (virtgpu_ioctl_resource_info(gpu, gem_handle, &info))
1162 goto fail;
1163
1164 /* Upon import, blob_flags is not passed to the kernel and is only for
1165 * internal use. Set it to what works best for us.
1166 * - blob mem: SHAREABLE + conditional MAPPABLE per VkMemoryPropertyFlags
1167 * - classic 3d: SHAREABLE only for export and to fail the map
1168 */
1169 uint32_t blob_flags = VIRTGPU_BLOB_FLAG_USE_SHAREABLE;
1170 size_t mmap_size = 0;
1171 if (info.blob_mem) {
1172 /* must be VIRTGPU_BLOB_MEM_HOST3D or VIRTGPU_BLOB_MEM_GUEST_VRAM */
1173 if (info.blob_mem != gpu->bo_blob_mem)
1174 goto fail;
1175
1176 blob_flags |= virtgpu_bo_blob_flags(flags, 0);
1177
1178 /* mmap_size is only used when mappable */
1179 mmap_size = 0;
1180 if (blob_flags & VIRTGPU_BLOB_FLAG_USE_MAPPABLE) {
1181 if (info.size < size)
1182 goto fail;
1183
1184 mmap_size = size;
1185 }
1186 }
1187
1188 /* we check bo->gem_handle instead of bo->refcount because bo->refcount
1189 * might only be memset to 0 and is not considered initialized in theory
1190 */
1191 if (bo->gem_handle == gem_handle) {
1192 if (bo->base.mmap_size < mmap_size)
1193 goto fail;
1194 if (blob_flags & ~bo->blob_flags)
1195 goto fail;
1196
1197 /* we can't use vn_renderer_bo_ref as the refcount may drop to 0
1198 * temporarily before virtgpu_bo_destroy grabs the lock
1199 */
1200 vn_refcount_fetch_add_relaxed(&bo->base.refcount, 1);
1201 } else {
1202 *bo = (struct virtgpu_bo){
1203 .base = {
1204 .refcount = VN_REFCOUNT_INIT(1),
1205 .res_id = info.res_handle,
1206 .mmap_size = mmap_size,
1207 },
1208 .gem_handle = gem_handle,
1209 .blob_flags = blob_flags,
1210 };
1211 }
1212
1213 mtx_unlock(&gpu->dma_buf_import_mutex);
1214
1215 *out_bo = &bo->base;
1216
1217 return VK_SUCCESS;
1218
1219 fail:
1220 if (gem_handle && bo->gem_handle != gem_handle)
1221 virtgpu_ioctl_gem_close(gpu, gem_handle);
1222 mtx_unlock(&gpu->dma_buf_import_mutex);
1223 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
1224 }
1225
1226 static VkResult
virtgpu_bo_create_from_device_memory(struct vn_renderer * renderer,VkDeviceSize size,vn_object_id mem_id,VkMemoryPropertyFlags flags,VkExternalMemoryHandleTypeFlags external_handles,struct vn_renderer_bo ** out_bo)1227 virtgpu_bo_create_from_device_memory(
1228 struct vn_renderer *renderer,
1229 VkDeviceSize size,
1230 vn_object_id mem_id,
1231 VkMemoryPropertyFlags flags,
1232 VkExternalMemoryHandleTypeFlags external_handles,
1233 struct vn_renderer_bo **out_bo)
1234 {
1235 struct virtgpu *gpu = (struct virtgpu *)renderer;
1236 const uint32_t blob_flags = virtgpu_bo_blob_flags(flags, external_handles);
1237
1238 uint32_t res_id;
1239 uint32_t gem_handle = virtgpu_ioctl_resource_create_blob(
1240 gpu, gpu->bo_blob_mem, blob_flags, size, mem_id, &res_id);
1241 if (!gem_handle)
1242 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
1243
1244 struct virtgpu_bo *bo = util_sparse_array_get(&gpu->bo_array, gem_handle);
1245 *bo = (struct virtgpu_bo){
1246 .base = {
1247 .refcount = VN_REFCOUNT_INIT(1),
1248 .res_id = res_id,
1249 .mmap_size = size,
1250 },
1251 .gem_handle = gem_handle,
1252 .blob_flags = blob_flags,
1253 };
1254
1255 *out_bo = &bo->base;
1256
1257 return VK_SUCCESS;
1258 }
1259
1260 static void
virtgpu_shmem_destroy_now(struct vn_renderer * renderer,struct vn_renderer_shmem * _shmem)1261 virtgpu_shmem_destroy_now(struct vn_renderer *renderer,
1262 struct vn_renderer_shmem *_shmem)
1263 {
1264 struct virtgpu *gpu = (struct virtgpu *)renderer;
1265 struct virtgpu_shmem *shmem = (struct virtgpu_shmem *)_shmem;
1266
1267 munmap(shmem->base.mmap_ptr, shmem->base.mmap_size);
1268 virtgpu_ioctl_gem_close(gpu, shmem->gem_handle);
1269 }
1270
1271 static void
virtgpu_shmem_destroy(struct vn_renderer * renderer,struct vn_renderer_shmem * shmem)1272 virtgpu_shmem_destroy(struct vn_renderer *renderer,
1273 struct vn_renderer_shmem *shmem)
1274 {
1275 struct virtgpu *gpu = (struct virtgpu *)renderer;
1276
1277 if (vn_renderer_shmem_cache_add(&gpu->shmem_cache, shmem))
1278 return;
1279
1280 virtgpu_shmem_destroy_now(&gpu->base, shmem);
1281 }
1282
1283 static struct vn_renderer_shmem *
virtgpu_shmem_create(struct vn_renderer * renderer,size_t size)1284 virtgpu_shmem_create(struct vn_renderer *renderer, size_t size)
1285 {
1286 struct virtgpu *gpu = (struct virtgpu *)renderer;
1287
1288 struct vn_renderer_shmem *cached_shmem =
1289 vn_renderer_shmem_cache_get(&gpu->shmem_cache, size);
1290 if (cached_shmem) {
1291 cached_shmem->refcount = VN_REFCOUNT_INIT(1);
1292 return cached_shmem;
1293 }
1294
1295 uint32_t res_id;
1296 uint32_t gem_handle = virtgpu_ioctl_resource_create_blob(
1297 gpu, gpu->shmem_blob_mem, VIRTGPU_BLOB_FLAG_USE_MAPPABLE, size, 0,
1298 &res_id);
1299 if (!gem_handle)
1300 return NULL;
1301
1302 void *ptr = virtgpu_ioctl_map(gpu, gem_handle, size);
1303 if (!ptr) {
1304 virtgpu_ioctl_gem_close(gpu, gem_handle);
1305 return NULL;
1306 }
1307
1308 struct virtgpu_shmem *shmem =
1309 util_sparse_array_get(&gpu->shmem_array, gem_handle);
1310 *shmem = (struct virtgpu_shmem){
1311 .base = {
1312 .refcount = VN_REFCOUNT_INIT(1),
1313 .res_id = res_id,
1314 .mmap_size = size,
1315 .mmap_ptr = ptr,
1316 },
1317 .gem_handle = gem_handle,
1318 };
1319
1320 return &shmem->base;
1321 }
1322
1323 static VkResult
virtgpu_wait(struct vn_renderer * renderer,const struct vn_renderer_wait * wait)1324 virtgpu_wait(struct vn_renderer *renderer,
1325 const struct vn_renderer_wait *wait)
1326 {
1327 struct virtgpu *gpu = (struct virtgpu *)renderer;
1328
1329 const int ret = virtgpu_ioctl_syncobj_timeline_wait(gpu, wait, false);
1330 if (ret && errno != ETIME)
1331 return VK_ERROR_DEVICE_LOST;
1332
1333 return ret ? VK_TIMEOUT : VK_SUCCESS;
1334 }
1335
1336 static VkResult
virtgpu_submit(struct vn_renderer * renderer,const struct vn_renderer_submit * submit)1337 virtgpu_submit(struct vn_renderer *renderer,
1338 const struct vn_renderer_submit *submit)
1339 {
1340 struct virtgpu *gpu = (struct virtgpu *)renderer;
1341
1342 const int ret = virtgpu_ioctl_submit(gpu, submit);
1343 return ret ? VK_ERROR_DEVICE_LOST : VK_SUCCESS;
1344 }
1345
1346 static void
virtgpu_init_renderer_info(struct virtgpu * gpu)1347 virtgpu_init_renderer_info(struct virtgpu *gpu)
1348 {
1349 struct vn_renderer_info *info = &gpu->base.info;
1350
1351 info->drm.has_primary = gpu->has_primary;
1352 info->drm.primary_major = gpu->primary_major;
1353 info->drm.primary_minor = gpu->primary_minor;
1354 info->drm.has_render = true;
1355 info->drm.render_major = gpu->render_major;
1356 info->drm.render_minor = gpu->render_minor;
1357
1358 info->pci.vendor_id = VIRTGPU_PCI_VENDOR_ID;
1359 info->pci.device_id = VIRTGPU_PCI_DEVICE_ID;
1360
1361 if (gpu->bustype == DRM_BUS_PCI) {
1362 info->pci.has_bus_info = true;
1363 info->pci.domain = gpu->pci_bus_info.domain;
1364 info->pci.bus = gpu->pci_bus_info.bus;
1365 info->pci.device = gpu->pci_bus_info.dev;
1366 info->pci.function = gpu->pci_bus_info.func;
1367 } else {
1368 info->pci.has_bus_info = false;
1369 }
1370
1371 info->has_dma_buf_import = true;
1372 /* TODO switch from emulation to drm_syncobj */
1373 info->has_external_sync = true;
1374
1375 info->has_implicit_fencing = false;
1376
1377 const struct virgl_renderer_capset_venus *capset = &gpu->capset.data;
1378 info->wire_format_version = capset->wire_format_version;
1379 info->vk_xml_version = capset->vk_xml_version;
1380 info->vk_ext_command_serialization_spec_version =
1381 capset->vk_ext_command_serialization_spec_version;
1382 info->vk_mesa_venus_protocol_spec_version =
1383 capset->vk_mesa_venus_protocol_spec_version;
1384 info->supports_blob_id_0 = capset->supports_blob_id_0;
1385
1386 /* ensure vk_extension_mask is large enough to hold all capset masks */
1387 STATIC_ASSERT(sizeof(info->vk_extension_mask) >=
1388 sizeof(capset->vk_extension_mask1));
1389 memcpy(info->vk_extension_mask, capset->vk_extension_mask1,
1390 sizeof(capset->vk_extension_mask1));
1391
1392 info->allow_vk_wait_syncs = capset->allow_vk_wait_syncs;
1393
1394 info->supports_multiple_timelines = capset->supports_multiple_timelines;
1395 info->max_timeline_count = gpu->max_timeline_count;
1396
1397 if (gpu->bo_blob_mem == VIRTGPU_BLOB_MEM_GUEST_VRAM)
1398 info->has_guest_vram = true;
1399
1400 /* Use guest blob allocations from dedicated heap (Host visible memory) */
1401 if (gpu->bo_blob_mem == VIRTGPU_BLOB_MEM_HOST3D && capset->use_guest_vram)
1402 info->has_guest_vram = true;
1403 }
1404
1405 static void
virtgpu_destroy(struct vn_renderer * renderer,const VkAllocationCallbacks * alloc)1406 virtgpu_destroy(struct vn_renderer *renderer,
1407 const VkAllocationCallbacks *alloc)
1408 {
1409 struct virtgpu *gpu = (struct virtgpu *)renderer;
1410
1411 vn_renderer_shmem_cache_fini(&gpu->shmem_cache);
1412
1413 if (gpu->fd >= 0)
1414 close(gpu->fd);
1415
1416 mtx_destroy(&gpu->dma_buf_import_mutex);
1417
1418 util_sparse_array_finish(&gpu->shmem_array);
1419 util_sparse_array_finish(&gpu->bo_array);
1420
1421 vk_free(alloc, gpu);
1422 }
1423
1424 static inline void
virtgpu_init_shmem_blob_mem(ASSERTED struct virtgpu * gpu)1425 virtgpu_init_shmem_blob_mem(ASSERTED struct virtgpu *gpu)
1426 {
1427 /* VIRTGPU_BLOB_MEM_GUEST allocates from the guest system memory. They are
1428 * logically contiguous in the guest but are sglists (iovecs) in the host.
1429 * That makes them slower to process in the host. With host process
1430 * isolation, it also becomes impossible for the host to access sglists
1431 * directly.
1432 *
1433 * While there are ideas (and shipped code in some cases) such as creating
1434 * udmabufs from sglists, or having a dedicated guest heap, it seems the
1435 * easiest way is to reuse VIRTGPU_BLOB_MEM_HOST3D. That is, when the
1436 * renderer sees a request to export a blob where
1437 *
1438 * - blob_mem is VIRTGPU_BLOB_MEM_HOST3D
1439 * - blob_flags is VIRTGPU_BLOB_FLAG_USE_MAPPABLE
1440 * - blob_id is 0
1441 *
1442 * it allocates a host shmem.
1443 *
1444 * supports_blob_id_0 has been enforced by mandated render server config.
1445 */
1446 assert(gpu->capset.data.supports_blob_id_0);
1447 gpu->shmem_blob_mem = VIRTGPU_BLOB_MEM_HOST3D;
1448 }
1449
1450 static VkResult
virtgpu_init_context(struct virtgpu * gpu)1451 virtgpu_init_context(struct virtgpu *gpu)
1452 {
1453 assert(!gpu->capset.version);
1454 const int ret = virtgpu_ioctl_context_init(gpu, gpu->capset.id);
1455 if (ret) {
1456 if (VN_DEBUG(INIT)) {
1457 vn_log(gpu->instance, "failed to initialize context: %s",
1458 strerror(errno));
1459 }
1460 return VK_ERROR_INITIALIZATION_FAILED;
1461 }
1462
1463 return VK_SUCCESS;
1464 }
1465
1466 static VkResult
virtgpu_init_capset(struct virtgpu * gpu)1467 virtgpu_init_capset(struct virtgpu *gpu)
1468 {
1469 gpu->capset.id = VIRGL_RENDERER_CAPSET_VENUS;
1470 gpu->capset.version = 0;
1471
1472 const int ret =
1473 virtgpu_ioctl_get_caps(gpu, gpu->capset.id, gpu->capset.version,
1474 &gpu->capset.data, sizeof(gpu->capset.data));
1475 if (ret) {
1476 if (VN_DEBUG(INIT)) {
1477 vn_log(gpu->instance, "failed to get venus v%d capset: %s",
1478 gpu->capset.version, strerror(errno));
1479 }
1480 return VK_ERROR_INITIALIZATION_FAILED;
1481 }
1482
1483 return VK_SUCCESS;
1484 }
1485
1486 static VkResult
virtgpu_init_params(struct virtgpu * gpu)1487 virtgpu_init_params(struct virtgpu *gpu)
1488 {
1489 const uint64_t required_params[] = {
1490 VIRTGPU_PARAM_3D_FEATURES, VIRTGPU_PARAM_CAPSET_QUERY_FIX,
1491 VIRTGPU_PARAM_RESOURCE_BLOB, VIRTGPU_PARAM_CROSS_DEVICE,
1492 VIRTGPU_PARAM_CONTEXT_INIT,
1493 };
1494 uint64_t val;
1495 for (uint32_t i = 0; i < ARRAY_SIZE(required_params); i++) {
1496 val = virtgpu_ioctl_getparam(gpu, required_params[i]);
1497 if (!val) {
1498 if (VN_DEBUG(INIT)) {
1499 vn_log(gpu->instance, "required kernel param %d is missing",
1500 (int)required_params[i]);
1501 }
1502 return VK_ERROR_INITIALIZATION_FAILED;
1503 }
1504 }
1505
1506 val = virtgpu_ioctl_getparam(gpu, VIRTGPU_PARAM_HOST_VISIBLE);
1507 if (val) {
1508 gpu->bo_blob_mem = VIRTGPU_BLOB_MEM_HOST3D;
1509 } else {
1510 val = virtgpu_ioctl_getparam(gpu, VIRTGPU_PARAM_GUEST_VRAM);
1511 if (val) {
1512 gpu->bo_blob_mem = VIRTGPU_BLOB_MEM_GUEST_VRAM;
1513 }
1514 }
1515
1516 if (!val) {
1517 vn_log(gpu->instance,
1518 "one of required kernel params (%d or %d) is missing",
1519 (int)VIRTGPU_PARAM_HOST_VISIBLE, (int)VIRTGPU_PARAM_GUEST_VRAM);
1520 return VK_ERROR_INITIALIZATION_FAILED;
1521 }
1522
1523 /* implied by CONTEXT_INIT uapi */
1524 gpu->max_timeline_count = 64;
1525
1526 return VK_SUCCESS;
1527 }
1528
1529 static VkResult
virtgpu_open_device(struct virtgpu * gpu,const drmDevicePtr dev)1530 virtgpu_open_device(struct virtgpu *gpu, const drmDevicePtr dev)
1531 {
1532 bool supported_bus = false;
1533
1534 switch (dev->bustype) {
1535 case DRM_BUS_PCI:
1536 if (dev->deviceinfo.pci->vendor_id == VIRTGPU_PCI_VENDOR_ID &&
1537 dev->deviceinfo.pci->device_id == VIRTGPU_PCI_DEVICE_ID)
1538 supported_bus = true;
1539 break;
1540 case DRM_BUS_PLATFORM:
1541 supported_bus = true;
1542 break;
1543 default:
1544 break;
1545 }
1546
1547 if (!supported_bus || !(dev->available_nodes & (1 << DRM_NODE_RENDER))) {
1548 if (VN_DEBUG(INIT)) {
1549 const char *name = "unknown";
1550 for (uint32_t i = 0; i < DRM_NODE_MAX; i++) {
1551 if (dev->available_nodes & (1 << i)) {
1552 name = dev->nodes[i];
1553 break;
1554 }
1555 }
1556 vn_log(gpu->instance, "skipping DRM device %s", name);
1557 }
1558 return VK_ERROR_INITIALIZATION_FAILED;
1559 }
1560
1561 const char *primary_path = dev->nodes[DRM_NODE_PRIMARY];
1562 const char *node_path = dev->nodes[DRM_NODE_RENDER];
1563
1564 int fd = open(node_path, O_RDWR | O_CLOEXEC);
1565 if (fd < 0) {
1566 if (VN_DEBUG(INIT))
1567 vn_log(gpu->instance, "failed to open %s", node_path);
1568 return VK_ERROR_INITIALIZATION_FAILED;
1569 }
1570
1571 drmVersionPtr version = drmGetVersion(fd);
1572 if (!version || strcmp(version->name, "virtio_gpu") ||
1573 version->version_major != 0) {
1574 if (VN_DEBUG(INIT)) {
1575 if (version) {
1576 vn_log(gpu->instance, "unknown DRM driver %s version %d",
1577 version->name, version->version_major);
1578 } else {
1579 vn_log(gpu->instance, "failed to get DRM driver version");
1580 }
1581 }
1582 if (version)
1583 drmFreeVersion(version);
1584 close(fd);
1585 return VK_ERROR_INITIALIZATION_FAILED;
1586 }
1587
1588 gpu->fd = fd;
1589
1590 struct stat st;
1591 if (stat(primary_path, &st) == 0) {
1592 gpu->has_primary = true;
1593 gpu->primary_major = major(st.st_rdev);
1594 gpu->primary_minor = minor(st.st_rdev);
1595 } else {
1596 gpu->has_primary = false;
1597 gpu->primary_major = 0;
1598 gpu->primary_minor = 0;
1599 }
1600 stat(node_path, &st);
1601 gpu->render_major = major(st.st_rdev);
1602 gpu->render_minor = minor(st.st_rdev);
1603
1604 gpu->bustype = dev->bustype;
1605 if (dev->bustype == DRM_BUS_PCI)
1606 gpu->pci_bus_info = *dev->businfo.pci;
1607
1608 drmFreeVersion(version);
1609
1610 if (VN_DEBUG(INIT))
1611 vn_log(gpu->instance, "using DRM device %s", node_path);
1612
1613 return VK_SUCCESS;
1614 }
1615
1616 static VkResult
virtgpu_open(struct virtgpu * gpu)1617 virtgpu_open(struct virtgpu *gpu)
1618 {
1619 drmDevicePtr devs[8];
1620 int count = drmGetDevices2(0, devs, ARRAY_SIZE(devs));
1621 if (count < 0) {
1622 if (VN_DEBUG(INIT))
1623 vn_log(gpu->instance, "failed to enumerate DRM devices");
1624 return VK_ERROR_INITIALIZATION_FAILED;
1625 }
1626
1627 VkResult result = VK_ERROR_INITIALIZATION_FAILED;
1628 for (int i = 0; i < count; i++) {
1629 result = virtgpu_open_device(gpu, devs[i]);
1630 if (result == VK_SUCCESS)
1631 break;
1632 }
1633
1634 drmFreeDevices(devs, count);
1635
1636 return result;
1637 }
1638
1639 static VkResult
virtgpu_init(struct virtgpu * gpu)1640 virtgpu_init(struct virtgpu *gpu)
1641 {
1642 util_sparse_array_init(&gpu->shmem_array, sizeof(struct virtgpu_shmem),
1643 1024);
1644 util_sparse_array_init(&gpu->bo_array, sizeof(struct virtgpu_bo), 1024);
1645
1646 mtx_init(&gpu->dma_buf_import_mutex, mtx_plain);
1647
1648 VkResult result = virtgpu_open(gpu);
1649 if (result == VK_SUCCESS)
1650 result = virtgpu_init_params(gpu);
1651 if (result == VK_SUCCESS)
1652 result = virtgpu_init_capset(gpu);
1653 if (result == VK_SUCCESS)
1654 result = virtgpu_init_context(gpu);
1655 if (result != VK_SUCCESS)
1656 return result;
1657
1658 virtgpu_init_shmem_blob_mem(gpu);
1659
1660 vn_renderer_shmem_cache_init(&gpu->shmem_cache, &gpu->base,
1661 virtgpu_shmem_destroy_now);
1662
1663 virtgpu_init_renderer_info(gpu);
1664
1665 gpu->base.ops.destroy = virtgpu_destroy;
1666 gpu->base.ops.submit = virtgpu_submit;
1667 gpu->base.ops.wait = virtgpu_wait;
1668
1669 gpu->base.shmem_ops.create = virtgpu_shmem_create;
1670 gpu->base.shmem_ops.destroy = virtgpu_shmem_destroy;
1671
1672 gpu->base.bo_ops.create_from_device_memory =
1673 virtgpu_bo_create_from_device_memory;
1674 gpu->base.bo_ops.create_from_dma_buf = virtgpu_bo_create_from_dma_buf;
1675 gpu->base.bo_ops.destroy = virtgpu_bo_destroy;
1676 gpu->base.bo_ops.export_dma_buf = virtgpu_bo_export_dma_buf;
1677 gpu->base.bo_ops.map = virtgpu_bo_map;
1678 gpu->base.bo_ops.flush = virtgpu_bo_flush;
1679 gpu->base.bo_ops.invalidate = virtgpu_bo_invalidate;
1680
1681 gpu->base.sync_ops.create = virtgpu_sync_create;
1682 gpu->base.sync_ops.create_from_syncobj = virtgpu_sync_create_from_syncobj;
1683 gpu->base.sync_ops.destroy = virtgpu_sync_destroy;
1684 gpu->base.sync_ops.export_syncobj = virtgpu_sync_export_syncobj;
1685 gpu->base.sync_ops.reset = virtgpu_sync_reset;
1686 gpu->base.sync_ops.read = virtgpu_sync_read;
1687 gpu->base.sync_ops.write = virtgpu_sync_write;
1688
1689 return VK_SUCCESS;
1690 }
1691
1692 VkResult
vn_renderer_create_virtgpu(struct vn_instance * instance,const VkAllocationCallbacks * alloc,struct vn_renderer ** renderer)1693 vn_renderer_create_virtgpu(struct vn_instance *instance,
1694 const VkAllocationCallbacks *alloc,
1695 struct vn_renderer **renderer)
1696 {
1697 struct virtgpu *gpu = vk_zalloc(alloc, sizeof(*gpu), VN_DEFAULT_ALIGN,
1698 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1699 if (!gpu)
1700 return VK_ERROR_OUT_OF_HOST_MEMORY;
1701
1702 gpu->instance = instance;
1703 gpu->fd = -1;
1704
1705 VkResult result = virtgpu_init(gpu);
1706 if (result != VK_SUCCESS) {
1707 virtgpu_destroy(&gpu->base, alloc);
1708 return result;
1709 }
1710
1711 *renderer = &gpu->base;
1712
1713 return VK_SUCCESS;
1714 }
1715