1 /*
2 * Copyright 2020 Google LLC
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include <errno.h>
7 #include <fcntl.h>
8 #include <poll.h>
9 #include <sys/mman.h>
10 #include <sys/stat.h>
11 #include <sys/types.h>
12 #include <unistd.h>
13 #include <xf86drm.h>
14
15 #ifdef MAJOR_IN_MKDEV
16 #include <sys/mkdev.h>
17 #endif
18 #ifdef MAJOR_IN_SYSMACROS
19 #include <sys/sysmacros.h>
20 #endif
21
22 #include "drm-uapi/virtgpu_drm.h"
23 #include "util/sparse_array.h"
24 #define VIRGL_RENDERER_UNSTABLE_APIS
25 #include "virtio-gpu/virglrenderer_hw.h"
26
27 #include "vn_renderer_internal.h"
28
29 /* XXX WIP kernel uapi */
30 #ifndef VIRTGPU_PARAM_CONTEXT_INIT
31 #define VIRTGPU_PARAM_CONTEXT_INIT 6
32 #define VIRTGPU_CONTEXT_PARAM_CAPSET_ID 0x0001
33 struct drm_virtgpu_context_set_param {
34 __u64 param;
35 __u64 value;
36 };
37 struct drm_virtgpu_context_init {
38 __u32 num_params;
39 __u32 pad;
40 __u64 ctx_set_params;
41 };
42 #define DRM_VIRTGPU_CONTEXT_INIT 0xb
43 #define DRM_IOCTL_VIRTGPU_CONTEXT_INIT \
44 DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_CONTEXT_INIT, \
45 struct drm_virtgpu_context_init)
46 #endif /* VIRTGPU_PARAM_CONTEXT_INIT */
47 #ifndef VIRTGPU_PARAM_MAX_SYNC_QUEUE_COUNT
48 #define VIRTGPU_PARAM_MAX_SYNC_QUEUE_COUNT 100
49 #endif /* VIRTGPU_PARAM_MAX_SYNC_QUEUE_COUNT */
50
51 #ifndef VIRTGPU_PARAM_GUEST_VRAM
52 /* All guest allocations happen via virtgpu dedicated heap. */
53 #define VIRTGPU_PARAM_GUEST_VRAM 9
54 #endif
55
56 #ifndef VIRTGPU_BLOB_MEM_GUEST_VRAM
57 #define VIRTGPU_BLOB_MEM_GUEST_VRAM 0x0004
58 #endif
59
60 /* XXX comment these out to really use kernel uapi */
61 #define SIMULATE_BO_SIZE_FIX 1
62 //#define SIMULATE_CONTEXT_INIT 1
63 #define SIMULATE_SYNCOBJ 1
64 #define SIMULATE_SUBMIT 1
65
66 #define VIRTGPU_PCI_VENDOR_ID 0x1af4
67 #define VIRTGPU_PCI_DEVICE_ID 0x1050
68
69 struct virtgpu;
70
71 struct virtgpu_shmem {
72 struct vn_renderer_shmem base;
73 uint32_t gem_handle;
74 };
75
76 struct virtgpu_bo {
77 struct vn_renderer_bo base;
78 uint32_t gem_handle;
79 uint32_t blob_flags;
80 };
81
82 struct virtgpu_sync {
83 struct vn_renderer_sync base;
84
85 /*
86 * drm_syncobj is in one of these states
87 *
88 * - value N: drm_syncobj has a signaled fence chain with seqno N
89 * - pending N->M: drm_syncobj has an unsignaled fence chain with seqno M
90 * (which may point to another unsignaled fence chain with
91 * seqno between N and M, and so on)
92 *
93 * TODO Do we want to use binary drm_syncobjs? They would be
94 *
95 * - value 0: drm_syncobj has no fence
96 * - value 1: drm_syncobj has a signaled fence with seqno 0
97 *
98 * They are cheaper but require special care.
99 */
100 uint32_t syncobj_handle;
101 };
102
103 struct virtgpu {
104 struct vn_renderer base;
105
106 struct vn_instance *instance;
107
108 int fd;
109
110 bool has_primary;
111 int primary_major;
112 int primary_minor;
113 int render_major;
114 int render_minor;
115
116 int bustype;
117 drmPciBusInfo pci_bus_info;
118
119 uint32_t max_sync_queue_count;
120
121 struct {
122 enum virgl_renderer_capset id;
123 uint32_t version;
124 struct virgl_renderer_capset_venus data;
125 } capset;
126
127 uint32_t shmem_blob_mem;
128 uint32_t bo_blob_mem;
129
130 /* note that we use gem_handle instead of res_id to index because
131 * res_id is monotonically increasing by default (see
132 * virtio_gpu_resource_id_get)
133 */
134 struct util_sparse_array shmem_array;
135 struct util_sparse_array bo_array;
136
137 mtx_t dma_buf_import_mutex;
138
139 struct vn_renderer_shmem_cache shmem_cache;
140 };
141
142 #ifdef SIMULATE_SYNCOBJ
143
144 #include "util/hash_table.h"
145 #include "util/u_idalloc.h"
146
147 static struct {
148 mtx_t mutex;
149 struct hash_table *syncobjs;
150 struct util_idalloc ida;
151
152 int signaled_fd;
153 } sim;
154
155 struct sim_syncobj {
156 mtx_t mutex;
157 uint64_t point;
158
159 int pending_fd;
160 uint64_t pending_point;
161 bool pending_cpu;
162 };
163
164 static uint32_t
sim_syncobj_create(struct virtgpu * gpu,bool signaled)165 sim_syncobj_create(struct virtgpu *gpu, bool signaled)
166 {
167 struct sim_syncobj *syncobj = calloc(1, sizeof(*syncobj));
168 if (!syncobj)
169 return 0;
170
171 mtx_init(&syncobj->mutex, mtx_plain);
172 syncobj->pending_fd = -1;
173
174 mtx_lock(&sim.mutex);
175
176 /* initialize lazily */
177 if (!sim.syncobjs) {
178 sim.syncobjs = _mesa_pointer_hash_table_create(NULL);
179 if (!sim.syncobjs) {
180 mtx_unlock(&sim.mutex);
181 return 0;
182 }
183
184 util_idalloc_init(&sim.ida, 32);
185
186 struct drm_virtgpu_execbuffer args = {
187 .flags = VIRTGPU_EXECBUF_FENCE_FD_OUT,
188 };
189 int ret = drmIoctl(gpu->fd, DRM_IOCTL_VIRTGPU_EXECBUFFER, &args);
190 if (ret || args.fence_fd < 0) {
191 _mesa_hash_table_destroy(sim.syncobjs, NULL);
192 sim.syncobjs = NULL;
193 mtx_unlock(&sim.mutex);
194 return 0;
195 }
196
197 sim.signaled_fd = args.fence_fd;
198 }
199
200 const unsigned syncobj_handle = util_idalloc_alloc(&sim.ida) + 1;
201 _mesa_hash_table_insert(sim.syncobjs,
202 (const void *)(uintptr_t)syncobj_handle, syncobj);
203
204 mtx_unlock(&sim.mutex);
205
206 return syncobj_handle;
207 }
208
209 static void
sim_syncobj_destroy(struct virtgpu * gpu,uint32_t syncobj_handle)210 sim_syncobj_destroy(struct virtgpu *gpu, uint32_t syncobj_handle)
211 {
212 struct sim_syncobj *syncobj = NULL;
213
214 mtx_lock(&sim.mutex);
215
216 struct hash_entry *entry = _mesa_hash_table_search(
217 sim.syncobjs, (const void *)(uintptr_t)syncobj_handle);
218 if (entry) {
219 syncobj = entry->data;
220 _mesa_hash_table_remove(sim.syncobjs, entry);
221 util_idalloc_free(&sim.ida, syncobj_handle - 1);
222 }
223
224 mtx_unlock(&sim.mutex);
225
226 if (syncobj) {
227 if (syncobj->pending_fd >= 0)
228 close(syncobj->pending_fd);
229 mtx_destroy(&syncobj->mutex);
230 free(syncobj);
231 }
232 }
233
234 static VkResult
sim_syncobj_poll(int fd,int poll_timeout)235 sim_syncobj_poll(int fd, int poll_timeout)
236 {
237 struct pollfd pollfd = {
238 .fd = fd,
239 .events = POLLIN,
240 };
241 int ret;
242 do {
243 ret = poll(&pollfd, 1, poll_timeout);
244 } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
245
246 if (ret < 0 || (ret > 0 && !(pollfd.revents & POLLIN))) {
247 return (ret < 0 && errno == ENOMEM) ? VK_ERROR_OUT_OF_HOST_MEMORY
248 : VK_ERROR_DEVICE_LOST;
249 }
250
251 return ret ? VK_SUCCESS : VK_TIMEOUT;
252 }
253
254 static void
sim_syncobj_set_point_locked(struct sim_syncobj * syncobj,uint64_t point)255 sim_syncobj_set_point_locked(struct sim_syncobj *syncobj, uint64_t point)
256 {
257 syncobj->point = point;
258
259 if (syncobj->pending_fd >= 0) {
260 close(syncobj->pending_fd);
261 syncobj->pending_fd = -1;
262 syncobj->pending_point = point;
263 }
264 }
265
266 static void
sim_syncobj_update_point_locked(struct sim_syncobj * syncobj,int poll_timeout)267 sim_syncobj_update_point_locked(struct sim_syncobj *syncobj, int poll_timeout)
268 {
269 if (syncobj->pending_fd >= 0) {
270 VkResult result;
271 if (syncobj->pending_cpu) {
272 if (poll_timeout == -1) {
273 const int max_cpu_timeout = 2000;
274 poll_timeout = max_cpu_timeout;
275 result = sim_syncobj_poll(syncobj->pending_fd, poll_timeout);
276 if (result == VK_TIMEOUT) {
277 vn_log(NULL, "cpu sync timed out after %dms; ignoring",
278 poll_timeout);
279 result = VK_SUCCESS;
280 }
281 } else {
282 result = sim_syncobj_poll(syncobj->pending_fd, poll_timeout);
283 }
284 } else {
285 result = sim_syncobj_poll(syncobj->pending_fd, poll_timeout);
286 }
287 if (result == VK_SUCCESS) {
288 close(syncobj->pending_fd);
289 syncobj->pending_fd = -1;
290 syncobj->point = syncobj->pending_point;
291 }
292 }
293 }
294
295 static struct sim_syncobj *
sim_syncobj_lookup(struct virtgpu * gpu,uint32_t syncobj_handle)296 sim_syncobj_lookup(struct virtgpu *gpu, uint32_t syncobj_handle)
297 {
298 struct sim_syncobj *syncobj = NULL;
299
300 mtx_lock(&sim.mutex);
301 struct hash_entry *entry = _mesa_hash_table_search(
302 sim.syncobjs, (const void *)(uintptr_t)syncobj_handle);
303 if (entry)
304 syncobj = entry->data;
305 mtx_unlock(&sim.mutex);
306
307 return syncobj;
308 }
309
310 static int
sim_syncobj_reset(struct virtgpu * gpu,uint32_t syncobj_handle)311 sim_syncobj_reset(struct virtgpu *gpu, uint32_t syncobj_handle)
312 {
313 struct sim_syncobj *syncobj = sim_syncobj_lookup(gpu, syncobj_handle);
314 if (!syncobj)
315 return -1;
316
317 mtx_lock(&syncobj->mutex);
318 sim_syncobj_set_point_locked(syncobj, 0);
319 mtx_unlock(&syncobj->mutex);
320
321 return 0;
322 }
323
324 static int
sim_syncobj_query(struct virtgpu * gpu,uint32_t syncobj_handle,uint64_t * point)325 sim_syncobj_query(struct virtgpu *gpu,
326 uint32_t syncobj_handle,
327 uint64_t *point)
328 {
329 struct sim_syncobj *syncobj = sim_syncobj_lookup(gpu, syncobj_handle);
330 if (!syncobj)
331 return -1;
332
333 mtx_lock(&syncobj->mutex);
334 sim_syncobj_update_point_locked(syncobj, 0);
335 *point = syncobj->point;
336 mtx_unlock(&syncobj->mutex);
337
338 return 0;
339 }
340
341 static int
sim_syncobj_signal(struct virtgpu * gpu,uint32_t syncobj_handle,uint64_t point)342 sim_syncobj_signal(struct virtgpu *gpu,
343 uint32_t syncobj_handle,
344 uint64_t point)
345 {
346 struct sim_syncobj *syncobj = sim_syncobj_lookup(gpu, syncobj_handle);
347 if (!syncobj)
348 return -1;
349
350 mtx_lock(&syncobj->mutex);
351 sim_syncobj_set_point_locked(syncobj, point);
352 mtx_unlock(&syncobj->mutex);
353
354 return 0;
355 }
356
357 static int
sim_syncobj_submit(struct virtgpu * gpu,uint32_t syncobj_handle,int sync_fd,uint64_t point,bool cpu)358 sim_syncobj_submit(struct virtgpu *gpu,
359 uint32_t syncobj_handle,
360 int sync_fd,
361 uint64_t point,
362 bool cpu)
363 {
364 struct sim_syncobj *syncobj = sim_syncobj_lookup(gpu, syncobj_handle);
365 if (!syncobj)
366 return -1;
367
368 int pending_fd = dup(sync_fd);
369 if (pending_fd < 0) {
370 vn_log(gpu->instance, "failed to dup sync fd");
371 return -1;
372 }
373
374 mtx_lock(&syncobj->mutex);
375
376 if (syncobj->pending_fd >= 0) {
377 mtx_unlock(&syncobj->mutex);
378
379 /* TODO */
380 vn_log(gpu->instance, "sorry, no simulated timeline semaphore");
381 close(pending_fd);
382 return -1;
383 }
384 if (syncobj->point >= point)
385 vn_log(gpu->instance, "non-monotonic signaling");
386
387 syncobj->pending_fd = pending_fd;
388 syncobj->pending_point = point;
389 syncobj->pending_cpu = cpu;
390
391 mtx_unlock(&syncobj->mutex);
392
393 return 0;
394 }
395
396 static int
timeout_to_poll_timeout(uint64_t timeout)397 timeout_to_poll_timeout(uint64_t timeout)
398 {
399 const uint64_t ns_per_ms = 1000000;
400 const uint64_t ms = (timeout + ns_per_ms - 1) / ns_per_ms;
401 if (!ms && timeout)
402 return -1;
403 return ms <= INT_MAX ? ms : -1;
404 }
405
406 static int
sim_syncobj_wait(struct virtgpu * gpu,const struct vn_renderer_wait * wait,bool wait_avail)407 sim_syncobj_wait(struct virtgpu *gpu,
408 const struct vn_renderer_wait *wait,
409 bool wait_avail)
410 {
411 if (wait_avail)
412 return -1;
413
414 const int poll_timeout = timeout_to_poll_timeout(wait->timeout);
415
416 /* TODO poll all fds at the same time */
417 for (uint32_t i = 0; i < wait->sync_count; i++) {
418 struct virtgpu_sync *sync = (struct virtgpu_sync *)wait->syncs[i];
419 const uint64_t point = wait->sync_values[i];
420
421 struct sim_syncobj *syncobj =
422 sim_syncobj_lookup(gpu, sync->syncobj_handle);
423 if (!syncobj)
424 return -1;
425
426 mtx_lock(&syncobj->mutex);
427
428 if (syncobj->point < point)
429 sim_syncobj_update_point_locked(syncobj, poll_timeout);
430
431 if (syncobj->point < point) {
432 if (wait->wait_any && i < wait->sync_count - 1 &&
433 syncobj->pending_fd < 0) {
434 mtx_unlock(&syncobj->mutex);
435 continue;
436 }
437 errno = ETIME;
438 mtx_unlock(&syncobj->mutex);
439 return -1;
440 }
441
442 mtx_unlock(&syncobj->mutex);
443
444 if (wait->wait_any)
445 break;
446
447 /* TODO adjust poll_timeout */
448 }
449
450 return 0;
451 }
452
453 static int
sim_syncobj_export(struct virtgpu * gpu,uint32_t syncobj_handle)454 sim_syncobj_export(struct virtgpu *gpu, uint32_t syncobj_handle)
455 {
456 struct sim_syncobj *syncobj = sim_syncobj_lookup(gpu, syncobj_handle);
457 if (!syncobj)
458 return -1;
459
460 int fd = -1;
461 mtx_lock(&syncobj->mutex);
462 if (syncobj->pending_fd >= 0)
463 fd = dup(syncobj->pending_fd);
464 else
465 fd = dup(sim.signaled_fd);
466 mtx_unlock(&syncobj->mutex);
467
468 return fd;
469 }
470
471 static uint32_t
sim_syncobj_import(struct virtgpu * gpu,uint32_t syncobj_handle,int fd)472 sim_syncobj_import(struct virtgpu *gpu, uint32_t syncobj_handle, int fd)
473 {
474 struct sim_syncobj *syncobj = sim_syncobj_lookup(gpu, syncobj_handle);
475 if (!syncobj)
476 return 0;
477
478 if (sim_syncobj_submit(gpu, syncobj_handle, fd, 1, false))
479 return 0;
480
481 return syncobj_handle;
482 }
483
484 #endif /* SIMULATE_SYNCOBJ */
485
486 #ifdef SIMULATE_SUBMIT
487
488 static int
sim_submit_signal_syncs(struct virtgpu * gpu,int sync_fd,struct vn_renderer_sync * const * syncs,const uint64_t * sync_values,uint32_t sync_count,bool cpu)489 sim_submit_signal_syncs(struct virtgpu *gpu,
490 int sync_fd,
491 struct vn_renderer_sync *const *syncs,
492 const uint64_t *sync_values,
493 uint32_t sync_count,
494 bool cpu)
495 {
496 for (uint32_t i = 0; i < sync_count; i++) {
497 struct virtgpu_sync *sync = (struct virtgpu_sync *)syncs[i];
498 const uint64_t pending_point = sync_values[i];
499
500 #ifdef SIMULATE_SYNCOBJ
501 int ret = sim_syncobj_submit(gpu, sync->syncobj_handle, sync_fd,
502 pending_point, cpu);
503 if (ret)
504 return ret;
505 #else
506 /* we can in theory do a DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE followed by a
507 * DRM_IOCTL_SYNCOBJ_TRANSFER
508 */
509 return -1;
510 #endif
511 }
512
513 return 0;
514 }
515
516 static uint32_t *
sim_submit_alloc_gem_handles(struct vn_renderer_bo * const * bos,uint32_t bo_count)517 sim_submit_alloc_gem_handles(struct vn_renderer_bo *const *bos,
518 uint32_t bo_count)
519 {
520 uint32_t *gem_handles = malloc(sizeof(*gem_handles) * bo_count);
521 if (!gem_handles)
522 return NULL;
523
524 for (uint32_t i = 0; i < bo_count; i++) {
525 struct virtgpu_bo *bo = (struct virtgpu_bo *)bos[i];
526 gem_handles[i] = bo->gem_handle;
527 }
528
529 return gem_handles;
530 }
531
532 static int
sim_submit(struct virtgpu * gpu,const struct vn_renderer_submit * submit)533 sim_submit(struct virtgpu *gpu, const struct vn_renderer_submit *submit)
534 {
535 /* TODO replace submit->bos by submit->gem_handles to avoid malloc/loop */
536 uint32_t *gem_handles = NULL;
537 if (submit->bo_count) {
538 gem_handles =
539 sim_submit_alloc_gem_handles(submit->bos, submit->bo_count);
540 if (!gem_handles)
541 return -1;
542 }
543
544 int ret = 0;
545 for (uint32_t i = 0; i < submit->batch_count; i++) {
546 const struct vn_renderer_submit_batch *batch = &submit->batches[i];
547
548 struct drm_virtgpu_execbuffer args = {
549 .flags = batch->sync_count ? VIRTGPU_EXECBUF_FENCE_FD_OUT : 0,
550 .size = batch->cs_size,
551 .command = (uintptr_t)batch->cs_data,
552 .bo_handles = (uintptr_t)gem_handles,
553 .num_bo_handles = submit->bo_count,
554 };
555
556 ret = drmIoctl(gpu->fd, DRM_IOCTL_VIRTGPU_EXECBUFFER, &args);
557 if (ret) {
558 vn_log(gpu->instance, "failed to execbuffer: %s", strerror(errno));
559 break;
560 }
561
562 if (batch->sync_count) {
563 ret = sim_submit_signal_syncs(gpu, args.fence_fd, batch->syncs,
564 batch->sync_values, batch->sync_count,
565 batch->sync_queue_cpu);
566 close(args.fence_fd);
567 if (ret)
568 break;
569 }
570 }
571
572 if (!submit->batch_count && submit->bo_count) {
573 struct drm_virtgpu_execbuffer args = {
574 .bo_handles = (uintptr_t)gem_handles,
575 .num_bo_handles = submit->bo_count,
576 };
577
578 ret = drmIoctl(gpu->fd, DRM_IOCTL_VIRTGPU_EXECBUFFER, &args);
579 if (ret)
580 vn_log(gpu->instance, "failed to execbuffer: %s", strerror(errno));
581 }
582
583 free(gem_handles);
584
585 return ret;
586 }
587
588 #endif /* SIMULATE_SUBMIT */
589
590 static int
virtgpu_ioctl(struct virtgpu * gpu,unsigned long request,void * args)591 virtgpu_ioctl(struct virtgpu *gpu, unsigned long request, void *args)
592 {
593 return drmIoctl(gpu->fd, request, args);
594 }
595
596 static uint64_t
virtgpu_ioctl_getparam(struct virtgpu * gpu,uint64_t param)597 virtgpu_ioctl_getparam(struct virtgpu *gpu, uint64_t param)
598 {
599 #ifdef SIMULATE_CONTEXT_INIT
600 if (param == VIRTGPU_PARAM_CONTEXT_INIT)
601 return 1;
602 #endif
603 #ifdef SIMULATE_SUBMIT
604 if (param == VIRTGPU_PARAM_MAX_SYNC_QUEUE_COUNT)
605 return 16;
606 #endif
607
608 /* val must be zeroed because kernel only writes the lower 32 bits */
609 uint64_t val = 0;
610 struct drm_virtgpu_getparam args = {
611 .param = param,
612 .value = (uintptr_t)&val,
613 };
614
615 const int ret = virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_GETPARAM, &args);
616 return ret ? 0 : val;
617 }
618
619 static int
virtgpu_ioctl_get_caps(struct virtgpu * gpu,enum virgl_renderer_capset id,uint32_t version,void * capset,size_t capset_size)620 virtgpu_ioctl_get_caps(struct virtgpu *gpu,
621 enum virgl_renderer_capset id,
622 uint32_t version,
623 void *capset,
624 size_t capset_size)
625 {
626 #ifdef SIMULATE_CONTEXT_INIT
627 if (id == VIRGL_RENDERER_CAPSET_VENUS && version == 0)
628 return 0;
629 #endif
630
631 struct drm_virtgpu_get_caps args = {
632 .cap_set_id = id,
633 .cap_set_ver = version,
634 .addr = (uintptr_t)capset,
635 .size = capset_size,
636 };
637
638 return virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_GET_CAPS, &args);
639 }
640
641 static int
virtgpu_ioctl_context_init(struct virtgpu * gpu,enum virgl_renderer_capset capset_id)642 virtgpu_ioctl_context_init(struct virtgpu *gpu,
643 enum virgl_renderer_capset capset_id)
644 {
645 #ifdef SIMULATE_CONTEXT_INIT
646 if (capset_id == VIRGL_RENDERER_CAPSET_VENUS)
647 return 0;
648 #endif
649
650 struct drm_virtgpu_context_init args = {
651 .num_params = 1,
652 .ctx_set_params = (uintptr_t) &
653 (struct drm_virtgpu_context_set_param){
654 .param = VIRTGPU_CONTEXT_PARAM_CAPSET_ID,
655 .value = capset_id,
656 },
657 };
658
659 return virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_CONTEXT_INIT, &args);
660 }
661
662 static uint32_t
virtgpu_ioctl_resource_create_blob(struct virtgpu * gpu,uint32_t blob_mem,uint32_t blob_flags,size_t blob_size,uint64_t blob_id,uint32_t * res_id)663 virtgpu_ioctl_resource_create_blob(struct virtgpu *gpu,
664 uint32_t blob_mem,
665 uint32_t blob_flags,
666 size_t blob_size,
667 uint64_t blob_id,
668 uint32_t *res_id)
669 {
670 #ifdef SIMULATE_BO_SIZE_FIX
671 blob_size = align64(blob_size, 4096);
672 #endif
673
674 struct drm_virtgpu_resource_create_blob args = {
675 .blob_mem = blob_mem,
676 .blob_flags = blob_flags,
677 .size = blob_size,
678 .blob_id = blob_id,
679 };
680
681 if (virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_RESOURCE_CREATE_BLOB, &args))
682 return 0;
683
684 *res_id = args.res_handle;
685 return args.bo_handle;
686 }
687
688 static int
virtgpu_ioctl_resource_info(struct virtgpu * gpu,uint32_t gem_handle,struct drm_virtgpu_resource_info * info)689 virtgpu_ioctl_resource_info(struct virtgpu *gpu,
690 uint32_t gem_handle,
691 struct drm_virtgpu_resource_info *info)
692 {
693 *info = (struct drm_virtgpu_resource_info){
694 .bo_handle = gem_handle,
695 };
696
697 return virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_RESOURCE_INFO, info);
698 }
699
700 static void
virtgpu_ioctl_gem_close(struct virtgpu * gpu,uint32_t gem_handle)701 virtgpu_ioctl_gem_close(struct virtgpu *gpu, uint32_t gem_handle)
702 {
703 struct drm_gem_close args = {
704 .handle = gem_handle,
705 };
706
707 ASSERTED const int ret = virtgpu_ioctl(gpu, DRM_IOCTL_GEM_CLOSE, &args);
708 assert(!ret);
709 }
710
711 static int
virtgpu_ioctl_prime_handle_to_fd(struct virtgpu * gpu,uint32_t gem_handle,bool mappable)712 virtgpu_ioctl_prime_handle_to_fd(struct virtgpu *gpu,
713 uint32_t gem_handle,
714 bool mappable)
715 {
716 struct drm_prime_handle args = {
717 .handle = gem_handle,
718 .flags = DRM_CLOEXEC | (mappable ? DRM_RDWR : 0),
719 };
720
721 const int ret = virtgpu_ioctl(gpu, DRM_IOCTL_PRIME_HANDLE_TO_FD, &args);
722 return ret ? -1 : args.fd;
723 }
724
725 static uint32_t
virtgpu_ioctl_prime_fd_to_handle(struct virtgpu * gpu,int fd)726 virtgpu_ioctl_prime_fd_to_handle(struct virtgpu *gpu, int fd)
727 {
728 struct drm_prime_handle args = {
729 .fd = fd,
730 };
731
732 const int ret = virtgpu_ioctl(gpu, DRM_IOCTL_PRIME_FD_TO_HANDLE, &args);
733 return ret ? 0 : args.handle;
734 }
735
736 static void *
virtgpu_ioctl_map(struct virtgpu * gpu,uint32_t gem_handle,size_t size)737 virtgpu_ioctl_map(struct virtgpu *gpu, uint32_t gem_handle, size_t size)
738 {
739 struct drm_virtgpu_map args = {
740 .handle = gem_handle,
741 };
742
743 if (virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_MAP, &args))
744 return NULL;
745
746 void *ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, gpu->fd,
747 args.offset);
748 if (ptr == MAP_FAILED)
749 return NULL;
750
751 return ptr;
752 }
753
754 static uint32_t
virtgpu_ioctl_syncobj_create(struct virtgpu * gpu,bool signaled)755 virtgpu_ioctl_syncobj_create(struct virtgpu *gpu, bool signaled)
756 {
757 #ifdef SIMULATE_SYNCOBJ
758 return sim_syncobj_create(gpu, signaled);
759 #endif
760
761 struct drm_syncobj_create args = {
762 .flags = signaled ? DRM_SYNCOBJ_CREATE_SIGNALED : 0,
763 };
764
765 const int ret = virtgpu_ioctl(gpu, DRM_IOCTL_SYNCOBJ_CREATE, &args);
766 return ret ? 0 : args.handle;
767 }
768
769 static void
virtgpu_ioctl_syncobj_destroy(struct virtgpu * gpu,uint32_t syncobj_handle)770 virtgpu_ioctl_syncobj_destroy(struct virtgpu *gpu, uint32_t syncobj_handle)
771 {
772 #ifdef SIMULATE_SYNCOBJ
773 sim_syncobj_destroy(gpu, syncobj_handle);
774 return;
775 #endif
776
777 struct drm_syncobj_destroy args = {
778 .handle = syncobj_handle,
779 };
780
781 ASSERTED const int ret =
782 virtgpu_ioctl(gpu, DRM_IOCTL_SYNCOBJ_DESTROY, &args);
783 assert(!ret);
784 }
785
786 static int
virtgpu_ioctl_syncobj_handle_to_fd(struct virtgpu * gpu,uint32_t syncobj_handle,bool sync_file)787 virtgpu_ioctl_syncobj_handle_to_fd(struct virtgpu *gpu,
788 uint32_t syncobj_handle,
789 bool sync_file)
790 {
791 #ifdef SIMULATE_SYNCOBJ
792 return sync_file ? sim_syncobj_export(gpu, syncobj_handle) : -1;
793 #endif
794
795 struct drm_syncobj_handle args = {
796 .handle = syncobj_handle,
797 .flags =
798 sync_file ? DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE : 0,
799 };
800
801 int ret = virtgpu_ioctl(gpu, DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD, &args);
802 if (ret)
803 return -1;
804
805 return args.fd;
806 }
807
808 static uint32_t
virtgpu_ioctl_syncobj_fd_to_handle(struct virtgpu * gpu,int fd,uint32_t syncobj_handle)809 virtgpu_ioctl_syncobj_fd_to_handle(struct virtgpu *gpu,
810 int fd,
811 uint32_t syncobj_handle)
812 {
813 #ifdef SIMULATE_SYNCOBJ
814 return syncobj_handle ? sim_syncobj_import(gpu, syncobj_handle, fd) : 0;
815 #endif
816
817 struct drm_syncobj_handle args = {
818 .handle = syncobj_handle,
819 .flags =
820 syncobj_handle ? DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_IMPORT_SYNC_FILE : 0,
821 .fd = fd,
822 };
823
824 int ret = virtgpu_ioctl(gpu, DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, &args);
825 if (ret)
826 return 0;
827
828 return args.handle;
829 }
830
831 static int
virtgpu_ioctl_syncobj_reset(struct virtgpu * gpu,uint32_t syncobj_handle)832 virtgpu_ioctl_syncobj_reset(struct virtgpu *gpu, uint32_t syncobj_handle)
833 {
834 #ifdef SIMULATE_SYNCOBJ
835 return sim_syncobj_reset(gpu, syncobj_handle);
836 #endif
837
838 struct drm_syncobj_array args = {
839 .handles = (uintptr_t)&syncobj_handle,
840 .count_handles = 1,
841 };
842
843 return virtgpu_ioctl(gpu, DRM_IOCTL_SYNCOBJ_RESET, &args);
844 }
845
846 static int
virtgpu_ioctl_syncobj_query(struct virtgpu * gpu,uint32_t syncobj_handle,uint64_t * point)847 virtgpu_ioctl_syncobj_query(struct virtgpu *gpu,
848 uint32_t syncobj_handle,
849 uint64_t *point)
850 {
851 #ifdef SIMULATE_SYNCOBJ
852 return sim_syncobj_query(gpu, syncobj_handle, point);
853 #endif
854
855 struct drm_syncobj_timeline_array args = {
856 .handles = (uintptr_t)&syncobj_handle,
857 .points = (uintptr_t)point,
858 .count_handles = 1,
859 };
860
861 return virtgpu_ioctl(gpu, DRM_IOCTL_SYNCOBJ_QUERY, &args);
862 }
863
864 static int
virtgpu_ioctl_syncobj_timeline_signal(struct virtgpu * gpu,uint32_t syncobj_handle,uint64_t point)865 virtgpu_ioctl_syncobj_timeline_signal(struct virtgpu *gpu,
866 uint32_t syncobj_handle,
867 uint64_t point)
868 {
869 #ifdef SIMULATE_SYNCOBJ
870 return sim_syncobj_signal(gpu, syncobj_handle, point);
871 #endif
872
873 struct drm_syncobj_timeline_array args = {
874 .handles = (uintptr_t)&syncobj_handle,
875 .points = (uintptr_t)&point,
876 .count_handles = 1,
877 };
878
879 return virtgpu_ioctl(gpu, DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL, &args);
880 }
881
882 static int
virtgpu_ioctl_syncobj_timeline_wait(struct virtgpu * gpu,const struct vn_renderer_wait * wait,bool wait_avail)883 virtgpu_ioctl_syncobj_timeline_wait(struct virtgpu *gpu,
884 const struct vn_renderer_wait *wait,
885 bool wait_avail)
886 {
887 #ifdef SIMULATE_SYNCOBJ
888 return sim_syncobj_wait(gpu, wait, wait_avail);
889 #endif
890
891 /* always enable wait-before-submit */
892 uint32_t flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT;
893 if (!wait->wait_any)
894 flags |= DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL;
895 /* wait for fences to appear instead of signaling */
896 if (wait_avail)
897 flags |= DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE;
898
899 /* TODO replace wait->syncs by wait->sync_handles to avoid malloc/loop */
900 uint32_t *syncobj_handles =
901 malloc(sizeof(*syncobj_handles) * wait->sync_count);
902 if (!syncobj_handles)
903 return -1;
904 for (uint32_t i = 0; i < wait->sync_count; i++) {
905 struct virtgpu_sync *sync = (struct virtgpu_sync *)wait->syncs[i];
906 syncobj_handles[i] = sync->syncobj_handle;
907 }
908
909 struct drm_syncobj_timeline_wait args = {
910 .handles = (uintptr_t)syncobj_handles,
911 .points = (uintptr_t)wait->sync_values,
912 .timeout_nsec = os_time_get_absolute_timeout(wait->timeout),
913 .count_handles = wait->sync_count,
914 .flags = flags,
915 };
916
917 const int ret = virtgpu_ioctl(gpu, DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT, &args);
918
919 free(syncobj_handles);
920
921 return ret;
922 }
923
924 static int
virtgpu_ioctl_submit(struct virtgpu * gpu,const struct vn_renderer_submit * submit)925 virtgpu_ioctl_submit(struct virtgpu *gpu,
926 const struct vn_renderer_submit *submit)
927 {
928 #ifdef SIMULATE_SUBMIT
929 return sim_submit(gpu, submit);
930 #endif
931 return -1;
932 }
933
934 static VkResult
virtgpu_sync_write(struct vn_renderer * renderer,struct vn_renderer_sync * _sync,uint64_t val)935 virtgpu_sync_write(struct vn_renderer *renderer,
936 struct vn_renderer_sync *_sync,
937 uint64_t val)
938 {
939 struct virtgpu *gpu = (struct virtgpu *)renderer;
940 struct virtgpu_sync *sync = (struct virtgpu_sync *)_sync;
941
942 const int ret =
943 virtgpu_ioctl_syncobj_timeline_signal(gpu, sync->syncobj_handle, val);
944
945 return ret ? VK_ERROR_OUT_OF_DEVICE_MEMORY : VK_SUCCESS;
946 }
947
948 static VkResult
virtgpu_sync_read(struct vn_renderer * renderer,struct vn_renderer_sync * _sync,uint64_t * val)949 virtgpu_sync_read(struct vn_renderer *renderer,
950 struct vn_renderer_sync *_sync,
951 uint64_t *val)
952 {
953 struct virtgpu *gpu = (struct virtgpu *)renderer;
954 struct virtgpu_sync *sync = (struct virtgpu_sync *)_sync;
955
956 const int ret =
957 virtgpu_ioctl_syncobj_query(gpu, sync->syncobj_handle, val);
958
959 return ret ? VK_ERROR_OUT_OF_DEVICE_MEMORY : VK_SUCCESS;
960 }
961
962 static VkResult
virtgpu_sync_reset(struct vn_renderer * renderer,struct vn_renderer_sync * _sync,uint64_t initial_val)963 virtgpu_sync_reset(struct vn_renderer *renderer,
964 struct vn_renderer_sync *_sync,
965 uint64_t initial_val)
966 {
967 struct virtgpu *gpu = (struct virtgpu *)renderer;
968 struct virtgpu_sync *sync = (struct virtgpu_sync *)_sync;
969
970 int ret = virtgpu_ioctl_syncobj_reset(gpu, sync->syncobj_handle);
971 if (!ret) {
972 ret = virtgpu_ioctl_syncobj_timeline_signal(gpu, sync->syncobj_handle,
973 initial_val);
974 }
975
976 return ret ? VK_ERROR_OUT_OF_DEVICE_MEMORY : VK_SUCCESS;
977 }
978
979 static int
virtgpu_sync_export_syncobj(struct vn_renderer * renderer,struct vn_renderer_sync * _sync,bool sync_file)980 virtgpu_sync_export_syncobj(struct vn_renderer *renderer,
981 struct vn_renderer_sync *_sync,
982 bool sync_file)
983 {
984 struct virtgpu *gpu = (struct virtgpu *)renderer;
985 struct virtgpu_sync *sync = (struct virtgpu_sync *)_sync;
986
987 return virtgpu_ioctl_syncobj_handle_to_fd(gpu, sync->syncobj_handle,
988 sync_file);
989 }
990
991 static void
virtgpu_sync_destroy(struct vn_renderer * renderer,struct vn_renderer_sync * _sync)992 virtgpu_sync_destroy(struct vn_renderer *renderer,
993 struct vn_renderer_sync *_sync)
994 {
995 struct virtgpu *gpu = (struct virtgpu *)renderer;
996 struct virtgpu_sync *sync = (struct virtgpu_sync *)_sync;
997
998 virtgpu_ioctl_syncobj_destroy(gpu, sync->syncobj_handle);
999
1000 free(sync);
1001 }
1002
1003 static VkResult
virtgpu_sync_create_from_syncobj(struct vn_renderer * renderer,int fd,bool sync_file,struct vn_renderer_sync ** out_sync)1004 virtgpu_sync_create_from_syncobj(struct vn_renderer *renderer,
1005 int fd,
1006 bool sync_file,
1007 struct vn_renderer_sync **out_sync)
1008 {
1009 struct virtgpu *gpu = (struct virtgpu *)renderer;
1010
1011 uint32_t syncobj_handle;
1012 if (sync_file) {
1013 syncobj_handle = virtgpu_ioctl_syncobj_create(gpu, false);
1014 if (!syncobj_handle)
1015 return VK_ERROR_OUT_OF_HOST_MEMORY;
1016 if (!virtgpu_ioctl_syncobj_fd_to_handle(gpu, fd, syncobj_handle)) {
1017 virtgpu_ioctl_syncobj_destroy(gpu, syncobj_handle);
1018 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
1019 }
1020 } else {
1021 syncobj_handle = virtgpu_ioctl_syncobj_fd_to_handle(gpu, fd, 0);
1022 if (!syncobj_handle)
1023 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
1024 }
1025
1026 struct virtgpu_sync *sync = calloc(1, sizeof(*sync));
1027 if (!sync) {
1028 virtgpu_ioctl_syncobj_destroy(gpu, syncobj_handle);
1029 return VK_ERROR_OUT_OF_HOST_MEMORY;
1030 }
1031
1032 sync->syncobj_handle = syncobj_handle;
1033 sync->base.sync_id = 0; /* TODO */
1034
1035 *out_sync = &sync->base;
1036
1037 return VK_SUCCESS;
1038 }
1039
1040 static VkResult
virtgpu_sync_create(struct vn_renderer * renderer,uint64_t initial_val,uint32_t flags,struct vn_renderer_sync ** out_sync)1041 virtgpu_sync_create(struct vn_renderer *renderer,
1042 uint64_t initial_val,
1043 uint32_t flags,
1044 struct vn_renderer_sync **out_sync)
1045 {
1046 struct virtgpu *gpu = (struct virtgpu *)renderer;
1047
1048 /* TODO */
1049 if (flags & VN_RENDERER_SYNC_SHAREABLE)
1050 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
1051
1052 /* always false because we don't use binary drm_syncobjs */
1053 const bool signaled = false;
1054 const uint32_t syncobj_handle =
1055 virtgpu_ioctl_syncobj_create(gpu, signaled);
1056 if (!syncobj_handle)
1057 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
1058
1059 /* add a signaled fence chain with seqno initial_val */
1060 const int ret =
1061 virtgpu_ioctl_syncobj_timeline_signal(gpu, syncobj_handle, initial_val);
1062 if (ret) {
1063 virtgpu_ioctl_syncobj_destroy(gpu, syncobj_handle);
1064 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
1065 }
1066
1067 struct virtgpu_sync *sync = calloc(1, sizeof(*sync));
1068 if (!sync) {
1069 virtgpu_ioctl_syncobj_destroy(gpu, syncobj_handle);
1070 return VK_ERROR_OUT_OF_HOST_MEMORY;
1071 }
1072
1073 sync->syncobj_handle = syncobj_handle;
1074 /* we will have a sync_id when shareable is true and virtio-gpu associates
1075 * a host sync object with guest drm_syncobj
1076 */
1077 sync->base.sync_id = 0;
1078
1079 *out_sync = &sync->base;
1080
1081 return VK_SUCCESS;
1082 }
1083
1084 static void
virtgpu_bo_invalidate(struct vn_renderer * renderer,struct vn_renderer_bo * bo,VkDeviceSize offset,VkDeviceSize size)1085 virtgpu_bo_invalidate(struct vn_renderer *renderer,
1086 struct vn_renderer_bo *bo,
1087 VkDeviceSize offset,
1088 VkDeviceSize size)
1089 {
1090 /* nop because kernel makes every mapping coherent */
1091 }
1092
1093 static void
virtgpu_bo_flush(struct vn_renderer * renderer,struct vn_renderer_bo * bo,VkDeviceSize offset,VkDeviceSize size)1094 virtgpu_bo_flush(struct vn_renderer *renderer,
1095 struct vn_renderer_bo *bo,
1096 VkDeviceSize offset,
1097 VkDeviceSize size)
1098 {
1099 /* nop because kernel makes every mapping coherent */
1100 }
1101
1102 static void *
virtgpu_bo_map(struct vn_renderer * renderer,struct vn_renderer_bo * _bo)1103 virtgpu_bo_map(struct vn_renderer *renderer, struct vn_renderer_bo *_bo)
1104 {
1105 struct virtgpu *gpu = (struct virtgpu *)renderer;
1106 struct virtgpu_bo *bo = (struct virtgpu_bo *)_bo;
1107 const bool mappable = bo->blob_flags & VIRTGPU_BLOB_FLAG_USE_MAPPABLE;
1108
1109 /* not thread-safe but is fine */
1110 if (!bo->base.mmap_ptr && mappable) {
1111 bo->base.mmap_ptr =
1112 virtgpu_ioctl_map(gpu, bo->gem_handle, bo->base.mmap_size);
1113 }
1114
1115 return bo->base.mmap_ptr;
1116 }
1117
1118 static int
virtgpu_bo_export_dma_buf(struct vn_renderer * renderer,struct vn_renderer_bo * _bo)1119 virtgpu_bo_export_dma_buf(struct vn_renderer *renderer,
1120 struct vn_renderer_bo *_bo)
1121 {
1122 struct virtgpu *gpu = (struct virtgpu *)renderer;
1123 struct virtgpu_bo *bo = (struct virtgpu_bo *)_bo;
1124 const bool mappable = bo->blob_flags & VIRTGPU_BLOB_FLAG_USE_MAPPABLE;
1125 const bool shareable = bo->blob_flags & VIRTGPU_BLOB_FLAG_USE_SHAREABLE;
1126
1127 return shareable
1128 ? virtgpu_ioctl_prime_handle_to_fd(gpu, bo->gem_handle, mappable)
1129 : -1;
1130 }
1131
1132 static bool
virtgpu_bo_destroy(struct vn_renderer * renderer,struct vn_renderer_bo * _bo)1133 virtgpu_bo_destroy(struct vn_renderer *renderer, struct vn_renderer_bo *_bo)
1134 {
1135 struct virtgpu *gpu = (struct virtgpu *)renderer;
1136 struct virtgpu_bo *bo = (struct virtgpu_bo *)_bo;
1137
1138 mtx_lock(&gpu->dma_buf_import_mutex);
1139
1140 /* Check the refcount again after the import lock is grabbed. Yes, we use
1141 * the double-checked locking anti-pattern.
1142 */
1143 if (vn_refcount_is_valid(&bo->base.refcount)) {
1144 mtx_unlock(&gpu->dma_buf_import_mutex);
1145 return false;
1146 }
1147
1148 if (bo->base.mmap_ptr)
1149 munmap(bo->base.mmap_ptr, bo->base.mmap_size);
1150 virtgpu_ioctl_gem_close(gpu, bo->gem_handle);
1151
1152 /* set gem_handle to 0 to indicate that the bo is invalid */
1153 bo->gem_handle = 0;
1154
1155 mtx_unlock(&gpu->dma_buf_import_mutex);
1156
1157 return true;
1158 }
1159
1160 static uint32_t
virtgpu_bo_blob_flags(VkMemoryPropertyFlags flags,VkExternalMemoryHandleTypeFlags external_handles)1161 virtgpu_bo_blob_flags(VkMemoryPropertyFlags flags,
1162 VkExternalMemoryHandleTypeFlags external_handles)
1163 {
1164 uint32_t blob_flags = 0;
1165 if (flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
1166 blob_flags |= VIRTGPU_BLOB_FLAG_USE_MAPPABLE;
1167 if (external_handles)
1168 blob_flags |= VIRTGPU_BLOB_FLAG_USE_SHAREABLE;
1169 if (external_handles & VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT)
1170 blob_flags |= VIRTGPU_BLOB_FLAG_USE_CROSS_DEVICE;
1171
1172 return blob_flags;
1173 }
1174
1175 static VkResult
virtgpu_bo_create_from_dma_buf(struct vn_renderer * renderer,VkDeviceSize size,int fd,VkMemoryPropertyFlags flags,struct vn_renderer_bo ** out_bo)1176 virtgpu_bo_create_from_dma_buf(struct vn_renderer *renderer,
1177 VkDeviceSize size,
1178 int fd,
1179 VkMemoryPropertyFlags flags,
1180 struct vn_renderer_bo **out_bo)
1181 {
1182 struct virtgpu *gpu = (struct virtgpu *)renderer;
1183 struct drm_virtgpu_resource_info info;
1184 uint32_t gem_handle = 0;
1185 struct virtgpu_bo *bo = NULL;
1186
1187 mtx_lock(&gpu->dma_buf_import_mutex);
1188
1189 gem_handle = virtgpu_ioctl_prime_fd_to_handle(gpu, fd);
1190 if (!gem_handle)
1191 goto fail;
1192 bo = util_sparse_array_get(&gpu->bo_array, gem_handle);
1193
1194 if (virtgpu_ioctl_resource_info(gpu, gem_handle, &info))
1195 goto fail;
1196
1197 uint32_t blob_flags;
1198 size_t mmap_size;
1199 if (info.blob_mem) {
1200 /* must be VIRTGPU_BLOB_MEM_HOST3D or VIRTGPU_BLOB_MEM_GUEST_VRAM */
1201 if (info.blob_mem != gpu->bo_blob_mem)
1202 goto fail;
1203
1204 /* blob_flags is not passed to the kernel and is only for internal use
1205 * on imports. Set it to what works best for us.
1206 */
1207 blob_flags = virtgpu_bo_blob_flags(flags, 0);
1208 blob_flags |= VIRTGPU_BLOB_FLAG_USE_SHAREABLE;
1209
1210 /* mmap_size is only used when mappable */
1211 mmap_size = 0;
1212 if (blob_flags & VIRTGPU_BLOB_FLAG_USE_MAPPABLE) {
1213 if (info.size < size)
1214 goto fail;
1215
1216 mmap_size = size;
1217 }
1218 } else {
1219 /* must be classic resource here
1220 * set blob_flags to 0 to fail virtgpu_bo_map
1221 * set mmap_size to 0 since mapping is not allowed
1222 */
1223 blob_flags = 0;
1224 mmap_size = 0;
1225 }
1226
1227 /* we check bo->gem_handle instead of bo->refcount because bo->refcount
1228 * might only be memset to 0 and is not considered initialized in theory
1229 */
1230 if (bo->gem_handle == gem_handle) {
1231 if (bo->base.mmap_size < mmap_size)
1232 goto fail;
1233 if (blob_flags & ~bo->blob_flags)
1234 goto fail;
1235
1236 /* we can't use vn_renderer_bo_ref as the refcount may drop to 0
1237 * temporarily before virtgpu_bo_destroy grabs the lock
1238 */
1239 vn_refcount_fetch_add_relaxed(&bo->base.refcount, 1);
1240 } else {
1241 *bo = (struct virtgpu_bo){
1242 .base = {
1243 .refcount = VN_REFCOUNT_INIT(1),
1244 .res_id = info.res_handle,
1245 .mmap_size = mmap_size,
1246 },
1247 .gem_handle = gem_handle,
1248 .blob_flags = blob_flags,
1249 };
1250 }
1251
1252 mtx_unlock(&gpu->dma_buf_import_mutex);
1253
1254 *out_bo = &bo->base;
1255
1256 return VK_SUCCESS;
1257
1258 fail:
1259 if (gem_handle && bo->gem_handle != gem_handle)
1260 virtgpu_ioctl_gem_close(gpu, gem_handle);
1261 mtx_unlock(&gpu->dma_buf_import_mutex);
1262 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
1263 }
1264
1265 static VkResult
virtgpu_bo_create_from_device_memory(struct vn_renderer * renderer,VkDeviceSize size,vn_object_id mem_id,VkMemoryPropertyFlags flags,VkExternalMemoryHandleTypeFlags external_handles,struct vn_renderer_bo ** out_bo)1266 virtgpu_bo_create_from_device_memory(
1267 struct vn_renderer *renderer,
1268 VkDeviceSize size,
1269 vn_object_id mem_id,
1270 VkMemoryPropertyFlags flags,
1271 VkExternalMemoryHandleTypeFlags external_handles,
1272 struct vn_renderer_bo **out_bo)
1273 {
1274 struct virtgpu *gpu = (struct virtgpu *)renderer;
1275 const uint32_t blob_flags = virtgpu_bo_blob_flags(flags, external_handles);
1276
1277 uint32_t res_id;
1278 uint32_t gem_handle = virtgpu_ioctl_resource_create_blob(
1279 gpu, gpu->bo_blob_mem, blob_flags, size, mem_id, &res_id);
1280 if (!gem_handle)
1281 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
1282
1283 struct virtgpu_bo *bo = util_sparse_array_get(&gpu->bo_array, gem_handle);
1284 *bo = (struct virtgpu_bo){
1285 .base = {
1286 .refcount = VN_REFCOUNT_INIT(1),
1287 .res_id = res_id,
1288 .mmap_size = size,
1289 },
1290 .gem_handle = gem_handle,
1291 .blob_flags = blob_flags,
1292 };
1293
1294 *out_bo = &bo->base;
1295
1296 return VK_SUCCESS;
1297 }
1298
1299 static void
virtgpu_shmem_destroy_now(struct vn_renderer * renderer,struct vn_renderer_shmem * _shmem)1300 virtgpu_shmem_destroy_now(struct vn_renderer *renderer,
1301 struct vn_renderer_shmem *_shmem)
1302 {
1303 struct virtgpu *gpu = (struct virtgpu *)renderer;
1304 struct virtgpu_shmem *shmem = (struct virtgpu_shmem *)_shmem;
1305
1306 munmap(shmem->base.mmap_ptr, shmem->base.mmap_size);
1307 virtgpu_ioctl_gem_close(gpu, shmem->gem_handle);
1308 }
1309
1310 static void
virtgpu_shmem_destroy(struct vn_renderer * renderer,struct vn_renderer_shmem * shmem)1311 virtgpu_shmem_destroy(struct vn_renderer *renderer,
1312 struct vn_renderer_shmem *shmem)
1313 {
1314 struct virtgpu *gpu = (struct virtgpu *)renderer;
1315
1316 if (vn_renderer_shmem_cache_add(&gpu->shmem_cache, shmem))
1317 return;
1318
1319 virtgpu_shmem_destroy_now(&gpu->base, shmem);
1320 }
1321
1322 static struct vn_renderer_shmem *
virtgpu_shmem_create(struct vn_renderer * renderer,size_t size)1323 virtgpu_shmem_create(struct vn_renderer *renderer, size_t size)
1324 {
1325 struct virtgpu *gpu = (struct virtgpu *)renderer;
1326
1327 struct vn_renderer_shmem *cached_shmem =
1328 vn_renderer_shmem_cache_get(&gpu->shmem_cache, size);
1329 if (cached_shmem) {
1330 cached_shmem->refcount = VN_REFCOUNT_INIT(1);
1331 return cached_shmem;
1332 }
1333
1334 uint32_t res_id;
1335 uint32_t gem_handle = virtgpu_ioctl_resource_create_blob(
1336 gpu, gpu->shmem_blob_mem, VIRTGPU_BLOB_FLAG_USE_MAPPABLE, size, 0,
1337 &res_id);
1338 if (!gem_handle)
1339 return NULL;
1340
1341 void *ptr = virtgpu_ioctl_map(gpu, gem_handle, size);
1342 if (!ptr) {
1343 virtgpu_ioctl_gem_close(gpu, gem_handle);
1344 return NULL;
1345 }
1346
1347 struct virtgpu_shmem *shmem =
1348 util_sparse_array_get(&gpu->shmem_array, gem_handle);
1349 *shmem = (struct virtgpu_shmem){
1350 .base = {
1351 .refcount = VN_REFCOUNT_INIT(1),
1352 .res_id = res_id,
1353 .mmap_size = size,
1354 .mmap_ptr = ptr,
1355 },
1356 .gem_handle = gem_handle,
1357 };
1358
1359 return &shmem->base;
1360 }
1361
1362 static VkResult
virtgpu_wait(struct vn_renderer * renderer,const struct vn_renderer_wait * wait)1363 virtgpu_wait(struct vn_renderer *renderer,
1364 const struct vn_renderer_wait *wait)
1365 {
1366 struct virtgpu *gpu = (struct virtgpu *)renderer;
1367
1368 const int ret = virtgpu_ioctl_syncobj_timeline_wait(gpu, wait, false);
1369 if (ret && errno != ETIME)
1370 return VK_ERROR_DEVICE_LOST;
1371
1372 return ret ? VK_TIMEOUT : VK_SUCCESS;
1373 }
1374
1375 static VkResult
virtgpu_submit(struct vn_renderer * renderer,const struct vn_renderer_submit * submit)1376 virtgpu_submit(struct vn_renderer *renderer,
1377 const struct vn_renderer_submit *submit)
1378 {
1379 struct virtgpu *gpu = (struct virtgpu *)renderer;
1380
1381 const int ret = virtgpu_ioctl_submit(gpu, submit);
1382 return ret ? VK_ERROR_DEVICE_LOST : VK_SUCCESS;
1383 }
1384
1385 static void
virtgpu_init_renderer_info(struct virtgpu * gpu)1386 virtgpu_init_renderer_info(struct virtgpu *gpu)
1387 {
1388 struct vn_renderer_info *info = &gpu->base.info;
1389
1390 info->drm.has_primary = gpu->has_primary;
1391 info->drm.primary_major = gpu->primary_major;
1392 info->drm.primary_minor = gpu->primary_minor;
1393 info->drm.has_render = true;
1394 info->drm.render_major = gpu->render_major;
1395 info->drm.render_minor = gpu->render_minor;
1396
1397 info->pci.vendor_id = VIRTGPU_PCI_VENDOR_ID;
1398 info->pci.device_id = VIRTGPU_PCI_DEVICE_ID;
1399
1400 if (gpu->bustype == DRM_BUS_PCI) {
1401 info->pci.has_bus_info = true;
1402 info->pci.domain = gpu->pci_bus_info.domain;
1403 info->pci.bus = gpu->pci_bus_info.bus;
1404 info->pci.device = gpu->pci_bus_info.dev;
1405 info->pci.function = gpu->pci_bus_info.func;
1406 } else {
1407 info->pci.has_bus_info = false;
1408 }
1409
1410 info->has_dma_buf_import = true;
1411 /* Kernel makes every mapping coherent. We are better off filtering
1412 * incoherent memory types out than silently making them coherent.
1413 */
1414 info->has_cache_management = false;
1415 /* TODO drm_syncobj */
1416 info->has_external_sync = false;
1417
1418 info->has_implicit_fencing = false;
1419
1420 info->max_sync_queue_count = gpu->max_sync_queue_count;
1421
1422 const struct virgl_renderer_capset_venus *capset = &gpu->capset.data;
1423 info->wire_format_version = capset->wire_format_version;
1424 info->vk_xml_version = capset->vk_xml_version;
1425 info->vk_ext_command_serialization_spec_version =
1426 capset->vk_ext_command_serialization_spec_version;
1427 info->vk_mesa_venus_protocol_spec_version =
1428 capset->vk_mesa_venus_protocol_spec_version;
1429 info->supports_blob_id_0 = capset->supports_blob_id_0;
1430
1431 /* ensure vk_extension_mask is large enough to hold all capset masks */
1432 STATIC_ASSERT(sizeof(info->vk_extension_mask) >=
1433 sizeof(capset->vk_extension_mask1));
1434 memcpy(info->vk_extension_mask, capset->vk_extension_mask1,
1435 sizeof(capset->vk_extension_mask1));
1436
1437 info->allow_vk_wait_syncs = capset->allow_vk_wait_syncs;
1438
1439 if (gpu->bo_blob_mem == VIRTGPU_BLOB_MEM_GUEST_VRAM)
1440 info->has_guest_vram = true;
1441 }
1442
1443 static void
virtgpu_destroy(struct vn_renderer * renderer,const VkAllocationCallbacks * alloc)1444 virtgpu_destroy(struct vn_renderer *renderer,
1445 const VkAllocationCallbacks *alloc)
1446 {
1447 struct virtgpu *gpu = (struct virtgpu *)renderer;
1448
1449 vn_renderer_shmem_cache_fini(&gpu->shmem_cache);
1450
1451 if (gpu->fd >= 0)
1452 close(gpu->fd);
1453
1454 mtx_destroy(&gpu->dma_buf_import_mutex);
1455
1456 util_sparse_array_finish(&gpu->shmem_array);
1457 util_sparse_array_finish(&gpu->bo_array);
1458
1459 vk_free(alloc, gpu);
1460 }
1461
1462 static void
virtgpu_init_shmem_blob_mem(struct virtgpu * gpu)1463 virtgpu_init_shmem_blob_mem(struct virtgpu *gpu)
1464 {
1465 /* VIRTGPU_BLOB_MEM_GUEST allocates from the guest system memory. They are
1466 * logically contiguous in the guest but are sglists (iovecs) in the host.
1467 * That makes them slower to process in the host. With host process
1468 * isolation, it also becomes impossible for the host to access sglists
1469 * directly.
1470 *
1471 * While there are ideas (and shipped code in some cases) such as creating
1472 * udmabufs from sglists, or having a dedicated guest heap, it seems the
1473 * easiest way is to reuse VIRTGPU_BLOB_MEM_HOST3D. That is, when the
1474 * renderer sees a request to export a blob where
1475 *
1476 * - blob_mem is VIRTGPU_BLOB_MEM_HOST3D
1477 * - blob_flags is VIRTGPU_BLOB_FLAG_USE_MAPPABLE
1478 * - blob_id is 0
1479 *
1480 * it allocates a host shmem.
1481 *
1482 * TODO cache shmems as they are costly to set up and usually require syncs
1483 */
1484 gpu->shmem_blob_mem = gpu->capset.data.supports_blob_id_0
1485 ? VIRTGPU_BLOB_MEM_HOST3D
1486 : VIRTGPU_BLOB_MEM_GUEST;
1487 }
1488
1489 static VkResult
virtgpu_init_context(struct virtgpu * gpu)1490 virtgpu_init_context(struct virtgpu *gpu)
1491 {
1492 assert(!gpu->capset.version);
1493 const int ret = virtgpu_ioctl_context_init(gpu, gpu->capset.id);
1494 if (ret) {
1495 if (VN_DEBUG(INIT)) {
1496 vn_log(gpu->instance, "failed to initialize context: %s",
1497 strerror(errno));
1498 }
1499 return VK_ERROR_INITIALIZATION_FAILED;
1500 }
1501
1502 return VK_SUCCESS;
1503 }
1504
1505 static VkResult
virtgpu_init_capset(struct virtgpu * gpu)1506 virtgpu_init_capset(struct virtgpu *gpu)
1507 {
1508 gpu->capset.id = VIRGL_RENDERER_CAPSET_VENUS;
1509 gpu->capset.version = 0;
1510
1511 const int ret =
1512 virtgpu_ioctl_get_caps(gpu, gpu->capset.id, gpu->capset.version,
1513 &gpu->capset.data, sizeof(gpu->capset.data));
1514 if (ret) {
1515 if (VN_DEBUG(INIT)) {
1516 vn_log(gpu->instance, "failed to get venus v%d capset: %s",
1517 gpu->capset.version, strerror(errno));
1518 }
1519 return VK_ERROR_INITIALIZATION_FAILED;
1520 }
1521
1522 return VK_SUCCESS;
1523 }
1524
1525 static VkResult
virtgpu_init_params(struct virtgpu * gpu)1526 virtgpu_init_params(struct virtgpu *gpu)
1527 {
1528 const uint64_t required_params[] = {
1529 VIRTGPU_PARAM_3D_FEATURES, VIRTGPU_PARAM_CAPSET_QUERY_FIX,
1530 VIRTGPU_PARAM_RESOURCE_BLOB, VIRTGPU_PARAM_CROSS_DEVICE,
1531 VIRTGPU_PARAM_CONTEXT_INIT,
1532 };
1533 uint64_t val;
1534 for (uint32_t i = 0; i < ARRAY_SIZE(required_params); i++) {
1535 val = virtgpu_ioctl_getparam(gpu, required_params[i]);
1536 if (!val) {
1537 if (VN_DEBUG(INIT)) {
1538 vn_log(gpu->instance, "required kernel param %d is missing",
1539 (int)required_params[i]);
1540 }
1541 return VK_ERROR_INITIALIZATION_FAILED;
1542 }
1543 }
1544
1545 val = virtgpu_ioctl_getparam(gpu, VIRTGPU_PARAM_HOST_VISIBLE);
1546 if (val) {
1547 gpu->bo_blob_mem = VIRTGPU_BLOB_MEM_HOST3D;
1548 } else {
1549 val = virtgpu_ioctl_getparam(gpu, VIRTGPU_PARAM_GUEST_VRAM);
1550 if (val) {
1551 gpu->bo_blob_mem = VIRTGPU_BLOB_MEM_GUEST_VRAM;
1552 }
1553 }
1554
1555 if (!val) {
1556 vn_log(gpu->instance,
1557 "one of required kernel params (%d or %d) is missing",
1558 (int)VIRTGPU_PARAM_HOST_VISIBLE, (int)VIRTGPU_PARAM_GUEST_VRAM);
1559 return VK_ERROR_INITIALIZATION_FAILED;
1560 }
1561
1562 val = virtgpu_ioctl_getparam(gpu, VIRTGPU_PARAM_MAX_SYNC_QUEUE_COUNT);
1563 if (!val) {
1564 if (VN_DEBUG(INIT))
1565 vn_log(gpu->instance, "no sync queue support");
1566 return VK_ERROR_INITIALIZATION_FAILED;
1567 }
1568 gpu->max_sync_queue_count = val;
1569
1570 return VK_SUCCESS;
1571 }
1572
1573 static VkResult
virtgpu_open_device(struct virtgpu * gpu,const drmDevicePtr dev)1574 virtgpu_open_device(struct virtgpu *gpu, const drmDevicePtr dev)
1575 {
1576 bool supported_bus = false;
1577
1578 switch (dev->bustype) {
1579 case DRM_BUS_PCI:
1580 if (dev->deviceinfo.pci->vendor_id == VIRTGPU_PCI_VENDOR_ID &&
1581 dev->deviceinfo.pci->device_id == VIRTGPU_PCI_DEVICE_ID)
1582 supported_bus = true;
1583 break;
1584 case DRM_BUS_PLATFORM:
1585 supported_bus = true;
1586 break;
1587 default:
1588 break;
1589 }
1590
1591 if (!supported_bus || !(dev->available_nodes & (1 << DRM_NODE_RENDER))) {
1592 if (VN_DEBUG(INIT)) {
1593 const char *name = "unknown";
1594 for (uint32_t i = 0; i < DRM_NODE_MAX; i++) {
1595 if (dev->available_nodes & (1 << i)) {
1596 name = dev->nodes[i];
1597 break;
1598 }
1599 }
1600 vn_log(gpu->instance, "skipping DRM device %s", name);
1601 }
1602 return VK_ERROR_INITIALIZATION_FAILED;
1603 }
1604
1605 const char *primary_path = dev->nodes[DRM_NODE_PRIMARY];
1606 const char *node_path = dev->nodes[DRM_NODE_RENDER];
1607
1608 int fd = open(node_path, O_RDWR | O_CLOEXEC);
1609 if (fd < 0) {
1610 if (VN_DEBUG(INIT))
1611 vn_log(gpu->instance, "failed to open %s", node_path);
1612 return VK_ERROR_INITIALIZATION_FAILED;
1613 }
1614
1615 drmVersionPtr version = drmGetVersion(fd);
1616 if (!version || strcmp(version->name, "virtio_gpu") ||
1617 version->version_major != 0) {
1618 if (VN_DEBUG(INIT)) {
1619 if (version) {
1620 vn_log(gpu->instance, "unknown DRM driver %s version %d",
1621 version->name, version->version_major);
1622 } else {
1623 vn_log(gpu->instance, "failed to get DRM driver version");
1624 }
1625 }
1626 if (version)
1627 drmFreeVersion(version);
1628 close(fd);
1629 return VK_ERROR_INITIALIZATION_FAILED;
1630 }
1631
1632 gpu->fd = fd;
1633
1634 struct stat st;
1635 if (stat(primary_path, &st) == 0) {
1636 gpu->has_primary = true;
1637 gpu->primary_major = major(st.st_rdev);
1638 gpu->primary_minor = minor(st.st_rdev);
1639 } else {
1640 gpu->has_primary = false;
1641 gpu->primary_major = 0;
1642 gpu->primary_minor = 0;
1643 }
1644 stat(node_path, &st);
1645 gpu->render_major = major(st.st_rdev);
1646 gpu->render_minor = minor(st.st_rdev);
1647
1648 gpu->bustype = dev->bustype;
1649 if (dev->bustype == DRM_BUS_PCI)
1650 gpu->pci_bus_info = *dev->businfo.pci;
1651
1652 drmFreeVersion(version);
1653
1654 if (VN_DEBUG(INIT))
1655 vn_log(gpu->instance, "using DRM device %s", node_path);
1656
1657 return VK_SUCCESS;
1658 }
1659
1660 static VkResult
virtgpu_open(struct virtgpu * gpu)1661 virtgpu_open(struct virtgpu *gpu)
1662 {
1663 drmDevicePtr devs[8];
1664 int count = drmGetDevices2(0, devs, ARRAY_SIZE(devs));
1665 if (count < 0) {
1666 if (VN_DEBUG(INIT))
1667 vn_log(gpu->instance, "failed to enumerate DRM devices");
1668 return VK_ERROR_INITIALIZATION_FAILED;
1669 }
1670
1671 VkResult result = VK_ERROR_INITIALIZATION_FAILED;
1672 for (int i = 0; i < count; i++) {
1673 result = virtgpu_open_device(gpu, devs[i]);
1674 if (result == VK_SUCCESS)
1675 break;
1676 }
1677
1678 drmFreeDevices(devs, count);
1679
1680 return result;
1681 }
1682
1683 static VkResult
virtgpu_init(struct virtgpu * gpu)1684 virtgpu_init(struct virtgpu *gpu)
1685 {
1686 util_sparse_array_init(&gpu->shmem_array, sizeof(struct virtgpu_shmem),
1687 1024);
1688 util_sparse_array_init(&gpu->bo_array, sizeof(struct virtgpu_bo), 1024);
1689
1690 mtx_init(&gpu->dma_buf_import_mutex, mtx_plain);
1691
1692 VkResult result = virtgpu_open(gpu);
1693 if (result == VK_SUCCESS)
1694 result = virtgpu_init_params(gpu);
1695 if (result == VK_SUCCESS)
1696 result = virtgpu_init_capset(gpu);
1697 if (result == VK_SUCCESS)
1698 result = virtgpu_init_context(gpu);
1699 if (result != VK_SUCCESS)
1700 return result;
1701
1702 virtgpu_init_shmem_blob_mem(gpu);
1703
1704 vn_renderer_shmem_cache_init(&gpu->shmem_cache, &gpu->base,
1705 virtgpu_shmem_destroy_now);
1706
1707 virtgpu_init_renderer_info(gpu);
1708
1709 gpu->base.ops.destroy = virtgpu_destroy;
1710 gpu->base.ops.submit = virtgpu_submit;
1711 gpu->base.ops.wait = virtgpu_wait;
1712
1713 gpu->base.shmem_ops.create = virtgpu_shmem_create;
1714 gpu->base.shmem_ops.destroy = virtgpu_shmem_destroy;
1715
1716 gpu->base.bo_ops.create_from_device_memory =
1717 virtgpu_bo_create_from_device_memory;
1718 gpu->base.bo_ops.create_from_dma_buf = virtgpu_bo_create_from_dma_buf;
1719 gpu->base.bo_ops.destroy = virtgpu_bo_destroy;
1720 gpu->base.bo_ops.export_dma_buf = virtgpu_bo_export_dma_buf;
1721 gpu->base.bo_ops.map = virtgpu_bo_map;
1722 gpu->base.bo_ops.flush = virtgpu_bo_flush;
1723 gpu->base.bo_ops.invalidate = virtgpu_bo_invalidate;
1724
1725 gpu->base.sync_ops.create = virtgpu_sync_create;
1726 gpu->base.sync_ops.create_from_syncobj = virtgpu_sync_create_from_syncobj;
1727 gpu->base.sync_ops.destroy = virtgpu_sync_destroy;
1728 gpu->base.sync_ops.export_syncobj = virtgpu_sync_export_syncobj;
1729 gpu->base.sync_ops.reset = virtgpu_sync_reset;
1730 gpu->base.sync_ops.read = virtgpu_sync_read;
1731 gpu->base.sync_ops.write = virtgpu_sync_write;
1732
1733 return VK_SUCCESS;
1734 }
1735
1736 VkResult
vn_renderer_create_virtgpu(struct vn_instance * instance,const VkAllocationCallbacks * alloc,struct vn_renderer ** renderer)1737 vn_renderer_create_virtgpu(struct vn_instance *instance,
1738 const VkAllocationCallbacks *alloc,
1739 struct vn_renderer **renderer)
1740 {
1741 struct virtgpu *gpu = vk_zalloc(alloc, sizeof(*gpu), VN_DEFAULT_ALIGN,
1742 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1743 if (!gpu)
1744 return VK_ERROR_OUT_OF_HOST_MEMORY;
1745
1746 gpu->instance = instance;
1747 gpu->fd = -1;
1748
1749 VkResult result = virtgpu_init(gpu);
1750 if (result != VK_SUCCESS) {
1751 virtgpu_destroy(&gpu->base, alloc);
1752 return result;
1753 }
1754
1755 *renderer = &gpu->base;
1756
1757 return VK_SUCCESS;
1758 }
1759