• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2020 Google LLC
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include <errno.h>
7 #include <fcntl.h>
8 #include <poll.h>
9 #include <sys/mman.h>
10 #include <sys/stat.h>
11 #include <sys/types.h>
12 #include <unistd.h>
13 #include <xf86drm.h>
14 
15 #ifdef MAJOR_IN_MKDEV
16 #include <sys/mkdev.h>
17 #endif
18 #ifdef MAJOR_IN_SYSMACROS
19 #include <sys/sysmacros.h>
20 #endif
21 
22 #include "drm-uapi/virtgpu_drm.h"
23 #include "util/sparse_array.h"
24 #define VIRGL_RENDERER_UNSTABLE_APIS
25 #include "virtio-gpu/virglrenderer_hw.h"
26 
27 #include "vn_renderer_internal.h"
28 
29 #ifndef VIRTGPU_PARAM_GUEST_VRAM
30 /* All guest allocations happen via virtgpu dedicated heap. */
31 #define VIRTGPU_PARAM_GUEST_VRAM 9
32 #endif
33 
34 #ifndef VIRTGPU_BLOB_MEM_GUEST_VRAM
35 #define VIRTGPU_BLOB_MEM_GUEST_VRAM 0x0004
36 #endif
37 
38 /* XXX comment these out to really use kernel uapi */
39 #define SIMULATE_BO_SIZE_FIX 1
40 #define SIMULATE_SYNCOBJ     1
41 #define SIMULATE_SUBMIT      1
42 
43 #define VIRTGPU_PCI_VENDOR_ID 0x1af4
44 #define VIRTGPU_PCI_DEVICE_ID 0x1050
45 
46 struct virtgpu;
47 
48 struct virtgpu_shmem {
49    struct vn_renderer_shmem base;
50    uint32_t gem_handle;
51 };
52 
53 struct virtgpu_bo {
54    struct vn_renderer_bo base;
55    uint32_t gem_handle;
56    uint32_t blob_flags;
57 };
58 
59 struct virtgpu_sync {
60    struct vn_renderer_sync base;
61 
62    /*
63     * drm_syncobj is in one of these states
64     *
65     *  - value N:      drm_syncobj has a signaled fence chain with seqno N
66     *  - pending N->M: drm_syncobj has an unsignaled fence chain with seqno M
67     *                  (which may point to another unsignaled fence chain with
68     *                   seqno between N and M, and so on)
69     *
70     * TODO Do we want to use binary drm_syncobjs?  They would be
71     *
72     *  - value 0: drm_syncobj has no fence
73     *  - value 1: drm_syncobj has a signaled fence with seqno 0
74     *
75     * They are cheaper but require special care.
76     */
77    uint32_t syncobj_handle;
78 };
79 
80 struct virtgpu {
81    struct vn_renderer base;
82 
83    struct vn_instance *instance;
84 
85    int fd;
86 
87    bool has_primary;
88    int primary_major;
89    int primary_minor;
90    int render_major;
91    int render_minor;
92 
93    int bustype;
94    drmPciBusInfo pci_bus_info;
95 
96    uint32_t max_timeline_count;
97 
98    struct {
99       enum virgl_renderer_capset id;
100       uint32_t version;
101       struct virgl_renderer_capset_venus data;
102    } capset;
103 
104    uint32_t shmem_blob_mem;
105    uint32_t bo_blob_mem;
106 
107    /* note that we use gem_handle instead of res_id to index because
108     * res_id is monotonically increasing by default (see
109     * virtio_gpu_resource_id_get)
110     */
111    struct util_sparse_array shmem_array;
112    struct util_sparse_array bo_array;
113 
114    mtx_t dma_buf_import_mutex;
115 
116    struct vn_renderer_shmem_cache shmem_cache;
117 };
118 
119 #ifdef SIMULATE_SYNCOBJ
120 
121 #include "util/hash_table.h"
122 #include "util/u_idalloc.h"
123 
124 static struct {
125    mtx_t mutex;
126    struct hash_table *syncobjs;
127    struct util_idalloc ida;
128 
129    int signaled_fd;
130 } sim;
131 
132 struct sim_syncobj {
133    mtx_t mutex;
134    uint64_t point;
135 
136    int pending_fd;
137    uint64_t pending_point;
138    bool pending_cpu;
139 };
140 
141 static uint32_t
sim_syncobj_create(struct virtgpu * gpu,bool signaled)142 sim_syncobj_create(struct virtgpu *gpu, bool signaled)
143 {
144    struct sim_syncobj *syncobj = calloc(1, sizeof(*syncobj));
145    if (!syncobj)
146       return 0;
147 
148    mtx_init(&syncobj->mutex, mtx_plain);
149    syncobj->pending_fd = -1;
150 
151    mtx_lock(&sim.mutex);
152 
153    /* initialize lazily */
154    if (!sim.syncobjs) {
155       sim.syncobjs = _mesa_pointer_hash_table_create(NULL);
156       if (!sim.syncobjs) {
157          mtx_unlock(&sim.mutex);
158          return 0;
159       }
160 
161       util_idalloc_init(&sim.ida, 32);
162 
163       struct drm_virtgpu_execbuffer args = {
164          .flags = VIRTGPU_EXECBUF_FENCE_FD_OUT |
165                   (gpu->base.info.supports_multiple_timelines
166                       ? VIRTGPU_EXECBUF_RING_IDX
167                       : 0),
168          .ring_idx = 0, /* CPU ring */
169       };
170       int ret = drmIoctl(gpu->fd, DRM_IOCTL_VIRTGPU_EXECBUFFER, &args);
171       if (ret || args.fence_fd < 0) {
172          _mesa_hash_table_destroy(sim.syncobjs, NULL);
173          sim.syncobjs = NULL;
174          mtx_unlock(&sim.mutex);
175          return 0;
176       }
177 
178       sim.signaled_fd = args.fence_fd;
179    }
180 
181    const unsigned syncobj_handle = util_idalloc_alloc(&sim.ida) + 1;
182    _mesa_hash_table_insert(sim.syncobjs,
183                            (const void *)(uintptr_t)syncobj_handle, syncobj);
184 
185    mtx_unlock(&sim.mutex);
186 
187    return syncobj_handle;
188 }
189 
190 static void
sim_syncobj_destroy(struct virtgpu * gpu,uint32_t syncobj_handle)191 sim_syncobj_destroy(struct virtgpu *gpu, uint32_t syncobj_handle)
192 {
193    struct sim_syncobj *syncobj = NULL;
194 
195    mtx_lock(&sim.mutex);
196 
197    struct hash_entry *entry = _mesa_hash_table_search(
198       sim.syncobjs, (const void *)(uintptr_t)syncobj_handle);
199    if (entry) {
200       syncobj = entry->data;
201       _mesa_hash_table_remove(sim.syncobjs, entry);
202       util_idalloc_free(&sim.ida, syncobj_handle - 1);
203    }
204 
205    mtx_unlock(&sim.mutex);
206 
207    if (syncobj) {
208       if (syncobj->pending_fd >= 0)
209          close(syncobj->pending_fd);
210       mtx_destroy(&syncobj->mutex);
211       free(syncobj);
212    }
213 }
214 
215 static VkResult
sim_syncobj_poll(int fd,int poll_timeout)216 sim_syncobj_poll(int fd, int poll_timeout)
217 {
218    struct pollfd pollfd = {
219       .fd = fd,
220       .events = POLLIN,
221    };
222    int ret;
223    do {
224       ret = poll(&pollfd, 1, poll_timeout);
225    } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
226 
227    if (ret < 0 || (ret > 0 && !(pollfd.revents & POLLIN))) {
228       return (ret < 0 && errno == ENOMEM) ? VK_ERROR_OUT_OF_HOST_MEMORY
229                                           : VK_ERROR_DEVICE_LOST;
230    }
231 
232    return ret ? VK_SUCCESS : VK_TIMEOUT;
233 }
234 
235 static void
sim_syncobj_set_point_locked(struct sim_syncobj * syncobj,uint64_t point)236 sim_syncobj_set_point_locked(struct sim_syncobj *syncobj, uint64_t point)
237 {
238    syncobj->point = point;
239 
240    if (syncobj->pending_fd >= 0) {
241       close(syncobj->pending_fd);
242       syncobj->pending_fd = -1;
243       syncobj->pending_point = point;
244    }
245 }
246 
247 static void
sim_syncobj_update_point_locked(struct sim_syncobj * syncobj,int poll_timeout)248 sim_syncobj_update_point_locked(struct sim_syncobj *syncobj, int poll_timeout)
249 {
250    if (syncobj->pending_fd >= 0) {
251       VkResult result;
252       if (syncobj->pending_cpu) {
253          if (poll_timeout == -1) {
254             const int max_cpu_timeout = 2000;
255             poll_timeout = max_cpu_timeout;
256             result = sim_syncobj_poll(syncobj->pending_fd, poll_timeout);
257             if (result == VK_TIMEOUT) {
258                vn_log(NULL, "cpu sync timed out after %dms; ignoring",
259                       poll_timeout);
260                result = VK_SUCCESS;
261             }
262          } else {
263             result = sim_syncobj_poll(syncobj->pending_fd, poll_timeout);
264          }
265       } else {
266          result = sim_syncobj_poll(syncobj->pending_fd, poll_timeout);
267       }
268       if (result == VK_SUCCESS) {
269          close(syncobj->pending_fd);
270          syncobj->pending_fd = -1;
271          syncobj->point = syncobj->pending_point;
272       }
273    }
274 }
275 
276 static struct sim_syncobj *
sim_syncobj_lookup(struct virtgpu * gpu,uint32_t syncobj_handle)277 sim_syncobj_lookup(struct virtgpu *gpu, uint32_t syncobj_handle)
278 {
279    struct sim_syncobj *syncobj = NULL;
280 
281    mtx_lock(&sim.mutex);
282    struct hash_entry *entry = _mesa_hash_table_search(
283       sim.syncobjs, (const void *)(uintptr_t)syncobj_handle);
284    if (entry)
285       syncobj = entry->data;
286    mtx_unlock(&sim.mutex);
287 
288    return syncobj;
289 }
290 
291 static int
sim_syncobj_reset(struct virtgpu * gpu,uint32_t syncobj_handle)292 sim_syncobj_reset(struct virtgpu *gpu, uint32_t syncobj_handle)
293 {
294    struct sim_syncobj *syncobj = sim_syncobj_lookup(gpu, syncobj_handle);
295    if (!syncobj)
296       return -1;
297 
298    mtx_lock(&syncobj->mutex);
299    sim_syncobj_set_point_locked(syncobj, 0);
300    mtx_unlock(&syncobj->mutex);
301 
302    return 0;
303 }
304 
305 static int
sim_syncobj_query(struct virtgpu * gpu,uint32_t syncobj_handle,uint64_t * point)306 sim_syncobj_query(struct virtgpu *gpu,
307                   uint32_t syncobj_handle,
308                   uint64_t *point)
309 {
310    struct sim_syncobj *syncobj = sim_syncobj_lookup(gpu, syncobj_handle);
311    if (!syncobj)
312       return -1;
313 
314    mtx_lock(&syncobj->mutex);
315    sim_syncobj_update_point_locked(syncobj, 0);
316    *point = syncobj->point;
317    mtx_unlock(&syncobj->mutex);
318 
319    return 0;
320 }
321 
322 static int
sim_syncobj_signal(struct virtgpu * gpu,uint32_t syncobj_handle,uint64_t point)323 sim_syncobj_signal(struct virtgpu *gpu,
324                    uint32_t syncobj_handle,
325                    uint64_t point)
326 {
327    struct sim_syncobj *syncobj = sim_syncobj_lookup(gpu, syncobj_handle);
328    if (!syncobj)
329       return -1;
330 
331    mtx_lock(&syncobj->mutex);
332    sim_syncobj_set_point_locked(syncobj, point);
333    mtx_unlock(&syncobj->mutex);
334 
335    return 0;
336 }
337 
338 static int
sim_syncobj_submit(struct virtgpu * gpu,uint32_t syncobj_handle,int sync_fd,uint64_t point,bool cpu)339 sim_syncobj_submit(struct virtgpu *gpu,
340                    uint32_t syncobj_handle,
341                    int sync_fd,
342                    uint64_t point,
343                    bool cpu)
344 {
345    struct sim_syncobj *syncobj = sim_syncobj_lookup(gpu, syncobj_handle);
346    if (!syncobj)
347       return -1;
348 
349    int pending_fd = dup(sync_fd);
350    if (pending_fd < 0) {
351       vn_log(gpu->instance, "failed to dup sync fd");
352       return -1;
353    }
354 
355    mtx_lock(&syncobj->mutex);
356 
357    if (syncobj->pending_fd >= 0) {
358       mtx_unlock(&syncobj->mutex);
359 
360       /* TODO */
361       vn_log(gpu->instance, "sorry, no simulated timeline semaphore");
362       close(pending_fd);
363       return -1;
364    }
365    if (syncobj->point >= point)
366       vn_log(gpu->instance, "non-monotonic signaling");
367 
368    syncobj->pending_fd = pending_fd;
369    syncobj->pending_point = point;
370    syncobj->pending_cpu = cpu;
371 
372    mtx_unlock(&syncobj->mutex);
373 
374    return 0;
375 }
376 
377 static int
timeout_to_poll_timeout(uint64_t timeout)378 timeout_to_poll_timeout(uint64_t timeout)
379 {
380    const uint64_t ns_per_ms = 1000000;
381    const uint64_t ms = (timeout + ns_per_ms - 1) / ns_per_ms;
382    if (!ms && timeout)
383       return -1;
384    return ms <= INT_MAX ? ms : -1;
385 }
386 
387 static int
sim_syncobj_wait(struct virtgpu * gpu,const struct vn_renderer_wait * wait,bool wait_avail)388 sim_syncobj_wait(struct virtgpu *gpu,
389                  const struct vn_renderer_wait *wait,
390                  bool wait_avail)
391 {
392    if (wait_avail)
393       return -1;
394 
395    const int poll_timeout = timeout_to_poll_timeout(wait->timeout);
396 
397    /* TODO poll all fds at the same time */
398    for (uint32_t i = 0; i < wait->sync_count; i++) {
399       struct virtgpu_sync *sync = (struct virtgpu_sync *)wait->syncs[i];
400       const uint64_t point = wait->sync_values[i];
401 
402       struct sim_syncobj *syncobj =
403          sim_syncobj_lookup(gpu, sync->syncobj_handle);
404       if (!syncobj)
405          return -1;
406 
407       mtx_lock(&syncobj->mutex);
408 
409       if (syncobj->point < point)
410          sim_syncobj_update_point_locked(syncobj, poll_timeout);
411 
412       if (syncobj->point < point) {
413          if (wait->wait_any && i < wait->sync_count - 1 &&
414              syncobj->pending_fd < 0) {
415             mtx_unlock(&syncobj->mutex);
416             continue;
417          }
418          errno = ETIME;
419          mtx_unlock(&syncobj->mutex);
420          return -1;
421       }
422 
423       mtx_unlock(&syncobj->mutex);
424 
425       if (wait->wait_any)
426          break;
427 
428       /* TODO adjust poll_timeout */
429    }
430 
431    return 0;
432 }
433 
434 static int
sim_syncobj_export(struct virtgpu * gpu,uint32_t syncobj_handle)435 sim_syncobj_export(struct virtgpu *gpu, uint32_t syncobj_handle)
436 {
437    struct sim_syncobj *syncobj = sim_syncobj_lookup(gpu, syncobj_handle);
438    if (!syncobj)
439       return -1;
440 
441    int fd = -1;
442    mtx_lock(&syncobj->mutex);
443    if (syncobj->pending_fd >= 0)
444       fd = dup(syncobj->pending_fd);
445    else
446       fd = dup(sim.signaled_fd);
447    mtx_unlock(&syncobj->mutex);
448 
449    return fd;
450 }
451 
452 static uint32_t
sim_syncobj_import(struct virtgpu * gpu,uint32_t syncobj_handle,int fd)453 sim_syncobj_import(struct virtgpu *gpu, uint32_t syncobj_handle, int fd)
454 {
455    struct sim_syncobj *syncobj = sim_syncobj_lookup(gpu, syncobj_handle);
456    if (!syncobj)
457       return 0;
458 
459    if (sim_syncobj_submit(gpu, syncobj_handle, fd, 1, false))
460       return 0;
461 
462    return syncobj_handle;
463 }
464 
465 #endif /* SIMULATE_SYNCOBJ */
466 
467 #ifdef SIMULATE_SUBMIT
468 
469 static int
sim_submit_signal_syncs(struct virtgpu * gpu,int sync_fd,struct vn_renderer_sync * const * syncs,const uint64_t * sync_values,uint32_t sync_count,bool cpu)470 sim_submit_signal_syncs(struct virtgpu *gpu,
471                         int sync_fd,
472                         struct vn_renderer_sync *const *syncs,
473                         const uint64_t *sync_values,
474                         uint32_t sync_count,
475                         bool cpu)
476 {
477    for (uint32_t i = 0; i < sync_count; i++) {
478       struct virtgpu_sync *sync = (struct virtgpu_sync *)syncs[i];
479       const uint64_t pending_point = sync_values[i];
480 
481 #ifdef SIMULATE_SYNCOBJ
482       int ret = sim_syncobj_submit(gpu, sync->syncobj_handle, sync_fd,
483                                    pending_point, cpu);
484       if (ret)
485          return ret;
486 #else
487       /* we can in theory do a DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE followed by a
488        * DRM_IOCTL_SYNCOBJ_TRANSFER
489        */
490       return -1;
491 #endif
492    }
493 
494    return 0;
495 }
496 
497 static uint32_t *
sim_submit_alloc_gem_handles(struct vn_renderer_bo * const * bos,uint32_t bo_count)498 sim_submit_alloc_gem_handles(struct vn_renderer_bo *const *bos,
499                              uint32_t bo_count)
500 {
501    uint32_t *gem_handles = malloc(sizeof(*gem_handles) * bo_count);
502    if (!gem_handles)
503       return NULL;
504 
505    for (uint32_t i = 0; i < bo_count; i++) {
506       struct virtgpu_bo *bo = (struct virtgpu_bo *)bos[i];
507       gem_handles[i] = bo->gem_handle;
508    }
509 
510    return gem_handles;
511 }
512 
513 static int
sim_submit(struct virtgpu * gpu,const struct vn_renderer_submit * submit)514 sim_submit(struct virtgpu *gpu, const struct vn_renderer_submit *submit)
515 {
516    const bool use_ring_idx = gpu->base.info.supports_multiple_timelines;
517 
518    /* TODO replace submit->bos by submit->gem_handles to avoid malloc/loop */
519    uint32_t *gem_handles = NULL;
520    if (submit->bo_count) {
521       gem_handles =
522          sim_submit_alloc_gem_handles(submit->bos, submit->bo_count);
523       if (!gem_handles)
524          return -1;
525    }
526 
527    assert(submit->batch_count);
528 
529    int ret = 0;
530    for (uint32_t i = 0; i < submit->batch_count; i++) {
531       const struct vn_renderer_submit_batch *batch = &submit->batches[i];
532 
533       struct drm_virtgpu_execbuffer args = {
534          .flags = (batch->sync_count ? VIRTGPU_EXECBUF_FENCE_FD_OUT : 0) |
535                   (use_ring_idx ? VIRTGPU_EXECBUF_RING_IDX : 0),
536          .size = batch->cs_size,
537          .command = (uintptr_t)batch->cs_data,
538          .bo_handles = (uintptr_t)gem_handles,
539          .num_bo_handles = submit->bo_count,
540          .ring_idx = (use_ring_idx ? batch->ring_idx : 0),
541       };
542 
543       ret = drmIoctl(gpu->fd, DRM_IOCTL_VIRTGPU_EXECBUFFER, &args);
544       if (ret) {
545          vn_log(gpu->instance, "failed to execbuffer: %s", strerror(errno));
546          break;
547       }
548 
549       if (batch->sync_count) {
550          ret = sim_submit_signal_syncs(gpu, args.fence_fd, batch->syncs,
551                                        batch->sync_values, batch->sync_count,
552                                        batch->ring_idx == 0);
553          close(args.fence_fd);
554          if (ret)
555             break;
556       }
557    }
558 
559    free(gem_handles);
560    return ret;
561 }
562 
563 #endif /* SIMULATE_SUBMIT */
564 
565 static int
virtgpu_ioctl(struct virtgpu * gpu,unsigned long request,void * args)566 virtgpu_ioctl(struct virtgpu *gpu, unsigned long request, void *args)
567 {
568    return drmIoctl(gpu->fd, request, args);
569 }
570 
571 static uint64_t
virtgpu_ioctl_getparam(struct virtgpu * gpu,uint64_t param)572 virtgpu_ioctl_getparam(struct virtgpu *gpu, uint64_t param)
573 {
574    /* val must be zeroed because kernel only writes the lower 32 bits */
575    uint64_t val = 0;
576    struct drm_virtgpu_getparam args = {
577       .param = param,
578       .value = (uintptr_t)&val,
579    };
580 
581    const int ret = virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_GETPARAM, &args);
582    return ret ? 0 : val;
583 }
584 
585 static int
virtgpu_ioctl_get_caps(struct virtgpu * gpu,enum virgl_renderer_capset id,uint32_t version,void * capset,size_t capset_size)586 virtgpu_ioctl_get_caps(struct virtgpu *gpu,
587                        enum virgl_renderer_capset id,
588                        uint32_t version,
589                        void *capset,
590                        size_t capset_size)
591 {
592    struct drm_virtgpu_get_caps args = {
593       .cap_set_id = id,
594       .cap_set_ver = version,
595       .addr = (uintptr_t)capset,
596       .size = capset_size,
597    };
598 
599    return virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_GET_CAPS, &args);
600 }
601 
602 static int
virtgpu_ioctl_context_init(struct virtgpu * gpu,enum virgl_renderer_capset capset_id)603 virtgpu_ioctl_context_init(struct virtgpu *gpu,
604                            enum virgl_renderer_capset capset_id)
605 {
606    struct drm_virtgpu_context_set_param ctx_set_params[3] = {
607       {
608          .param = VIRTGPU_CONTEXT_PARAM_CAPSET_ID,
609          .value = capset_id,
610       },
611       {
612          .param = VIRTGPU_CONTEXT_PARAM_NUM_RINGS,
613          .value = 64,
614       },
615       {
616          .param = VIRTGPU_CONTEXT_PARAM_POLL_RINGS_MASK,
617          .value = 0, /* don't generate drm_events on fence signaling */
618       },
619    };
620 
621    struct drm_virtgpu_context_init args = {
622       .num_params = ARRAY_SIZE(ctx_set_params),
623       .ctx_set_params = (uintptr_t)&ctx_set_params,
624    };
625 
626    return virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_CONTEXT_INIT, &args);
627 }
628 
629 static uint32_t
virtgpu_ioctl_resource_create_blob(struct virtgpu * gpu,uint32_t blob_mem,uint32_t blob_flags,size_t blob_size,uint64_t blob_id,uint32_t * res_id)630 virtgpu_ioctl_resource_create_blob(struct virtgpu *gpu,
631                                    uint32_t blob_mem,
632                                    uint32_t blob_flags,
633                                    size_t blob_size,
634                                    uint64_t blob_id,
635                                    uint32_t *res_id)
636 {
637 #ifdef SIMULATE_BO_SIZE_FIX
638    blob_size = align64(blob_size, 4096);
639 #endif
640 
641    struct drm_virtgpu_resource_create_blob args = {
642       .blob_mem = blob_mem,
643       .blob_flags = blob_flags,
644       .size = blob_size,
645       .blob_id = blob_id,
646    };
647 
648    if (virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_RESOURCE_CREATE_BLOB, &args))
649       return 0;
650 
651    *res_id = args.res_handle;
652    return args.bo_handle;
653 }
654 
655 static int
virtgpu_ioctl_resource_info(struct virtgpu * gpu,uint32_t gem_handle,struct drm_virtgpu_resource_info * info)656 virtgpu_ioctl_resource_info(struct virtgpu *gpu,
657                             uint32_t gem_handle,
658                             struct drm_virtgpu_resource_info *info)
659 {
660    *info = (struct drm_virtgpu_resource_info){
661       .bo_handle = gem_handle,
662    };
663 
664    return virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_RESOURCE_INFO, info);
665 }
666 
667 static void
virtgpu_ioctl_gem_close(struct virtgpu * gpu,uint32_t gem_handle)668 virtgpu_ioctl_gem_close(struct virtgpu *gpu, uint32_t gem_handle)
669 {
670    struct drm_gem_close args = {
671       .handle = gem_handle,
672    };
673 
674    ASSERTED const int ret = virtgpu_ioctl(gpu, DRM_IOCTL_GEM_CLOSE, &args);
675    assert(!ret);
676 }
677 
678 static int
virtgpu_ioctl_prime_handle_to_fd(struct virtgpu * gpu,uint32_t gem_handle,bool mappable)679 virtgpu_ioctl_prime_handle_to_fd(struct virtgpu *gpu,
680                                  uint32_t gem_handle,
681                                  bool mappable)
682 {
683    struct drm_prime_handle args = {
684       .handle = gem_handle,
685       .flags = DRM_CLOEXEC | (mappable ? DRM_RDWR : 0),
686    };
687 
688    const int ret = virtgpu_ioctl(gpu, DRM_IOCTL_PRIME_HANDLE_TO_FD, &args);
689    return ret ? -1 : args.fd;
690 }
691 
692 static uint32_t
virtgpu_ioctl_prime_fd_to_handle(struct virtgpu * gpu,int fd)693 virtgpu_ioctl_prime_fd_to_handle(struct virtgpu *gpu, int fd)
694 {
695    struct drm_prime_handle args = {
696       .fd = fd,
697    };
698 
699    const int ret = virtgpu_ioctl(gpu, DRM_IOCTL_PRIME_FD_TO_HANDLE, &args);
700    return ret ? 0 : args.handle;
701 }
702 
703 static void *
virtgpu_ioctl_map(struct virtgpu * gpu,uint32_t gem_handle,size_t size)704 virtgpu_ioctl_map(struct virtgpu *gpu, uint32_t gem_handle, size_t size)
705 {
706    struct drm_virtgpu_map args = {
707       .handle = gem_handle,
708    };
709 
710    if (virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_MAP, &args))
711       return NULL;
712 
713    void *ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, gpu->fd,
714                     args.offset);
715    if (ptr == MAP_FAILED)
716       return NULL;
717 
718    return ptr;
719 }
720 
721 static uint32_t
virtgpu_ioctl_syncobj_create(struct virtgpu * gpu,bool signaled)722 virtgpu_ioctl_syncobj_create(struct virtgpu *gpu, bool signaled)
723 {
724 #ifdef SIMULATE_SYNCOBJ
725    return sim_syncobj_create(gpu, signaled);
726 #endif
727 
728    struct drm_syncobj_create args = {
729       .flags = signaled ? DRM_SYNCOBJ_CREATE_SIGNALED : 0,
730    };
731 
732    const int ret = virtgpu_ioctl(gpu, DRM_IOCTL_SYNCOBJ_CREATE, &args);
733    return ret ? 0 : args.handle;
734 }
735 
736 static void
virtgpu_ioctl_syncobj_destroy(struct virtgpu * gpu,uint32_t syncobj_handle)737 virtgpu_ioctl_syncobj_destroy(struct virtgpu *gpu, uint32_t syncobj_handle)
738 {
739 #ifdef SIMULATE_SYNCOBJ
740    sim_syncobj_destroy(gpu, syncobj_handle);
741    return;
742 #endif
743 
744    struct drm_syncobj_destroy args = {
745       .handle = syncobj_handle,
746    };
747 
748    ASSERTED const int ret =
749       virtgpu_ioctl(gpu, DRM_IOCTL_SYNCOBJ_DESTROY, &args);
750    assert(!ret);
751 }
752 
753 static int
virtgpu_ioctl_syncobj_handle_to_fd(struct virtgpu * gpu,uint32_t syncobj_handle,bool sync_file)754 virtgpu_ioctl_syncobj_handle_to_fd(struct virtgpu *gpu,
755                                    uint32_t syncobj_handle,
756                                    bool sync_file)
757 {
758 #ifdef SIMULATE_SYNCOBJ
759    return sync_file ? sim_syncobj_export(gpu, syncobj_handle) : -1;
760 #endif
761 
762    struct drm_syncobj_handle args = {
763       .handle = syncobj_handle,
764       .flags =
765          sync_file ? DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE : 0,
766    };
767 
768    int ret = virtgpu_ioctl(gpu, DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD, &args);
769    if (ret)
770       return -1;
771 
772    return args.fd;
773 }
774 
775 static uint32_t
virtgpu_ioctl_syncobj_fd_to_handle(struct virtgpu * gpu,int fd,uint32_t syncobj_handle)776 virtgpu_ioctl_syncobj_fd_to_handle(struct virtgpu *gpu,
777                                    int fd,
778                                    uint32_t syncobj_handle)
779 {
780 #ifdef SIMULATE_SYNCOBJ
781    return syncobj_handle ? sim_syncobj_import(gpu, syncobj_handle, fd) : 0;
782 #endif
783 
784    struct drm_syncobj_handle args = {
785       .handle = syncobj_handle,
786       .flags =
787          syncobj_handle ? DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_IMPORT_SYNC_FILE : 0,
788       .fd = fd,
789    };
790 
791    int ret = virtgpu_ioctl(gpu, DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, &args);
792    if (ret)
793       return 0;
794 
795    return args.handle;
796 }
797 
798 static int
virtgpu_ioctl_syncobj_reset(struct virtgpu * gpu,uint32_t syncobj_handle)799 virtgpu_ioctl_syncobj_reset(struct virtgpu *gpu, uint32_t syncobj_handle)
800 {
801 #ifdef SIMULATE_SYNCOBJ
802    return sim_syncobj_reset(gpu, syncobj_handle);
803 #endif
804 
805    struct drm_syncobj_array args = {
806       .handles = (uintptr_t)&syncobj_handle,
807       .count_handles = 1,
808    };
809 
810    return virtgpu_ioctl(gpu, DRM_IOCTL_SYNCOBJ_RESET, &args);
811 }
812 
813 static int
virtgpu_ioctl_syncobj_query(struct virtgpu * gpu,uint32_t syncobj_handle,uint64_t * point)814 virtgpu_ioctl_syncobj_query(struct virtgpu *gpu,
815                             uint32_t syncobj_handle,
816                             uint64_t *point)
817 {
818 #ifdef SIMULATE_SYNCOBJ
819    return sim_syncobj_query(gpu, syncobj_handle, point);
820 #endif
821 
822    struct drm_syncobj_timeline_array args = {
823       .handles = (uintptr_t)&syncobj_handle,
824       .points = (uintptr_t)point,
825       .count_handles = 1,
826    };
827 
828    return virtgpu_ioctl(gpu, DRM_IOCTL_SYNCOBJ_QUERY, &args);
829 }
830 
831 static int
virtgpu_ioctl_syncobj_timeline_signal(struct virtgpu * gpu,uint32_t syncobj_handle,uint64_t point)832 virtgpu_ioctl_syncobj_timeline_signal(struct virtgpu *gpu,
833                                       uint32_t syncobj_handle,
834                                       uint64_t point)
835 {
836 #ifdef SIMULATE_SYNCOBJ
837    return sim_syncobj_signal(gpu, syncobj_handle, point);
838 #endif
839 
840    struct drm_syncobj_timeline_array args = {
841       .handles = (uintptr_t)&syncobj_handle,
842       .points = (uintptr_t)&point,
843       .count_handles = 1,
844    };
845 
846    return virtgpu_ioctl(gpu, DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL, &args);
847 }
848 
849 static int
virtgpu_ioctl_syncobj_timeline_wait(struct virtgpu * gpu,const struct vn_renderer_wait * wait,bool wait_avail)850 virtgpu_ioctl_syncobj_timeline_wait(struct virtgpu *gpu,
851                                     const struct vn_renderer_wait *wait,
852                                     bool wait_avail)
853 {
854 #ifdef SIMULATE_SYNCOBJ
855    return sim_syncobj_wait(gpu, wait, wait_avail);
856 #endif
857 
858    /* always enable wait-before-submit */
859    uint32_t flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT;
860    if (!wait->wait_any)
861       flags |= DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL;
862    /* wait for fences to appear instead of signaling */
863    if (wait_avail)
864       flags |= DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE;
865 
866    /* TODO replace wait->syncs by wait->sync_handles to avoid malloc/loop */
867    uint32_t *syncobj_handles =
868       malloc(sizeof(*syncobj_handles) * wait->sync_count);
869    if (!syncobj_handles)
870       return -1;
871    for (uint32_t i = 0; i < wait->sync_count; i++) {
872       struct virtgpu_sync *sync = (struct virtgpu_sync *)wait->syncs[i];
873       syncobj_handles[i] = sync->syncobj_handle;
874    }
875 
876    struct drm_syncobj_timeline_wait args = {
877       .handles = (uintptr_t)syncobj_handles,
878       .points = (uintptr_t)wait->sync_values,
879       .timeout_nsec = os_time_get_absolute_timeout(wait->timeout),
880       .count_handles = wait->sync_count,
881       .flags = flags,
882    };
883 
884    const int ret = virtgpu_ioctl(gpu, DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT, &args);
885 
886    free(syncobj_handles);
887 
888    return ret;
889 }
890 
891 static int
virtgpu_ioctl_submit(struct virtgpu * gpu,const struct vn_renderer_submit * submit)892 virtgpu_ioctl_submit(struct virtgpu *gpu,
893                      const struct vn_renderer_submit *submit)
894 {
895 #ifdef SIMULATE_SUBMIT
896    return sim_submit(gpu, submit);
897 #endif
898    return -1;
899 }
900 
901 static VkResult
virtgpu_sync_write(struct vn_renderer * renderer,struct vn_renderer_sync * _sync,uint64_t val)902 virtgpu_sync_write(struct vn_renderer *renderer,
903                    struct vn_renderer_sync *_sync,
904                    uint64_t val)
905 {
906    struct virtgpu *gpu = (struct virtgpu *)renderer;
907    struct virtgpu_sync *sync = (struct virtgpu_sync *)_sync;
908 
909    const int ret =
910       virtgpu_ioctl_syncobj_timeline_signal(gpu, sync->syncobj_handle, val);
911 
912    return ret ? VK_ERROR_OUT_OF_DEVICE_MEMORY : VK_SUCCESS;
913 }
914 
915 static VkResult
virtgpu_sync_read(struct vn_renderer * renderer,struct vn_renderer_sync * _sync,uint64_t * val)916 virtgpu_sync_read(struct vn_renderer *renderer,
917                   struct vn_renderer_sync *_sync,
918                   uint64_t *val)
919 {
920    struct virtgpu *gpu = (struct virtgpu *)renderer;
921    struct virtgpu_sync *sync = (struct virtgpu_sync *)_sync;
922 
923    const int ret =
924       virtgpu_ioctl_syncobj_query(gpu, sync->syncobj_handle, val);
925 
926    return ret ? VK_ERROR_OUT_OF_DEVICE_MEMORY : VK_SUCCESS;
927 }
928 
929 static VkResult
virtgpu_sync_reset(struct vn_renderer * renderer,struct vn_renderer_sync * _sync,uint64_t initial_val)930 virtgpu_sync_reset(struct vn_renderer *renderer,
931                    struct vn_renderer_sync *_sync,
932                    uint64_t initial_val)
933 {
934    struct virtgpu *gpu = (struct virtgpu *)renderer;
935    struct virtgpu_sync *sync = (struct virtgpu_sync *)_sync;
936 
937    int ret = virtgpu_ioctl_syncobj_reset(gpu, sync->syncobj_handle);
938    if (!ret) {
939       ret = virtgpu_ioctl_syncobj_timeline_signal(gpu, sync->syncobj_handle,
940                                                   initial_val);
941    }
942 
943    return ret ? VK_ERROR_OUT_OF_DEVICE_MEMORY : VK_SUCCESS;
944 }
945 
946 static int
virtgpu_sync_export_syncobj(struct vn_renderer * renderer,struct vn_renderer_sync * _sync,bool sync_file)947 virtgpu_sync_export_syncobj(struct vn_renderer *renderer,
948                             struct vn_renderer_sync *_sync,
949                             bool sync_file)
950 {
951    struct virtgpu *gpu = (struct virtgpu *)renderer;
952    struct virtgpu_sync *sync = (struct virtgpu_sync *)_sync;
953 
954    return virtgpu_ioctl_syncobj_handle_to_fd(gpu, sync->syncobj_handle,
955                                              sync_file);
956 }
957 
958 static void
virtgpu_sync_destroy(struct vn_renderer * renderer,struct vn_renderer_sync * _sync)959 virtgpu_sync_destroy(struct vn_renderer *renderer,
960                      struct vn_renderer_sync *_sync)
961 {
962    struct virtgpu *gpu = (struct virtgpu *)renderer;
963    struct virtgpu_sync *sync = (struct virtgpu_sync *)_sync;
964 
965    virtgpu_ioctl_syncobj_destroy(gpu, sync->syncobj_handle);
966 
967    free(sync);
968 }
969 
970 static VkResult
virtgpu_sync_create_from_syncobj(struct vn_renderer * renderer,int fd,bool sync_file,struct vn_renderer_sync ** out_sync)971 virtgpu_sync_create_from_syncobj(struct vn_renderer *renderer,
972                                  int fd,
973                                  bool sync_file,
974                                  struct vn_renderer_sync **out_sync)
975 {
976    struct virtgpu *gpu = (struct virtgpu *)renderer;
977 
978    uint32_t syncobj_handle;
979    if (sync_file) {
980       syncobj_handle = virtgpu_ioctl_syncobj_create(gpu, false);
981       if (!syncobj_handle)
982          return VK_ERROR_OUT_OF_HOST_MEMORY;
983       if (!virtgpu_ioctl_syncobj_fd_to_handle(gpu, fd, syncobj_handle)) {
984          virtgpu_ioctl_syncobj_destroy(gpu, syncobj_handle);
985          return VK_ERROR_INVALID_EXTERNAL_HANDLE;
986       }
987    } else {
988       syncobj_handle = virtgpu_ioctl_syncobj_fd_to_handle(gpu, fd, 0);
989       if (!syncobj_handle)
990          return VK_ERROR_INVALID_EXTERNAL_HANDLE;
991    }
992 
993    struct virtgpu_sync *sync = calloc(1, sizeof(*sync));
994    if (!sync) {
995       virtgpu_ioctl_syncobj_destroy(gpu, syncobj_handle);
996       return VK_ERROR_OUT_OF_HOST_MEMORY;
997    }
998 
999    sync->syncobj_handle = syncobj_handle;
1000    sync->base.sync_id = 0; /* TODO */
1001 
1002    *out_sync = &sync->base;
1003 
1004    return VK_SUCCESS;
1005 }
1006 
1007 static VkResult
virtgpu_sync_create(struct vn_renderer * renderer,uint64_t initial_val,uint32_t flags,struct vn_renderer_sync ** out_sync)1008 virtgpu_sync_create(struct vn_renderer *renderer,
1009                     uint64_t initial_val,
1010                     uint32_t flags,
1011                     struct vn_renderer_sync **out_sync)
1012 {
1013    struct virtgpu *gpu = (struct virtgpu *)renderer;
1014 
1015    /* TODO */
1016    if (flags & VN_RENDERER_SYNC_SHAREABLE)
1017       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
1018 
1019    /* always false because we don't use binary drm_syncobjs */
1020    const bool signaled = false;
1021    const uint32_t syncobj_handle =
1022       virtgpu_ioctl_syncobj_create(gpu, signaled);
1023    if (!syncobj_handle)
1024       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
1025 
1026    /* add a signaled fence chain with seqno initial_val */
1027    const int ret =
1028       virtgpu_ioctl_syncobj_timeline_signal(gpu, syncobj_handle, initial_val);
1029    if (ret) {
1030       virtgpu_ioctl_syncobj_destroy(gpu, syncobj_handle);
1031       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
1032    }
1033 
1034    struct virtgpu_sync *sync = calloc(1, sizeof(*sync));
1035    if (!sync) {
1036       virtgpu_ioctl_syncobj_destroy(gpu, syncobj_handle);
1037       return VK_ERROR_OUT_OF_HOST_MEMORY;
1038    }
1039 
1040    sync->syncobj_handle = syncobj_handle;
1041    /* we will have a sync_id when shareable is true and virtio-gpu associates
1042     * a host sync object with guest drm_syncobj
1043     */
1044    sync->base.sync_id = 0;
1045 
1046    *out_sync = &sync->base;
1047 
1048    return VK_SUCCESS;
1049 }
1050 
1051 static void
virtgpu_bo_invalidate(struct vn_renderer * renderer,struct vn_renderer_bo * bo,VkDeviceSize offset,VkDeviceSize size)1052 virtgpu_bo_invalidate(struct vn_renderer *renderer,
1053                       struct vn_renderer_bo *bo,
1054                       VkDeviceSize offset,
1055                       VkDeviceSize size)
1056 {
1057    /* nop because kernel makes every mapping coherent */
1058 }
1059 
1060 static void
virtgpu_bo_flush(struct vn_renderer * renderer,struct vn_renderer_bo * bo,VkDeviceSize offset,VkDeviceSize size)1061 virtgpu_bo_flush(struct vn_renderer *renderer,
1062                  struct vn_renderer_bo *bo,
1063                  VkDeviceSize offset,
1064                  VkDeviceSize size)
1065 {
1066    /* nop because kernel makes every mapping coherent */
1067 }
1068 
1069 static void *
virtgpu_bo_map(struct vn_renderer * renderer,struct vn_renderer_bo * _bo)1070 virtgpu_bo_map(struct vn_renderer *renderer, struct vn_renderer_bo *_bo)
1071 {
1072    struct virtgpu *gpu = (struct virtgpu *)renderer;
1073    struct virtgpu_bo *bo = (struct virtgpu_bo *)_bo;
1074    const bool mappable = bo->blob_flags & VIRTGPU_BLOB_FLAG_USE_MAPPABLE;
1075 
1076    /* not thread-safe but is fine */
1077    if (!bo->base.mmap_ptr && mappable) {
1078       bo->base.mmap_ptr =
1079          virtgpu_ioctl_map(gpu, bo->gem_handle, bo->base.mmap_size);
1080    }
1081 
1082    return bo->base.mmap_ptr;
1083 }
1084 
1085 static int
virtgpu_bo_export_dma_buf(struct vn_renderer * renderer,struct vn_renderer_bo * _bo)1086 virtgpu_bo_export_dma_buf(struct vn_renderer *renderer,
1087                           struct vn_renderer_bo *_bo)
1088 {
1089    struct virtgpu *gpu = (struct virtgpu *)renderer;
1090    struct virtgpu_bo *bo = (struct virtgpu_bo *)_bo;
1091    const bool mappable = bo->blob_flags & VIRTGPU_BLOB_FLAG_USE_MAPPABLE;
1092    const bool shareable = bo->blob_flags & VIRTGPU_BLOB_FLAG_USE_SHAREABLE;
1093 
1094    return shareable
1095              ? virtgpu_ioctl_prime_handle_to_fd(gpu, bo->gem_handle, mappable)
1096              : -1;
1097 }
1098 
1099 static bool
virtgpu_bo_destroy(struct vn_renderer * renderer,struct vn_renderer_bo * _bo)1100 virtgpu_bo_destroy(struct vn_renderer *renderer, struct vn_renderer_bo *_bo)
1101 {
1102    struct virtgpu *gpu = (struct virtgpu *)renderer;
1103    struct virtgpu_bo *bo = (struct virtgpu_bo *)_bo;
1104 
1105    mtx_lock(&gpu->dma_buf_import_mutex);
1106 
1107    /* Check the refcount again after the import lock is grabbed.  Yes, we use
1108     * the double-checked locking anti-pattern.
1109     */
1110    if (vn_refcount_is_valid(&bo->base.refcount)) {
1111       mtx_unlock(&gpu->dma_buf_import_mutex);
1112       return false;
1113    }
1114 
1115    if (bo->base.mmap_ptr)
1116       munmap(bo->base.mmap_ptr, bo->base.mmap_size);
1117    virtgpu_ioctl_gem_close(gpu, bo->gem_handle);
1118 
1119    /* set gem_handle to 0 to indicate that the bo is invalid */
1120    bo->gem_handle = 0;
1121 
1122    mtx_unlock(&gpu->dma_buf_import_mutex);
1123 
1124    return true;
1125 }
1126 
1127 static uint32_t
virtgpu_bo_blob_flags(VkMemoryPropertyFlags flags,VkExternalMemoryHandleTypeFlags external_handles)1128 virtgpu_bo_blob_flags(VkMemoryPropertyFlags flags,
1129                       VkExternalMemoryHandleTypeFlags external_handles)
1130 {
1131    uint32_t blob_flags = 0;
1132    if (flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
1133       blob_flags |= VIRTGPU_BLOB_FLAG_USE_MAPPABLE;
1134    if (external_handles)
1135       blob_flags |= VIRTGPU_BLOB_FLAG_USE_SHAREABLE;
1136    if (external_handles & VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT)
1137       blob_flags |= VIRTGPU_BLOB_FLAG_USE_CROSS_DEVICE;
1138 
1139    return blob_flags;
1140 }
1141 
1142 static VkResult
virtgpu_bo_create_from_dma_buf(struct vn_renderer * renderer,VkDeviceSize size,int fd,VkMemoryPropertyFlags flags,struct vn_renderer_bo ** out_bo)1143 virtgpu_bo_create_from_dma_buf(struct vn_renderer *renderer,
1144                                VkDeviceSize size,
1145                                int fd,
1146                                VkMemoryPropertyFlags flags,
1147                                struct vn_renderer_bo **out_bo)
1148 {
1149    struct virtgpu *gpu = (struct virtgpu *)renderer;
1150    struct drm_virtgpu_resource_info info;
1151    uint32_t gem_handle = 0;
1152    struct virtgpu_bo *bo = NULL;
1153 
1154    mtx_lock(&gpu->dma_buf_import_mutex);
1155 
1156    gem_handle = virtgpu_ioctl_prime_fd_to_handle(gpu, fd);
1157    if (!gem_handle)
1158       goto fail;
1159    bo = util_sparse_array_get(&gpu->bo_array, gem_handle);
1160 
1161    if (virtgpu_ioctl_resource_info(gpu, gem_handle, &info))
1162       goto fail;
1163 
1164    /* Upon import, blob_flags is not passed to the kernel and is only for
1165     * internal use. Set it to what works best for us.
1166     * - blob mem: SHAREABLE + conditional MAPPABLE per VkMemoryPropertyFlags
1167     * - classic 3d: SHAREABLE only for export and to fail the map
1168     */
1169    uint32_t blob_flags = VIRTGPU_BLOB_FLAG_USE_SHAREABLE;
1170    size_t mmap_size = 0;
1171    if (info.blob_mem) {
1172       /* must be VIRTGPU_BLOB_MEM_HOST3D or VIRTGPU_BLOB_MEM_GUEST_VRAM */
1173       if (info.blob_mem != gpu->bo_blob_mem)
1174          goto fail;
1175 
1176       blob_flags |= virtgpu_bo_blob_flags(flags, 0);
1177 
1178       /* mmap_size is only used when mappable */
1179       mmap_size = 0;
1180       if (blob_flags & VIRTGPU_BLOB_FLAG_USE_MAPPABLE) {
1181          if (info.size < size)
1182             goto fail;
1183 
1184          mmap_size = size;
1185       }
1186    }
1187 
1188    /* we check bo->gem_handle instead of bo->refcount because bo->refcount
1189     * might only be memset to 0 and is not considered initialized in theory
1190     */
1191    if (bo->gem_handle == gem_handle) {
1192       if (bo->base.mmap_size < mmap_size)
1193          goto fail;
1194       if (blob_flags & ~bo->blob_flags)
1195          goto fail;
1196 
1197       /* we can't use vn_renderer_bo_ref as the refcount may drop to 0
1198        * temporarily before virtgpu_bo_destroy grabs the lock
1199        */
1200       vn_refcount_fetch_add_relaxed(&bo->base.refcount, 1);
1201    } else {
1202       *bo = (struct virtgpu_bo){
1203          .base = {
1204             .refcount = VN_REFCOUNT_INIT(1),
1205             .res_id = info.res_handle,
1206             .mmap_size = mmap_size,
1207          },
1208          .gem_handle = gem_handle,
1209          .blob_flags = blob_flags,
1210       };
1211    }
1212 
1213    mtx_unlock(&gpu->dma_buf_import_mutex);
1214 
1215    *out_bo = &bo->base;
1216 
1217    return VK_SUCCESS;
1218 
1219 fail:
1220    if (gem_handle && bo->gem_handle != gem_handle)
1221       virtgpu_ioctl_gem_close(gpu, gem_handle);
1222    mtx_unlock(&gpu->dma_buf_import_mutex);
1223    return VK_ERROR_INVALID_EXTERNAL_HANDLE;
1224 }
1225 
1226 static VkResult
virtgpu_bo_create_from_device_memory(struct vn_renderer * renderer,VkDeviceSize size,vn_object_id mem_id,VkMemoryPropertyFlags flags,VkExternalMemoryHandleTypeFlags external_handles,struct vn_renderer_bo ** out_bo)1227 virtgpu_bo_create_from_device_memory(
1228    struct vn_renderer *renderer,
1229    VkDeviceSize size,
1230    vn_object_id mem_id,
1231    VkMemoryPropertyFlags flags,
1232    VkExternalMemoryHandleTypeFlags external_handles,
1233    struct vn_renderer_bo **out_bo)
1234 {
1235    struct virtgpu *gpu = (struct virtgpu *)renderer;
1236    const uint32_t blob_flags = virtgpu_bo_blob_flags(flags, external_handles);
1237 
1238    uint32_t res_id;
1239    uint32_t gem_handle = virtgpu_ioctl_resource_create_blob(
1240       gpu, gpu->bo_blob_mem, blob_flags, size, mem_id, &res_id);
1241    if (!gem_handle)
1242       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
1243 
1244    struct virtgpu_bo *bo = util_sparse_array_get(&gpu->bo_array, gem_handle);
1245    *bo = (struct virtgpu_bo){
1246       .base = {
1247          .refcount = VN_REFCOUNT_INIT(1),
1248          .res_id = res_id,
1249          .mmap_size = size,
1250       },
1251       .gem_handle = gem_handle,
1252       .blob_flags = blob_flags,
1253    };
1254 
1255    *out_bo = &bo->base;
1256 
1257    return VK_SUCCESS;
1258 }
1259 
1260 static void
virtgpu_shmem_destroy_now(struct vn_renderer * renderer,struct vn_renderer_shmem * _shmem)1261 virtgpu_shmem_destroy_now(struct vn_renderer *renderer,
1262                           struct vn_renderer_shmem *_shmem)
1263 {
1264    struct virtgpu *gpu = (struct virtgpu *)renderer;
1265    struct virtgpu_shmem *shmem = (struct virtgpu_shmem *)_shmem;
1266 
1267    munmap(shmem->base.mmap_ptr, shmem->base.mmap_size);
1268    virtgpu_ioctl_gem_close(gpu, shmem->gem_handle);
1269 }
1270 
1271 static void
virtgpu_shmem_destroy(struct vn_renderer * renderer,struct vn_renderer_shmem * shmem)1272 virtgpu_shmem_destroy(struct vn_renderer *renderer,
1273                       struct vn_renderer_shmem *shmem)
1274 {
1275    struct virtgpu *gpu = (struct virtgpu *)renderer;
1276 
1277    if (vn_renderer_shmem_cache_add(&gpu->shmem_cache, shmem))
1278       return;
1279 
1280    virtgpu_shmem_destroy_now(&gpu->base, shmem);
1281 }
1282 
1283 static struct vn_renderer_shmem *
virtgpu_shmem_create(struct vn_renderer * renderer,size_t size)1284 virtgpu_shmem_create(struct vn_renderer *renderer, size_t size)
1285 {
1286    struct virtgpu *gpu = (struct virtgpu *)renderer;
1287 
1288    struct vn_renderer_shmem *cached_shmem =
1289       vn_renderer_shmem_cache_get(&gpu->shmem_cache, size);
1290    if (cached_shmem) {
1291       cached_shmem->refcount = VN_REFCOUNT_INIT(1);
1292       return cached_shmem;
1293    }
1294 
1295    uint32_t res_id;
1296    uint32_t gem_handle = virtgpu_ioctl_resource_create_blob(
1297       gpu, gpu->shmem_blob_mem, VIRTGPU_BLOB_FLAG_USE_MAPPABLE, size, 0,
1298       &res_id);
1299    if (!gem_handle)
1300       return NULL;
1301 
1302    void *ptr = virtgpu_ioctl_map(gpu, gem_handle, size);
1303    if (!ptr) {
1304       virtgpu_ioctl_gem_close(gpu, gem_handle);
1305       return NULL;
1306    }
1307 
1308    struct virtgpu_shmem *shmem =
1309       util_sparse_array_get(&gpu->shmem_array, gem_handle);
1310    *shmem = (struct virtgpu_shmem){
1311       .base = {
1312          .refcount = VN_REFCOUNT_INIT(1),
1313          .res_id = res_id,
1314          .mmap_size = size,
1315          .mmap_ptr = ptr,
1316       },
1317       .gem_handle = gem_handle,
1318    };
1319 
1320    return &shmem->base;
1321 }
1322 
1323 static VkResult
virtgpu_wait(struct vn_renderer * renderer,const struct vn_renderer_wait * wait)1324 virtgpu_wait(struct vn_renderer *renderer,
1325              const struct vn_renderer_wait *wait)
1326 {
1327    struct virtgpu *gpu = (struct virtgpu *)renderer;
1328 
1329    const int ret = virtgpu_ioctl_syncobj_timeline_wait(gpu, wait, false);
1330    if (ret && errno != ETIME)
1331       return VK_ERROR_DEVICE_LOST;
1332 
1333    return ret ? VK_TIMEOUT : VK_SUCCESS;
1334 }
1335 
1336 static VkResult
virtgpu_submit(struct vn_renderer * renderer,const struct vn_renderer_submit * submit)1337 virtgpu_submit(struct vn_renderer *renderer,
1338                const struct vn_renderer_submit *submit)
1339 {
1340    struct virtgpu *gpu = (struct virtgpu *)renderer;
1341 
1342    const int ret = virtgpu_ioctl_submit(gpu, submit);
1343    return ret ? VK_ERROR_DEVICE_LOST : VK_SUCCESS;
1344 }
1345 
1346 static void
virtgpu_init_renderer_info(struct virtgpu * gpu)1347 virtgpu_init_renderer_info(struct virtgpu *gpu)
1348 {
1349    struct vn_renderer_info *info = &gpu->base.info;
1350 
1351    info->drm.has_primary = gpu->has_primary;
1352    info->drm.primary_major = gpu->primary_major;
1353    info->drm.primary_minor = gpu->primary_minor;
1354    info->drm.has_render = true;
1355    info->drm.render_major = gpu->render_major;
1356    info->drm.render_minor = gpu->render_minor;
1357 
1358    info->pci.vendor_id = VIRTGPU_PCI_VENDOR_ID;
1359    info->pci.device_id = VIRTGPU_PCI_DEVICE_ID;
1360 
1361    if (gpu->bustype == DRM_BUS_PCI) {
1362       info->pci.has_bus_info = true;
1363       info->pci.domain = gpu->pci_bus_info.domain;
1364       info->pci.bus = gpu->pci_bus_info.bus;
1365       info->pci.device = gpu->pci_bus_info.dev;
1366       info->pci.function = gpu->pci_bus_info.func;
1367    } else {
1368       info->pci.has_bus_info = false;
1369    }
1370 
1371    info->has_dma_buf_import = true;
1372    /* TODO switch from emulation to drm_syncobj */
1373    info->has_external_sync = true;
1374 
1375    info->has_implicit_fencing = false;
1376 
1377    const struct virgl_renderer_capset_venus *capset = &gpu->capset.data;
1378    info->wire_format_version = capset->wire_format_version;
1379    info->vk_xml_version = capset->vk_xml_version;
1380    info->vk_ext_command_serialization_spec_version =
1381       capset->vk_ext_command_serialization_spec_version;
1382    info->vk_mesa_venus_protocol_spec_version =
1383       capset->vk_mesa_venus_protocol_spec_version;
1384    info->supports_blob_id_0 = capset->supports_blob_id_0;
1385 
1386    /* ensure vk_extension_mask is large enough to hold all capset masks */
1387    STATIC_ASSERT(sizeof(info->vk_extension_mask) >=
1388                  sizeof(capset->vk_extension_mask1));
1389    memcpy(info->vk_extension_mask, capset->vk_extension_mask1,
1390           sizeof(capset->vk_extension_mask1));
1391 
1392    info->allow_vk_wait_syncs = capset->allow_vk_wait_syncs;
1393 
1394    info->supports_multiple_timelines = capset->supports_multiple_timelines;
1395    info->max_timeline_count = gpu->max_timeline_count;
1396 
1397    if (gpu->bo_blob_mem == VIRTGPU_BLOB_MEM_GUEST_VRAM)
1398       info->has_guest_vram = true;
1399 
1400    /* Use guest blob allocations from dedicated heap (Host visible memory) */
1401    if (gpu->bo_blob_mem == VIRTGPU_BLOB_MEM_HOST3D && capset->use_guest_vram)
1402       info->has_guest_vram = true;
1403 }
1404 
1405 static void
virtgpu_destroy(struct vn_renderer * renderer,const VkAllocationCallbacks * alloc)1406 virtgpu_destroy(struct vn_renderer *renderer,
1407                 const VkAllocationCallbacks *alloc)
1408 {
1409    struct virtgpu *gpu = (struct virtgpu *)renderer;
1410 
1411    vn_renderer_shmem_cache_fini(&gpu->shmem_cache);
1412 
1413    if (gpu->fd >= 0)
1414       close(gpu->fd);
1415 
1416    mtx_destroy(&gpu->dma_buf_import_mutex);
1417 
1418    util_sparse_array_finish(&gpu->shmem_array);
1419    util_sparse_array_finish(&gpu->bo_array);
1420 
1421    vk_free(alloc, gpu);
1422 }
1423 
1424 static inline void
virtgpu_init_shmem_blob_mem(ASSERTED struct virtgpu * gpu)1425 virtgpu_init_shmem_blob_mem(ASSERTED struct virtgpu *gpu)
1426 {
1427    /* VIRTGPU_BLOB_MEM_GUEST allocates from the guest system memory.  They are
1428     * logically contiguous in the guest but are sglists (iovecs) in the host.
1429     * That makes them slower to process in the host.  With host process
1430     * isolation, it also becomes impossible for the host to access sglists
1431     * directly.
1432     *
1433     * While there are ideas (and shipped code in some cases) such as creating
1434     * udmabufs from sglists, or having a dedicated guest heap, it seems the
1435     * easiest way is to reuse VIRTGPU_BLOB_MEM_HOST3D.  That is, when the
1436     * renderer sees a request to export a blob where
1437     *
1438     *  - blob_mem is VIRTGPU_BLOB_MEM_HOST3D
1439     *  - blob_flags is VIRTGPU_BLOB_FLAG_USE_MAPPABLE
1440     *  - blob_id is 0
1441     *
1442     * it allocates a host shmem.
1443     *
1444     * supports_blob_id_0 has been enforced by mandated render server config.
1445     */
1446    assert(gpu->capset.data.supports_blob_id_0);
1447    gpu->shmem_blob_mem = VIRTGPU_BLOB_MEM_HOST3D;
1448 }
1449 
1450 static VkResult
virtgpu_init_context(struct virtgpu * gpu)1451 virtgpu_init_context(struct virtgpu *gpu)
1452 {
1453    assert(!gpu->capset.version);
1454    const int ret = virtgpu_ioctl_context_init(gpu, gpu->capset.id);
1455    if (ret) {
1456       if (VN_DEBUG(INIT)) {
1457          vn_log(gpu->instance, "failed to initialize context: %s",
1458                 strerror(errno));
1459       }
1460       return VK_ERROR_INITIALIZATION_FAILED;
1461    }
1462 
1463    return VK_SUCCESS;
1464 }
1465 
1466 static VkResult
virtgpu_init_capset(struct virtgpu * gpu)1467 virtgpu_init_capset(struct virtgpu *gpu)
1468 {
1469    gpu->capset.id = VIRGL_RENDERER_CAPSET_VENUS;
1470    gpu->capset.version = 0;
1471 
1472    const int ret =
1473       virtgpu_ioctl_get_caps(gpu, gpu->capset.id, gpu->capset.version,
1474                              &gpu->capset.data, sizeof(gpu->capset.data));
1475    if (ret) {
1476       if (VN_DEBUG(INIT)) {
1477          vn_log(gpu->instance, "failed to get venus v%d capset: %s",
1478                 gpu->capset.version, strerror(errno));
1479       }
1480       return VK_ERROR_INITIALIZATION_FAILED;
1481    }
1482 
1483    return VK_SUCCESS;
1484 }
1485 
1486 static VkResult
virtgpu_init_params(struct virtgpu * gpu)1487 virtgpu_init_params(struct virtgpu *gpu)
1488 {
1489    const uint64_t required_params[] = {
1490       VIRTGPU_PARAM_3D_FEATURES,   VIRTGPU_PARAM_CAPSET_QUERY_FIX,
1491       VIRTGPU_PARAM_RESOURCE_BLOB, VIRTGPU_PARAM_CROSS_DEVICE,
1492       VIRTGPU_PARAM_CONTEXT_INIT,
1493    };
1494    uint64_t val;
1495    for (uint32_t i = 0; i < ARRAY_SIZE(required_params); i++) {
1496       val = virtgpu_ioctl_getparam(gpu, required_params[i]);
1497       if (!val) {
1498          if (VN_DEBUG(INIT)) {
1499             vn_log(gpu->instance, "required kernel param %d is missing",
1500                    (int)required_params[i]);
1501          }
1502          return VK_ERROR_INITIALIZATION_FAILED;
1503       }
1504    }
1505 
1506    val = virtgpu_ioctl_getparam(gpu, VIRTGPU_PARAM_HOST_VISIBLE);
1507    if (val) {
1508       gpu->bo_blob_mem = VIRTGPU_BLOB_MEM_HOST3D;
1509    } else {
1510       val = virtgpu_ioctl_getparam(gpu, VIRTGPU_PARAM_GUEST_VRAM);
1511       if (val) {
1512          gpu->bo_blob_mem = VIRTGPU_BLOB_MEM_GUEST_VRAM;
1513       }
1514    }
1515 
1516    if (!val) {
1517       vn_log(gpu->instance,
1518              "one of required kernel params (%d or %d) is missing",
1519              (int)VIRTGPU_PARAM_HOST_VISIBLE, (int)VIRTGPU_PARAM_GUEST_VRAM);
1520       return VK_ERROR_INITIALIZATION_FAILED;
1521    }
1522 
1523    /* implied by CONTEXT_INIT uapi */
1524    gpu->max_timeline_count = 64;
1525 
1526    return VK_SUCCESS;
1527 }
1528 
1529 static VkResult
virtgpu_open_device(struct virtgpu * gpu,const drmDevicePtr dev)1530 virtgpu_open_device(struct virtgpu *gpu, const drmDevicePtr dev)
1531 {
1532    bool supported_bus = false;
1533 
1534    switch (dev->bustype) {
1535    case DRM_BUS_PCI:
1536       if (dev->deviceinfo.pci->vendor_id == VIRTGPU_PCI_VENDOR_ID &&
1537           dev->deviceinfo.pci->device_id == VIRTGPU_PCI_DEVICE_ID)
1538          supported_bus = true;
1539       break;
1540    case DRM_BUS_PLATFORM:
1541       supported_bus = true;
1542       break;
1543    default:
1544       break;
1545    }
1546 
1547    if (!supported_bus || !(dev->available_nodes & (1 << DRM_NODE_RENDER))) {
1548       if (VN_DEBUG(INIT)) {
1549          const char *name = "unknown";
1550          for (uint32_t i = 0; i < DRM_NODE_MAX; i++) {
1551             if (dev->available_nodes & (1 << i)) {
1552                name = dev->nodes[i];
1553                break;
1554             }
1555          }
1556          vn_log(gpu->instance, "skipping DRM device %s", name);
1557       }
1558       return VK_ERROR_INITIALIZATION_FAILED;
1559    }
1560 
1561    const char *primary_path = dev->nodes[DRM_NODE_PRIMARY];
1562    const char *node_path = dev->nodes[DRM_NODE_RENDER];
1563 
1564    int fd = open(node_path, O_RDWR | O_CLOEXEC);
1565    if (fd < 0) {
1566       if (VN_DEBUG(INIT))
1567          vn_log(gpu->instance, "failed to open %s", node_path);
1568       return VK_ERROR_INITIALIZATION_FAILED;
1569    }
1570 
1571    drmVersionPtr version = drmGetVersion(fd);
1572    if (!version || strcmp(version->name, "virtio_gpu") ||
1573        version->version_major != 0) {
1574       if (VN_DEBUG(INIT)) {
1575          if (version) {
1576             vn_log(gpu->instance, "unknown DRM driver %s version %d",
1577                    version->name, version->version_major);
1578          } else {
1579             vn_log(gpu->instance, "failed to get DRM driver version");
1580          }
1581       }
1582       if (version)
1583          drmFreeVersion(version);
1584       close(fd);
1585       return VK_ERROR_INITIALIZATION_FAILED;
1586    }
1587 
1588    gpu->fd = fd;
1589 
1590    struct stat st;
1591    if (stat(primary_path, &st) == 0) {
1592       gpu->has_primary = true;
1593       gpu->primary_major = major(st.st_rdev);
1594       gpu->primary_minor = minor(st.st_rdev);
1595    } else {
1596       gpu->has_primary = false;
1597       gpu->primary_major = 0;
1598       gpu->primary_minor = 0;
1599    }
1600    stat(node_path, &st);
1601    gpu->render_major = major(st.st_rdev);
1602    gpu->render_minor = minor(st.st_rdev);
1603 
1604    gpu->bustype = dev->bustype;
1605    if (dev->bustype == DRM_BUS_PCI)
1606       gpu->pci_bus_info = *dev->businfo.pci;
1607 
1608    drmFreeVersion(version);
1609 
1610    if (VN_DEBUG(INIT))
1611       vn_log(gpu->instance, "using DRM device %s", node_path);
1612 
1613    return VK_SUCCESS;
1614 }
1615 
1616 static VkResult
virtgpu_open(struct virtgpu * gpu)1617 virtgpu_open(struct virtgpu *gpu)
1618 {
1619    drmDevicePtr devs[8];
1620    int count = drmGetDevices2(0, devs, ARRAY_SIZE(devs));
1621    if (count < 0) {
1622       if (VN_DEBUG(INIT))
1623          vn_log(gpu->instance, "failed to enumerate DRM devices");
1624       return VK_ERROR_INITIALIZATION_FAILED;
1625    }
1626 
1627    VkResult result = VK_ERROR_INITIALIZATION_FAILED;
1628    for (int i = 0; i < count; i++) {
1629       result = virtgpu_open_device(gpu, devs[i]);
1630       if (result == VK_SUCCESS)
1631          break;
1632    }
1633 
1634    drmFreeDevices(devs, count);
1635 
1636    return result;
1637 }
1638 
1639 static VkResult
virtgpu_init(struct virtgpu * gpu)1640 virtgpu_init(struct virtgpu *gpu)
1641 {
1642    util_sparse_array_init(&gpu->shmem_array, sizeof(struct virtgpu_shmem),
1643                           1024);
1644    util_sparse_array_init(&gpu->bo_array, sizeof(struct virtgpu_bo), 1024);
1645 
1646    mtx_init(&gpu->dma_buf_import_mutex, mtx_plain);
1647 
1648    VkResult result = virtgpu_open(gpu);
1649    if (result == VK_SUCCESS)
1650       result = virtgpu_init_params(gpu);
1651    if (result == VK_SUCCESS)
1652       result = virtgpu_init_capset(gpu);
1653    if (result == VK_SUCCESS)
1654       result = virtgpu_init_context(gpu);
1655    if (result != VK_SUCCESS)
1656       return result;
1657 
1658    virtgpu_init_shmem_blob_mem(gpu);
1659 
1660    vn_renderer_shmem_cache_init(&gpu->shmem_cache, &gpu->base,
1661                                 virtgpu_shmem_destroy_now);
1662 
1663    virtgpu_init_renderer_info(gpu);
1664 
1665    gpu->base.ops.destroy = virtgpu_destroy;
1666    gpu->base.ops.submit = virtgpu_submit;
1667    gpu->base.ops.wait = virtgpu_wait;
1668 
1669    gpu->base.shmem_ops.create = virtgpu_shmem_create;
1670    gpu->base.shmem_ops.destroy = virtgpu_shmem_destroy;
1671 
1672    gpu->base.bo_ops.create_from_device_memory =
1673       virtgpu_bo_create_from_device_memory;
1674    gpu->base.bo_ops.create_from_dma_buf = virtgpu_bo_create_from_dma_buf;
1675    gpu->base.bo_ops.destroy = virtgpu_bo_destroy;
1676    gpu->base.bo_ops.export_dma_buf = virtgpu_bo_export_dma_buf;
1677    gpu->base.bo_ops.map = virtgpu_bo_map;
1678    gpu->base.bo_ops.flush = virtgpu_bo_flush;
1679    gpu->base.bo_ops.invalidate = virtgpu_bo_invalidate;
1680 
1681    gpu->base.sync_ops.create = virtgpu_sync_create;
1682    gpu->base.sync_ops.create_from_syncobj = virtgpu_sync_create_from_syncobj;
1683    gpu->base.sync_ops.destroy = virtgpu_sync_destroy;
1684    gpu->base.sync_ops.export_syncobj = virtgpu_sync_export_syncobj;
1685    gpu->base.sync_ops.reset = virtgpu_sync_reset;
1686    gpu->base.sync_ops.read = virtgpu_sync_read;
1687    gpu->base.sync_ops.write = virtgpu_sync_write;
1688 
1689    return VK_SUCCESS;
1690 }
1691 
1692 VkResult
vn_renderer_create_virtgpu(struct vn_instance * instance,const VkAllocationCallbacks * alloc,struct vn_renderer ** renderer)1693 vn_renderer_create_virtgpu(struct vn_instance *instance,
1694                            const VkAllocationCallbacks *alloc,
1695                            struct vn_renderer **renderer)
1696 {
1697    struct virtgpu *gpu = vk_zalloc(alloc, sizeof(*gpu), VN_DEFAULT_ALIGN,
1698                                    VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1699    if (!gpu)
1700       return VK_ERROR_OUT_OF_HOST_MEMORY;
1701 
1702    gpu->instance = instance;
1703    gpu->fd = -1;
1704 
1705    VkResult result = virtgpu_init(gpu);
1706    if (result != VK_SUCCESS) {
1707       virtgpu_destroy(&gpu->base, alloc);
1708       return result;
1709    }
1710 
1711    *renderer = &gpu->base;
1712 
1713    return VK_SUCCESS;
1714 }
1715