• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2018 Google, Inc.
3  * Copyright © 2015 Intel Corporation
4  * SPDX-License-Identifier: MIT
5  *
6  * Kernel interface layer for turnip running on virtio_gpu (aka virtgpu)
7  */
8 
9 #include "tu_knl.h"
10 
11 #include <errno.h>
12 #include <fcntl.h>
13 #include <sys/ioctl.h>
14 #include <sys/mman.h>
15 #include <xf86drm.h>
16 
17 #include "vk_util.h"
18 
19 #include "drm-uapi/msm_drm.h"
20 #include "drm-uapi/virtgpu_drm.h"
21 #include "util/u_debug.h"
22 #include "util/hash_table.h"
23 #include "util/libsync.h"
24 #include "util/u_process.h"
25 
26 #include "tu_cmd_buffer.h"
27 #include "tu_cs.h"
28 #include "tu_device.h"
29 #include "tu_dynamic_rendering.h"
30 #include "tu_knl_drm.h"
31 #include "tu_queue.h"
32 
33 #include "virglrenderer_hw.h"
34 #include "msm_proto.h"
35 
36 #include "vdrm.h"
37 
38 struct tu_userspace_fence_cmd {
39    uint32_t pkt[4];    /* first 4 dwords of packet */
40    uint32_t fence;     /* fifth dword is fence value which is plugged in at runtime */
41    uint32_t _pad[11];
42 };
43 
44 struct tu_userspace_fence_cmds {
45    struct tu_userspace_fence_cmd cmds[64];
46 };
47 
48 struct tu_virtio_device {
49    struct vdrm_device *vdrm;
50    struct msm_shmem *shmem;
51    uint32_t next_blob_id;
52 
53    struct tu_userspace_fence_cmds *fence_cmds;
54    struct tu_bo *fence_cmds_mem;
55 
56    /**
57     * Processing zombie VMAs is a two step process, first we clear the iova
58     * and then we close the handles.  But to minimize waste of virtqueue
59     * space (and associated stalling and ping-ponging between guest and host)
60     * we want to batch up all the GEM_SET_IOVA ccmds before we flush them to
61     * the host and start closing handles.
62     *
63     * This gives us a place to stash the VMAs between the two steps.
64     */
65    struct u_vector zombie_vmas_stage_2;
66 };
67 
68 static int tu_drm_get_param(struct vdrm_device *vdrm, uint32_t param, uint64_t *value);
69 
70 /**
71  * Helper for simple pass-thru ioctls
72  */
73 static int
virtio_simple_ioctl(struct vdrm_device * vdrm,unsigned cmd,void * _req)74 virtio_simple_ioctl(struct vdrm_device *vdrm, unsigned cmd, void *_req)
75 {
76    MESA_TRACE_FUNC();
77    unsigned req_len = sizeof(struct msm_ccmd_ioctl_simple_req);
78    unsigned rsp_len = sizeof(struct msm_ccmd_ioctl_simple_rsp);
79 
80    req_len += _IOC_SIZE(cmd);
81    if (cmd & IOC_OUT)
82       rsp_len += _IOC_SIZE(cmd);
83 
84    uint8_t buf[req_len];
85    struct msm_ccmd_ioctl_simple_req *req = (struct msm_ccmd_ioctl_simple_req *)buf;
86    struct msm_ccmd_ioctl_simple_rsp *rsp;
87 
88    req->hdr = MSM_CCMD(IOCTL_SIMPLE, req_len);
89    req->cmd = cmd;
90    memcpy(req->payload, _req, _IOC_SIZE(cmd));
91 
92    rsp = (struct msm_ccmd_ioctl_simple_rsp *)
93          vdrm_alloc_rsp(vdrm, &req->hdr, rsp_len);
94 
95    int ret = vdrm_send_req(vdrm, &req->hdr, true);
96 
97    if (cmd & IOC_OUT)
98       memcpy(_req, rsp->payload, _IOC_SIZE(cmd));
99 
100    ret = rsp->ret;
101 
102    return ret;
103 }
104 
105 static int
set_iova(struct tu_device * device,uint32_t res_id,uint64_t iova)106 set_iova(struct tu_device *device, uint32_t res_id, uint64_t iova)
107 {
108    struct msm_ccmd_gem_set_iova_req req = {
109          .hdr = MSM_CCMD(GEM_SET_IOVA, sizeof(req)),
110          .iova = iova,
111          .res_id = res_id,
112    };
113 
114    return vdrm_send_req(device->vdev->vdrm, &req.hdr, false);
115 }
116 
117 static int
query_faults(struct tu_device * dev,uint64_t * value)118 query_faults(struct tu_device *dev, uint64_t *value)
119 {
120    struct tu_virtio_device *vdev = dev->vdev;
121    uint32_t async_error = 0;
122    uint64_t global_faults;
123 
124    if (vdrm_shmem_has_field(vdev->shmem, async_error))
125       async_error = vdev->shmem->async_error;
126 
127    if (vdrm_shmem_has_field(vdev->shmem, global_faults)) {
128       global_faults = vdev->shmem->global_faults;
129    } else {
130       int ret = tu_drm_get_param(vdev->vdrm, MSM_PARAM_FAULTS, &global_faults);
131       if (ret)
132          return ret;
133    }
134 
135    *value = global_faults + async_error;
136 
137    return 0;
138 }
139 
140 static void
set_debuginfo(struct tu_device * dev)141 set_debuginfo(struct tu_device *dev)
142 {
143    const char *comm = util_get_process_name();
144    static char cmdline[0x1000+1];
145    int fd = open("/proc/self/cmdline", O_RDONLY);
146    if (fd < 0)
147       return;
148 
149    int n = read(fd, cmdline, sizeof(cmdline) - 1);
150    if (n < 0)
151       return;
152 
153    /* arguments are separated by NULL, convert to spaces: */
154    for (int i = 0; i < n; i++) {
155       if (cmdline[i] == '\0') {
156          cmdline[i] = ' ';
157       }
158    }
159 
160    cmdline[n] = '\0';
161 
162    unsigned comm_len = strlen(comm) + 1;
163    unsigned cmdline_len = strlen(cmdline) + 1;
164 
165    struct msm_ccmd_set_debuginfo_req *req;
166 
167    unsigned req_len = align(sizeof(*req) + comm_len + cmdline_len, 4);
168 
169    req = (struct msm_ccmd_set_debuginfo_req *)malloc(req_len);
170 
171    req->hdr         = MSM_CCMD(SET_DEBUGINFO, req_len);
172    req->comm_len    = comm_len;
173    req->cmdline_len = cmdline_len;
174 
175    memcpy(&req->payload[0], comm, comm_len);
176    memcpy(&req->payload[comm_len], cmdline, cmdline_len);
177 
178    vdrm_send_req(dev->vdev->vdrm, &req->hdr, false);
179 
180    free(req);
181 }
182 
183 static VkResult
virtio_device_init(struct tu_device * dev)184 virtio_device_init(struct tu_device *dev)
185 {
186    struct tu_instance *instance = dev->physical_device->instance;
187    int fd;
188 
189    fd = open(dev->physical_device->fd_path, O_RDWR | O_CLOEXEC);
190    if (fd < 0) {
191       return vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
192                                "failed to open device %s", dev->physical_device->fd_path);
193    }
194 
195    struct tu_virtio_device *vdev = (struct tu_virtio_device *)
196             vk_zalloc(&instance->vk.alloc, sizeof(*vdev), 8,
197                       VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
198    if (!vdev) {
199       close(fd);
200       return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
201    };
202 
203    u_vector_init(&vdev->zombie_vmas_stage_2, 64, sizeof(struct tu_zombie_vma));
204 
205    dev->vdev = vdev;
206    dev->fd = fd;
207 
208    vdev->vdrm = vdrm_device_connect(fd, VIRTGPU_DRM_CONTEXT_MSM);
209 
210    p_atomic_set(&vdev->next_blob_id, 1);
211    vdev->shmem = to_msm_shmem(vdev->vdrm->shmem);
212 
213    query_faults(dev, &dev->fault_count);
214 
215    set_debuginfo(dev);
216 
217    return VK_SUCCESS;
218 }
219 
220 static void
virtio_device_finish(struct tu_device * dev)221 virtio_device_finish(struct tu_device *dev)
222 {
223    struct tu_instance *instance = dev->physical_device->instance;
224    struct tu_virtio_device *vdev = dev->vdev;
225 
226    u_vector_finish(&vdev->zombie_vmas_stage_2);
227 
228    vdrm_device_close(vdev->vdrm);
229 
230    vk_free(&instance->vk.alloc, vdev);
231    dev->vdev = NULL;
232 
233    close(dev->fd);
234 }
235 
236 static int
tu_drm_get_param(struct vdrm_device * vdrm,uint32_t param,uint64_t * value)237 tu_drm_get_param(struct vdrm_device *vdrm, uint32_t param, uint64_t *value)
238 {
239    /* Technically this requires a pipe, but the kernel only supports one pipe
240     * anyway at the time of writing and most of these are clearly pipe
241     * independent. */
242    struct drm_msm_param req = {
243       .pipe = MSM_PIPE_3D0,
244       .param = param,
245    };
246 
247    int ret = virtio_simple_ioctl(vdrm, DRM_IOCTL_MSM_GET_PARAM, &req);
248    if (ret)
249       return ret;
250 
251    *value = req.value;
252 
253    return 0;
254 }
255 
256 static uint32_t
tu_drm_get_highest_bank_bit(struct vdrm_device * vdrm)257 tu_drm_get_highest_bank_bit(struct vdrm_device *vdrm)
258 {
259    uint64_t value;
260    int ret = tu_drm_get_param(vdrm, MSM_PARAM_HIGHEST_BANK_BIT, &value);
261    if (ret)
262       return 0;
263 
264    return value;
265 }
266 
267 static enum fdl_macrotile_mode
tu_drm_get_macrotile_mode(struct vdrm_device * vdrm)268 tu_drm_get_macrotile_mode(struct vdrm_device *vdrm)
269 {
270    uint64_t value;
271    int ret = tu_drm_get_param(vdrm, MSM_PARAM_MACROTILE_MODE, &value);
272    if (ret)
273       return FDL_MACROTILE_INVALID;
274 
275    return (enum fdl_macrotile_mode) value;
276 }
277 
278 static uint32_t
tu_drm_get_ubwc_swizzle(struct vdrm_device * vdrm)279 tu_drm_get_ubwc_swizzle(struct vdrm_device *vdrm)
280 {
281    uint64_t value;
282    int ret = tu_drm_get_param(vdrm, MSM_PARAM_UBWC_SWIZZLE, &value);
283    if (ret)
284       return ~0;
285 
286    return value;
287 }
288 
289 static int
virtio_device_get_gpu_timestamp(struct tu_device * dev,uint64_t * ts)290 virtio_device_get_gpu_timestamp(struct tu_device *dev, uint64_t *ts)
291 {
292    return tu_drm_get_param(dev->vdev->vdrm, MSM_PARAM_TIMESTAMP, ts);
293 }
294 
295 static int
virtio_device_get_suspend_count(struct tu_device * dev,uint64_t * suspend_count)296 virtio_device_get_suspend_count(struct tu_device *dev, uint64_t *suspend_count)
297 {
298    int ret = tu_drm_get_param(dev->vdev->vdrm, MSM_PARAM_SUSPENDS, suspend_count);
299    return ret;
300 }
301 
302 
303 static bool
tu_drm_get_raytracing(struct vdrm_device * vdrm)304 tu_drm_get_raytracing(struct vdrm_device *vdrm)
305 {
306    uint64_t value;
307    int ret = tu_drm_get_param(vdrm, MSM_PARAM_RAYTRACING, &value);
308    if (ret)
309       return false;
310 
311    return value;
312 }
313 
314 
315 static VkResult
virtio_device_check_status(struct tu_device * device)316 virtio_device_check_status(struct tu_device *device)
317 {
318    uint64_t last_fault_count = device->fault_count;
319 
320    query_faults(device, &device->fault_count);
321 
322    if (last_fault_count != device->fault_count)
323       return vk_device_set_lost(&device->vk, "GPU faulted or hung");
324 
325    return VK_SUCCESS;
326 }
327 
328 static int
virtio_submitqueue_new(struct tu_device * dev,int priority,uint32_t * queue_id)329 virtio_submitqueue_new(struct tu_device *dev,
330                        int priority,
331                        uint32_t *queue_id)
332 {
333    assert(priority >= 0 &&
334           priority < dev->physical_device->submitqueue_priority_count);
335 
336    struct drm_msm_submitqueue req = {
337       .flags = dev->physical_device->info->chip >= 7 &&
338          dev->physical_device->has_preemption ?
339          MSM_SUBMITQUEUE_ALLOW_PREEMPT : 0,
340       .prio = priority,
341    };
342 
343    int ret = virtio_simple_ioctl(dev->vdev->vdrm, DRM_IOCTL_MSM_SUBMITQUEUE_NEW, &req);
344    if (ret)
345       return ret;
346 
347    *queue_id = req.id;
348    return 0;
349 }
350 
351 static void
virtio_submitqueue_close(struct tu_device * dev,uint32_t queue_id)352 virtio_submitqueue_close(struct tu_device *dev, uint32_t queue_id)
353 {
354    virtio_simple_ioctl(dev->vdev->vdrm, DRM_IOCTL_MSM_SUBMITQUEUE_CLOSE, &queue_id);
355 }
356 
357 static bool
virtio_has_preemption(struct vdrm_device * vdrm)358 virtio_has_preemption(struct vdrm_device *vdrm)
359 {
360    struct drm_msm_submitqueue req = {
361       .flags = MSM_SUBMITQUEUE_ALLOW_PREEMPT,
362       .prio = vdrm->caps.u.msm.priorities / 2,
363    };
364 
365    int ret = virtio_simple_ioctl(vdrm, DRM_IOCTL_MSM_SUBMITQUEUE_NEW, &req);
366    if (ret)
367       return false;
368 
369    virtio_simple_ioctl(vdrm, DRM_IOCTL_MSM_SUBMITQUEUE_CLOSE, &req.id);
370    return true;
371 }
372 
373 static VkResult
tu_wait_fence(struct tu_device * dev,uint32_t queue_id,int fence,uint64_t timeout_ns)374 tu_wait_fence(struct tu_device *dev,
375               uint32_t queue_id,
376               int fence,
377               uint64_t timeout_ns)
378 {
379    struct vdrm_device *vdrm = dev->vdev->vdrm;
380 
381    if (!fence_before(dev->global_bo_map->userspace_fence, fence))
382       return VK_SUCCESS;
383 
384    if (!timeout_ns)
385       return VK_TIMEOUT;
386 
387    MESA_TRACE_FUNC();
388 
389    struct msm_ccmd_wait_fence_req req = {
390          .hdr = MSM_CCMD(WAIT_FENCE, sizeof(req)),
391          .queue_id = queue_id,
392          .fence = fence,
393    };
394    struct msm_ccmd_submitqueue_query_rsp *rsp;
395    int64_t end_time = os_time_get_nano() + timeout_ns;
396    int ret;
397 
398    do {
399       rsp = (struct msm_ccmd_submitqueue_query_rsp *)
400             vdrm_alloc_rsp(vdrm, &req.hdr, sizeof(*rsp));
401 
402       ret = vdrm_send_req(vdrm, &req.hdr, true);
403       if (ret)
404          goto out;
405 
406       if (os_time_get_nano() >= end_time)
407          break;
408 
409       ret = rsp->ret;
410    } while (ret == -ETIMEDOUT);
411 
412 out:
413    if (!ret) return VK_SUCCESS;
414    if (ret == -ETIMEDOUT) return VK_TIMEOUT;
415    return VK_ERROR_UNKNOWN;
416 }
417 
418 VkResult
virtio_queue_wait_fence(struct tu_queue * queue,uint32_t fence,uint64_t timeout_ns)419 virtio_queue_wait_fence(struct tu_queue *queue, uint32_t fence,
420                         uint64_t timeout_ns)
421 {
422    return tu_wait_fence(queue->device, queue->msm_queue_id, fence,
423                         timeout_ns);
424 }
425 
426 static VkResult
tu_free_zombie_vma_locked(struct tu_device * dev,bool wait)427 tu_free_zombie_vma_locked(struct tu_device *dev, bool wait)
428 {
429    struct tu_virtio_device *vdev = dev->vdev;
430 
431    if (!u_vector_length(&dev->zombie_vmas))
432       return VK_SUCCESS;
433 
434    if (wait) {
435       struct tu_zombie_vma *vma = (struct tu_zombie_vma *)
436             u_vector_head(&dev->zombie_vmas);
437       /* Wait for 3s (arbitrary timeout) */
438       VkResult ret = tu_wait_fence(dev, dev->queues[0]->msm_queue_id,
439                                    vma->fence, 3000000000);
440 
441       if (ret != VK_SUCCESS)
442          return ret;
443    }
444 
445    /* Clear the iova of all finished objects in first pass so the SET_IOVA
446     * ccmd's can be buffered and sent together to the host.  *Then* delete
447     * the handles.  This avoids filling up the virtqueue with tiny messages,
448     * since each execbuf ends up needing to be page aligned.
449     */
450    int last_signaled_fence = -1;
451    while (u_vector_length(&dev->zombie_vmas) > 0) {
452       struct tu_zombie_vma *vma = (struct tu_zombie_vma *)
453             u_vector_tail(&dev->zombie_vmas);
454       if (vma->fence > last_signaled_fence) {
455          VkResult ret =
456             tu_wait_fence(dev, dev->queues[0]->msm_queue_id, vma->fence, 0);
457          if (ret != VK_SUCCESS)
458             break;
459 
460          last_signaled_fence = vma->fence;
461       }
462 
463       u_vector_remove(&dev->zombie_vmas);
464 
465       if (vma->gem_handle) {
466          set_iova(dev, vma->res_id, 0);
467 
468          struct tu_zombie_vma *vma2 =
469             (struct tu_zombie_vma *) u_vector_add(&vdev->zombie_vmas_stage_2);
470 
471          *vma2 = *vma;
472       }
473    }
474 
475    /* And _then_ close the GEM handles: */
476    while (u_vector_length(&vdev->zombie_vmas_stage_2) > 0) {
477       struct tu_zombie_vma *vma = (struct tu_zombie_vma *)
478             u_vector_remove(&vdev->zombie_vmas_stage_2);
479 
480       util_vma_heap_free(&dev->vma, vma->iova, vma->size);
481       vdrm_bo_close(dev->vdev->vdrm, vma->gem_handle);
482    }
483 
484    return VK_SUCCESS;
485 }
486 
487 static bool
tu_restore_from_zombie_vma_locked(struct tu_device * dev,uint32_t gem_handle,uint64_t * iova)488 tu_restore_from_zombie_vma_locked(struct tu_device *dev,
489                                   uint32_t gem_handle,
490                                   uint64_t *iova)
491 {
492    struct tu_zombie_vma *vma;
493    u_vector_foreach (vma, &dev->zombie_vmas) {
494       if (vma->gem_handle == gem_handle) {
495          *iova = vma->iova;
496 
497          /* mark to skip later vdrm bo and iova cleanup */
498          vma->gem_handle = 0;
499          return true;
500       }
501    }
502 
503    return false;
504 }
505 
506 static VkResult
virtio_allocate_userspace_iova_locked(struct tu_device * dev,uint32_t gem_handle,uint64_t size,uint64_t client_iova,enum tu_bo_alloc_flags flags,uint64_t * iova)507 virtio_allocate_userspace_iova_locked(struct tu_device *dev,
508                                       uint32_t gem_handle,
509                                       uint64_t size,
510                                       uint64_t client_iova,
511                                       enum tu_bo_alloc_flags flags,
512                                       uint64_t *iova)
513 {
514    VkResult result;
515 
516    *iova = 0;
517 
518    if (flags & TU_BO_ALLOC_DMABUF) {
519       assert(gem_handle);
520 
521       if (tu_restore_from_zombie_vma_locked(dev, gem_handle, iova))
522          return VK_SUCCESS;
523    }
524 
525    tu_free_zombie_vma_locked(dev, false);
526 
527    result = tu_allocate_userspace_iova(dev, size, client_iova, flags, iova);
528    if (result == VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS) {
529       /* Address may be already freed by us, but not considered as
530        * freed by the kernel. We have to wait until all work that
531        * may hold the address is done. Since addresses are meant to
532        * be replayed only by debug tooling, it should be ok to wait.
533        */
534       tu_free_zombie_vma_locked(dev, true);
535       result = tu_allocate_userspace_iova(dev, size, client_iova, flags, iova);
536    }
537 
538    return result;
539 }
540 
541 static VkResult
tu_bo_init(struct tu_device * dev,struct vk_object_base * base,struct tu_bo * bo,uint32_t gem_handle,uint64_t size,uint64_t iova,enum tu_bo_alloc_flags flags,const char * name)542 tu_bo_init(struct tu_device *dev,
543            struct vk_object_base *base,
544            struct tu_bo *bo,
545            uint32_t gem_handle,
546            uint64_t size,
547            uint64_t iova,
548            enum tu_bo_alloc_flags flags,
549            const char *name)
550 {
551    assert(dev->physical_device->has_set_iova);
552 
553    set_iova(dev, bo->res_id, iova);
554 
555    name = tu_debug_bos_add(dev, size, name);
556 
557    mtx_lock(&dev->bo_mutex);
558    uint32_t idx = dev->submit_bo_count++;
559 
560    /* grow the bo list if needed */
561    if (idx >= dev->submit_bo_list_size) {
562       uint32_t new_len = idx + 64;
563       struct drm_msm_gem_submit_bo *new_ptr = (struct drm_msm_gem_submit_bo *)
564          vk_realloc(&dev->vk.alloc, dev->submit_bo_list, new_len * sizeof(*dev->submit_bo_list),
565                     8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
566       if (!new_ptr) {
567          dev->submit_bo_count--;
568          mtx_unlock(&dev->bo_mutex);
569          vdrm_bo_close(dev->vdev->vdrm, bo->gem_handle);
570          return VK_ERROR_OUT_OF_HOST_MEMORY;
571       }
572 
573       dev->submit_bo_list = new_ptr;
574       dev->submit_bo_list_size = new_len;
575    }
576 
577    bool dump = flags & TU_BO_ALLOC_ALLOW_DUMP;
578    dev->submit_bo_list[idx] = (struct drm_msm_gem_submit_bo) {
579       .flags = MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE |
580                COND(dump, MSM_SUBMIT_BO_DUMP),
581       .handle = bo->res_id,
582       .presumed = iova,
583    };
584 
585    *bo = (struct tu_bo) {
586       .gem_handle = gem_handle,
587       .res_id = bo->res_id,
588       .size = size,
589       .iova = iova,
590       .name = name,
591       .refcnt = 1,
592       .submit_bo_list_idx = idx,
593       .base = base,
594    };
595 
596    mtx_unlock(&dev->bo_mutex);
597 
598    tu_dump_bo_init(dev, bo);
599 
600    return VK_SUCCESS;
601 }
602 
603 /**
604  * Sets the name in the kernel so that the contents of /debug/dri/0/gem are more
605  * useful.
606  *
607  * We skip this on release builds (when we're also not doing BO debugging) to
608  * reduce overhead.
609  */
610 static void
tu_bo_set_kernel_name(struct tu_device * dev,struct tu_bo * bo,const char * name)611 tu_bo_set_kernel_name(struct tu_device *dev, struct tu_bo *bo, const char *name)
612 {
613    bool kernel_bo_names = dev->bo_sizes != NULL;
614 #if MESA_DEBUG
615    kernel_bo_names = true;
616 #endif
617    if (!kernel_bo_names)
618       return;
619 
620    size_t sz = strlen(name);
621 
622    unsigned req_len = sizeof(struct msm_ccmd_gem_set_name_req) + align(sz, 4);
623 
624    uint8_t buf[req_len];
625    struct msm_ccmd_gem_set_name_req *req = (struct msm_ccmd_gem_set_name_req *)buf;
626 
627    req->hdr = MSM_CCMD(GEM_SET_NAME, req_len);
628    req->res_id = bo->res_id;
629    req->len = sz;
630 
631    memcpy(req->payload, name, sz);
632 
633    vdrm_send_req(dev->vdev->vdrm, &req->hdr, false);
634 }
635 
636 static VkResult
virtio_bo_init(struct tu_device * dev,struct vk_object_base * base,struct tu_bo ** out_bo,uint64_t size,uint64_t client_iova,VkMemoryPropertyFlags mem_property,enum tu_bo_alloc_flags flags,const char * name)637 virtio_bo_init(struct tu_device *dev,
638                struct vk_object_base *base,
639                struct tu_bo **out_bo,
640                uint64_t size,
641                uint64_t client_iova,
642                VkMemoryPropertyFlags mem_property,
643                enum tu_bo_alloc_flags flags,
644                const char *name)
645 {
646    struct tu_virtio_device *vdev = dev->vdev;
647    struct msm_ccmd_gem_new_req req = {
648          .hdr = MSM_CCMD(GEM_NEW, sizeof(req)),
649          .size = size,
650    };
651    VkResult result;
652    uint32_t res_id;
653    struct tu_bo *bo;
654 
655    if (mem_property & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) {
656       if (mem_property & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) {
657          req.flags |= MSM_BO_CACHED_COHERENT;
658       } else {
659          req.flags |= MSM_BO_CACHED;
660       }
661    } else {
662       req.flags |= MSM_BO_WC;
663    }
664 
665    uint32_t blob_flags = 0;
666    if (mem_property & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
667       blob_flags |= VIRTGPU_BLOB_FLAG_USE_MAPPABLE;
668    }
669 
670    if (!(mem_property & VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT)) {
671       blob_flags |= VIRTGPU_BLOB_FLAG_USE_SHAREABLE;
672       if (vdev->vdrm->supports_cross_device)
673          blob_flags |= VIRTGPU_BLOB_FLAG_USE_CROSS_DEVICE;
674    }
675 
676    if (flags & TU_BO_ALLOC_GPU_READ_ONLY)
677       req.flags |= MSM_BO_GPU_READONLY;
678 
679    assert(!(flags & TU_BO_ALLOC_DMABUF));
680 
681    mtx_lock(&dev->vma_mutex);
682    result = virtio_allocate_userspace_iova_locked(dev, 0, size, client_iova,
683                                                   flags, &req.iova);
684    mtx_unlock(&dev->vma_mutex);
685 
686    if (result != VK_SUCCESS)
687       return result;
688 
689    /* tunneled cmds are processed separately on host side,
690     * before the renderer->get_blob() callback.. the blob_id
691     * is used to link the created bo to the get_blob() call
692     */
693    req.blob_id = p_atomic_inc_return(&vdev->next_blob_id);;
694 
695    uint32_t handle =
696       vdrm_bo_create(vdev->vdrm, size, blob_flags, req.blob_id, &req.hdr);
697 
698    if (!handle) {
699       result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
700       goto fail;
701    }
702 
703    res_id = vdrm_handle_to_res_id(vdev->vdrm, handle);
704    bo = tu_device_lookup_bo(dev, res_id);
705    assert(bo && bo->gem_handle == 0);
706 
707    bo->res_id = res_id;
708 
709    result = tu_bo_init(dev, base, bo, handle, size, req.iova, flags, name);
710    if (result != VK_SUCCESS) {
711       memset(bo, 0, sizeof(*bo));
712       goto fail;
713    }
714 
715    *out_bo = bo;
716 
717    /* We don't use bo->name here because for the !TU_DEBUG=bo case bo->name is NULL. */
718    tu_bo_set_kernel_name(dev, bo, name);
719 
720    if ((mem_property & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) &&
721        !(mem_property & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
722       tu_bo_map(dev, bo, NULL);
723 
724       /* Cached non-coherent memory may already have dirty cache lines,
725        * we should clean the cache lines before GPU got the chance to
726        * write into this memory.
727        *
728        * MSM already does this automatically for uncached (MSM_BO_WC) memory.
729        */
730       tu_bo_sync_cache(dev, bo, 0, VK_WHOLE_SIZE, TU_MEM_SYNC_CACHE_TO_GPU);
731    }
732 
733    return VK_SUCCESS;
734 
735 fail:
736    mtx_lock(&dev->vma_mutex);
737    util_vma_heap_free(&dev->vma, req.iova, size);
738    mtx_unlock(&dev->vma_mutex);
739    return result;
740 }
741 
742 static VkResult
virtio_bo_init_dmabuf(struct tu_device * dev,struct tu_bo ** out_bo,uint64_t size,int prime_fd)743 virtio_bo_init_dmabuf(struct tu_device *dev,
744                    struct tu_bo **out_bo,
745                    uint64_t size,
746                    int prime_fd)
747 {
748    struct vdrm_device *vdrm = dev->vdev->vdrm;
749    VkResult result;
750    struct tu_bo* bo = NULL;
751 
752    /* lseek() to get the real size */
753    off_t real_size = lseek(prime_fd, 0, SEEK_END);
754    lseek(prime_fd, 0, SEEK_SET);
755    if (real_size < 0 || (uint64_t) real_size < size)
756       return vk_error(dev, VK_ERROR_INVALID_EXTERNAL_HANDLE);
757 
758    /* iova allocation needs to consider the object's *real* size: */
759    size = real_size;
760 
761    /* Importing the same dmabuf several times would yield the same
762     * gem_handle. Thus there could be a race when destroying
763     * BO and importing the same dmabuf from different threads.
764     * We must not permit the creation of dmabuf BO and its release
765     * to happen in parallel.
766     */
767    u_rwlock_wrlock(&dev->dma_bo_lock);
768    mtx_lock(&dev->vma_mutex);
769 
770    uint32_t handle, res_id;
771    uint64_t iova;
772 
773    handle = vdrm_dmabuf_to_handle(vdrm, prime_fd);
774    if (!handle) {
775       result = vk_error(dev, VK_ERROR_INVALID_EXTERNAL_HANDLE);
776       goto out_unlock;
777    }
778 
779    res_id = vdrm_handle_to_res_id(vdrm, handle);
780    if (!res_id) {
781       /* XXX gem_handle potentially leaked here since no refcnt */
782       result = vk_error(dev, VK_ERROR_INVALID_EXTERNAL_HANDLE);
783       goto out_unlock;
784    }
785 
786    bo = tu_device_lookup_bo(dev, res_id);
787 
788    if (bo->refcnt != 0) {
789       p_atomic_inc(&bo->refcnt);
790       assert(bo->res_id == res_id);
791       *out_bo = bo;
792       result = VK_SUCCESS;
793       goto out_unlock;
794    }
795 
796    bo->res_id = res_id;
797 
798    result = virtio_allocate_userspace_iova_locked(dev, handle, size, 0,
799                                                   TU_BO_ALLOC_DMABUF, &iova);
800    if (result != VK_SUCCESS) {
801       vdrm_bo_close(dev->vdev->vdrm, handle);
802       goto out_unlock;
803    }
804 
805    result =
806       tu_bo_init(dev, NULL, bo, handle, size, iova, TU_BO_ALLOC_NO_FLAGS, "dmabuf");
807    if (result != VK_SUCCESS) {
808       util_vma_heap_free(&dev->vma, iova, size);
809       memset(bo, 0, sizeof(*bo));
810    } else {
811       *out_bo = bo;
812    }
813 
814 out_unlock:
815    mtx_unlock(&dev->vma_mutex);
816    u_rwlock_wrunlock(&dev->dma_bo_lock);
817    return result;
818 }
819 
820 static VkResult
virtio_bo_map(struct tu_device * dev,struct tu_bo * bo,void * placed_addr)821 virtio_bo_map(struct tu_device *dev, struct tu_bo *bo, void *placed_addr)
822 {
823    bo->map = vdrm_bo_map(dev->vdev->vdrm, bo->gem_handle, bo->size, placed_addr);
824    if (bo->map == MAP_FAILED)
825       return vk_error(dev, VK_ERROR_MEMORY_MAP_FAILED);
826 
827    return VK_SUCCESS;
828 }
829 
830 static void
virtio_bo_allow_dump(struct tu_device * dev,struct tu_bo * bo)831 virtio_bo_allow_dump(struct tu_device *dev, struct tu_bo *bo)
832 {
833    mtx_lock(&dev->bo_mutex);
834    dev->submit_bo_list[bo->submit_bo_list_idx].flags |= MSM_SUBMIT_BO_DUMP;
835    mtx_unlock(&dev->bo_mutex);
836 }
837 
838 static VkResult
setup_fence_cmds(struct tu_device * dev)839 setup_fence_cmds(struct tu_device *dev)
840 {
841    struct tu_virtio_device *vdev = dev->vdev;
842    VkResult result;
843 
844    result = tu_bo_init_new(dev, NULL, &vdev->fence_cmds_mem,
845                            sizeof(*vdev->fence_cmds), (enum tu_bo_alloc_flags)
846                               (TU_BO_ALLOC_ALLOW_DUMP | TU_BO_ALLOC_GPU_READ_ONLY),
847                            "fence_cmds");
848    if (result != VK_SUCCESS)
849       return result;
850 
851    result = tu_bo_map(dev, vdev->fence_cmds_mem, NULL);
852    if (result != VK_SUCCESS)
853       return result;
854 
855    vdev->fence_cmds = (struct tu_userspace_fence_cmds *)vdev->fence_cmds_mem->map;
856 
857    uint64_t fence_iova = dev->global_bo->iova + gb_offset(userspace_fence);
858    for (int i = 0; i < ARRAY_SIZE(vdev->fence_cmds->cmds); i++) {
859       struct tu_userspace_fence_cmd *c = &vdev->fence_cmds->cmds[i];
860 
861       memset(c, 0, sizeof(*c));
862 
863       if (fd_dev_gen(&dev->physical_device->dev_id) >= A7XX) {
864          c->pkt[0] = pm4_pkt7_hdr((uint8_t)CP_EVENT_WRITE7, 4);
865          c->pkt[1] = CP_EVENT_WRITE7_0(.event = CACHE_FLUSH_TS,
866                            .write_src = EV_WRITE_USER_32B,
867                            .write_dst = EV_DST_RAM,
868                            .write_enabled = true).value;
869       } else {
870          c->pkt[0] = pm4_pkt7_hdr((uint8_t)CP_EVENT_WRITE, 4);
871          c->pkt[1] = CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_TS);
872       }
873       c->pkt[2] = fence_iova;
874       c->pkt[3] = fence_iova >> 32;
875    }
876 
877    return result;
878 }
879 
880 static VkResult
virtio_queue_submit(struct tu_queue * queue,void * _submit,struct vk_sync_wait * waits,uint32_t wait_count,struct vk_sync_signal * signals,uint32_t signal_count,struct tu_u_trace_submission_data * u_trace_submission_data)881 virtio_queue_submit(struct tu_queue *queue, void *_submit,
882                     struct vk_sync_wait *waits, uint32_t wait_count,
883                     struct vk_sync_signal *signals, uint32_t signal_count,
884                     struct tu_u_trace_submission_data *u_trace_submission_data)
885 {
886    VkResult result = VK_SUCCESS;
887    int ret;
888    struct tu_msm_queue_submit *submit =
889       (struct tu_msm_queue_submit *)_submit;
890    struct tu_virtio_device *vdev = queue->device->vdev;
891    struct drm_virtgpu_execbuffer_syncobj *in_syncobjs, *out_syncobjs;
892    uint64_t gpu_offset = 0;
893    int ring_idx = queue->priority + 1;
894    struct vdrm_execbuf_params params;
895 #if HAVE_PERFETTO
896    struct tu_perfetto_clocks clocks;
897    uint64_t start_ts = tu_perfetto_begin_submit();
898 #endif
899 
900    /* It would be nice to not need to defer this, but virtio_device_init()
901     * happens before the device is initialized enough to allocate normal
902     * GEM buffers
903     */
904    if (!vdev->fence_cmds) {
905       VkResult result = setup_fence_cmds(queue->device);
906       if (result != VK_SUCCESS)
907          return result;
908    }
909 
910    /* Add the userspace fence cmd: */
911    struct tu_userspace_fence_cmds *fcmds = vdev->fence_cmds;
912    if (queue->fence <= 0)
913       queue->fence = 0;
914    uint32_t fence = ++queue->fence;
915    int idx = fence % ARRAY_SIZE(fcmds->cmds);
916    struct tu_cs_entry fence_cs = {
917       .bo = vdev->fence_cmds_mem,
918       .size = 5 * 4,
919       .offset = ((intptr_t)&fcmds->cmds[idx]) - (intptr_t)fcmds,
920    };
921    msm_submit_add_entries(queue->device, _submit, &fence_cs, 1);
922 
923    uint32_t entry_count =
924       util_dynarray_num_elements(&submit->commands, struct drm_msm_gem_submit_cmd);
925    unsigned nr_bos = entry_count ? queue->device->submit_bo_count : 0;
926    unsigned bos_len = nr_bos * sizeof(struct drm_msm_gem_submit_bo);
927    unsigned cmd_len = entry_count * sizeof(struct drm_msm_gem_submit_cmd);
928    unsigned req_len = sizeof(struct msm_ccmd_gem_submit_req) + bos_len + cmd_len;
929    struct msm_ccmd_gem_submit_req *req;
930    uint32_t flags = MSM_PIPE_3D0;
931 
932    /* Allocate without wait timeline semaphores */
933    in_syncobjs = (struct drm_virtgpu_execbuffer_syncobj *) vk_zalloc(
934       &queue->device->vk.alloc,
935       wait_count * sizeof(*in_syncobjs), 8,
936       VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
937 
938    if (in_syncobjs == NULL) {
939       result = vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
940       goto fail_in_syncobjs;
941    }
942 
943    /* Allocate with signal timeline semaphores considered */
944    out_syncobjs = (struct drm_virtgpu_execbuffer_syncobj *) vk_zalloc(
945       &queue->device->vk.alloc,
946       signal_count * sizeof(*out_syncobjs), 8,
947       VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
948 
949    if (out_syncobjs == NULL) {
950       result = vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
951       goto fail_out_syncobjs;
952    }
953 
954    for (uint32_t i = 0; i < wait_count; i++) {
955       struct vk_sync *sync = waits[i].sync;
956 
957       in_syncobjs[i] = (struct drm_virtgpu_execbuffer_syncobj) {
958          .handle = tu_syncobj_from_vk_sync(sync),
959          .flags = 0,
960          .point = waits[i].wait_value,
961       };
962    }
963 
964    for (uint32_t i = 0; i < signal_count; i++) {
965       struct vk_sync *sync = signals[i].sync;
966 
967       out_syncobjs[i] = (struct drm_virtgpu_execbuffer_syncobj) {
968          .handle = tu_syncobj_from_vk_sync(sync),
969          .flags = 0,
970          .point = signals[i].signal_value,
971       };
972    }
973 
974    if (wait_count)
975       flags |= MSM_SUBMIT_SYNCOBJ_IN;
976 
977    if (signal_count)
978       flags |= MSM_SUBMIT_SYNCOBJ_OUT;
979 
980    mtx_lock(&queue->device->bo_mutex);
981 
982    if (queue->device->implicit_sync_bo_count == 0)
983       flags |= MSM_SUBMIT_NO_IMPLICIT;
984 
985    /* drm_msm_gem_submit_cmd requires index of bo which could change at any
986     * time when bo_mutex is not locked. So we update the index here under the
987     * lock.
988     */
989    util_dynarray_foreach (&submit->commands, struct drm_msm_gem_submit_cmd,
990                           cmd) {
991       unsigned i = cmd -
992          util_dynarray_element(&submit->commands,
993                                struct drm_msm_gem_submit_cmd, 0);
994       struct tu_bo **bo = util_dynarray_element(&submit->command_bos,
995                                                 struct tu_bo *, i);
996       cmd->submit_idx = (*bo)->submit_bo_list_idx;
997    }
998 
999    req = (struct msm_ccmd_gem_submit_req *)vk_alloc(
1000          &queue->device->vk.alloc, req_len, 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1001 
1002    if (!req) {
1003       mtx_unlock(&queue->device->bo_mutex);
1004       result = vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
1005       goto fail_alloc_req;
1006    }
1007 
1008    req->hdr      = MSM_CCMD(GEM_SUBMIT, req_len);
1009    req->flags    = flags;
1010    req->queue_id = queue->msm_queue_id;
1011    req->nr_bos   = nr_bos;
1012    req->nr_cmds  = entry_count;
1013 
1014    /* Use same kernel fence and userspace fence seqno to avoid having
1015     * to track both:
1016     */
1017    req->fence    = queue->fence;
1018 
1019    memcpy(req->payload, queue->device->submit_bo_list, bos_len);
1020    memcpy(req->payload + bos_len, submit->commands.data, cmd_len);
1021 
1022    params = (struct vdrm_execbuf_params) {
1023       .ring_idx = ring_idx,
1024       .req = &req->hdr,
1025       .in_syncobjs = in_syncobjs,
1026       .out_syncobjs = out_syncobjs,
1027       .num_in_syncobjs = wait_count,
1028       .num_out_syncobjs = signal_count,
1029    };
1030 
1031    ret = vdrm_execbuf(vdev->vdrm, &params);
1032 
1033    mtx_unlock(&queue->device->bo_mutex);
1034 
1035    if (ret) {
1036       result = vk_device_set_lost(&queue->device->vk, "submit failed: %m");
1037       goto fail_submit;
1038    }
1039 
1040 #if HAVE_PERFETTO
1041    clocks = tu_perfetto_end_submit(queue, queue->device->submit_count,
1042                                    start_ts, NULL);
1043    gpu_offset = clocks.gpu_ts_offset;
1044 #endif
1045 
1046    if (u_trace_submission_data) {
1047       u_trace_submission_data->gpu_ts_offset = gpu_offset;
1048    }
1049 
1050    for (uint32_t i = 0; i < wait_count; i++) {
1051       if (!vk_sync_is_tu_timeline_sync(waits[i].sync))
1052          continue;
1053 
1054       struct tu_timeline_sync *sync =
1055          container_of(waits[i].sync, struct tu_timeline_sync, base);
1056 
1057       assert(sync->state != TU_TIMELINE_SYNC_STATE_RESET);
1058 
1059       /* Set SIGNALED to the state of the wait timeline sync since this means the syncobj
1060        * is done and ready again so this can be garbage-collectioned later.
1061        */
1062       sync->state = TU_TIMELINE_SYNC_STATE_SIGNALED;
1063    }
1064 
1065    for (uint32_t i = 0; i < signal_count; i++) {
1066       if (!vk_sync_is_tu_timeline_sync(signals[i].sync))
1067          continue;
1068 
1069       struct tu_timeline_sync *sync =
1070          container_of(signals[i].sync, struct tu_timeline_sync, base);
1071 
1072       assert(sync->state == TU_TIMELINE_SYNC_STATE_RESET);
1073       /* Set SUBMITTED to the state of the signal timeline sync so we could wait for
1074        * this timeline sync until completed if necessary.
1075        */
1076       sync->state = TU_TIMELINE_SYNC_STATE_SUBMITTED;
1077    }
1078 
1079 fail_submit:
1080    vk_free(&queue->device->vk.alloc, req);
1081 fail_alloc_req:
1082    vk_free(&queue->device->vk.alloc, out_syncobjs);
1083 fail_out_syncobjs:
1084    vk_free(&queue->device->vk.alloc, in_syncobjs);
1085 fail_in_syncobjs:
1086    return result;
1087 }
1088 
1089 static const struct tu_knl virtio_knl_funcs = {
1090       .name = "virtgpu",
1091 
1092       .device_init = virtio_device_init,
1093       .device_finish = virtio_device_finish,
1094       .device_get_gpu_timestamp = virtio_device_get_gpu_timestamp,
1095       .device_get_suspend_count = virtio_device_get_suspend_count,
1096       .device_check_status = virtio_device_check_status,
1097       .submitqueue_new = virtio_submitqueue_new,
1098       .submitqueue_close = virtio_submitqueue_close,
1099       .bo_init = virtio_bo_init,
1100       .bo_init_dmabuf = virtio_bo_init_dmabuf,
1101       .bo_export_dmabuf = tu_drm_export_dmabuf,
1102       .bo_map = virtio_bo_map,
1103       .bo_allow_dump = virtio_bo_allow_dump,
1104       .bo_finish = tu_drm_bo_finish,
1105       .submit_create = msm_submit_create,
1106       .submit_finish = msm_submit_finish,
1107       .submit_add_entries = msm_submit_add_entries,
1108       .queue_submit = virtio_queue_submit,
1109       .queue_wait_fence = virtio_queue_wait_fence,
1110 };
1111 
1112 VkResult
tu_knl_drm_virtio_load(struct tu_instance * instance,int fd,struct _drmVersion * version,struct tu_physical_device ** out)1113 tu_knl_drm_virtio_load(struct tu_instance *instance,
1114                        int fd, struct _drmVersion *version,
1115                        struct tu_physical_device **out)
1116 {
1117    struct virgl_renderer_capset_drm caps;
1118    struct vdrm_device *vdrm;
1119    VkResult result = VK_SUCCESS;
1120    uint64_t val;
1121 
1122    /* Debug option to force fallback to venus: */
1123    if (debug_get_bool_option("TU_NO_VIRTIO", false))
1124       return VK_ERROR_INCOMPATIBLE_DRIVER;
1125 
1126    if (drmGetCap(fd, DRM_CAP_SYNCOBJ, &val) || !val) {
1127       return vk_startup_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
1128                                "kernel driver for device %s does not support DRM_CAP_SYNC_OBJ",
1129                                version->name);
1130    }
1131 
1132    /* Try to connect. If this doesn't work, it's probably because we're running
1133     * in a non-Adreno VM. Unless startup debug info is specifically requested,
1134     * we should silently exit and let another Vulkan driver try probing instead.
1135     */
1136    vdrm = vdrm_device_connect(fd, VIRTGPU_DRM_CONTEXT_MSM);
1137    if (!vdrm) {
1138       if (TU_DEBUG(STARTUP)) {
1139          return vk_startup_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
1140                                   "could not get connect vdrm: %s", strerror(errno));
1141       } else {
1142          return VK_ERROR_INCOMPATIBLE_DRIVER;
1143       }
1144    }
1145 
1146    caps = vdrm->caps;
1147 
1148    /* TODO add something to virgl_renderer_capset_drm to avoid round-trip to
1149     * host if virglrenderer is new enough.
1150     */
1151    bool has_preemption = virtio_has_preemption(vdrm);
1152 
1153    /* If virglrenderer is too old, we may need another round-trip to get this.
1154     */
1155    if (caps.u.msm.highest_bank_bit == 0)
1156       caps.u.msm.highest_bank_bit = tu_drm_get_highest_bank_bit(vdrm);
1157 
1158    /* TODO add these to the caps struct */
1159    uint32_t bank_swizzle_levels = tu_drm_get_ubwc_swizzle(vdrm);
1160    enum fdl_macrotile_mode macrotile_mode = tu_drm_get_macrotile_mode(vdrm);
1161 
1162    /* TODO add a cap for this */
1163    bool has_raytracing = tu_drm_get_raytracing(vdrm);
1164 
1165    vdrm_device_close(vdrm);
1166 
1167    mesa_logd("wire_format_version: %u", caps.wire_format_version);
1168    mesa_logd("version_major:       %u", caps.version_major);
1169    mesa_logd("version_minor:       %u", caps.version_minor);
1170    mesa_logd("version_patchlevel:  %u", caps.version_patchlevel);
1171    mesa_logd("has_cached_coherent: %u", caps.u.msm.has_cached_coherent);
1172    mesa_logd("va_start:            0x%0" PRIx64, caps.u.msm.va_start);
1173    mesa_logd("va_size:             0x%0" PRIx64, caps.u.msm.va_size);
1174    mesa_logd("gpu_id:              %u", caps.u.msm.gpu_id);
1175    mesa_logd("gmem_size:           %u", caps.u.msm.gmem_size);
1176    mesa_logd("gmem_base:           0x%0" PRIx64, caps.u.msm.gmem_base);
1177    mesa_logd("chip_id:             0x%0" PRIx64, caps.u.msm.chip_id);
1178    mesa_logd("max_freq:            %u", caps.u.msm.max_freq);
1179    mesa_logd("highest_bank_bit:    %u", caps.u.msm.highest_bank_bit);
1180 
1181    if (caps.wire_format_version != 2) {
1182       return vk_startup_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
1183                                "Unsupported protocol version: %u",
1184                                caps.wire_format_version);
1185    }
1186 
1187    if ((caps.version_major != 1) || (caps.version_minor < 9)) {
1188       return vk_startup_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
1189                                "unsupported version: %u.%u.%u",
1190                                caps.version_major,
1191                                caps.version_minor,
1192                                caps.version_patchlevel);
1193    }
1194 
1195    if (!caps.u.msm.va_size) {
1196       return vk_startup_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
1197                                "No address space");
1198    }
1199 
1200    struct tu_physical_device *device = (struct tu_physical_device *)
1201       vk_zalloc(&instance->vk.alloc, sizeof(*device), 8,
1202                 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1203    if (!device) {
1204       result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1205       goto fail;
1206    }
1207 
1208    device->msm_major_version = caps.version_major;
1209    device->msm_minor_version = caps.version_minor;
1210 
1211    device->instance = instance;
1212    device->local_fd = fd;
1213 
1214    device->dev_id.gpu_id  = caps.u.msm.gpu_id;
1215    device->dev_id.chip_id = caps.u.msm.chip_id;
1216    device->gmem_size      = caps.u.msm.gmem_size;
1217    device->gmem_base      = caps.u.msm.gmem_base;
1218    device->va_start       = caps.u.msm.va_start;
1219    device->va_size        = caps.u.msm.va_size;
1220    device->ubwc_config.highest_bank_bit = caps.u.msm.highest_bank_bit;
1221    device->has_set_iova   = true;
1222    device->has_preemption = has_preemption;
1223 
1224    device->ubwc_config.bank_swizzle_levels = bank_swizzle_levels;
1225    device->ubwc_config.macrotile_mode = macrotile_mode;
1226 
1227    device->gmem_size = debug_get_num_option("TU_GMEM", device->gmem_size);
1228 
1229    device->has_cached_coherent_memory = caps.u.msm.has_cached_coherent;
1230 
1231    device->submitqueue_priority_count = caps.u.msm.priorities;
1232 
1233    device->has_raytracing = has_raytracing;
1234 
1235    device->syncobj_type = vk_drm_syncobj_get_type(fd);
1236    /* we don't support DRM_CAP_SYNCOBJ_TIMELINE, but drm-shim does */
1237    if (!(device->syncobj_type.features & VK_SYNC_FEATURE_TIMELINE))
1238       device->timeline_type = vk_sync_timeline_get_type(&tu_timeline_sync_type);
1239 
1240    device->sync_types[0] = &device->syncobj_type;
1241    device->sync_types[1] = &device->timeline_type.sync;
1242    device->sync_types[2] = NULL;
1243 
1244    device->heap.size = tu_get_system_heap_size(device);
1245    device->heap.used = 0u;
1246    device->heap.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT;
1247 
1248    instance->knl = &virtio_knl_funcs;
1249 
1250    *out = device;
1251 
1252    return VK_SUCCESS;
1253 
1254 fail:
1255    vk_free(&instance->vk.alloc, device);
1256    return result;
1257 }
1258