1 /*
2 * Copyright © 2018 Google, Inc.
3 * Copyright © 2015 Intel Corporation
4 * SPDX-License-Identifier: MIT
5 *
6 * Kernel interface layer for turnip running on virtio_gpu (aka virtgpu)
7 */
8
9 #include "tu_knl.h"
10
11 #include <errno.h>
12 #include <fcntl.h>
13 #include <sys/ioctl.h>
14 #include <sys/mman.h>
15 #include <xf86drm.h>
16
17 #include "vk_util.h"
18
19 #include "drm-uapi/msm_drm.h"
20 #include "drm-uapi/virtgpu_drm.h"
21 #include "util/u_debug.h"
22 #include "util/hash_table.h"
23 #include "util/libsync.h"
24 #include "util/u_process.h"
25
26 #include "tu_cmd_buffer.h"
27 #include "tu_cs.h"
28 #include "tu_device.h"
29 #include "tu_dynamic_rendering.h"
30 #include "tu_knl_drm.h"
31 #include "tu_queue.h"
32
33 #include "virglrenderer_hw.h"
34 #include "msm_proto.h"
35
36 #include "vdrm.h"
37
38 struct tu_userspace_fence_cmd {
39 uint32_t pkt[4]; /* first 4 dwords of packet */
40 uint32_t fence; /* fifth dword is fence value which is plugged in at runtime */
41 uint32_t _pad[11];
42 };
43
44 struct tu_userspace_fence_cmds {
45 struct tu_userspace_fence_cmd cmds[64];
46 };
47
48 struct tu_virtio_device {
49 struct vdrm_device *vdrm;
50 struct msm_shmem *shmem;
51 uint32_t next_blob_id;
52
53 struct tu_userspace_fence_cmds *fence_cmds;
54 struct tu_bo *fence_cmds_mem;
55
56 /**
57 * Processing zombie VMAs is a two step process, first we clear the iova
58 * and then we close the handles. But to minimize waste of virtqueue
59 * space (and associated stalling and ping-ponging between guest and host)
60 * we want to batch up all the GEM_SET_IOVA ccmds before we flush them to
61 * the host and start closing handles.
62 *
63 * This gives us a place to stash the VMAs between the two steps.
64 */
65 struct u_vector zombie_vmas_stage_2;
66 };
67
68 static int tu_drm_get_param(struct vdrm_device *vdrm, uint32_t param, uint64_t *value);
69
70 /**
71 * Helper for simple pass-thru ioctls
72 */
73 static int
virtio_simple_ioctl(struct vdrm_device * vdrm,unsigned cmd,void * _req)74 virtio_simple_ioctl(struct vdrm_device *vdrm, unsigned cmd, void *_req)
75 {
76 MESA_TRACE_FUNC();
77 unsigned req_len = sizeof(struct msm_ccmd_ioctl_simple_req);
78 unsigned rsp_len = sizeof(struct msm_ccmd_ioctl_simple_rsp);
79
80 req_len += _IOC_SIZE(cmd);
81 if (cmd & IOC_OUT)
82 rsp_len += _IOC_SIZE(cmd);
83
84 uint8_t buf[req_len];
85 struct msm_ccmd_ioctl_simple_req *req = (struct msm_ccmd_ioctl_simple_req *)buf;
86 struct msm_ccmd_ioctl_simple_rsp *rsp;
87
88 req->hdr = MSM_CCMD(IOCTL_SIMPLE, req_len);
89 req->cmd = cmd;
90 memcpy(req->payload, _req, _IOC_SIZE(cmd));
91
92 rsp = (struct msm_ccmd_ioctl_simple_rsp *)
93 vdrm_alloc_rsp(vdrm, &req->hdr, rsp_len);
94
95 int ret = vdrm_send_req(vdrm, &req->hdr, true);
96
97 if (cmd & IOC_OUT)
98 memcpy(_req, rsp->payload, _IOC_SIZE(cmd));
99
100 ret = rsp->ret;
101
102 return ret;
103 }
104
105 static int
set_iova(struct tu_device * device,uint32_t res_id,uint64_t iova)106 set_iova(struct tu_device *device, uint32_t res_id, uint64_t iova)
107 {
108 struct msm_ccmd_gem_set_iova_req req = {
109 .hdr = MSM_CCMD(GEM_SET_IOVA, sizeof(req)),
110 .iova = iova,
111 .res_id = res_id,
112 };
113
114 return vdrm_send_req(device->vdev->vdrm, &req.hdr, false);
115 }
116
117 static int
query_faults(struct tu_device * dev,uint64_t * value)118 query_faults(struct tu_device *dev, uint64_t *value)
119 {
120 struct tu_virtio_device *vdev = dev->vdev;
121 uint32_t async_error = 0;
122 uint64_t global_faults;
123
124 if (vdrm_shmem_has_field(vdev->shmem, async_error))
125 async_error = vdev->shmem->async_error;
126
127 if (vdrm_shmem_has_field(vdev->shmem, global_faults)) {
128 global_faults = vdev->shmem->global_faults;
129 } else {
130 int ret = tu_drm_get_param(vdev->vdrm, MSM_PARAM_FAULTS, &global_faults);
131 if (ret)
132 return ret;
133 }
134
135 *value = global_faults + async_error;
136
137 return 0;
138 }
139
140 static void
set_debuginfo(struct tu_device * dev)141 set_debuginfo(struct tu_device *dev)
142 {
143 const char *comm = util_get_process_name();
144 static char cmdline[0x1000+1];
145 int fd = open("/proc/self/cmdline", O_RDONLY);
146 if (fd < 0)
147 return;
148
149 int n = read(fd, cmdline, sizeof(cmdline) - 1);
150 if (n < 0)
151 return;
152
153 /* arguments are separated by NULL, convert to spaces: */
154 for (int i = 0; i < n; i++) {
155 if (cmdline[i] == '\0') {
156 cmdline[i] = ' ';
157 }
158 }
159
160 cmdline[n] = '\0';
161
162 unsigned comm_len = strlen(comm) + 1;
163 unsigned cmdline_len = strlen(cmdline) + 1;
164
165 struct msm_ccmd_set_debuginfo_req *req;
166
167 unsigned req_len = align(sizeof(*req) + comm_len + cmdline_len, 4);
168
169 req = (struct msm_ccmd_set_debuginfo_req *)malloc(req_len);
170
171 req->hdr = MSM_CCMD(SET_DEBUGINFO, req_len);
172 req->comm_len = comm_len;
173 req->cmdline_len = cmdline_len;
174
175 memcpy(&req->payload[0], comm, comm_len);
176 memcpy(&req->payload[comm_len], cmdline, cmdline_len);
177
178 vdrm_send_req(dev->vdev->vdrm, &req->hdr, false);
179
180 free(req);
181 }
182
183 static VkResult
virtio_device_init(struct tu_device * dev)184 virtio_device_init(struct tu_device *dev)
185 {
186 struct tu_instance *instance = dev->physical_device->instance;
187 int fd;
188
189 fd = open(dev->physical_device->fd_path, O_RDWR | O_CLOEXEC);
190 if (fd < 0) {
191 return vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
192 "failed to open device %s", dev->physical_device->fd_path);
193 }
194
195 struct tu_virtio_device *vdev = (struct tu_virtio_device *)
196 vk_zalloc(&instance->vk.alloc, sizeof(*vdev), 8,
197 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
198 if (!vdev) {
199 close(fd);
200 return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
201 };
202
203 u_vector_init(&vdev->zombie_vmas_stage_2, 64, sizeof(struct tu_zombie_vma));
204
205 dev->vdev = vdev;
206 dev->fd = fd;
207
208 vdev->vdrm = vdrm_device_connect(fd, VIRTGPU_DRM_CONTEXT_MSM);
209
210 p_atomic_set(&vdev->next_blob_id, 1);
211 vdev->shmem = to_msm_shmem(vdev->vdrm->shmem);
212
213 query_faults(dev, &dev->fault_count);
214
215 set_debuginfo(dev);
216
217 return VK_SUCCESS;
218 }
219
220 static void
virtio_device_finish(struct tu_device * dev)221 virtio_device_finish(struct tu_device *dev)
222 {
223 struct tu_instance *instance = dev->physical_device->instance;
224 struct tu_virtio_device *vdev = dev->vdev;
225
226 u_vector_finish(&vdev->zombie_vmas_stage_2);
227
228 vdrm_device_close(vdev->vdrm);
229
230 vk_free(&instance->vk.alloc, vdev);
231 dev->vdev = NULL;
232
233 close(dev->fd);
234 }
235
236 static int
tu_drm_get_param(struct vdrm_device * vdrm,uint32_t param,uint64_t * value)237 tu_drm_get_param(struct vdrm_device *vdrm, uint32_t param, uint64_t *value)
238 {
239 /* Technically this requires a pipe, but the kernel only supports one pipe
240 * anyway at the time of writing and most of these are clearly pipe
241 * independent. */
242 struct drm_msm_param req = {
243 .pipe = MSM_PIPE_3D0,
244 .param = param,
245 };
246
247 int ret = virtio_simple_ioctl(vdrm, DRM_IOCTL_MSM_GET_PARAM, &req);
248 if (ret)
249 return ret;
250
251 *value = req.value;
252
253 return 0;
254 }
255
256 static uint32_t
tu_drm_get_highest_bank_bit(struct vdrm_device * vdrm)257 tu_drm_get_highest_bank_bit(struct vdrm_device *vdrm)
258 {
259 uint64_t value;
260 int ret = tu_drm_get_param(vdrm, MSM_PARAM_HIGHEST_BANK_BIT, &value);
261 if (ret)
262 return 0;
263
264 return value;
265 }
266
267 static enum fdl_macrotile_mode
tu_drm_get_macrotile_mode(struct vdrm_device * vdrm)268 tu_drm_get_macrotile_mode(struct vdrm_device *vdrm)
269 {
270 uint64_t value;
271 int ret = tu_drm_get_param(vdrm, MSM_PARAM_MACROTILE_MODE, &value);
272 if (ret)
273 return FDL_MACROTILE_INVALID;
274
275 return (enum fdl_macrotile_mode) value;
276 }
277
278 static uint32_t
tu_drm_get_ubwc_swizzle(struct vdrm_device * vdrm)279 tu_drm_get_ubwc_swizzle(struct vdrm_device *vdrm)
280 {
281 uint64_t value;
282 int ret = tu_drm_get_param(vdrm, MSM_PARAM_UBWC_SWIZZLE, &value);
283 if (ret)
284 return ~0;
285
286 return value;
287 }
288
289 static int
virtio_device_get_gpu_timestamp(struct tu_device * dev,uint64_t * ts)290 virtio_device_get_gpu_timestamp(struct tu_device *dev, uint64_t *ts)
291 {
292 return tu_drm_get_param(dev->vdev->vdrm, MSM_PARAM_TIMESTAMP, ts);
293 }
294
295 static int
virtio_device_get_suspend_count(struct tu_device * dev,uint64_t * suspend_count)296 virtio_device_get_suspend_count(struct tu_device *dev, uint64_t *suspend_count)
297 {
298 int ret = tu_drm_get_param(dev->vdev->vdrm, MSM_PARAM_SUSPENDS, suspend_count);
299 return ret;
300 }
301
302 static VkResult
virtio_device_check_status(struct tu_device * device)303 virtio_device_check_status(struct tu_device *device)
304 {
305 uint64_t last_fault_count = device->fault_count;
306
307 query_faults(device, &device->fault_count);
308
309 if (last_fault_count != device->fault_count)
310 return vk_device_set_lost(&device->vk, "GPU faulted or hung");
311
312 return VK_SUCCESS;
313 }
314
315 static int
virtio_submitqueue_new(struct tu_device * dev,int priority,uint32_t * queue_id)316 virtio_submitqueue_new(struct tu_device *dev,
317 int priority,
318 uint32_t *queue_id)
319 {
320 assert(priority >= 0 &&
321 priority < dev->physical_device->submitqueue_priority_count);
322
323 struct drm_msm_submitqueue req = {
324 .flags = dev->physical_device->info->chip >= 7 &&
325 dev->physical_device->has_preemption ?
326 MSM_SUBMITQUEUE_ALLOW_PREEMPT : 0,
327 .prio = priority,
328 };
329
330 int ret = virtio_simple_ioctl(dev->vdev->vdrm, DRM_IOCTL_MSM_SUBMITQUEUE_NEW, &req);
331 if (ret)
332 return ret;
333
334 *queue_id = req.id;
335 return 0;
336 }
337
338 static void
virtio_submitqueue_close(struct tu_device * dev,uint32_t queue_id)339 virtio_submitqueue_close(struct tu_device *dev, uint32_t queue_id)
340 {
341 virtio_simple_ioctl(dev->vdev->vdrm, DRM_IOCTL_MSM_SUBMITQUEUE_CLOSE, &queue_id);
342 }
343
344 static bool
virtio_has_preemption(struct vdrm_device * vdrm)345 virtio_has_preemption(struct vdrm_device *vdrm)
346 {
347 struct drm_msm_submitqueue req = {
348 .flags = MSM_SUBMITQUEUE_ALLOW_PREEMPT,
349 .prio = vdrm->caps.u.msm.priorities / 2,
350 };
351
352 int ret = virtio_simple_ioctl(vdrm, DRM_IOCTL_MSM_SUBMITQUEUE_NEW, &req);
353 if (ret)
354 return false;
355
356 virtio_simple_ioctl(vdrm, DRM_IOCTL_MSM_SUBMITQUEUE_CLOSE, &req.id);
357 return true;
358 }
359
360 static VkResult
tu_wait_fence(struct tu_device * dev,uint32_t queue_id,int fence,uint64_t timeout_ns)361 tu_wait_fence(struct tu_device *dev,
362 uint32_t queue_id,
363 int fence,
364 uint64_t timeout_ns)
365 {
366 struct vdrm_device *vdrm = dev->vdev->vdrm;
367
368 if (!fence_before(dev->global_bo_map->userspace_fence, fence))
369 return VK_SUCCESS;
370
371 if (!timeout_ns)
372 return VK_TIMEOUT;
373
374 MESA_TRACE_FUNC();
375
376 struct msm_ccmd_wait_fence_req req = {
377 .hdr = MSM_CCMD(WAIT_FENCE, sizeof(req)),
378 .queue_id = queue_id,
379 .fence = fence,
380 };
381 struct msm_ccmd_submitqueue_query_rsp *rsp;
382 int64_t end_time = os_time_get_nano() + timeout_ns;
383 int ret;
384
385 do {
386 rsp = (struct msm_ccmd_submitqueue_query_rsp *)
387 vdrm_alloc_rsp(vdrm, &req.hdr, sizeof(*rsp));
388
389 ret = vdrm_send_req(vdrm, &req.hdr, true);
390 if (ret)
391 goto out;
392
393 if (os_time_get_nano() >= end_time)
394 break;
395
396 ret = rsp->ret;
397 } while (ret == -ETIMEDOUT);
398
399 out:
400 if (!ret) return VK_SUCCESS;
401 if (ret == -ETIMEDOUT) return VK_TIMEOUT;
402 return VK_ERROR_UNKNOWN;
403 }
404
405 VkResult
virtio_queue_wait_fence(struct tu_queue * queue,uint32_t fence,uint64_t timeout_ns)406 virtio_queue_wait_fence(struct tu_queue *queue, uint32_t fence,
407 uint64_t timeout_ns)
408 {
409 return tu_wait_fence(queue->device, queue->msm_queue_id, fence,
410 timeout_ns);
411 }
412
413 static VkResult
tu_free_zombie_vma_locked(struct tu_device * dev,bool wait)414 tu_free_zombie_vma_locked(struct tu_device *dev, bool wait)
415 {
416 struct tu_virtio_device *vdev = dev->vdev;
417
418 if (!u_vector_length(&dev->zombie_vmas))
419 return VK_SUCCESS;
420
421 if (wait) {
422 struct tu_zombie_vma *vma = (struct tu_zombie_vma *)
423 u_vector_head(&dev->zombie_vmas);
424 /* Wait for 3s (arbitrary timeout) */
425 VkResult ret = tu_wait_fence(dev, dev->queues[0]->msm_queue_id,
426 vma->fence, 3000000000);
427
428 if (ret != VK_SUCCESS)
429 return ret;
430 }
431
432 /* Clear the iova of all finished objects in first pass so the SET_IOVA
433 * ccmd's can be buffered and sent together to the host. *Then* delete
434 * the handles. This avoids filling up the virtqueue with tiny messages,
435 * since each execbuf ends up needing to be page aligned.
436 */
437 int last_signaled_fence = -1;
438 while (u_vector_length(&dev->zombie_vmas) > 0) {
439 struct tu_zombie_vma *vma = (struct tu_zombie_vma *)
440 u_vector_tail(&dev->zombie_vmas);
441 if (vma->fence > last_signaled_fence) {
442 VkResult ret =
443 tu_wait_fence(dev, dev->queues[0]->msm_queue_id, vma->fence, 0);
444 if (ret != VK_SUCCESS)
445 break;
446
447 last_signaled_fence = vma->fence;
448 }
449
450 u_vector_remove(&dev->zombie_vmas);
451
452 if (vma->gem_handle) {
453 set_iova(dev, vma->res_id, 0);
454
455 struct tu_zombie_vma *vma2 =
456 (struct tu_zombie_vma *) u_vector_add(&vdev->zombie_vmas_stage_2);
457
458 *vma2 = *vma;
459 }
460 }
461
462 /* And _then_ close the GEM handles: */
463 while (u_vector_length(&vdev->zombie_vmas_stage_2) > 0) {
464 struct tu_zombie_vma *vma = (struct tu_zombie_vma *)
465 u_vector_remove(&vdev->zombie_vmas_stage_2);
466
467 util_vma_heap_free(&dev->vma, vma->iova, vma->size);
468 vdrm_bo_close(dev->vdev->vdrm, vma->gem_handle);
469 }
470
471 return VK_SUCCESS;
472 }
473
474 static bool
tu_restore_from_zombie_vma_locked(struct tu_device * dev,uint32_t gem_handle,uint64_t * iova)475 tu_restore_from_zombie_vma_locked(struct tu_device *dev,
476 uint32_t gem_handle,
477 uint64_t *iova)
478 {
479 struct tu_zombie_vma *vma;
480 u_vector_foreach (vma, &dev->zombie_vmas) {
481 if (vma->gem_handle == gem_handle) {
482 *iova = vma->iova;
483
484 /* mark to skip later vdrm bo and iova cleanup */
485 vma->gem_handle = 0;
486 return true;
487 }
488 }
489
490 return false;
491 }
492
493 static VkResult
virtio_allocate_userspace_iova_locked(struct tu_device * dev,uint32_t gem_handle,uint64_t size,uint64_t client_iova,enum tu_bo_alloc_flags flags,uint64_t * iova)494 virtio_allocate_userspace_iova_locked(struct tu_device *dev,
495 uint32_t gem_handle,
496 uint64_t size,
497 uint64_t client_iova,
498 enum tu_bo_alloc_flags flags,
499 uint64_t *iova)
500 {
501 VkResult result;
502
503 *iova = 0;
504
505 if (flags & TU_BO_ALLOC_DMABUF) {
506 assert(gem_handle);
507
508 if (tu_restore_from_zombie_vma_locked(dev, gem_handle, iova))
509 return VK_SUCCESS;
510 }
511
512 tu_free_zombie_vma_locked(dev, false);
513
514 result = tu_allocate_userspace_iova(dev, size, client_iova, flags, iova);
515 if (result == VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS) {
516 /* Address may be already freed by us, but not considered as
517 * freed by the kernel. We have to wait until all work that
518 * may hold the address is done. Since addresses are meant to
519 * be replayed only by debug tooling, it should be ok to wait.
520 */
521 tu_free_zombie_vma_locked(dev, true);
522 result = tu_allocate_userspace_iova(dev, size, client_iova, flags, iova);
523 }
524
525 return result;
526 }
527
528 static VkResult
tu_bo_init(struct tu_device * dev,struct vk_object_base * base,struct tu_bo * bo,uint32_t gem_handle,uint64_t size,uint64_t iova,enum tu_bo_alloc_flags flags,const char * name)529 tu_bo_init(struct tu_device *dev,
530 struct vk_object_base *base,
531 struct tu_bo *bo,
532 uint32_t gem_handle,
533 uint64_t size,
534 uint64_t iova,
535 enum tu_bo_alloc_flags flags,
536 const char *name)
537 {
538 assert(dev->physical_device->has_set_iova);
539
540 set_iova(dev, bo->res_id, iova);
541
542 name = tu_debug_bos_add(dev, size, name);
543
544 mtx_lock(&dev->bo_mutex);
545 uint32_t idx = dev->submit_bo_count++;
546
547 /* grow the bo list if needed */
548 if (idx >= dev->submit_bo_list_size) {
549 uint32_t new_len = idx + 64;
550 struct drm_msm_gem_submit_bo *new_ptr = (struct drm_msm_gem_submit_bo *)
551 vk_realloc(&dev->vk.alloc, dev->submit_bo_list, new_len * sizeof(*dev->submit_bo_list),
552 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
553 if (!new_ptr) {
554 dev->submit_bo_count--;
555 mtx_unlock(&dev->bo_mutex);
556 vdrm_bo_close(dev->vdev->vdrm, bo->gem_handle);
557 return VK_ERROR_OUT_OF_HOST_MEMORY;
558 }
559
560 dev->submit_bo_list = new_ptr;
561 dev->submit_bo_list_size = new_len;
562 }
563
564 bool dump = flags & TU_BO_ALLOC_ALLOW_DUMP;
565 dev->submit_bo_list[idx] = (struct drm_msm_gem_submit_bo) {
566 .flags = MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE |
567 COND(dump, MSM_SUBMIT_BO_DUMP),
568 .handle = bo->res_id,
569 .presumed = iova,
570 };
571
572 *bo = (struct tu_bo) {
573 .gem_handle = gem_handle,
574 .res_id = bo->res_id,
575 .size = size,
576 .iova = iova,
577 .name = name,
578 .refcnt = 1,
579 .submit_bo_list_idx = idx,
580 .base = base,
581 };
582
583 mtx_unlock(&dev->bo_mutex);
584
585 tu_dump_bo_init(dev, bo);
586
587 return VK_SUCCESS;
588 }
589
590 /**
591 * Sets the name in the kernel so that the contents of /debug/dri/0/gem are more
592 * useful.
593 *
594 * We skip this on release builds (when we're also not doing BO debugging) to
595 * reduce overhead.
596 */
597 static void
tu_bo_set_kernel_name(struct tu_device * dev,struct tu_bo * bo,const char * name)598 tu_bo_set_kernel_name(struct tu_device *dev, struct tu_bo *bo, const char *name)
599 {
600 bool kernel_bo_names = dev->bo_sizes != NULL;
601 #if MESA_DEBUG
602 kernel_bo_names = true;
603 #endif
604 if (!kernel_bo_names)
605 return;
606
607 size_t sz = strlen(name);
608
609 unsigned req_len = sizeof(struct msm_ccmd_gem_set_name_req) + align(sz, 4);
610
611 uint8_t buf[req_len];
612 struct msm_ccmd_gem_set_name_req *req = (struct msm_ccmd_gem_set_name_req *)buf;
613
614 req->hdr = MSM_CCMD(GEM_SET_NAME, req_len);
615 req->res_id = bo->res_id;
616 req->len = sz;
617
618 memcpy(req->payload, name, sz);
619
620 vdrm_send_req(dev->vdev->vdrm, &req->hdr, false);
621 }
622
623 static VkResult
virtio_bo_init(struct tu_device * dev,struct vk_object_base * base,struct tu_bo ** out_bo,uint64_t size,uint64_t client_iova,VkMemoryPropertyFlags mem_property,enum tu_bo_alloc_flags flags,const char * name)624 virtio_bo_init(struct tu_device *dev,
625 struct vk_object_base *base,
626 struct tu_bo **out_bo,
627 uint64_t size,
628 uint64_t client_iova,
629 VkMemoryPropertyFlags mem_property,
630 enum tu_bo_alloc_flags flags,
631 const char *name)
632 {
633 struct tu_virtio_device *vdev = dev->vdev;
634 struct msm_ccmd_gem_new_req req = {
635 .hdr = MSM_CCMD(GEM_NEW, sizeof(req)),
636 .size = size,
637 };
638 VkResult result;
639 uint32_t res_id;
640 struct tu_bo *bo;
641
642 if (mem_property & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) {
643 if (mem_property & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) {
644 req.flags |= MSM_BO_CACHED_COHERENT;
645 } else {
646 req.flags |= MSM_BO_CACHED;
647 }
648 } else {
649 req.flags |= MSM_BO_WC;
650 }
651
652 uint32_t blob_flags = 0;
653 if (mem_property & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
654 blob_flags |= VIRTGPU_BLOB_FLAG_USE_MAPPABLE;
655 }
656
657 if (!(mem_property & VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT)) {
658 blob_flags |= VIRTGPU_BLOB_FLAG_USE_SHAREABLE;
659 if (vdev->vdrm->supports_cross_device)
660 blob_flags |= VIRTGPU_BLOB_FLAG_USE_CROSS_DEVICE;
661 }
662
663 if (flags & TU_BO_ALLOC_GPU_READ_ONLY)
664 req.flags |= MSM_BO_GPU_READONLY;
665
666 assert(!(flags & TU_BO_ALLOC_DMABUF));
667
668 mtx_lock(&dev->vma_mutex);
669 result = virtio_allocate_userspace_iova_locked(dev, 0, size, client_iova,
670 flags, &req.iova);
671 mtx_unlock(&dev->vma_mutex);
672
673 if (result != VK_SUCCESS)
674 return result;
675
676 /* tunneled cmds are processed separately on host side,
677 * before the renderer->get_blob() callback.. the blob_id
678 * is used to link the created bo to the get_blob() call
679 */
680 req.blob_id = p_atomic_inc_return(&vdev->next_blob_id);;
681
682 uint32_t handle =
683 vdrm_bo_create(vdev->vdrm, size, blob_flags, req.blob_id, &req.hdr);
684
685 if (!handle) {
686 result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
687 goto fail;
688 }
689
690 res_id = vdrm_handle_to_res_id(vdev->vdrm, handle);
691 bo = tu_device_lookup_bo(dev, res_id);
692 assert(bo && bo->gem_handle == 0);
693
694 bo->res_id = res_id;
695
696 result = tu_bo_init(dev, base, bo, handle, size, req.iova, flags, name);
697 if (result != VK_SUCCESS) {
698 memset(bo, 0, sizeof(*bo));
699 goto fail;
700 }
701
702 *out_bo = bo;
703
704 /* We don't use bo->name here because for the !TU_DEBUG=bo case bo->name is NULL. */
705 tu_bo_set_kernel_name(dev, bo, name);
706
707 if ((mem_property & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) &&
708 !(mem_property & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
709 tu_bo_map(dev, bo, NULL);
710
711 /* Cached non-coherent memory may already have dirty cache lines,
712 * we should clean the cache lines before GPU got the chance to
713 * write into this memory.
714 *
715 * MSM already does this automatically for uncached (MSM_BO_WC) memory.
716 */
717 tu_bo_sync_cache(dev, bo, 0, VK_WHOLE_SIZE, TU_MEM_SYNC_CACHE_TO_GPU);
718 }
719
720 return VK_SUCCESS;
721
722 fail:
723 mtx_lock(&dev->vma_mutex);
724 util_vma_heap_free(&dev->vma, req.iova, size);
725 mtx_unlock(&dev->vma_mutex);
726 return result;
727 }
728
729 static VkResult
virtio_bo_init_dmabuf(struct tu_device * dev,struct tu_bo ** out_bo,uint64_t size,int prime_fd)730 virtio_bo_init_dmabuf(struct tu_device *dev,
731 struct tu_bo **out_bo,
732 uint64_t size,
733 int prime_fd)
734 {
735 struct vdrm_device *vdrm = dev->vdev->vdrm;
736 VkResult result;
737 struct tu_bo* bo = NULL;
738
739 /* lseek() to get the real size */
740 off_t real_size = lseek(prime_fd, 0, SEEK_END);
741 lseek(prime_fd, 0, SEEK_SET);
742 if (real_size < 0 || (uint64_t) real_size < size)
743 return vk_error(dev, VK_ERROR_INVALID_EXTERNAL_HANDLE);
744
745 /* iova allocation needs to consider the object's *real* size: */
746 size = real_size;
747
748 /* Importing the same dmabuf several times would yield the same
749 * gem_handle. Thus there could be a race when destroying
750 * BO and importing the same dmabuf from different threads.
751 * We must not permit the creation of dmabuf BO and its release
752 * to happen in parallel.
753 */
754 u_rwlock_wrlock(&dev->dma_bo_lock);
755 mtx_lock(&dev->vma_mutex);
756
757 uint32_t handle, res_id;
758 uint64_t iova;
759
760 handle = vdrm_dmabuf_to_handle(vdrm, prime_fd);
761 if (!handle) {
762 result = vk_error(dev, VK_ERROR_INVALID_EXTERNAL_HANDLE);
763 goto out_unlock;
764 }
765
766 res_id = vdrm_handle_to_res_id(vdrm, handle);
767 if (!res_id) {
768 /* XXX gem_handle potentially leaked here since no refcnt */
769 result = vk_error(dev, VK_ERROR_INVALID_EXTERNAL_HANDLE);
770 goto out_unlock;
771 }
772
773 bo = tu_device_lookup_bo(dev, res_id);
774
775 if (bo->refcnt != 0) {
776 p_atomic_inc(&bo->refcnt);
777 assert(bo->res_id == res_id);
778 *out_bo = bo;
779 result = VK_SUCCESS;
780 goto out_unlock;
781 }
782
783 bo->res_id = res_id;
784
785 result = virtio_allocate_userspace_iova_locked(dev, handle, size, 0,
786 TU_BO_ALLOC_DMABUF, &iova);
787 if (result != VK_SUCCESS) {
788 vdrm_bo_close(dev->vdev->vdrm, handle);
789 goto out_unlock;
790 }
791
792 result =
793 tu_bo_init(dev, NULL, bo, handle, size, iova, TU_BO_ALLOC_NO_FLAGS, "dmabuf");
794 if (result != VK_SUCCESS) {
795 util_vma_heap_free(&dev->vma, iova, size);
796 memset(bo, 0, sizeof(*bo));
797 } else {
798 *out_bo = bo;
799 }
800
801 out_unlock:
802 mtx_unlock(&dev->vma_mutex);
803 u_rwlock_wrunlock(&dev->dma_bo_lock);
804 return result;
805 }
806
807 static VkResult
virtio_bo_map(struct tu_device * dev,struct tu_bo * bo,void * placed_addr)808 virtio_bo_map(struct tu_device *dev, struct tu_bo *bo, void *placed_addr)
809 {
810 bo->map = vdrm_bo_map(dev->vdev->vdrm, bo->gem_handle, bo->size, placed_addr);
811 if (bo->map == MAP_FAILED)
812 return vk_error(dev, VK_ERROR_MEMORY_MAP_FAILED);
813
814 return VK_SUCCESS;
815 }
816
817 static void
virtio_bo_allow_dump(struct tu_device * dev,struct tu_bo * bo)818 virtio_bo_allow_dump(struct tu_device *dev, struct tu_bo *bo)
819 {
820 mtx_lock(&dev->bo_mutex);
821 dev->submit_bo_list[bo->submit_bo_list_idx].flags |= MSM_SUBMIT_BO_DUMP;
822 mtx_unlock(&dev->bo_mutex);
823 }
824
825 static VkResult
setup_fence_cmds(struct tu_device * dev)826 setup_fence_cmds(struct tu_device *dev)
827 {
828 struct tu_virtio_device *vdev = dev->vdev;
829 VkResult result;
830
831 result = tu_bo_init_new(dev, NULL, &vdev->fence_cmds_mem,
832 sizeof(*vdev->fence_cmds), (enum tu_bo_alloc_flags)
833 (TU_BO_ALLOC_ALLOW_DUMP | TU_BO_ALLOC_GPU_READ_ONLY),
834 "fence_cmds");
835 if (result != VK_SUCCESS)
836 return result;
837
838 result = tu_bo_map(dev, vdev->fence_cmds_mem, NULL);
839 if (result != VK_SUCCESS)
840 return result;
841
842 vdev->fence_cmds = (struct tu_userspace_fence_cmds *)vdev->fence_cmds_mem->map;
843
844 uint64_t fence_iova = dev->global_bo->iova + gb_offset(userspace_fence);
845 for (int i = 0; i < ARRAY_SIZE(vdev->fence_cmds->cmds); i++) {
846 struct tu_userspace_fence_cmd *c = &vdev->fence_cmds->cmds[i];
847
848 memset(c, 0, sizeof(*c));
849
850 if (fd_dev_gen(&dev->physical_device->dev_id) >= A7XX) {
851 c->pkt[0] = pm4_pkt7_hdr((uint8_t)CP_EVENT_WRITE7, 4);
852 c->pkt[1] = CP_EVENT_WRITE7_0(.event = CACHE_FLUSH_TS,
853 .write_src = EV_WRITE_USER_32B,
854 .write_dst = EV_DST_RAM,
855 .write_enabled = true).value;
856 } else {
857 c->pkt[0] = pm4_pkt7_hdr((uint8_t)CP_EVENT_WRITE, 4);
858 c->pkt[1] = CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_TS);
859 }
860 c->pkt[2] = fence_iova;
861 c->pkt[3] = fence_iova >> 32;
862 }
863
864 return result;
865 }
866
867 static VkResult
virtio_queue_submit(struct tu_queue * queue,void * _submit,struct vk_sync_wait * waits,uint32_t wait_count,struct vk_sync_signal * signals,uint32_t signal_count,struct tu_u_trace_submission_data * u_trace_submission_data)868 virtio_queue_submit(struct tu_queue *queue, void *_submit,
869 struct vk_sync_wait *waits, uint32_t wait_count,
870 struct vk_sync_signal *signals, uint32_t signal_count,
871 struct tu_u_trace_submission_data *u_trace_submission_data)
872 {
873 VkResult result = VK_SUCCESS;
874 int ret;
875 struct tu_msm_queue_submit *submit =
876 (struct tu_msm_queue_submit *)_submit;
877 struct tu_virtio_device *vdev = queue->device->vdev;
878 struct drm_virtgpu_execbuffer_syncobj *in_syncobjs, *out_syncobjs;
879 uint64_t gpu_offset = 0;
880 int ring_idx = queue->priority + 1;
881 struct vdrm_execbuf_params params;
882 #if HAVE_PERFETTO
883 struct tu_perfetto_clocks clocks;
884 uint64_t start_ts = tu_perfetto_begin_submit();
885 #endif
886
887 /* It would be nice to not need to defer this, but virtio_device_init()
888 * happens before the device is initialized enough to allocate normal
889 * GEM buffers
890 */
891 if (!vdev->fence_cmds) {
892 VkResult result = setup_fence_cmds(queue->device);
893 if (result != VK_SUCCESS)
894 return result;
895 }
896
897 /* Add the userspace fence cmd: */
898 struct tu_userspace_fence_cmds *fcmds = vdev->fence_cmds;
899 if (queue->fence <= 0)
900 queue->fence = 0;
901 uint32_t fence = ++queue->fence;
902 int idx = fence % ARRAY_SIZE(fcmds->cmds);
903 struct tu_cs_entry fence_cs = {
904 .bo = vdev->fence_cmds_mem,
905 .size = 5 * 4,
906 .offset = ((intptr_t)&fcmds->cmds[idx]) - (intptr_t)fcmds,
907 };
908 msm_submit_add_entries(queue->device, _submit, &fence_cs, 1);
909
910 uint32_t entry_count =
911 util_dynarray_num_elements(&submit->commands, struct drm_msm_gem_submit_cmd);
912 unsigned nr_bos = entry_count ? queue->device->submit_bo_count : 0;
913 unsigned bos_len = nr_bos * sizeof(struct drm_msm_gem_submit_bo);
914 unsigned cmd_len = entry_count * sizeof(struct drm_msm_gem_submit_cmd);
915 unsigned req_len = sizeof(struct msm_ccmd_gem_submit_req) + bos_len + cmd_len;
916 struct msm_ccmd_gem_submit_req *req;
917 uint32_t flags = MSM_PIPE_3D0;
918
919 /* Allocate without wait timeline semaphores */
920 in_syncobjs = (struct drm_virtgpu_execbuffer_syncobj *) vk_zalloc(
921 &queue->device->vk.alloc,
922 wait_count * sizeof(*in_syncobjs), 8,
923 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
924
925 if (in_syncobjs == NULL) {
926 result = vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
927 goto fail_in_syncobjs;
928 }
929
930 /* Allocate with signal timeline semaphores considered */
931 out_syncobjs = (struct drm_virtgpu_execbuffer_syncobj *) vk_zalloc(
932 &queue->device->vk.alloc,
933 signal_count * sizeof(*out_syncobjs), 8,
934 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
935
936 if (out_syncobjs == NULL) {
937 result = vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
938 goto fail_out_syncobjs;
939 }
940
941 for (uint32_t i = 0; i < wait_count; i++) {
942 struct vk_sync *sync = waits[i].sync;
943
944 in_syncobjs[i] = (struct drm_virtgpu_execbuffer_syncobj) {
945 .handle = tu_syncobj_from_vk_sync(sync),
946 .flags = 0,
947 .point = waits[i].wait_value,
948 };
949 }
950
951 for (uint32_t i = 0; i < signal_count; i++) {
952 struct vk_sync *sync = signals[i].sync;
953
954 out_syncobjs[i] = (struct drm_virtgpu_execbuffer_syncobj) {
955 .handle = tu_syncobj_from_vk_sync(sync),
956 .flags = 0,
957 .point = signals[i].signal_value,
958 };
959 }
960
961 if (wait_count)
962 flags |= MSM_SUBMIT_SYNCOBJ_IN;
963
964 if (signal_count)
965 flags |= MSM_SUBMIT_SYNCOBJ_OUT;
966
967 mtx_lock(&queue->device->bo_mutex);
968
969 if (queue->device->implicit_sync_bo_count == 0)
970 flags |= MSM_SUBMIT_NO_IMPLICIT;
971
972 /* drm_msm_gem_submit_cmd requires index of bo which could change at any
973 * time when bo_mutex is not locked. So we update the index here under the
974 * lock.
975 */
976 util_dynarray_foreach (&submit->commands, struct drm_msm_gem_submit_cmd,
977 cmd) {
978 unsigned i = cmd -
979 util_dynarray_element(&submit->commands,
980 struct drm_msm_gem_submit_cmd, 0);
981 struct tu_bo **bo = util_dynarray_element(&submit->command_bos,
982 struct tu_bo *, i);
983 cmd->submit_idx = (*bo)->submit_bo_list_idx;
984 }
985
986 req = (struct msm_ccmd_gem_submit_req *)vk_alloc(
987 &queue->device->vk.alloc, req_len, 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
988
989 if (!req) {
990 mtx_unlock(&queue->device->bo_mutex);
991 result = vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
992 goto fail_alloc_req;
993 }
994
995 req->hdr = MSM_CCMD(GEM_SUBMIT, req_len);
996 req->flags = flags;
997 req->queue_id = queue->msm_queue_id;
998 req->nr_bos = nr_bos;
999 req->nr_cmds = entry_count;
1000
1001 /* Use same kernel fence and userspace fence seqno to avoid having
1002 * to track both:
1003 */
1004 req->fence = queue->fence;
1005
1006 memcpy(req->payload, queue->device->submit_bo_list, bos_len);
1007 memcpy(req->payload + bos_len, submit->commands.data, cmd_len);
1008
1009 params = (struct vdrm_execbuf_params) {
1010 .ring_idx = ring_idx,
1011 .req = &req->hdr,
1012 .in_syncobjs = in_syncobjs,
1013 .out_syncobjs = out_syncobjs,
1014 .num_in_syncobjs = wait_count,
1015 .num_out_syncobjs = signal_count,
1016 };
1017
1018 ret = vdrm_execbuf(vdev->vdrm, ¶ms);
1019
1020 mtx_unlock(&queue->device->bo_mutex);
1021
1022 if (ret) {
1023 result = vk_device_set_lost(&queue->device->vk, "submit failed: %m");
1024 goto fail_submit;
1025 }
1026
1027 #if HAVE_PERFETTO
1028 clocks = tu_perfetto_end_submit(queue, queue->device->submit_count,
1029 start_ts, NULL);
1030 gpu_offset = clocks.gpu_ts_offset;
1031 #endif
1032
1033 if (u_trace_submission_data) {
1034 u_trace_submission_data->gpu_ts_offset = gpu_offset;
1035 }
1036
1037 for (uint32_t i = 0; i < wait_count; i++) {
1038 if (!vk_sync_is_tu_timeline_sync(waits[i].sync))
1039 continue;
1040
1041 struct tu_timeline_sync *sync =
1042 container_of(waits[i].sync, struct tu_timeline_sync, base);
1043
1044 assert(sync->state != TU_TIMELINE_SYNC_STATE_RESET);
1045
1046 /* Set SIGNALED to the state of the wait timeline sync since this means the syncobj
1047 * is done and ready again so this can be garbage-collectioned later.
1048 */
1049 sync->state = TU_TIMELINE_SYNC_STATE_SIGNALED;
1050 }
1051
1052 for (uint32_t i = 0; i < signal_count; i++) {
1053 if (!vk_sync_is_tu_timeline_sync(signals[i].sync))
1054 continue;
1055
1056 struct tu_timeline_sync *sync =
1057 container_of(signals[i].sync, struct tu_timeline_sync, base);
1058
1059 assert(sync->state == TU_TIMELINE_SYNC_STATE_RESET);
1060 /* Set SUBMITTED to the state of the signal timeline sync so we could wait for
1061 * this timeline sync until completed if necessary.
1062 */
1063 sync->state = TU_TIMELINE_SYNC_STATE_SUBMITTED;
1064 }
1065
1066 fail_submit:
1067 vk_free(&queue->device->vk.alloc, req);
1068 fail_alloc_req:
1069 vk_free(&queue->device->vk.alloc, out_syncobjs);
1070 fail_out_syncobjs:
1071 vk_free(&queue->device->vk.alloc, in_syncobjs);
1072 fail_in_syncobjs:
1073 return result;
1074 }
1075
1076 static const struct tu_knl virtio_knl_funcs = {
1077 .name = "virtgpu",
1078
1079 .device_init = virtio_device_init,
1080 .device_finish = virtio_device_finish,
1081 .device_get_gpu_timestamp = virtio_device_get_gpu_timestamp,
1082 .device_get_suspend_count = virtio_device_get_suspend_count,
1083 .device_check_status = virtio_device_check_status,
1084 .submitqueue_new = virtio_submitqueue_new,
1085 .submitqueue_close = virtio_submitqueue_close,
1086 .bo_init = virtio_bo_init,
1087 .bo_init_dmabuf = virtio_bo_init_dmabuf,
1088 .bo_export_dmabuf = tu_drm_export_dmabuf,
1089 .bo_map = virtio_bo_map,
1090 .bo_allow_dump = virtio_bo_allow_dump,
1091 .bo_finish = tu_drm_bo_finish,
1092 .submit_create = msm_submit_create,
1093 .submit_finish = msm_submit_finish,
1094 .submit_add_entries = msm_submit_add_entries,
1095 .queue_submit = virtio_queue_submit,
1096 .queue_wait_fence = virtio_queue_wait_fence,
1097 };
1098
1099 VkResult
tu_knl_drm_virtio_load(struct tu_instance * instance,int fd,struct _drmVersion * version,struct tu_physical_device ** out)1100 tu_knl_drm_virtio_load(struct tu_instance *instance,
1101 int fd, struct _drmVersion *version,
1102 struct tu_physical_device **out)
1103 {
1104 struct virgl_renderer_capset_drm caps;
1105 struct vdrm_device *vdrm;
1106 VkResult result = VK_SUCCESS;
1107 uint64_t val;
1108
1109 /* Debug option to force fallback to venus: */
1110 if (debug_get_bool_option("TU_NO_VIRTIO", false))
1111 return VK_ERROR_INCOMPATIBLE_DRIVER;
1112
1113 if (drmGetCap(fd, DRM_CAP_SYNCOBJ, &val) || !val) {
1114 return vk_startup_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
1115 "kernel driver for device %s does not support DRM_CAP_SYNC_OBJ",
1116 version->name);
1117 }
1118
1119 /* Try to connect. If this doesn't work, it's probably because we're running
1120 * in a non-Adreno VM. Unless startup debug info is specifically requested,
1121 * we should silently exit and let another Vulkan driver try probing instead.
1122 */
1123 vdrm = vdrm_device_connect(fd, VIRTGPU_DRM_CONTEXT_MSM);
1124 if (!vdrm) {
1125 if (TU_DEBUG(STARTUP)) {
1126 return vk_startup_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
1127 "could not get connect vdrm: %s", strerror(errno));
1128 } else {
1129 return VK_ERROR_INCOMPATIBLE_DRIVER;
1130 }
1131 }
1132
1133 caps = vdrm->caps;
1134
1135 /* TODO add something to virgl_renderer_capset_drm to avoid round-trip to
1136 * host if virglrenderer is new enough.
1137 */
1138 bool has_preemption = virtio_has_preemption(vdrm);
1139
1140 /* If virglrenderer is too old, we may need another round-trip to get this.
1141 */
1142 if (caps.u.msm.highest_bank_bit == 0)
1143 caps.u.msm.highest_bank_bit = tu_drm_get_highest_bank_bit(vdrm);
1144
1145 /* TODO add these to the caps struct */
1146 uint32_t bank_swizzle_levels = tu_drm_get_ubwc_swizzle(vdrm);
1147 enum fdl_macrotile_mode macrotile_mode = tu_drm_get_macrotile_mode(vdrm);
1148
1149 vdrm_device_close(vdrm);
1150
1151 mesa_logd("wire_format_version: %u", caps.wire_format_version);
1152 mesa_logd("version_major: %u", caps.version_major);
1153 mesa_logd("version_minor: %u", caps.version_minor);
1154 mesa_logd("version_patchlevel: %u", caps.version_patchlevel);
1155 mesa_logd("has_cached_coherent: %u", caps.u.msm.has_cached_coherent);
1156 mesa_logd("va_start: 0x%0" PRIx64, caps.u.msm.va_start);
1157 mesa_logd("va_size: 0x%0" PRIx64, caps.u.msm.va_size);
1158 mesa_logd("gpu_id: %u", caps.u.msm.gpu_id);
1159 mesa_logd("gmem_size: %u", caps.u.msm.gmem_size);
1160 mesa_logd("gmem_base: 0x%0" PRIx64, caps.u.msm.gmem_base);
1161 mesa_logd("chip_id: 0x%0" PRIx64, caps.u.msm.chip_id);
1162 mesa_logd("max_freq: %u", caps.u.msm.max_freq);
1163 mesa_logd("highest_bank_bit: %u", caps.u.msm.highest_bank_bit);
1164
1165 if (caps.wire_format_version != 2) {
1166 return vk_startup_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
1167 "Unsupported protocol version: %u",
1168 caps.wire_format_version);
1169 }
1170
1171 if ((caps.version_major != 1) || (caps.version_minor < 9)) {
1172 return vk_startup_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
1173 "unsupported version: %u.%u.%u",
1174 caps.version_major,
1175 caps.version_minor,
1176 caps.version_patchlevel);
1177 }
1178
1179 if (!caps.u.msm.va_size) {
1180 return vk_startup_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
1181 "No address space");
1182 }
1183
1184 struct tu_physical_device *device = (struct tu_physical_device *)
1185 vk_zalloc(&instance->vk.alloc, sizeof(*device), 8,
1186 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1187 if (!device) {
1188 result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1189 goto fail;
1190 }
1191
1192 device->msm_major_version = caps.version_major;
1193 device->msm_minor_version = caps.version_minor;
1194
1195 device->instance = instance;
1196 device->local_fd = fd;
1197
1198 device->dev_id.gpu_id = caps.u.msm.gpu_id;
1199 device->dev_id.chip_id = caps.u.msm.chip_id;
1200 device->gmem_size = caps.u.msm.gmem_size;
1201 device->gmem_base = caps.u.msm.gmem_base;
1202 device->va_start = caps.u.msm.va_start;
1203 device->va_size = caps.u.msm.va_size;
1204 device->ubwc_config.highest_bank_bit = caps.u.msm.highest_bank_bit;
1205 device->has_set_iova = true;
1206 device->has_preemption = has_preemption;
1207
1208 device->ubwc_config.bank_swizzle_levels = bank_swizzle_levels;
1209 device->ubwc_config.macrotile_mode = macrotile_mode;
1210
1211 device->gmem_size = debug_get_num_option("TU_GMEM", device->gmem_size);
1212
1213 device->has_cached_coherent_memory = caps.u.msm.has_cached_coherent;
1214
1215 device->submitqueue_priority_count = caps.u.msm.priorities;
1216
1217 device->syncobj_type = vk_drm_syncobj_get_type(fd);
1218 /* we don't support DRM_CAP_SYNCOBJ_TIMELINE, but drm-shim does */
1219 if (!(device->syncobj_type.features & VK_SYNC_FEATURE_TIMELINE))
1220 device->timeline_type = vk_sync_timeline_get_type(&tu_timeline_sync_type);
1221
1222 device->sync_types[0] = &device->syncobj_type;
1223 device->sync_types[1] = &device->timeline_type.sync;
1224 device->sync_types[2] = NULL;
1225
1226 device->heap.size = tu_get_system_heap_size(device);
1227 device->heap.used = 0u;
1228 device->heap.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT;
1229
1230 instance->knl = &virtio_knl_funcs;
1231
1232 *out = device;
1233
1234 return VK_SUCCESS;
1235
1236 fail:
1237 vk_free(&instance->vk.alloc, device);
1238 return result;
1239 }
1240