• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2018 Google, Inc.
3  * Copyright © 2015 Intel Corporation
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "tu_knl.h"
8 
9 #include <errno.h>
10 #include <fcntl.h>
11 #include <sys/ioctl.h>
12 #include <sys/mman.h>
13 #include <xf86drm.h>
14 
15 #include "vk_util.h"
16 
17 #include "drm-uapi/msm_drm.h"
18 #include "util/u_debug.h"
19 #include "util/hash_table.h"
20 
21 #include "tu_cmd_buffer.h"
22 #include "tu_cs.h"
23 #include "tu_device.h"
24 #include "tu_dynamic_rendering.h"
25 #include "tu_knl_drm.h"
26 #include "redump.h"
27 
28 struct tu_queue_submit
29 {
30    struct vk_queue_submit *vk_submit;
31    struct tu_u_trace_submission_data *u_trace_submission_data;
32 
33    struct tu_cmd_buffer **cmd_buffers;
34    struct drm_msm_gem_submit_cmd *cmds;
35    struct drm_msm_gem_submit_syncobj *in_syncobjs;
36    struct drm_msm_gem_submit_syncobj *out_syncobjs;
37 
38    uint32_t nr_cmd_buffers;
39    uint32_t nr_in_syncobjs;
40    uint32_t nr_out_syncobjs;
41    uint32_t entry_count;
42    uint32_t perf_pass_index;
43 
44    bool     autotune_fence;
45 };
46 
47 struct tu_u_trace_syncobj
48 {
49    uint32_t msm_queue_id;
50    uint32_t fence;
51 };
52 
53 static int
tu_drm_get_param(int fd,uint32_t param,uint64_t * value)54 tu_drm_get_param(int fd, uint32_t param, uint64_t *value)
55 {
56    /* Technically this requires a pipe, but the kernel only supports one pipe
57     * anyway at the time of writing and most of these are clearly pipe
58     * independent. */
59    struct drm_msm_param req = {
60       .pipe = MSM_PIPE_3D0,
61       .param = param,
62    };
63 
64    int ret = drmCommandWriteRead(fd, DRM_MSM_GET_PARAM, &req, sizeof(req));
65    if (ret)
66       return ret;
67 
68    *value = req.value;
69 
70    return 0;
71 }
72 
73 static int
tu_drm_get_gpu_id(const struct tu_physical_device * dev,uint32_t * id)74 tu_drm_get_gpu_id(const struct tu_physical_device *dev, uint32_t *id)
75 {
76    uint64_t value;
77    int ret = tu_drm_get_param(dev->local_fd, MSM_PARAM_GPU_ID, &value);
78    if (ret)
79       return ret;
80 
81    *id = value;
82    return 0;
83 }
84 
85 static int
tu_drm_get_gmem_size(const struct tu_physical_device * dev,uint32_t * size)86 tu_drm_get_gmem_size(const struct tu_physical_device *dev, uint32_t *size)
87 {
88    uint64_t value;
89    int ret = tu_drm_get_param(dev->local_fd, MSM_PARAM_GMEM_SIZE, &value);
90    if (ret)
91       return ret;
92 
93    *size = value;
94    return 0;
95 }
96 
97 static int
tu_drm_get_gmem_base(const struct tu_physical_device * dev,uint64_t * base)98 tu_drm_get_gmem_base(const struct tu_physical_device *dev, uint64_t *base)
99 {
100    return tu_drm_get_param(dev->local_fd, MSM_PARAM_GMEM_BASE, base);
101 }
102 
103 static int
tu_drm_get_va_prop(const struct tu_physical_device * dev,uint64_t * va_start,uint64_t * va_size)104 tu_drm_get_va_prop(const struct tu_physical_device *dev,
105                    uint64_t *va_start, uint64_t *va_size)
106 {
107    uint64_t value;
108    int ret = tu_drm_get_param(dev->local_fd, MSM_PARAM_VA_START, &value);
109    if (ret)
110       return ret;
111 
112    *va_start = value;
113 
114    ret = tu_drm_get_param(dev->local_fd, MSM_PARAM_VA_SIZE, &value);
115    if (ret)
116       return ret;
117 
118    *va_size = value;
119 
120    return 0;
121 }
122 
123 static uint32_t
tu_drm_get_priorities(const struct tu_physical_device * dev)124 tu_drm_get_priorities(const struct tu_physical_device *dev)
125 {
126    uint64_t val = 1;
127    tu_drm_get_param(dev->local_fd, MSM_PARAM_PRIORITIES, &val);
128    assert(val >= 1);
129 
130    return val;
131 }
132 
133 static bool
tu_drm_is_memory_type_supported(int fd,uint32_t flags)134 tu_drm_is_memory_type_supported(int fd, uint32_t flags)
135 {
136    struct drm_msm_gem_new req_alloc = { .size = 0x1000, .flags = flags };
137 
138    int ret =
139       drmCommandWriteRead(fd, DRM_MSM_GEM_NEW, &req_alloc, sizeof(req_alloc));
140    if (ret) {
141       return false;
142    }
143 
144    struct drm_gem_close req_close = {
145       .handle = req_alloc.handle,
146    };
147    drmIoctl(fd, DRM_IOCTL_GEM_CLOSE, &req_close);
148 
149    return true;
150 }
151 
152 static VkResult
msm_device_init(struct tu_device * dev)153 msm_device_init(struct tu_device *dev)
154 {
155    int fd = open(dev->physical_device->fd_path, O_RDWR | O_CLOEXEC);
156    if (fd < 0) {
157       return vk_startup_errorf(
158             dev->physical_device->instance, VK_ERROR_INITIALIZATION_FAILED,
159             "failed to open device %s", dev->physical_device->fd_path);
160    }
161 
162    int ret = tu_drm_get_param(fd, MSM_PARAM_FAULTS, &dev->fault_count);
163    if (ret != 0) {
164       close(fd);
165       return vk_startup_errorf(dev->physical_device->instance,
166                                VK_ERROR_INITIALIZATION_FAILED,
167                                "Failed to get initial fault count: %d", ret);
168    }
169 
170    dev->fd = fd;
171 
172    return VK_SUCCESS;
173 }
174 
175 static void
msm_device_finish(struct tu_device * dev)176 msm_device_finish(struct tu_device *dev)
177 {
178    close(dev->fd);
179 }
180 
181 static int
msm_device_get_gpu_timestamp(struct tu_device * dev,uint64_t * ts)182 msm_device_get_gpu_timestamp(struct tu_device *dev, uint64_t *ts)
183 {
184    return tu_drm_get_param(dev->fd, MSM_PARAM_TIMESTAMP, ts);
185 }
186 
187 static int
msm_device_get_suspend_count(struct tu_device * dev,uint64_t * suspend_count)188 msm_device_get_suspend_count(struct tu_device *dev, uint64_t *suspend_count)
189 {
190    int ret = tu_drm_get_param(dev->fd, MSM_PARAM_SUSPENDS, suspend_count);
191    return ret;
192 }
193 
194 static VkResult
msm_device_check_status(struct tu_device * device)195 msm_device_check_status(struct tu_device *device)
196 {
197    uint64_t last_fault_count = device->fault_count;
198    int ret = tu_drm_get_param(device->fd, MSM_PARAM_FAULTS, &device->fault_count);
199    if (ret != 0)
200       return vk_device_set_lost(&device->vk, "error getting GPU fault count: %d", ret);
201 
202    if (last_fault_count != device->fault_count)
203       return vk_device_set_lost(&device->vk, "GPU faulted or hung");
204 
205    return VK_SUCCESS;
206 }
207 
208 static int
msm_submitqueue_new(struct tu_device * dev,int priority,uint32_t * queue_id)209 msm_submitqueue_new(struct tu_device *dev,
210                     int priority,
211                     uint32_t *queue_id)
212 {
213    assert(priority >= 0 &&
214           priority < dev->physical_device->submitqueue_priority_count);
215    struct drm_msm_submitqueue req = {
216       .flags = 0,
217       .prio = priority,
218    };
219 
220    int ret = drmCommandWriteRead(dev->fd,
221                                  DRM_MSM_SUBMITQUEUE_NEW, &req, sizeof(req));
222    if (ret)
223       return ret;
224 
225    *queue_id = req.id;
226    return 0;
227 }
228 
229 static void
msm_submitqueue_close(struct tu_device * dev,uint32_t queue_id)230 msm_submitqueue_close(struct tu_device *dev, uint32_t queue_id)
231 {
232    drmCommandWrite(dev->fd, DRM_MSM_SUBMITQUEUE_CLOSE,
233                    &queue_id, sizeof(uint32_t));
234 }
235 
236 static void
tu_gem_close(const struct tu_device * dev,uint32_t gem_handle)237 tu_gem_close(const struct tu_device *dev, uint32_t gem_handle)
238 {
239    struct drm_gem_close req = {
240       .handle = gem_handle,
241    };
242 
243    drmIoctl(dev->fd, DRM_IOCTL_GEM_CLOSE, &req);
244 }
245 
246 /** Helper for DRM_MSM_GEM_INFO, returns 0 on error. */
247 static uint64_t
tu_gem_info(const struct tu_device * dev,uint32_t gem_handle,uint32_t info)248 tu_gem_info(const struct tu_device *dev, uint32_t gem_handle, uint32_t info)
249 {
250    struct drm_msm_gem_info req = {
251       .handle = gem_handle,
252       .info = info,
253    };
254 
255    int ret = drmCommandWriteRead(dev->fd,
256                                  DRM_MSM_GEM_INFO, &req, sizeof(req));
257    if (ret < 0)
258       return 0;
259 
260    return req.value;
261 }
262 
263 static VkResult
tu_wait_fence(struct tu_device * dev,uint32_t queue_id,int fence,uint64_t timeout_ns)264 tu_wait_fence(struct tu_device *dev,
265               uint32_t queue_id,
266               int fence,
267               uint64_t timeout_ns)
268 {
269    /* fence was created when no work was yet submitted */
270    if (fence < 0)
271       return VK_SUCCESS;
272 
273    struct drm_msm_wait_fence req = {
274       .fence = fence,
275       .queueid = queue_id,
276    };
277    int ret;
278 
279    get_abs_timeout(&req.timeout, timeout_ns);
280 
281    ret = drmCommandWrite(dev->fd, DRM_MSM_WAIT_FENCE, &req, sizeof(req));
282    if (ret) {
283       if (ret == -ETIMEDOUT) {
284          return VK_TIMEOUT;
285       } else {
286          mesa_loge("tu_wait_fence failed! %d (%s)", ret, strerror(errno));
287          return VK_ERROR_UNKNOWN;
288       }
289    }
290 
291    return VK_SUCCESS;
292 }
293 
294 static VkResult
tu_free_zombie_vma_locked(struct tu_device * dev,bool wait)295 tu_free_zombie_vma_locked(struct tu_device *dev, bool wait)
296 {
297    if (!u_vector_length(&dev->zombie_vmas))
298       return VK_SUCCESS;
299 
300    if (wait) {
301       struct tu_zombie_vma *vma = (struct tu_zombie_vma *)
302             u_vector_head(&dev->zombie_vmas);
303       /* Wait for 3s (arbitrary timeout) */
304       VkResult ret = tu_wait_fence(dev, dev->queues[0]->msm_queue_id,
305                                    vma->fence, 3000000000);
306 
307       if (ret != VK_SUCCESS)
308          return ret;
309    }
310 
311    int last_signaled_fence = -1;
312    while (u_vector_length(&dev->zombie_vmas) > 0) {
313       struct tu_zombie_vma *vma = (struct tu_zombie_vma *)
314             u_vector_tail(&dev->zombie_vmas);
315       if (vma->fence > last_signaled_fence) {
316          VkResult ret =
317             tu_wait_fence(dev, dev->queues[0]->msm_queue_id, vma->fence, 0);
318          if (ret != VK_SUCCESS)
319             return ret;
320 
321          last_signaled_fence = vma->fence;
322       }
323 
324       /* Ensure that internal kernel's vma is freed. */
325       struct drm_msm_gem_info req = {
326          .handle = vma->gem_handle,
327          .info = MSM_INFO_SET_IOVA,
328          .value = 0,
329       };
330 
331       int ret =
332          drmCommandWriteRead(dev->fd, DRM_MSM_GEM_INFO, &req, sizeof(req));
333       if (ret < 0) {
334          mesa_loge("MSM_INFO_SET_IOVA(0) failed! %d (%s)", ret,
335                    strerror(errno));
336          return VK_ERROR_UNKNOWN;
337       }
338 
339       tu_gem_close(dev, vma->gem_handle);
340 
341       util_vma_heap_free(&dev->vma, vma->iova, vma->size);
342       u_vector_remove(&dev->zombie_vmas);
343    }
344 
345    return VK_SUCCESS;
346 }
347 
348 static VkResult
msm_allocate_userspace_iova(struct tu_device * dev,uint32_t gem_handle,uint64_t size,uint64_t client_iova,enum tu_bo_alloc_flags flags,uint64_t * iova)349 msm_allocate_userspace_iova(struct tu_device *dev,
350                             uint32_t gem_handle,
351                             uint64_t size,
352                             uint64_t client_iova,
353                             enum tu_bo_alloc_flags flags,
354                             uint64_t *iova)
355 {
356    VkResult result;
357 
358    mtx_lock(&dev->vma_mutex);
359 
360    *iova = 0;
361 
362    tu_free_zombie_vma_locked(dev, false);
363 
364    result = tu_allocate_userspace_iova(dev, size, client_iova, flags, iova);
365    if (result == VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS) {
366       /* Address may be already freed by us, but not considered as
367        * freed by the kernel. We have to wait until all work that
368        * may hold the address is done. Since addresses are meant to
369        * be replayed only by debug tooling, it should be ok to wait.
370        */
371       tu_free_zombie_vma_locked(dev, true);
372       result = tu_allocate_userspace_iova(dev, size, client_iova, flags, iova);
373    }
374 
375    mtx_unlock(&dev->vma_mutex);
376 
377    if (result != VK_SUCCESS)
378       return result;
379 
380    struct drm_msm_gem_info req = {
381       .handle = gem_handle,
382       .info = MSM_INFO_SET_IOVA,
383       .value = *iova,
384    };
385 
386    int ret =
387       drmCommandWriteRead(dev->fd, DRM_MSM_GEM_INFO, &req, sizeof(req));
388    if (ret < 0) {
389       mesa_loge("MSM_INFO_SET_IOVA failed! %d (%s)", ret, strerror(errno));
390       return VK_ERROR_OUT_OF_HOST_MEMORY;
391    }
392 
393    return VK_SUCCESS;
394 }
395 
396 static VkResult
tu_allocate_kernel_iova(struct tu_device * dev,uint32_t gem_handle,uint64_t * iova)397 tu_allocate_kernel_iova(struct tu_device *dev,
398                         uint32_t gem_handle,
399                         uint64_t *iova)
400 {
401    *iova = tu_gem_info(dev, gem_handle, MSM_INFO_GET_IOVA);
402    if (!*iova)
403       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
404 
405    return VK_SUCCESS;
406 }
407 
408 static VkResult
tu_bo_init(struct tu_device * dev,struct tu_bo * bo,uint32_t gem_handle,uint64_t size,uint64_t client_iova,enum tu_bo_alloc_flags flags,const char * name)409 tu_bo_init(struct tu_device *dev,
410            struct tu_bo *bo,
411            uint32_t gem_handle,
412            uint64_t size,
413            uint64_t client_iova,
414            enum tu_bo_alloc_flags flags,
415            const char *name)
416 {
417    VkResult result = VK_SUCCESS;
418    uint64_t iova = 0;
419 
420    assert(!client_iova || dev->physical_device->has_set_iova);
421 
422    if (dev->physical_device->has_set_iova) {
423       result = msm_allocate_userspace_iova(dev, gem_handle, size, client_iova,
424                                            flags, &iova);
425    } else {
426       result = tu_allocate_kernel_iova(dev, gem_handle, &iova);
427    }
428 
429    if (result != VK_SUCCESS) {
430       tu_gem_close(dev, gem_handle);
431       return result;
432    }
433 
434    name = tu_debug_bos_add(dev, size, name);
435 
436    mtx_lock(&dev->bo_mutex);
437    uint32_t idx = dev->bo_count++;
438 
439    /* grow the bo list if needed */
440    if (idx >= dev->bo_list_size) {
441       uint32_t new_len = idx + 64;
442       struct drm_msm_gem_submit_bo *new_ptr = (struct drm_msm_gem_submit_bo *)
443          vk_realloc(&dev->vk.alloc, dev->bo_list, new_len * sizeof(*dev->bo_list),
444                     8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
445       if (!new_ptr) {
446          dev->bo_count--;
447          mtx_unlock(&dev->bo_mutex);
448          tu_gem_close(dev, gem_handle);
449          return VK_ERROR_OUT_OF_HOST_MEMORY;
450       }
451 
452       dev->bo_list = new_ptr;
453       dev->bo_list_size = new_len;
454    }
455 
456    bool dump = flags & TU_BO_ALLOC_ALLOW_DUMP;
457    dev->bo_list[idx] = (struct drm_msm_gem_submit_bo) {
458       .flags = MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE |
459                COND(dump, MSM_SUBMIT_BO_DUMP),
460       .handle = gem_handle,
461       .presumed = iova,
462    };
463 
464    *bo = (struct tu_bo) {
465       .gem_handle = gem_handle,
466       .size = size,
467       .iova = iova,
468       .name = name,
469       .refcnt = 1,
470       .bo_list_idx = idx,
471    };
472 
473    mtx_unlock(&dev->bo_mutex);
474 
475    return VK_SUCCESS;
476 }
477 
478 /**
479  * Sets the name in the kernel so that the contents of /debug/dri/0/gem are more
480  * useful.
481  *
482  * We skip this on release builds (when we're also not doing BO debugging) to
483  * reduce overhead.
484  */
485 static void
tu_bo_set_kernel_name(struct tu_device * dev,struct tu_bo * bo,const char * name)486 tu_bo_set_kernel_name(struct tu_device *dev, struct tu_bo *bo, const char *name)
487 {
488    bool kernel_bo_names = dev->bo_sizes != NULL;
489 #ifdef DEBUG
490    kernel_bo_names = true;
491 #endif
492    if (!kernel_bo_names)
493       return;
494 
495    struct drm_msm_gem_info req = {
496       .handle = bo->gem_handle,
497       .info = MSM_INFO_SET_NAME,
498       .value = (uintptr_t)(void *)name,
499       .len = strlen(name),
500    };
501 
502    int ret = drmCommandWrite(dev->fd, DRM_MSM_GEM_INFO, &req, sizeof(req));
503    if (ret) {
504       mesa_logw_once("Failed to set BO name with DRM_MSM_GEM_INFO: %d",
505                      ret);
506    }
507 }
508 
509 static VkResult
msm_bo_init(struct tu_device * dev,struct tu_bo ** out_bo,uint64_t size,uint64_t client_iova,VkMemoryPropertyFlags mem_property,enum tu_bo_alloc_flags flags,const char * name)510 msm_bo_init(struct tu_device *dev,
511             struct tu_bo **out_bo,
512             uint64_t size,
513             uint64_t client_iova,
514             VkMemoryPropertyFlags mem_property,
515             enum tu_bo_alloc_flags flags,
516             const char *name)
517 {
518    struct drm_msm_gem_new req = {
519       .size = size,
520       .flags = 0
521    };
522 
523    if (mem_property & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) {
524       if (mem_property & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) {
525          req.flags |= MSM_BO_CACHED_COHERENT;
526       } else {
527          req.flags |= MSM_BO_CACHED;
528       }
529    } else {
530       req.flags |= MSM_BO_WC;
531    }
532 
533    if (flags & TU_BO_ALLOC_GPU_READ_ONLY)
534       req.flags |= MSM_BO_GPU_READONLY;
535 
536    int ret = drmCommandWriteRead(dev->fd,
537                                  DRM_MSM_GEM_NEW, &req, sizeof(req));
538    if (ret)
539       return vk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY);
540 
541    struct tu_bo* bo = tu_device_lookup_bo(dev, req.handle);
542    assert(bo && bo->gem_handle == 0);
543 
544    VkResult result =
545       tu_bo_init(dev, bo, req.handle, size, client_iova, flags, name);
546 
547    if (result != VK_SUCCESS)
548       memset(bo, 0, sizeof(*bo));
549    else
550       *out_bo = bo;
551 
552    /* We don't use bo->name here because for the !TU_DEBUG=bo case bo->name is NULL. */
553    tu_bo_set_kernel_name(dev, bo, name);
554 
555    if (result == VK_SUCCESS &&
556        (mem_property & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) &&
557        !(mem_property & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
558       tu_bo_map(dev, bo);
559 
560       /* Cached non-coherent memory may already have dirty cache lines,
561        * we should clean the cache lines before GPU got the chance to
562        * write into this memory.
563        *
564        * MSM already does this automatically for uncached (MSM_BO_WC) memory.
565        */
566       tu_sync_cache_bo(dev, bo, 0, VK_WHOLE_SIZE, TU_MEM_SYNC_CACHE_TO_GPU);
567    }
568 
569    return result;
570 }
571 
572 static VkResult
msm_bo_init_dmabuf(struct tu_device * dev,struct tu_bo ** out_bo,uint64_t size,int prime_fd)573 msm_bo_init_dmabuf(struct tu_device *dev,
574                    struct tu_bo **out_bo,
575                    uint64_t size,
576                    int prime_fd)
577 {
578    /* lseek() to get the real size */
579    off_t real_size = lseek(prime_fd, 0, SEEK_END);
580    lseek(prime_fd, 0, SEEK_SET);
581    if (real_size < 0 || (uint64_t) real_size < size)
582       return vk_error(dev, VK_ERROR_INVALID_EXTERNAL_HANDLE);
583 
584    /* iova allocation needs to consider the object's *real* size: */
585    size = real_size;
586 
587    /* Importing the same dmabuf several times would yield the same
588     * gem_handle. Thus there could be a race when destroying
589     * BO and importing the same dmabuf from different threads.
590     * We must not permit the creation of dmabuf BO and its release
591     * to happen in parallel.
592     */
593    u_rwlock_wrlock(&dev->dma_bo_lock);
594 
595    uint32_t gem_handle;
596    int ret = drmPrimeFDToHandle(dev->fd, prime_fd,
597                                 &gem_handle);
598    if (ret) {
599       u_rwlock_wrunlock(&dev->dma_bo_lock);
600       return vk_error(dev, VK_ERROR_INVALID_EXTERNAL_HANDLE);
601    }
602 
603    struct tu_bo* bo = tu_device_lookup_bo(dev, gem_handle);
604 
605    if (bo->refcnt != 0) {
606       p_atomic_inc(&bo->refcnt);
607       u_rwlock_wrunlock(&dev->dma_bo_lock);
608 
609       *out_bo = bo;
610       return VK_SUCCESS;
611    }
612 
613    VkResult result =
614       tu_bo_init(dev, bo, gem_handle, size, 0, TU_BO_ALLOC_NO_FLAGS, "dmabuf");
615 
616    if (result != VK_SUCCESS)
617       memset(bo, 0, sizeof(*bo));
618    else
619       *out_bo = bo;
620 
621    u_rwlock_wrunlock(&dev->dma_bo_lock);
622 
623    return result;
624 }
625 
626 static VkResult
msm_bo_map(struct tu_device * dev,struct tu_bo * bo)627 msm_bo_map(struct tu_device *dev, struct tu_bo *bo)
628 {
629    if (bo->map)
630       return VK_SUCCESS;
631 
632    uint64_t offset = tu_gem_info(dev, bo->gem_handle, MSM_INFO_GET_OFFSET);
633    if (!offset)
634       return vk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY);
635 
636    /* TODO: Should we use the wrapper os_mmap() like Freedreno does? */
637    void *map = mmap(0, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED,
638                     dev->fd, offset);
639    if (map == MAP_FAILED)
640       return vk_error(dev, VK_ERROR_MEMORY_MAP_FAILED);
641 
642    bo->map = map;
643    return VK_SUCCESS;
644 }
645 
646 static void
msm_bo_allow_dump(struct tu_device * dev,struct tu_bo * bo)647 msm_bo_allow_dump(struct tu_device *dev, struct tu_bo *bo)
648 {
649    mtx_lock(&dev->bo_mutex);
650    dev->bo_list[bo->bo_list_idx].flags |= MSM_SUBMIT_BO_DUMP;
651    mtx_unlock(&dev->bo_mutex);
652 }
653 
654 
655 static void
msm_bo_set_metadata(struct tu_device * dev,struct tu_bo * bo,void * metadata,uint32_t metadata_size)656 msm_bo_set_metadata(struct tu_device *dev, struct tu_bo *bo,
657                     void *metadata, uint32_t metadata_size)
658 {
659    struct drm_msm_gem_info req = {
660       .handle = bo->gem_handle,
661       .info = MSM_INFO_SET_METADATA,
662       .value = (uintptr_t)(void *)metadata,
663       .len = metadata_size,
664    };
665 
666    int ret = drmCommandWrite(dev->fd, DRM_MSM_GEM_INFO, &req, sizeof(req));
667    if (ret) {
668       mesa_logw_once("Failed to set BO metadata with DRM_MSM_GEM_INFO: %d",
669                      ret);
670    }
671 }
672 
673 static int
msm_bo_get_metadata(struct tu_device * dev,struct tu_bo * bo,void * metadata,uint32_t metadata_size)674 msm_bo_get_metadata(struct tu_device *dev, struct tu_bo *bo,
675                     void *metadata, uint32_t metadata_size)
676 {
677    struct drm_msm_gem_info req = {
678       .handle = bo->gem_handle,
679       .info = MSM_INFO_GET_METADATA,
680       .value = (uintptr_t)(void *)metadata,
681       .len = metadata_size,
682    };
683 
684    int ret = drmCommandWrite(dev->fd, DRM_MSM_GEM_INFO, &req, sizeof(req));
685    if (ret) {
686       mesa_logw_once("Failed to get BO metadata with DRM_MSM_GEM_INFO: %d",
687                      ret);
688    }
689 
690    return ret;
691 }
692 
693 static VkResult
tu_queue_submit_create_locked(struct tu_queue * queue,struct vk_queue_submit * vk_submit,const uint32_t nr_in_syncobjs,const uint32_t nr_out_syncobjs,uint32_t perf_pass_index,struct tu_queue_submit * new_submit)694 tu_queue_submit_create_locked(struct tu_queue *queue,
695                               struct vk_queue_submit *vk_submit,
696                               const uint32_t nr_in_syncobjs,
697                               const uint32_t nr_out_syncobjs,
698                               uint32_t perf_pass_index,
699                               struct tu_queue_submit *new_submit)
700 {
701    VkResult result;
702 
703    bool u_trace_enabled = u_trace_should_process(&queue->device->trace_context);
704    bool has_trace_points = false;
705 
706    struct vk_command_buffer **vk_cmd_buffers = vk_submit->command_buffers;
707 
708    memset(new_submit, 0, sizeof(struct tu_queue_submit));
709 
710    new_submit->cmd_buffers = (struct tu_cmd_buffer **) vk_cmd_buffers;
711    new_submit->nr_cmd_buffers = vk_submit->command_buffer_count;
712    tu_insert_dynamic_cmdbufs(queue->device, &new_submit->cmd_buffers,
713                              &new_submit->nr_cmd_buffers);
714 
715    uint32_t entry_count = 0;
716    for (uint32_t j = 0; j < new_submit->nr_cmd_buffers; ++j) {
717       struct tu_cmd_buffer *cmdbuf = new_submit->cmd_buffers[j];
718 
719       if (perf_pass_index != ~0)
720          entry_count++;
721 
722       entry_count += cmdbuf->cs.entry_count;
723 
724       if (u_trace_enabled && u_trace_has_points(&cmdbuf->trace)) {
725          if (!(cmdbuf->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT))
726             entry_count++;
727 
728          has_trace_points = true;
729       }
730    }
731 
732    new_submit->autotune_fence =
733       tu_autotune_submit_requires_fence(new_submit->cmd_buffers, new_submit->nr_cmd_buffers);
734    if (new_submit->autotune_fence)
735       entry_count++;
736 
737    new_submit->cmds = (struct drm_msm_gem_submit_cmd *) vk_zalloc(
738       &queue->device->vk.alloc, entry_count * sizeof(*new_submit->cmds), 8,
739       VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
740 
741    if (new_submit->cmds == NULL) {
742       result = vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
743       goto fail_cmds;
744    }
745 
746    if (has_trace_points) {
747       result =
748          tu_u_trace_submission_data_create(
749             queue->device, new_submit->cmd_buffers,
750             new_submit->nr_cmd_buffers,
751             &new_submit->u_trace_submission_data);
752 
753       if (result != VK_SUCCESS) {
754          goto fail_u_trace_submission_data;
755       }
756    }
757 
758    /* Allocate without wait timeline semaphores */
759    new_submit->in_syncobjs = (struct drm_msm_gem_submit_syncobj *) vk_zalloc(
760       &queue->device->vk.alloc,
761       nr_in_syncobjs * sizeof(*new_submit->in_syncobjs), 8,
762       VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
763 
764    if (new_submit->in_syncobjs == NULL) {
765       result = vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
766       goto fail_in_syncobjs;
767    }
768 
769    /* Allocate with signal timeline semaphores considered */
770    new_submit->out_syncobjs = (struct drm_msm_gem_submit_syncobj *) vk_zalloc(
771       &queue->device->vk.alloc,
772       nr_out_syncobjs * sizeof(*new_submit->out_syncobjs), 8,
773       VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
774 
775    if (new_submit->out_syncobjs == NULL) {
776       result = vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
777       goto fail_out_syncobjs;
778    }
779 
780    new_submit->entry_count = entry_count;
781    new_submit->nr_in_syncobjs = nr_in_syncobjs;
782    new_submit->nr_out_syncobjs = nr_out_syncobjs;
783    new_submit->perf_pass_index = perf_pass_index;
784    new_submit->vk_submit = vk_submit;
785 
786    return VK_SUCCESS;
787 
788 fail_out_syncobjs:
789    vk_free(&queue->device->vk.alloc, new_submit->in_syncobjs);
790 fail_in_syncobjs:
791    if (new_submit->u_trace_submission_data)
792       tu_u_trace_submission_data_finish(queue->device,
793                                         new_submit->u_trace_submission_data);
794 fail_u_trace_submission_data:
795    vk_free(&queue->device->vk.alloc, new_submit->cmds);
796 fail_cmds:
797    return result;
798 }
799 
800 static void
tu_queue_submit_finish(struct tu_queue * queue,struct tu_queue_submit * submit)801 tu_queue_submit_finish(struct tu_queue *queue, struct tu_queue_submit *submit)
802 {
803    vk_free(&queue->device->vk.alloc, submit->cmds);
804    vk_free(&queue->device->vk.alloc, submit->in_syncobjs);
805    vk_free(&queue->device->vk.alloc, submit->out_syncobjs);
806    if (submit->cmd_buffers != (void *) submit->vk_submit->command_buffers)
807       vk_free(&queue->device->vk.alloc, submit->cmd_buffers);
808 }
809 
810 static void
tu_fill_msm_gem_submit(struct tu_device * dev,struct drm_msm_gem_submit_cmd * cmd,struct tu_cs_entry * cs_entry)811 tu_fill_msm_gem_submit(struct tu_device *dev,
812                        struct drm_msm_gem_submit_cmd *cmd,
813                        struct tu_cs_entry *cs_entry)
814 {
815    cmd->type = MSM_SUBMIT_CMD_BUF;
816    cmd->submit_idx = cs_entry->bo->bo_list_idx;
817    cmd->submit_offset = cs_entry->offset;
818    cmd->size = cs_entry->size;
819    cmd->pad = 0;
820    cmd->nr_relocs = 0;
821    cmd->relocs = 0;
822 }
823 
824 static void
tu_queue_build_msm_gem_submit_cmds(struct tu_queue * queue,struct tu_queue_submit * submit,struct tu_cs * autotune_cs)825 tu_queue_build_msm_gem_submit_cmds(struct tu_queue *queue,
826                                    struct tu_queue_submit *submit,
827                                    struct tu_cs *autotune_cs)
828 {
829    struct tu_device *dev = queue->device;
830    struct drm_msm_gem_submit_cmd *cmds = submit->cmds;
831 
832    uint32_t entry_idx = 0;
833    for (uint32_t j = 0; j < submit->nr_cmd_buffers; ++j) {
834       struct tu_device *dev = queue->device;
835       struct tu_cmd_buffer *cmdbuf = submit->cmd_buffers[j];
836       struct tu_cs *cs = &cmdbuf->cs;
837 
838       if (submit->perf_pass_index != ~0) {
839          struct tu_cs_entry *perf_cs_entry =
840             &dev->perfcntrs_pass_cs_entries[submit->perf_pass_index];
841 
842          tu_fill_msm_gem_submit(dev, &cmds[entry_idx], perf_cs_entry);
843          entry_idx++;
844       }
845 
846       for (unsigned i = 0; i < cs->entry_count; ++i, ++entry_idx) {
847          tu_fill_msm_gem_submit(dev, &cmds[entry_idx], &cs->entries[i]);
848       }
849 
850       if (submit->u_trace_submission_data) {
851          struct tu_cs *ts_cs =
852             submit->u_trace_submission_data->cmd_trace_data[j].timestamp_copy_cs;
853          if (ts_cs) {
854             tu_fill_msm_gem_submit(dev, &cmds[entry_idx], &ts_cs->entries[0]);
855             entry_idx++;
856          }
857       }
858    }
859 
860    if (autotune_cs) {
861       assert(autotune_cs->entry_count == 1);
862       tu_fill_msm_gem_submit(dev, &cmds[entry_idx], &autotune_cs->entries[0]);
863       entry_idx++;
864    }
865 }
866 
867 static VkResult
tu_queue_submit_locked(struct tu_queue * queue,struct tu_queue_submit * submit)868 tu_queue_submit_locked(struct tu_queue *queue, struct tu_queue_submit *submit)
869 {
870    uint32_t submit_idx = queue->device->submit_count++;
871 
872    struct tu_cs *autotune_cs = NULL;
873    if (submit->autotune_fence) {
874       autotune_cs = tu_autotune_on_submit(queue->device,
875                                           &queue->device->autotune,
876                                           submit->cmd_buffers,
877                                           submit->nr_cmd_buffers);
878    }
879 
880    uint32_t flags = MSM_PIPE_3D0;
881 
882    if (submit->vk_submit->wait_count)
883       flags |= MSM_SUBMIT_SYNCOBJ_IN;
884 
885    if (submit->vk_submit->signal_count)
886       flags |= MSM_SUBMIT_SYNCOBJ_OUT;
887 
888    mtx_lock(&queue->device->bo_mutex);
889 
890    if (queue->device->implicit_sync_bo_count == 0)
891       flags |= MSM_SUBMIT_NO_IMPLICIT;
892 
893    /* drm_msm_gem_submit_cmd requires index of bo which could change at any
894     * time when bo_mutex is not locked. So we build submit cmds here the real
895     * place to submit.
896     */
897    tu_queue_build_msm_gem_submit_cmds(queue, submit, autotune_cs);
898 
899    struct drm_msm_gem_submit req = {
900       .flags = flags,
901       .nr_bos = submit->entry_count ? queue->device->bo_count : 0,
902       .nr_cmds = submit->entry_count,
903       .bos = (uint64_t)(uintptr_t) queue->device->bo_list,
904       .cmds = (uint64_t)(uintptr_t)submit->cmds,
905       .queueid = queue->msm_queue_id,
906       .in_syncobjs = (uint64_t)(uintptr_t)submit->in_syncobjs,
907       .out_syncobjs = (uint64_t)(uintptr_t)submit->out_syncobjs,
908       .nr_in_syncobjs = submit->nr_in_syncobjs,
909       .nr_out_syncobjs = submit->nr_out_syncobjs,
910       .syncobj_stride = sizeof(struct drm_msm_gem_submit_syncobj),
911    };
912 
913    if (FD_RD_DUMP(ENABLE) && fd_rd_output_begin(&queue->device->rd_output, submit_idx)) {
914       struct tu_device *device = queue->device;
915       struct fd_rd_output *rd_output = &device->rd_output;
916 
917       if (FD_RD_DUMP(FULL)) {
918          VkResult result = tu_wait_fence(device, queue->msm_queue_id, queue->fence, ~0);
919          if (result != VK_SUCCESS) {
920             mesa_loge("FD_RD_DUMP_FULL: wait on previous submission for device %u and queue %d failed: %u",
921                       device->device_idx, queue->msm_queue_id, 0);
922          }
923       }
924 
925       fd_rd_output_write_section(rd_output, RD_CHIP_ID, &device->physical_device->dev_id.chip_id, 8);
926       fd_rd_output_write_section(rd_output, RD_CMD, "tu-dump", 8);
927 
928       for (unsigned i = 0; i < device->bo_count; i++) {
929          struct drm_msm_gem_submit_bo bo = device->bo_list[i];
930          struct tu_bo *tu_bo = tu_device_lookup_bo(device, bo.handle);
931          uint64_t iova = bo.presumed;
932 
933          uint32_t buf[3] = { iova, tu_bo->size, iova >> 32 };
934          fd_rd_output_write_section(rd_output, RD_GPUADDR, buf, 12);
935          if (bo.flags & MSM_SUBMIT_BO_DUMP || FD_RD_DUMP(FULL)) {
936             msm_bo_map(device, tu_bo); /* note: this would need locking to be safe */
937             fd_rd_output_write_section(rd_output, RD_BUFFER_CONTENTS, tu_bo->map, tu_bo->size);
938          }
939       }
940 
941       for (unsigned i = 0; i < req.nr_cmds; i++) {
942          struct drm_msm_gem_submit_cmd *cmd = &submit->cmds[i];
943          uint64_t iova = device->bo_list[cmd->submit_idx].presumed + cmd->submit_offset;
944          uint32_t size = cmd->size >> 2;
945          uint32_t buf[3] = { iova, size, iova >> 32 };
946          fd_rd_output_write_section(rd_output, RD_CMDSTREAM_ADDR, buf, 12);
947       }
948 
949       fd_rd_output_end(rd_output);
950    }
951 
952    int ret = drmCommandWriteRead(queue->device->fd,
953                                  DRM_MSM_GEM_SUBMIT,
954                                  &req, sizeof(req));
955 
956    mtx_unlock(&queue->device->bo_mutex);
957 
958    tu_debug_bos_print_stats(queue->device);
959 
960    if (ret)
961       return vk_device_set_lost(&queue->device->vk, "submit failed: %m");
962 
963    p_atomic_set(&queue->fence, req.fence);
964 
965    uint64_t gpu_offset = 0;
966 #if HAVE_PERFETTO
967    struct tu_perfetto_clocks clocks =
968       tu_perfetto_submit(queue->device, queue->device->submit_count, NULL);
969    gpu_offset = clocks.gpu_ts_offset;
970 #endif
971 
972    if (submit->u_trace_submission_data) {
973       struct tu_u_trace_submission_data *submission_data =
974          submit->u_trace_submission_data;
975       submission_data->submission_id = queue->device->submit_count;
976       submission_data->gpu_ts_offset = gpu_offset;
977       /* We have to allocate it here since it is different between drm/kgsl */
978       submission_data->syncobj = (struct tu_u_trace_syncobj *)
979          vk_alloc(&queue->device->vk.alloc, sizeof(struct tu_u_trace_syncobj),
980                8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
981       submission_data->syncobj->fence = req.fence;
982       submission_data->syncobj->msm_queue_id = queue->msm_queue_id;
983 
984       submit->u_trace_submission_data = NULL;
985 
986       for (uint32_t i = 0; i < submission_data->cmd_buffer_count; i++) {
987          bool free_data = i == submission_data->last_buffer_with_tracepoints;
988          if (submission_data->cmd_trace_data[i].trace)
989             u_trace_flush(submission_data->cmd_trace_data[i].trace,
990                           submission_data, free_data);
991 
992          if (!submission_data->cmd_trace_data[i].timestamp_copy_cs) {
993             /* u_trace is owned by cmd_buffer */
994             submission_data->cmd_trace_data[i].trace = NULL;
995          }
996       }
997    }
998 
999    for (uint32_t i = 0; i < submit->vk_submit->wait_count; i++) {
1000       if (!vk_sync_is_tu_timeline_sync(submit->vk_submit->waits[i].sync))
1001          continue;
1002 
1003       struct tu_timeline_sync *sync =
1004          container_of(submit->vk_submit->waits[i].sync, struct tu_timeline_sync, base);
1005 
1006       assert(sync->state != TU_TIMELINE_SYNC_STATE_RESET);
1007 
1008       /* Set SIGNALED to the state of the wait timeline sync since this means the syncobj
1009        * is done and ready again so this can be garbage-collectioned later.
1010        */
1011       sync->state = TU_TIMELINE_SYNC_STATE_SIGNALED;
1012    }
1013 
1014    for (uint32_t i = 0; i < submit->vk_submit->signal_count; i++) {
1015       if (!vk_sync_is_tu_timeline_sync(submit->vk_submit->signals[i].sync))
1016          continue;
1017 
1018       struct tu_timeline_sync *sync =
1019          container_of(submit->vk_submit->signals[i].sync, struct tu_timeline_sync, base);
1020 
1021       assert(sync->state == TU_TIMELINE_SYNC_STATE_RESET);
1022       /* Set SUBMITTED to the state of the signal timeline sync so we could wait for
1023        * this timeline sync until completed if necessary.
1024        */
1025       sync->state = TU_TIMELINE_SYNC_STATE_SUBMITTED;
1026    }
1027 
1028    pthread_cond_broadcast(&queue->device->timeline_cond);
1029 
1030    return VK_SUCCESS;
1031 }
1032 
1033 static VkResult
msm_device_wait_u_trace(struct tu_device * dev,struct tu_u_trace_syncobj * syncobj)1034 msm_device_wait_u_trace(struct tu_device *dev, struct tu_u_trace_syncobj *syncobj)
1035 {
1036    return tu_wait_fence(dev, syncobj->msm_queue_id, syncobj->fence, 1000000000);
1037 }
1038 
1039 static VkResult
msm_queue_submit(struct tu_queue * queue,struct vk_queue_submit * submit)1040 msm_queue_submit(struct tu_queue *queue, struct vk_queue_submit *submit)
1041 {
1042    MESA_TRACE_FUNC();
1043    uint32_t perf_pass_index = queue->device->perfcntrs_pass_cs ?
1044                               submit->perf_pass_index : ~0;
1045    struct tu_queue_submit submit_req;
1046 
1047    if (TU_DEBUG(LOG_SKIP_GMEM_OPS)) {
1048       tu_dbg_log_gmem_load_store_skips(queue->device);
1049    }
1050 
1051    pthread_mutex_lock(&queue->device->submit_mutex);
1052 
1053    VkResult ret = tu_queue_submit_create_locked(queue, submit,
1054          submit->wait_count, submit->signal_count,
1055          perf_pass_index, &submit_req);
1056 
1057    if (ret != VK_SUCCESS) {
1058       pthread_mutex_unlock(&queue->device->submit_mutex);
1059       return ret;
1060    }
1061 
1062    /* note: assuming there won't be any very large semaphore counts */
1063    struct drm_msm_gem_submit_syncobj *in_syncobjs = submit_req.in_syncobjs;
1064    struct drm_msm_gem_submit_syncobj *out_syncobjs = submit_req.out_syncobjs;
1065 
1066    uint32_t nr_in_syncobjs = 0, nr_out_syncobjs = 0;
1067 
1068    for (uint32_t i = 0; i < submit->wait_count; i++) {
1069       struct vk_sync *sync = submit->waits[i].sync;
1070 
1071       in_syncobjs[nr_in_syncobjs++] = (struct drm_msm_gem_submit_syncobj) {
1072          .handle = tu_syncobj_from_vk_sync(sync),
1073          .flags = 0,
1074          .point = submit->waits[i].wait_value,
1075       };
1076    }
1077 
1078    for (uint32_t i = 0; i < submit->signal_count; i++) {
1079       struct vk_sync *sync = submit->signals[i].sync;
1080 
1081       out_syncobjs[nr_out_syncobjs++] = (struct drm_msm_gem_submit_syncobj) {
1082          .handle = tu_syncobj_from_vk_sync(sync),
1083          .flags = 0,
1084          .point = submit->signals[i].signal_value,
1085       };
1086    }
1087 
1088    ret = tu_queue_submit_locked(queue, &submit_req);
1089 
1090    pthread_mutex_unlock(&queue->device->submit_mutex);
1091    tu_queue_submit_finish(queue, &submit_req);
1092 
1093    if (ret != VK_SUCCESS)
1094        return ret;
1095 
1096    u_trace_context_process(&queue->device->trace_context, true);
1097 
1098    return VK_SUCCESS;
1099 }
1100 
1101 static const struct tu_knl msm_knl_funcs = {
1102       .name = "msm",
1103 
1104       .device_init = msm_device_init,
1105       .device_finish = msm_device_finish,
1106       .device_get_gpu_timestamp = msm_device_get_gpu_timestamp,
1107       .device_get_suspend_count = msm_device_get_suspend_count,
1108       .device_check_status = msm_device_check_status,
1109       .submitqueue_new = msm_submitqueue_new,
1110       .submitqueue_close = msm_submitqueue_close,
1111       .bo_init = msm_bo_init,
1112       .bo_init_dmabuf = msm_bo_init_dmabuf,
1113       .bo_export_dmabuf = tu_drm_export_dmabuf,
1114       .bo_map = msm_bo_map,
1115       .bo_allow_dump = msm_bo_allow_dump,
1116       .bo_finish = tu_drm_bo_finish,
1117       .bo_set_metadata = msm_bo_set_metadata,
1118       .bo_get_metadata = msm_bo_get_metadata,
1119       .device_wait_u_trace = msm_device_wait_u_trace,
1120       .queue_submit = msm_queue_submit,
1121 };
1122 
1123 VkResult
tu_knl_drm_msm_load(struct tu_instance * instance,int fd,struct _drmVersion * version,struct tu_physical_device ** out)1124 tu_knl_drm_msm_load(struct tu_instance *instance,
1125                     int fd, struct _drmVersion *version,
1126                     struct tu_physical_device **out)
1127 {
1128    VkResult result = VK_SUCCESS;
1129 
1130    /* Version 1.6 added SYNCOBJ support. */
1131    const int min_version_major = 1;
1132    const int min_version_minor = 6;
1133 
1134    if (version->version_major != min_version_major ||
1135        version->version_minor < min_version_minor) {
1136       result = vk_startup_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
1137                                  "kernel driver for device %s has version %d.%d, "
1138                                  "but Vulkan requires version >= %d.%d",
1139                                  version->name,
1140                                  version->version_major, version->version_minor,
1141                                  min_version_major, min_version_minor);
1142       return result;
1143    }
1144 
1145    struct tu_physical_device *device = (struct tu_physical_device *)
1146       vk_zalloc(&instance->vk.alloc, sizeof(*device), 8,
1147                 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1148    if (!device) {
1149       result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1150       goto fail;
1151    }
1152 
1153    device->msm_major_version = version->version_major;
1154    device->msm_minor_version = version->version_minor;
1155 
1156    device->instance = instance;
1157    device->local_fd = fd;
1158 
1159    if (tu_drm_get_gpu_id(device, &device->dev_id.gpu_id)) {
1160       result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1161                                  "could not get GPU ID");
1162       goto fail;
1163    }
1164 
1165    if (tu_drm_get_param(fd, MSM_PARAM_CHIP_ID, &device->dev_id.chip_id)) {
1166       result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1167                                  "could not get CHIP ID");
1168       goto fail;
1169    }
1170 
1171    if (tu_drm_get_gmem_size(device, &device->gmem_size)) {
1172       result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1173                                 "could not get GMEM size");
1174       goto fail;
1175    }
1176    device->gmem_size = debug_get_num_option("TU_GMEM", device->gmem_size);
1177 
1178    if (tu_drm_get_gmem_base(device, &device->gmem_base)) {
1179       result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1180                                  "could not get GMEM size");
1181       goto fail;
1182    }
1183 
1184    device->has_set_iova = !tu_drm_get_va_prop(device, &device->va_start,
1185                                               &device->va_size);
1186 
1187    /* Even if kernel is new enough, the GPU itself may not support it. */
1188    device->has_cached_coherent_memory =
1189       (device->msm_minor_version >= 8) &&
1190       tu_drm_is_memory_type_supported(fd, MSM_BO_CACHED_COHERENT);
1191 
1192    device->submitqueue_priority_count = tu_drm_get_priorities(device);
1193 
1194    device->syncobj_type = vk_drm_syncobj_get_type(fd);
1195    /* we don't support DRM_CAP_SYNCOBJ_TIMELINE, but drm-shim does */
1196    if (!(device->syncobj_type.features & VK_SYNC_FEATURE_TIMELINE))
1197       device->timeline_type = vk_sync_timeline_get_type(&tu_timeline_sync_type);
1198 
1199    device->sync_types[0] = &device->syncobj_type;
1200    device->sync_types[1] = &device->timeline_type.sync;
1201    device->sync_types[2] = NULL;
1202 
1203    device->heap.size = tu_get_system_heap_size(device);
1204    device->heap.used = 0u;
1205    device->heap.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT;
1206 
1207    instance->knl = &msm_knl_funcs;
1208 
1209    *out = device;
1210 
1211    return VK_SUCCESS;
1212 
1213 fail:
1214    vk_free(&instance->vk.alloc, device);
1215    return result;
1216 }
1217