• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2018 Google, Inc.
3  * Copyright © 2015 Intel Corporation
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "tu_knl.h"
8 
9 #include <errno.h>
10 #include <fcntl.h>
11 #include <sys/ioctl.h>
12 #include <sys/mman.h>
13 #include <xf86drm.h>
14 
15 #include "vk_util.h"
16 
17 #include "drm-uapi/msm_drm.h"
18 #include "util/u_debug.h"
19 #include "util/hash_table.h"
20 
21 #include "tu_cmd_buffer.h"
22 #include "tu_cs.h"
23 #include "tu_device.h"
24 #include "tu_dynamic_rendering.h"
25 #include "tu_knl_drm.h"
26 #include "tu_queue.h"
27 #include "tu_rmv.h"
28 #include "redump.h"
29 
30 static int
tu_drm_get_param(int fd,uint32_t param,uint64_t * value)31 tu_drm_get_param(int fd, uint32_t param, uint64_t *value)
32 {
33    /* Technically this requires a pipe, but the kernel only supports one pipe
34     * anyway at the time of writing and most of these are clearly pipe
35     * independent. */
36    struct drm_msm_param req = {
37       .pipe = MSM_PIPE_3D0,
38       .param = param,
39    };
40 
41    int ret = drmCommandWriteRead(fd, DRM_MSM_GET_PARAM, &req, sizeof(req));
42    if (ret)
43       return ret;
44 
45    *value = req.value;
46 
47    return 0;
48 }
49 
50 static int
tu_drm_get_gpu_id(const struct tu_physical_device * dev,uint32_t * id)51 tu_drm_get_gpu_id(const struct tu_physical_device *dev, uint32_t *id)
52 {
53    uint64_t value;
54    int ret = tu_drm_get_param(dev->local_fd, MSM_PARAM_GPU_ID, &value);
55    if (ret)
56       return ret;
57 
58    *id = value;
59    return 0;
60 }
61 
62 static int
tu_drm_get_gmem_size(const struct tu_physical_device * dev,uint32_t * size)63 tu_drm_get_gmem_size(const struct tu_physical_device *dev, uint32_t *size)
64 {
65    uint64_t value;
66    int ret = tu_drm_get_param(dev->local_fd, MSM_PARAM_GMEM_SIZE, &value);
67    if (ret)
68       return ret;
69 
70    *size = value;
71    return 0;
72 }
73 
74 static int
tu_drm_get_gmem_base(const struct tu_physical_device * dev,uint64_t * base)75 tu_drm_get_gmem_base(const struct tu_physical_device *dev, uint64_t *base)
76 {
77    return tu_drm_get_param(dev->local_fd, MSM_PARAM_GMEM_BASE, base);
78 }
79 
80 static bool
tu_drm_get_raytracing(const struct tu_physical_device * dev)81 tu_drm_get_raytracing(const struct tu_physical_device *dev)
82 {
83    uint64_t value;
84    int ret = tu_drm_get_param(dev->local_fd, MSM_PARAM_RAYTRACING, &value);
85    if (ret)
86       return false;
87 
88    return value;
89 }
90 
91 static int
tu_drm_get_va_prop(const struct tu_physical_device * dev,uint64_t * va_start,uint64_t * va_size)92 tu_drm_get_va_prop(const struct tu_physical_device *dev,
93                    uint64_t *va_start, uint64_t *va_size)
94 {
95    uint64_t value;
96    int ret = tu_drm_get_param(dev->local_fd, MSM_PARAM_VA_START, &value);
97    if (ret)
98       return ret;
99 
100    *va_start = value;
101 
102    ret = tu_drm_get_param(dev->local_fd, MSM_PARAM_VA_SIZE, &value);
103    if (ret)
104       return ret;
105 
106    *va_size = value;
107 
108    return 0;
109 }
110 
111 static bool
tu_drm_has_preemption(const struct tu_physical_device * dev)112 tu_drm_has_preemption(const struct tu_physical_device *dev)
113 {
114    struct drm_msm_submitqueue req = {
115       .flags = MSM_SUBMITQUEUE_ALLOW_PREEMPT,
116       .prio = dev->submitqueue_priority_count / 2,
117    };
118 
119    int ret = drmCommandWriteRead(dev->local_fd,
120                                  DRM_MSM_SUBMITQUEUE_NEW, &req, sizeof(req));
121    if (ret)
122       return false;
123 
124    drmCommandWrite(dev->local_fd, DRM_MSM_SUBMITQUEUE_CLOSE, &req.id,
125                    sizeof(req.id));
126    return true;
127 }
128 
129 static uint32_t
tu_drm_get_priorities(const struct tu_physical_device * dev)130 tu_drm_get_priorities(const struct tu_physical_device *dev)
131 {
132    uint64_t val = 1;
133    tu_drm_get_param(dev->local_fd, MSM_PARAM_PRIORITIES, &val);
134    assert(val >= 1);
135 
136    return val;
137 }
138 
139 static uint32_t
tu_drm_get_highest_bank_bit(const struct tu_physical_device * dev)140 tu_drm_get_highest_bank_bit(const struct tu_physical_device *dev)
141 {
142    uint64_t value;
143    int ret = tu_drm_get_param(dev->local_fd, MSM_PARAM_HIGHEST_BANK_BIT, &value);
144    if (ret)
145       return 0;
146 
147    return value;
148 }
149 
150 static enum fdl_macrotile_mode
tu_drm_get_macrotile_mode(const struct tu_physical_device * dev)151 tu_drm_get_macrotile_mode(const struct tu_physical_device *dev)
152 {
153    uint64_t value;
154    int ret = tu_drm_get_param(dev->local_fd, MSM_PARAM_MACROTILE_MODE, &value);
155    if (ret)
156       return FDL_MACROTILE_INVALID;
157 
158    return (enum fdl_macrotile_mode) value;
159 }
160 
161 static uint32_t
tu_drm_get_ubwc_swizzle(const struct tu_physical_device * dev)162 tu_drm_get_ubwc_swizzle(const struct tu_physical_device *dev)
163 {
164    uint64_t value;
165    int ret = tu_drm_get_param(dev->local_fd, MSM_PARAM_UBWC_SWIZZLE, &value);
166    if (ret)
167       return ~0;
168 
169    return value;
170 }
171 
172 static bool
tu_drm_is_memory_type_supported(int fd,uint32_t flags)173 tu_drm_is_memory_type_supported(int fd, uint32_t flags)
174 {
175    struct drm_msm_gem_new req_alloc = { .size = 0x1000, .flags = flags };
176 
177    int ret =
178       drmCommandWriteRead(fd, DRM_MSM_GEM_NEW, &req_alloc, sizeof(req_alloc));
179    if (ret) {
180       return false;
181    }
182 
183    struct drm_gem_close req_close = {
184       .handle = req_alloc.handle,
185    };
186    drmIoctl(fd, DRM_IOCTL_GEM_CLOSE, &req_close);
187 
188    return true;
189 }
190 
191 static VkResult
msm_device_init(struct tu_device * dev)192 msm_device_init(struct tu_device *dev)
193 {
194    int fd = open(dev->physical_device->fd_path, O_RDWR | O_CLOEXEC);
195    if (fd < 0) {
196       return vk_startup_errorf(
197             dev->physical_device->instance, VK_ERROR_INITIALIZATION_FAILED,
198             "failed to open device %s", dev->physical_device->fd_path);
199    }
200 
201    int ret = tu_drm_get_param(fd, MSM_PARAM_FAULTS, &dev->fault_count);
202    if (ret != 0) {
203       close(fd);
204       return vk_startup_errorf(dev->physical_device->instance,
205                                VK_ERROR_INITIALIZATION_FAILED,
206                                "Failed to get initial fault count: %d", ret);
207    }
208 
209    dev->fd = fd;
210 
211    return VK_SUCCESS;
212 }
213 
214 static void
msm_device_finish(struct tu_device * dev)215 msm_device_finish(struct tu_device *dev)
216 {
217    close(dev->fd);
218 }
219 
220 static int
msm_device_get_gpu_timestamp(struct tu_device * dev,uint64_t * ts)221 msm_device_get_gpu_timestamp(struct tu_device *dev, uint64_t *ts)
222 {
223    return tu_drm_get_param(dev->fd, MSM_PARAM_TIMESTAMP, ts);
224 }
225 
226 static int
msm_device_get_suspend_count(struct tu_device * dev,uint64_t * suspend_count)227 msm_device_get_suspend_count(struct tu_device *dev, uint64_t *suspend_count)
228 {
229    int ret = tu_drm_get_param(dev->fd, MSM_PARAM_SUSPENDS, suspend_count);
230    return ret;
231 }
232 
233 static VkResult
msm_device_check_status(struct tu_device * device)234 msm_device_check_status(struct tu_device *device)
235 {
236    uint64_t last_fault_count = device->fault_count;
237    int ret = tu_drm_get_param(device->fd, MSM_PARAM_FAULTS, &device->fault_count);
238    if (ret != 0)
239       return vk_device_set_lost(&device->vk, "error getting GPU fault count: %d", ret);
240 
241    if (last_fault_count != device->fault_count)
242       return vk_device_set_lost(&device->vk, "GPU faulted or hung");
243 
244    return VK_SUCCESS;
245 }
246 
247 static int
msm_submitqueue_new(struct tu_device * dev,int priority,uint32_t * queue_id)248 msm_submitqueue_new(struct tu_device *dev,
249                     int priority,
250                     uint32_t *queue_id)
251 {
252    assert(priority >= 0 &&
253           priority < dev->physical_device->submitqueue_priority_count);
254    struct drm_msm_submitqueue req = {
255       .flags = dev->physical_device->info->chip >= 7 &&
256          dev->physical_device->has_preemption ?
257          MSM_SUBMITQUEUE_ALLOW_PREEMPT : 0,
258       .prio = priority,
259    };
260 
261    int ret = drmCommandWriteRead(dev->fd,
262                                  DRM_MSM_SUBMITQUEUE_NEW, &req, sizeof(req));
263    if (ret)
264       return ret;
265 
266    *queue_id = req.id;
267    return 0;
268 }
269 
270 static void
msm_submitqueue_close(struct tu_device * dev,uint32_t queue_id)271 msm_submitqueue_close(struct tu_device *dev, uint32_t queue_id)
272 {
273    drmCommandWrite(dev->fd, DRM_MSM_SUBMITQUEUE_CLOSE,
274                    &queue_id, sizeof(uint32_t));
275 }
276 
277 static void
tu_gem_close(const struct tu_device * dev,uint32_t gem_handle)278 tu_gem_close(const struct tu_device *dev, uint32_t gem_handle)
279 {
280    struct drm_gem_close req = {
281       .handle = gem_handle,
282    };
283 
284    drmIoctl(dev->fd, DRM_IOCTL_GEM_CLOSE, &req);
285 }
286 
287 /** Helper for DRM_MSM_GEM_INFO, returns 0 on error. */
288 static uint64_t
tu_gem_info(const struct tu_device * dev,uint32_t gem_handle,uint32_t info)289 tu_gem_info(const struct tu_device *dev, uint32_t gem_handle, uint32_t info)
290 {
291    struct drm_msm_gem_info req = {
292       .handle = gem_handle,
293       .info = info,
294    };
295 
296    int ret = drmCommandWriteRead(dev->fd,
297                                  DRM_MSM_GEM_INFO, &req, sizeof(req));
298    if (ret < 0)
299       return 0;
300 
301    return req.value;
302 }
303 
304 static VkResult
tu_wait_fence(struct tu_device * dev,uint32_t queue_id,int fence,uint64_t timeout_ns)305 tu_wait_fence(struct tu_device *dev,
306               uint32_t queue_id,
307               int fence,
308               uint64_t timeout_ns)
309 {
310    /* fence was created when no work was yet submitted */
311    if (fence < 0)
312       return VK_SUCCESS;
313 
314    struct drm_msm_wait_fence req = {
315       .fence = fence,
316       .queueid = queue_id,
317    };
318    int ret;
319 
320    get_abs_timeout(&req.timeout, timeout_ns);
321 
322    ret = drmCommandWrite(dev->fd, DRM_MSM_WAIT_FENCE, &req, sizeof(req));
323    if (ret) {
324       if (ret == -ETIMEDOUT) {
325          return VK_TIMEOUT;
326       } else {
327          mesa_loge("tu_wait_fence failed! %d (%s)", ret, strerror(errno));
328          return VK_ERROR_UNKNOWN;
329       }
330    }
331 
332    return VK_SUCCESS;
333 }
334 
335 VkResult
msm_queue_wait_fence(struct tu_queue * queue,uint32_t fence,uint64_t timeout_ns)336 msm_queue_wait_fence(struct tu_queue *queue, uint32_t fence,
337                      uint64_t timeout_ns)
338 {
339    return tu_wait_fence(queue->device, queue->msm_queue_id, fence,
340                         timeout_ns);
341 }
342 
343 static VkResult
tu_free_zombie_vma_locked(struct tu_device * dev,bool wait)344 tu_free_zombie_vma_locked(struct tu_device *dev, bool wait)
345 {
346    if (!u_vector_length(&dev->zombie_vmas))
347       return VK_SUCCESS;
348 
349    if (wait) {
350       struct tu_zombie_vma *vma = (struct tu_zombie_vma *)
351             u_vector_head(&dev->zombie_vmas);
352       /* Wait for 3s (arbitrary timeout) */
353       VkResult ret = tu_wait_fence(dev, dev->queues[0]->msm_queue_id,
354                                    vma->fence, 3000000000);
355 
356       if (ret != VK_SUCCESS)
357          return ret;
358    }
359 
360    int last_signaled_fence = -1;
361    while (u_vector_length(&dev->zombie_vmas) > 0) {
362       struct tu_zombie_vma *vma = (struct tu_zombie_vma *)
363             u_vector_tail(&dev->zombie_vmas);
364       if (vma->fence > last_signaled_fence) {
365          VkResult ret =
366             tu_wait_fence(dev, dev->queues[0]->msm_queue_id, vma->fence, 0);
367          if (ret != VK_SUCCESS)
368             return ret;
369 
370          last_signaled_fence = vma->fence;
371       }
372 
373       if (vma->gem_handle) {
374          /* Ensure that internal kernel's vma is freed. */
375          struct drm_msm_gem_info req = {
376             .handle = vma->gem_handle,
377             .info = MSM_INFO_SET_IOVA,
378             .value = 0,
379          };
380 
381          int ret =
382             drmCommandWriteRead(dev->fd, DRM_MSM_GEM_INFO, &req, sizeof(req));
383          if (ret < 0) {
384             mesa_loge("MSM_INFO_SET_IOVA(0) failed! %d (%s)", ret,
385                       strerror(errno));
386             return VK_ERROR_UNKNOWN;
387          }
388 
389          tu_gem_close(dev, vma->gem_handle);
390 
391          util_vma_heap_free(&dev->vma, vma->iova, vma->size);
392       }
393 
394       u_vector_remove(&dev->zombie_vmas);
395    }
396 
397    return VK_SUCCESS;
398 }
399 
400 static bool
tu_restore_from_zombie_vma_locked(struct tu_device * dev,uint32_t gem_handle,uint64_t * iova)401 tu_restore_from_zombie_vma_locked(struct tu_device *dev,
402                                   uint32_t gem_handle,
403                                   uint64_t *iova)
404 {
405    struct tu_zombie_vma *vma;
406    u_vector_foreach (vma, &dev->zombie_vmas) {
407       if (vma->gem_handle == gem_handle) {
408          *iova = vma->iova;
409 
410          /* mark to skip later gem and iova cleanup */
411          vma->gem_handle = 0;
412          return true;
413       }
414    }
415 
416    return false;
417 }
418 
419 static VkResult
msm_allocate_userspace_iova_locked(struct tu_device * dev,uint32_t gem_handle,uint64_t size,uint64_t client_iova,enum tu_bo_alloc_flags flags,uint64_t * iova)420 msm_allocate_userspace_iova_locked(struct tu_device *dev,
421                                    uint32_t gem_handle,
422                                    uint64_t size,
423                                    uint64_t client_iova,
424                                    enum tu_bo_alloc_flags flags,
425                                    uint64_t *iova)
426 {
427    VkResult result;
428 
429    *iova = 0;
430 
431    if ((flags & TU_BO_ALLOC_DMABUF) &&
432        tu_restore_from_zombie_vma_locked(dev, gem_handle, iova))
433       return VK_SUCCESS;
434 
435    tu_free_zombie_vma_locked(dev, false);
436 
437    result = tu_allocate_userspace_iova(dev, size, client_iova, flags, iova);
438    if (result == VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS) {
439       /* Address may be already freed by us, but not considered as
440        * freed by the kernel. We have to wait until all work that
441        * may hold the address is done. Since addresses are meant to
442        * be replayed only by debug tooling, it should be ok to wait.
443        */
444       tu_free_zombie_vma_locked(dev, true);
445       result = tu_allocate_userspace_iova(dev, size, client_iova, flags, iova);
446    }
447 
448    if (result != VK_SUCCESS)
449       return result;
450 
451    struct drm_msm_gem_info req = {
452       .handle = gem_handle,
453       .info = MSM_INFO_SET_IOVA,
454       .value = *iova,
455    };
456 
457    int ret =
458       drmCommandWriteRead(dev->fd, DRM_MSM_GEM_INFO, &req, sizeof(req));
459    if (ret < 0) {
460       util_vma_heap_free(&dev->vma, *iova, size);
461       mesa_loge("MSM_INFO_SET_IOVA failed! %d (%s)", ret, strerror(errno));
462       return VK_ERROR_OUT_OF_HOST_MEMORY;
463    }
464 
465    return VK_SUCCESS;
466 }
467 
468 static VkResult
tu_allocate_kernel_iova(struct tu_device * dev,uint32_t gem_handle,uint64_t * iova)469 tu_allocate_kernel_iova(struct tu_device *dev,
470                         uint32_t gem_handle,
471                         uint64_t *iova)
472 {
473    *iova = tu_gem_info(dev, gem_handle, MSM_INFO_GET_IOVA);
474    if (!*iova)
475       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
476 
477    return VK_SUCCESS;
478 }
479 
480 static VkResult
tu_bo_init(struct tu_device * dev,struct vk_object_base * base,struct tu_bo * bo,uint32_t gem_handle,uint64_t size,uint64_t client_iova,enum tu_bo_alloc_flags flags,const char * name)481 tu_bo_init(struct tu_device *dev,
482            struct vk_object_base *base,
483            struct tu_bo *bo,
484            uint32_t gem_handle,
485            uint64_t size,
486            uint64_t client_iova,
487            enum tu_bo_alloc_flags flags,
488            const char *name)
489 {
490    VkResult result = VK_SUCCESS;
491    uint64_t iova = 0;
492 
493    assert(!client_iova || dev->physical_device->has_set_iova);
494 
495    if (dev->physical_device->has_set_iova) {
496       result = msm_allocate_userspace_iova_locked(dev, gem_handle, size,
497                                                   client_iova, flags, &iova);
498    } else {
499       result = tu_allocate_kernel_iova(dev, gem_handle, &iova);
500    }
501 
502    if (result != VK_SUCCESS) {
503       tu_gem_close(dev, gem_handle);
504       return result;
505    }
506 
507    name = tu_debug_bos_add(dev, size, name);
508 
509    mtx_lock(&dev->bo_mutex);
510    uint32_t idx = dev->submit_bo_count++;
511 
512    /* grow the bo list if needed */
513    if (idx >= dev->submit_bo_list_size) {
514       uint32_t new_len = idx + 64;
515       struct drm_msm_gem_submit_bo *new_ptr = (struct drm_msm_gem_submit_bo *)
516          vk_realloc(&dev->vk.alloc, dev->submit_bo_list, new_len * sizeof(*dev->submit_bo_list),
517                     8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
518       if (!new_ptr) {
519          dev->submit_bo_count--;
520          mtx_unlock(&dev->bo_mutex);
521          if (dev->physical_device->has_set_iova)
522             util_vma_heap_free(&dev->vma, iova, size);
523          tu_gem_close(dev, gem_handle);
524          return VK_ERROR_OUT_OF_HOST_MEMORY;
525       }
526 
527       dev->submit_bo_list = new_ptr;
528       dev->submit_bo_list_size = new_len;
529    }
530 
531    bool dump = flags & TU_BO_ALLOC_ALLOW_DUMP;
532    dev->submit_bo_list[idx] = (struct drm_msm_gem_submit_bo) {
533       .flags = MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE |
534                COND(dump, MSM_SUBMIT_BO_DUMP),
535       .handle = gem_handle,
536       .presumed = iova,
537    };
538 
539    *bo = (struct tu_bo) {
540       .gem_handle = gem_handle,
541       .size = size,
542       .iova = iova,
543       .name = name,
544       .refcnt = 1,
545       .submit_bo_list_idx = idx,
546       .base = base,
547    };
548 
549    mtx_unlock(&dev->bo_mutex);
550 
551    tu_dump_bo_init(dev, bo);
552 
553    TU_RMV(bo_allocate, dev, bo);
554 
555    return VK_SUCCESS;
556 }
557 
558 /**
559  * Sets the name in the kernel so that the contents of /debug/dri/0/gem are more
560  * useful.
561  *
562  * We skip this on release builds (when we're also not doing BO debugging) to
563  * reduce overhead.
564  */
565 static void
tu_bo_set_kernel_name(struct tu_device * dev,struct tu_bo * bo,const char * name)566 tu_bo_set_kernel_name(struct tu_device *dev, struct tu_bo *bo, const char *name)
567 {
568    bool kernel_bo_names = dev->bo_sizes != NULL;
569 #if MESA_DEBUG
570    kernel_bo_names = true;
571 #endif
572    if (!kernel_bo_names)
573       return;
574 
575    struct drm_msm_gem_info req = {
576       .handle = bo->gem_handle,
577       .info = MSM_INFO_SET_NAME,
578       .value = (uintptr_t)(void *)name,
579       .len = strlen(name),
580    };
581 
582    int ret = drmCommandWrite(dev->fd, DRM_MSM_GEM_INFO, &req, sizeof(req));
583    if (ret) {
584       mesa_logw_once("Failed to set BO name with DRM_MSM_GEM_INFO: %d",
585                      ret);
586    }
587 }
588 
589 static inline void
msm_vma_lock(struct tu_device * dev)590 msm_vma_lock(struct tu_device *dev)
591 {
592    if (dev->physical_device->has_set_iova)
593       mtx_lock(&dev->vma_mutex);
594 }
595 
596 static inline void
msm_vma_unlock(struct tu_device * dev)597 msm_vma_unlock(struct tu_device *dev)
598 {
599    if (dev->physical_device->has_set_iova)
600       mtx_unlock(&dev->vma_mutex);
601 }
602 
603 static VkResult
msm_bo_init(struct tu_device * dev,struct vk_object_base * base,struct tu_bo ** out_bo,uint64_t size,uint64_t client_iova,VkMemoryPropertyFlags mem_property,enum tu_bo_alloc_flags flags,const char * name)604 msm_bo_init(struct tu_device *dev,
605             struct vk_object_base *base,
606             struct tu_bo **out_bo,
607             uint64_t size,
608             uint64_t client_iova,
609             VkMemoryPropertyFlags mem_property,
610             enum tu_bo_alloc_flags flags,
611             const char *name)
612 {
613    struct drm_msm_gem_new req = {
614       .size = size,
615       .flags = 0
616    };
617 
618    if (mem_property & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) {
619       if (mem_property & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) {
620          req.flags |= MSM_BO_CACHED_COHERENT;
621       } else {
622          req.flags |= MSM_BO_CACHED;
623       }
624    } else {
625       req.flags |= MSM_BO_WC;
626    }
627 
628    if (flags & TU_BO_ALLOC_GPU_READ_ONLY)
629       req.flags |= MSM_BO_GPU_READONLY;
630 
631    int ret = drmCommandWriteRead(dev->fd,
632                                  DRM_MSM_GEM_NEW, &req, sizeof(req));
633    if (ret)
634       return vk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY);
635 
636    struct tu_bo* bo = tu_device_lookup_bo(dev, req.handle);
637    assert(bo && bo->gem_handle == 0);
638 
639    assert(!(flags & TU_BO_ALLOC_DMABUF));
640 
641    msm_vma_lock(dev);
642 
643    VkResult result =
644       tu_bo_init(dev, base, bo, req.handle, size, client_iova, flags, name);
645 
646    msm_vma_unlock(dev);
647 
648    if (result == VK_SUCCESS) {
649       *out_bo = bo;
650       if (flags & TU_BO_ALLOC_INTERNAL_RESOURCE) {
651          TU_RMV(internal_resource_create, dev, bo);
652          TU_RMV(resource_name, dev, bo, name);
653       }
654    } else
655       memset(bo, 0, sizeof(*bo));
656 
657    /* We don't use bo->name here because for the !TU_DEBUG=bo case bo->name is NULL. */
658    tu_bo_set_kernel_name(dev, bo, name);
659 
660    if (result == VK_SUCCESS &&
661        (mem_property & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) &&
662        !(mem_property & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
663       tu_bo_map(dev, bo, NULL);
664 
665       /* Cached non-coherent memory may already have dirty cache lines,
666        * we should clean the cache lines before GPU got the chance to
667        * write into this memory.
668        *
669        * MSM already does this automatically for uncached (MSM_BO_WC) memory.
670        */
671       tu_bo_sync_cache(dev, bo, 0, VK_WHOLE_SIZE, TU_MEM_SYNC_CACHE_TO_GPU);
672    }
673 
674    return result;
675 }
676 
677 static VkResult
msm_bo_init_dmabuf(struct tu_device * dev,struct tu_bo ** out_bo,uint64_t size,int prime_fd)678 msm_bo_init_dmabuf(struct tu_device *dev,
679                    struct tu_bo **out_bo,
680                    uint64_t size,
681                    int prime_fd)
682 {
683    /* lseek() to get the real size */
684    off_t real_size = lseek(prime_fd, 0, SEEK_END);
685    lseek(prime_fd, 0, SEEK_SET);
686    if (real_size < 0 || (uint64_t) real_size < size)
687       return vk_error(dev, VK_ERROR_INVALID_EXTERNAL_HANDLE);
688 
689    /* iova allocation needs to consider the object's *real* size: */
690    size = real_size;
691 
692    /* Importing the same dmabuf several times would yield the same
693     * gem_handle. Thus there could be a race when destroying
694     * BO and importing the same dmabuf from different threads.
695     * We must not permit the creation of dmabuf BO and its release
696     * to happen in parallel.
697     */
698    u_rwlock_wrlock(&dev->dma_bo_lock);
699    msm_vma_lock(dev);
700 
701    uint32_t gem_handle;
702    int ret = drmPrimeFDToHandle(dev->fd, prime_fd,
703                                 &gem_handle);
704    if (ret) {
705       msm_vma_unlock(dev);
706       u_rwlock_wrunlock(&dev->dma_bo_lock);
707       return vk_error(dev, VK_ERROR_INVALID_EXTERNAL_HANDLE);
708    }
709 
710    struct tu_bo* bo = tu_device_lookup_bo(dev, gem_handle);
711 
712    if (bo->refcnt != 0) {
713       p_atomic_inc(&bo->refcnt);
714       msm_vma_unlock(dev);
715       u_rwlock_wrunlock(&dev->dma_bo_lock);
716 
717       *out_bo = bo;
718       return VK_SUCCESS;
719    }
720 
721    VkResult result =
722       tu_bo_init(dev, NULL, bo, gem_handle, size, 0, TU_BO_ALLOC_DMABUF, "dmabuf");
723 
724    if (result != VK_SUCCESS)
725       memset(bo, 0, sizeof(*bo));
726    else
727       *out_bo = bo;
728 
729    msm_vma_unlock(dev);
730    u_rwlock_wrunlock(&dev->dma_bo_lock);
731 
732    return result;
733 }
734 
735 static VkResult
msm_bo_map(struct tu_device * dev,struct tu_bo * bo,void * placed_addr)736 msm_bo_map(struct tu_device *dev, struct tu_bo *bo, void *placed_addr)
737 {
738    uint64_t offset = tu_gem_info(dev, bo->gem_handle, MSM_INFO_GET_OFFSET);
739    if (!offset)
740       return vk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY);
741 
742    /* TODO: Should we use the wrapper os_mmap() like Freedreno does? */
743    void *map = mmap(placed_addr, bo->size, PROT_READ | PROT_WRITE,
744                     MAP_SHARED | (placed_addr != NULL ? MAP_FIXED : 0),
745                     dev->fd, offset);
746    if (map == MAP_FAILED)
747       return vk_error(dev, VK_ERROR_MEMORY_MAP_FAILED);
748 
749    bo->map = map;
750    TU_RMV(bo_map, dev, bo);
751 
752    return VK_SUCCESS;
753 }
754 
755 static void
msm_bo_allow_dump(struct tu_device * dev,struct tu_bo * bo)756 msm_bo_allow_dump(struct tu_device *dev, struct tu_bo *bo)
757 {
758    mtx_lock(&dev->bo_mutex);
759    dev->submit_bo_list[bo->submit_bo_list_idx].flags |= MSM_SUBMIT_BO_DUMP;
760    mtx_unlock(&dev->bo_mutex);
761 }
762 
763 
764 static void
msm_bo_set_metadata(struct tu_device * dev,struct tu_bo * bo,void * metadata,uint32_t metadata_size)765 msm_bo_set_metadata(struct tu_device *dev, struct tu_bo *bo,
766                     void *metadata, uint32_t metadata_size)
767 {
768    struct drm_msm_gem_info req = {
769       .handle = bo->gem_handle,
770       .info = MSM_INFO_SET_METADATA,
771       .value = (uintptr_t)(void *)metadata,
772       .len = metadata_size,
773    };
774 
775    int ret = drmCommandWrite(dev->fd, DRM_MSM_GEM_INFO, &req, sizeof(req));
776    if (ret) {
777       mesa_logw_once("Failed to set BO metadata with DRM_MSM_GEM_INFO: %d",
778                      ret);
779    }
780 }
781 
782 static int
msm_bo_get_metadata(struct tu_device * dev,struct tu_bo * bo,void * metadata,uint32_t metadata_size)783 msm_bo_get_metadata(struct tu_device *dev, struct tu_bo *bo,
784                     void *metadata, uint32_t metadata_size)
785 {
786    struct drm_msm_gem_info req = {
787       .handle = bo->gem_handle,
788       .info = MSM_INFO_GET_METADATA,
789       .value = (uintptr_t)(void *)metadata,
790       .len = metadata_size,
791    };
792 
793    int ret = drmCommandWrite(dev->fd, DRM_MSM_GEM_INFO, &req, sizeof(req));
794    if (ret) {
795       mesa_logw_once("Failed to get BO metadata with DRM_MSM_GEM_INFO: %d",
796                      ret);
797    }
798 
799    return ret;
800 }
801 
802 static VkResult
msm_queue_submit(struct tu_queue * queue,void * _submit,struct vk_sync_wait * waits,uint32_t wait_count,struct vk_sync_signal * signals,uint32_t signal_count,struct tu_u_trace_submission_data * u_trace_submission_data)803 msm_queue_submit(struct tu_queue *queue, void *_submit,
804                  struct vk_sync_wait *waits, uint32_t wait_count,
805                  struct vk_sync_signal *signals, uint32_t signal_count,
806                  struct tu_u_trace_submission_data *u_trace_submission_data)
807 {
808    VkResult result = VK_SUCCESS;
809    int ret;
810    struct tu_msm_queue_submit *submit =
811       (struct tu_msm_queue_submit *)_submit;
812    struct drm_msm_gem_submit_syncobj *in_syncobjs, *out_syncobjs;
813    struct drm_msm_gem_submit req;
814    uint64_t gpu_offset = 0;
815    uint32_t entry_count =
816       util_dynarray_num_elements(&submit->commands, struct drm_msm_gem_submit_cmd);
817 #if HAVE_PERFETTO
818    struct tu_perfetto_clocks clocks;
819    uint64_t start_ts = tu_perfetto_begin_submit();
820 #endif
821 
822    uint32_t flags = MSM_PIPE_3D0;
823 
824    /* Allocate without wait timeline semaphores */
825    in_syncobjs = (struct drm_msm_gem_submit_syncobj *) vk_zalloc(
826       &queue->device->vk.alloc,
827       wait_count * sizeof(*in_syncobjs), 8,
828       VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
829 
830    if (in_syncobjs == NULL) {
831       result = vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
832       goto fail_in_syncobjs;
833    }
834 
835    /* Allocate with signal timeline semaphores considered */
836    out_syncobjs = (struct drm_msm_gem_submit_syncobj *) vk_zalloc(
837       &queue->device->vk.alloc,
838       signal_count * sizeof(*out_syncobjs), 8,
839       VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
840 
841    if (out_syncobjs == NULL) {
842       result = vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
843       goto fail_out_syncobjs;
844    }
845 
846    for (uint32_t i = 0; i < wait_count; i++) {
847       struct vk_sync *sync = waits[i].sync;
848 
849       in_syncobjs[i] = (struct drm_msm_gem_submit_syncobj) {
850          .handle = tu_syncobj_from_vk_sync(sync),
851          .flags = 0,
852          .point = waits[i].wait_value,
853       };
854    }
855 
856    for (uint32_t i = 0; i < signal_count; i++) {
857       struct vk_sync *sync = signals[i].sync;
858 
859       out_syncobjs[i] = (struct drm_msm_gem_submit_syncobj) {
860          .handle = tu_syncobj_from_vk_sync(sync),
861          .flags = 0,
862          .point = signals[i].signal_value,
863       };
864    }
865 
866    if (wait_count)
867       flags |= MSM_SUBMIT_SYNCOBJ_IN;
868 
869    if (signal_count)
870       flags |= MSM_SUBMIT_SYNCOBJ_OUT;
871 
872    mtx_lock(&queue->device->bo_mutex);
873 
874    if (queue->device->implicit_sync_bo_count == 0)
875       flags |= MSM_SUBMIT_NO_IMPLICIT;
876 
877    /* drm_msm_gem_submit_cmd requires index of bo which could change at any
878     * time when bo_mutex is not locked. So we update the index here under the
879     * lock.
880     */
881    util_dynarray_foreach (&submit->commands, struct drm_msm_gem_submit_cmd,
882                           cmd) {
883       unsigned i = cmd -
884          util_dynarray_element(&submit->commands,
885                                struct drm_msm_gem_submit_cmd, 0);
886       struct tu_bo **bo = util_dynarray_element(&submit->command_bos,
887                                                 struct tu_bo *, i);
888       cmd->submit_idx = (*bo)->submit_bo_list_idx;
889    }
890 
891    req = (struct drm_msm_gem_submit) {
892       .flags = flags,
893       .nr_bos = entry_count ? queue->device->submit_bo_count : 0,
894       .nr_cmds = entry_count,
895       .bos = (uint64_t)(uintptr_t) queue->device->submit_bo_list,
896       .cmds = (uint64_t)(uintptr_t)submit->commands.data,
897       .queueid = queue->msm_queue_id,
898       .in_syncobjs = (uint64_t)(uintptr_t)in_syncobjs,
899       .out_syncobjs = (uint64_t)(uintptr_t)out_syncobjs,
900       .nr_in_syncobjs = wait_count,
901       .nr_out_syncobjs = signal_count,
902       .syncobj_stride = sizeof(struct drm_msm_gem_submit_syncobj),
903    };
904 
905    ret = drmCommandWriteRead(queue->device->fd,
906                              DRM_MSM_GEM_SUBMIT,
907                              &req, sizeof(req));
908 
909    mtx_unlock(&queue->device->bo_mutex);
910 
911    if (ret) {
912       result = vk_device_set_lost(&queue->device->vk, "submit failed: %m");
913       goto fail_submit;
914    }
915 
916    p_atomic_set(&queue->fence, req.fence);
917 
918 #if HAVE_PERFETTO
919    clocks = tu_perfetto_end_submit(queue, queue->device->submit_count,
920                                    start_ts, NULL);
921    gpu_offset = clocks.gpu_ts_offset;
922 #endif
923 
924    if (u_trace_submission_data) {
925       u_trace_submission_data->gpu_ts_offset = gpu_offset;
926    }
927 
928    for (uint32_t i = 0; i < wait_count; i++) {
929       if (!vk_sync_is_tu_timeline_sync(waits[i].sync))
930          continue;
931 
932       struct tu_timeline_sync *sync =
933          container_of(waits[i].sync, struct tu_timeline_sync, base);
934 
935       assert(sync->state != TU_TIMELINE_SYNC_STATE_RESET);
936 
937       /* Set SIGNALED to the state of the wait timeline sync since this means the syncobj
938        * is done and ready again so this can be garbage-collectioned later.
939        */
940       sync->state = TU_TIMELINE_SYNC_STATE_SIGNALED;
941    }
942 
943    for (uint32_t i = 0; i < signal_count; i++) {
944       if (!vk_sync_is_tu_timeline_sync(signals[i].sync))
945          continue;
946 
947       struct tu_timeline_sync *sync =
948          container_of(signals[i].sync, struct tu_timeline_sync, base);
949 
950       assert(sync->state == TU_TIMELINE_SYNC_STATE_RESET);
951       /* Set SUBMITTED to the state of the signal timeline sync so we could wait for
952        * this timeline sync until completed if necessary.
953        */
954       sync->state = TU_TIMELINE_SYNC_STATE_SUBMITTED;
955    }
956 
957 fail_submit:
958    vk_free(&queue->device->vk.alloc, out_syncobjs);
959 fail_out_syncobjs:
960    vk_free(&queue->device->vk.alloc, in_syncobjs);
961 fail_in_syncobjs:
962    return result;
963 }
964 
965 static const struct tu_knl msm_knl_funcs = {
966       .name = "msm",
967 
968       .device_init = msm_device_init,
969       .device_finish = msm_device_finish,
970       .device_get_gpu_timestamp = msm_device_get_gpu_timestamp,
971       .device_get_suspend_count = msm_device_get_suspend_count,
972       .device_check_status = msm_device_check_status,
973       .submitqueue_new = msm_submitqueue_new,
974       .submitqueue_close = msm_submitqueue_close,
975       .bo_init = msm_bo_init,
976       .bo_init_dmabuf = msm_bo_init_dmabuf,
977       .bo_export_dmabuf = tu_drm_export_dmabuf,
978       .bo_map = msm_bo_map,
979       .bo_allow_dump = msm_bo_allow_dump,
980       .bo_finish = tu_drm_bo_finish,
981       .bo_set_metadata = msm_bo_set_metadata,
982       .bo_get_metadata = msm_bo_get_metadata,
983       .submit_create = msm_submit_create,
984       .submit_finish = msm_submit_finish,
985       .submit_add_entries = msm_submit_add_entries,
986       .queue_submit = msm_queue_submit,
987       .queue_wait_fence = msm_queue_wait_fence,
988 };
989 
990 VkResult
tu_knl_drm_msm_load(struct tu_instance * instance,int fd,struct _drmVersion * version,struct tu_physical_device ** out)991 tu_knl_drm_msm_load(struct tu_instance *instance,
992                     int fd, struct _drmVersion *version,
993                     struct tu_physical_device **out)
994 {
995    VkResult result = VK_SUCCESS;
996 
997    /* Version 1.6 added SYNCOBJ support. */
998    const int min_version_major = 1;
999    const int min_version_minor = 6;
1000 
1001    if (version->version_major != min_version_major ||
1002        version->version_minor < min_version_minor) {
1003       result = vk_startup_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
1004                                  "kernel driver for device %s has version %d.%d, "
1005                                  "but Vulkan requires version >= %d.%d",
1006                                  version->name,
1007                                  version->version_major, version->version_minor,
1008                                  min_version_major, min_version_minor);
1009       return result;
1010    }
1011 
1012    struct tu_physical_device *device = (struct tu_physical_device *)
1013       vk_zalloc(&instance->vk.alloc, sizeof(*device), 8,
1014                 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1015    if (!device) {
1016       result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1017       goto fail;
1018    }
1019 
1020    device->msm_major_version = version->version_major;
1021    device->msm_minor_version = version->version_minor;
1022 
1023    device->instance = instance;
1024    device->local_fd = fd;
1025 
1026    if (tu_drm_get_gpu_id(device, &device->dev_id.gpu_id)) {
1027       result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1028                                  "could not get GPU ID");
1029       goto fail;
1030    }
1031 
1032    if (tu_drm_get_param(fd, MSM_PARAM_CHIP_ID, &device->dev_id.chip_id)) {
1033       result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1034                                  "could not get CHIP ID");
1035       goto fail;
1036    }
1037 
1038    if (tu_drm_get_gmem_size(device, &device->gmem_size)) {
1039       result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1040                                 "could not get GMEM size");
1041       goto fail;
1042    }
1043    device->gmem_size = debug_get_num_option("TU_GMEM", device->gmem_size);
1044 
1045    if (tu_drm_get_gmem_base(device, &device->gmem_base)) {
1046       result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1047                                  "could not get GMEM size");
1048       goto fail;
1049    }
1050 
1051    device->has_set_iova = !tu_drm_get_va_prop(device, &device->va_start,
1052                                               &device->va_size);
1053    device->has_raytracing = tu_drm_get_raytracing(device);
1054 
1055    device->has_preemption = tu_drm_has_preemption(device);
1056 
1057    /* Even if kernel is new enough, the GPU itself may not support it. */
1058    device->has_cached_coherent_memory =
1059       (device->msm_minor_version >= 8) &&
1060       tu_drm_is_memory_type_supported(fd, MSM_BO_CACHED_COHERENT);
1061 
1062    device->submitqueue_priority_count = tu_drm_get_priorities(device);
1063 
1064    device->ubwc_config.highest_bank_bit = tu_drm_get_highest_bank_bit(device);
1065    device->ubwc_config.bank_swizzle_levels = tu_drm_get_ubwc_swizzle(device);
1066    device->ubwc_config.macrotile_mode = tu_drm_get_macrotile_mode(device);
1067 
1068    device->syncobj_type = vk_drm_syncobj_get_type(fd);
1069    /* we don't support DRM_CAP_SYNCOBJ_TIMELINE, but drm-shim does */
1070    if (!(device->syncobj_type.features & VK_SYNC_FEATURE_TIMELINE))
1071       device->timeline_type = vk_sync_timeline_get_type(&tu_timeline_sync_type);
1072 
1073    device->sync_types[0] = &device->syncobj_type;
1074    device->sync_types[1] = &device->timeline_type.sync;
1075    device->sync_types[2] = NULL;
1076 
1077    device->heap.size = tu_get_system_heap_size(device);
1078    device->heap.used = 0u;
1079    device->heap.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT;
1080 
1081    instance->knl = &msm_knl_funcs;
1082 
1083    *out = device;
1084 
1085    return VK_SUCCESS;
1086 
1087 fail:
1088    vk_free(&instance->vk.alloc, device);
1089    return result;
1090 }
1091