• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2018 Google, Inc.
3  * Copyright © 2015 Intel Corporation
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "tu_knl.h"
8 
9 #include <errno.h>
10 #include <fcntl.h>
11 #include <sys/ioctl.h>
12 #include <sys/mman.h>
13 #include <xf86drm.h>
14 
15 #include "vk_util.h"
16 
17 #include "drm-uapi/msm_drm.h"
18 #include "util/u_debug.h"
19 #include "util/hash_table.h"
20 
21 #include "tu_cmd_buffer.h"
22 #include "tu_cs.h"
23 #include "tu_device.h"
24 #include "tu_dynamic_rendering.h"
25 #include "tu_knl_drm.h"
26 #include "tu_queue.h"
27 #include "tu_rmv.h"
28 #include "redump.h"
29 
30 static int
tu_drm_get_param(int fd,uint32_t param,uint64_t * value)31 tu_drm_get_param(int fd, uint32_t param, uint64_t *value)
32 {
33    /* Technically this requires a pipe, but the kernel only supports one pipe
34     * anyway at the time of writing and most of these are clearly pipe
35     * independent. */
36    struct drm_msm_param req = {
37       .pipe = MSM_PIPE_3D0,
38       .param = param,
39    };
40 
41    int ret = drmCommandWriteRead(fd, DRM_MSM_GET_PARAM, &req, sizeof(req));
42    if (ret)
43       return ret;
44 
45    *value = req.value;
46 
47    return 0;
48 }
49 
50 static int
tu_drm_get_gpu_id(const struct tu_physical_device * dev,uint32_t * id)51 tu_drm_get_gpu_id(const struct tu_physical_device *dev, uint32_t *id)
52 {
53    uint64_t value;
54    int ret = tu_drm_get_param(dev->local_fd, MSM_PARAM_GPU_ID, &value);
55    if (ret)
56       return ret;
57 
58    *id = value;
59    return 0;
60 }
61 
62 static int
tu_drm_get_gmem_size(const struct tu_physical_device * dev,uint32_t * size)63 tu_drm_get_gmem_size(const struct tu_physical_device *dev, uint32_t *size)
64 {
65    uint64_t value;
66    int ret = tu_drm_get_param(dev->local_fd, MSM_PARAM_GMEM_SIZE, &value);
67    if (ret)
68       return ret;
69 
70    *size = value;
71    return 0;
72 }
73 
74 static int
tu_drm_get_gmem_base(const struct tu_physical_device * dev,uint64_t * base)75 tu_drm_get_gmem_base(const struct tu_physical_device *dev, uint64_t *base)
76 {
77    return tu_drm_get_param(dev->local_fd, MSM_PARAM_GMEM_BASE, base);
78 }
79 
80 static int
tu_drm_get_va_prop(const struct tu_physical_device * dev,uint64_t * va_start,uint64_t * va_size)81 tu_drm_get_va_prop(const struct tu_physical_device *dev,
82                    uint64_t *va_start, uint64_t *va_size)
83 {
84    uint64_t value;
85    int ret = tu_drm_get_param(dev->local_fd, MSM_PARAM_VA_START, &value);
86    if (ret)
87       return ret;
88 
89    *va_start = value;
90 
91    ret = tu_drm_get_param(dev->local_fd, MSM_PARAM_VA_SIZE, &value);
92    if (ret)
93       return ret;
94 
95    *va_size = value;
96 
97    return 0;
98 }
99 
100 static bool
tu_drm_has_preemption(const struct tu_physical_device * dev)101 tu_drm_has_preemption(const struct tu_physical_device *dev)
102 {
103    struct drm_msm_submitqueue req = {
104       .flags = MSM_SUBMITQUEUE_ALLOW_PREEMPT,
105       .prio = dev->submitqueue_priority_count / 2,
106    };
107 
108    int ret = drmCommandWriteRead(dev->local_fd,
109                                  DRM_MSM_SUBMITQUEUE_NEW, &req, sizeof(req));
110    if (ret)
111       return false;
112 
113    drmCommandWrite(dev->local_fd, DRM_MSM_SUBMITQUEUE_CLOSE, &req.id,
114                    sizeof(req.id));
115    return true;
116 }
117 
118 static uint32_t
tu_drm_get_priorities(const struct tu_physical_device * dev)119 tu_drm_get_priorities(const struct tu_physical_device *dev)
120 {
121    uint64_t val = 1;
122    tu_drm_get_param(dev->local_fd, MSM_PARAM_PRIORITIES, &val);
123    assert(val >= 1);
124 
125    return val;
126 }
127 
128 static uint32_t
tu_drm_get_highest_bank_bit(const struct tu_physical_device * dev)129 tu_drm_get_highest_bank_bit(const struct tu_physical_device *dev)
130 {
131    uint64_t value;
132    int ret = tu_drm_get_param(dev->local_fd, MSM_PARAM_HIGHEST_BANK_BIT, &value);
133    if (ret)
134       return 0;
135 
136    return value;
137 }
138 
139 static enum fdl_macrotile_mode
tu_drm_get_macrotile_mode(const struct tu_physical_device * dev)140 tu_drm_get_macrotile_mode(const struct tu_physical_device *dev)
141 {
142    uint64_t value;
143    int ret = tu_drm_get_param(dev->local_fd, MSM_PARAM_MACROTILE_MODE, &value);
144    if (ret)
145       return FDL_MACROTILE_INVALID;
146 
147    return (enum fdl_macrotile_mode) value;
148 }
149 
150 static uint32_t
tu_drm_get_ubwc_swizzle(const struct tu_physical_device * dev)151 tu_drm_get_ubwc_swizzle(const struct tu_physical_device *dev)
152 {
153    uint64_t value;
154    int ret = tu_drm_get_param(dev->local_fd, MSM_PARAM_UBWC_SWIZZLE, &value);
155    if (ret)
156       return ~0;
157 
158    return value;
159 }
160 
161 static bool
tu_drm_is_memory_type_supported(int fd,uint32_t flags)162 tu_drm_is_memory_type_supported(int fd, uint32_t flags)
163 {
164    struct drm_msm_gem_new req_alloc = { .size = 0x1000, .flags = flags };
165 
166    int ret =
167       drmCommandWriteRead(fd, DRM_MSM_GEM_NEW, &req_alloc, sizeof(req_alloc));
168    if (ret) {
169       return false;
170    }
171 
172    struct drm_gem_close req_close = {
173       .handle = req_alloc.handle,
174    };
175    drmIoctl(fd, DRM_IOCTL_GEM_CLOSE, &req_close);
176 
177    return true;
178 }
179 
180 static VkResult
msm_device_init(struct tu_device * dev)181 msm_device_init(struct tu_device *dev)
182 {
183    int fd = open(dev->physical_device->fd_path, O_RDWR | O_CLOEXEC);
184    if (fd < 0) {
185       return vk_startup_errorf(
186             dev->physical_device->instance, VK_ERROR_INITIALIZATION_FAILED,
187             "failed to open device %s", dev->physical_device->fd_path);
188    }
189 
190    int ret = tu_drm_get_param(fd, MSM_PARAM_FAULTS, &dev->fault_count);
191    if (ret != 0) {
192       close(fd);
193       return vk_startup_errorf(dev->physical_device->instance,
194                                VK_ERROR_INITIALIZATION_FAILED,
195                                "Failed to get initial fault count: %d", ret);
196    }
197 
198    dev->fd = fd;
199 
200    return VK_SUCCESS;
201 }
202 
203 static void
msm_device_finish(struct tu_device * dev)204 msm_device_finish(struct tu_device *dev)
205 {
206    close(dev->fd);
207 }
208 
209 static int
msm_device_get_gpu_timestamp(struct tu_device * dev,uint64_t * ts)210 msm_device_get_gpu_timestamp(struct tu_device *dev, uint64_t *ts)
211 {
212    return tu_drm_get_param(dev->fd, MSM_PARAM_TIMESTAMP, ts);
213 }
214 
215 static int
msm_device_get_suspend_count(struct tu_device * dev,uint64_t * suspend_count)216 msm_device_get_suspend_count(struct tu_device *dev, uint64_t *suspend_count)
217 {
218    int ret = tu_drm_get_param(dev->fd, MSM_PARAM_SUSPENDS, suspend_count);
219    return ret;
220 }
221 
222 static VkResult
msm_device_check_status(struct tu_device * device)223 msm_device_check_status(struct tu_device *device)
224 {
225    uint64_t last_fault_count = device->fault_count;
226    int ret = tu_drm_get_param(device->fd, MSM_PARAM_FAULTS, &device->fault_count);
227    if (ret != 0)
228       return vk_device_set_lost(&device->vk, "error getting GPU fault count: %d", ret);
229 
230    if (last_fault_count != device->fault_count)
231       return vk_device_set_lost(&device->vk, "GPU faulted or hung");
232 
233    return VK_SUCCESS;
234 }
235 
236 static int
msm_submitqueue_new(struct tu_device * dev,int priority,uint32_t * queue_id)237 msm_submitqueue_new(struct tu_device *dev,
238                     int priority,
239                     uint32_t *queue_id)
240 {
241    assert(priority >= 0 &&
242           priority < dev->physical_device->submitqueue_priority_count);
243    struct drm_msm_submitqueue req = {
244       .flags = dev->physical_device->info->chip >= 7 &&
245          dev->physical_device->has_preemption ?
246          MSM_SUBMITQUEUE_ALLOW_PREEMPT : 0,
247       .prio = priority,
248    };
249 
250    int ret = drmCommandWriteRead(dev->fd,
251                                  DRM_MSM_SUBMITQUEUE_NEW, &req, sizeof(req));
252    if (ret)
253       return ret;
254 
255    *queue_id = req.id;
256    return 0;
257 }
258 
259 static void
msm_submitqueue_close(struct tu_device * dev,uint32_t queue_id)260 msm_submitqueue_close(struct tu_device *dev, uint32_t queue_id)
261 {
262    drmCommandWrite(dev->fd, DRM_MSM_SUBMITQUEUE_CLOSE,
263                    &queue_id, sizeof(uint32_t));
264 }
265 
266 static void
tu_gem_close(const struct tu_device * dev,uint32_t gem_handle)267 tu_gem_close(const struct tu_device *dev, uint32_t gem_handle)
268 {
269    struct drm_gem_close req = {
270       .handle = gem_handle,
271    };
272 
273    drmIoctl(dev->fd, DRM_IOCTL_GEM_CLOSE, &req);
274 }
275 
276 /** Helper for DRM_MSM_GEM_INFO, returns 0 on error. */
277 static uint64_t
tu_gem_info(const struct tu_device * dev,uint32_t gem_handle,uint32_t info)278 tu_gem_info(const struct tu_device *dev, uint32_t gem_handle, uint32_t info)
279 {
280    struct drm_msm_gem_info req = {
281       .handle = gem_handle,
282       .info = info,
283    };
284 
285    int ret = drmCommandWriteRead(dev->fd,
286                                  DRM_MSM_GEM_INFO, &req, sizeof(req));
287    if (ret < 0)
288       return 0;
289 
290    return req.value;
291 }
292 
293 static VkResult
tu_wait_fence(struct tu_device * dev,uint32_t queue_id,int fence,uint64_t timeout_ns)294 tu_wait_fence(struct tu_device *dev,
295               uint32_t queue_id,
296               int fence,
297               uint64_t timeout_ns)
298 {
299    /* fence was created when no work was yet submitted */
300    if (fence < 0)
301       return VK_SUCCESS;
302 
303    struct drm_msm_wait_fence req = {
304       .fence = fence,
305       .queueid = queue_id,
306    };
307    int ret;
308 
309    get_abs_timeout(&req.timeout, timeout_ns);
310 
311    ret = drmCommandWrite(dev->fd, DRM_MSM_WAIT_FENCE, &req, sizeof(req));
312    if (ret) {
313       if (ret == -ETIMEDOUT) {
314          return VK_TIMEOUT;
315       } else {
316          mesa_loge("tu_wait_fence failed! %d (%s)", ret, strerror(errno));
317          return VK_ERROR_UNKNOWN;
318       }
319    }
320 
321    return VK_SUCCESS;
322 }
323 
324 VkResult
msm_queue_wait_fence(struct tu_queue * queue,uint32_t fence,uint64_t timeout_ns)325 msm_queue_wait_fence(struct tu_queue *queue, uint32_t fence,
326                      uint64_t timeout_ns)
327 {
328    return tu_wait_fence(queue->device, queue->msm_queue_id, fence,
329                         timeout_ns);
330 }
331 
332 static VkResult
tu_free_zombie_vma_locked(struct tu_device * dev,bool wait)333 tu_free_zombie_vma_locked(struct tu_device *dev, bool wait)
334 {
335    if (!u_vector_length(&dev->zombie_vmas))
336       return VK_SUCCESS;
337 
338    if (wait) {
339       struct tu_zombie_vma *vma = (struct tu_zombie_vma *)
340             u_vector_head(&dev->zombie_vmas);
341       /* Wait for 3s (arbitrary timeout) */
342       VkResult ret = tu_wait_fence(dev, dev->queues[0]->msm_queue_id,
343                                    vma->fence, 3000000000);
344 
345       if (ret != VK_SUCCESS)
346          return ret;
347    }
348 
349    int last_signaled_fence = -1;
350    while (u_vector_length(&dev->zombie_vmas) > 0) {
351       struct tu_zombie_vma *vma = (struct tu_zombie_vma *)
352             u_vector_tail(&dev->zombie_vmas);
353       if (vma->fence > last_signaled_fence) {
354          VkResult ret =
355             tu_wait_fence(dev, dev->queues[0]->msm_queue_id, vma->fence, 0);
356          if (ret != VK_SUCCESS)
357             return ret;
358 
359          last_signaled_fence = vma->fence;
360       }
361 
362       if (vma->gem_handle) {
363          /* Ensure that internal kernel's vma is freed. */
364          struct drm_msm_gem_info req = {
365             .handle = vma->gem_handle,
366             .info = MSM_INFO_SET_IOVA,
367             .value = 0,
368          };
369 
370          int ret =
371             drmCommandWriteRead(dev->fd, DRM_MSM_GEM_INFO, &req, sizeof(req));
372          if (ret < 0) {
373             mesa_loge("MSM_INFO_SET_IOVA(0) failed! %d (%s)", ret,
374                       strerror(errno));
375             return VK_ERROR_UNKNOWN;
376          }
377 
378          tu_gem_close(dev, vma->gem_handle);
379 
380          util_vma_heap_free(&dev->vma, vma->iova, vma->size);
381       }
382 
383       u_vector_remove(&dev->zombie_vmas);
384    }
385 
386    return VK_SUCCESS;
387 }
388 
389 static bool
tu_restore_from_zombie_vma_locked(struct tu_device * dev,uint32_t gem_handle,uint64_t * iova)390 tu_restore_from_zombie_vma_locked(struct tu_device *dev,
391                                   uint32_t gem_handle,
392                                   uint64_t *iova)
393 {
394    struct tu_zombie_vma *vma;
395    u_vector_foreach (vma, &dev->zombie_vmas) {
396       if (vma->gem_handle == gem_handle) {
397          *iova = vma->iova;
398 
399          /* mark to skip later gem and iova cleanup */
400          vma->gem_handle = 0;
401          return true;
402       }
403    }
404 
405    return false;
406 }
407 
408 static VkResult
msm_allocate_userspace_iova_locked(struct tu_device * dev,uint32_t gem_handle,uint64_t size,uint64_t client_iova,enum tu_bo_alloc_flags flags,uint64_t * iova)409 msm_allocate_userspace_iova_locked(struct tu_device *dev,
410                                    uint32_t gem_handle,
411                                    uint64_t size,
412                                    uint64_t client_iova,
413                                    enum tu_bo_alloc_flags flags,
414                                    uint64_t *iova)
415 {
416    VkResult result;
417 
418    *iova = 0;
419 
420    if ((flags & TU_BO_ALLOC_DMABUF) &&
421        tu_restore_from_zombie_vma_locked(dev, gem_handle, iova))
422       return VK_SUCCESS;
423 
424    tu_free_zombie_vma_locked(dev, false);
425 
426    result = tu_allocate_userspace_iova(dev, size, client_iova, flags, iova);
427    if (result == VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS) {
428       /* Address may be already freed by us, but not considered as
429        * freed by the kernel. We have to wait until all work that
430        * may hold the address is done. Since addresses are meant to
431        * be replayed only by debug tooling, it should be ok to wait.
432        */
433       tu_free_zombie_vma_locked(dev, true);
434       result = tu_allocate_userspace_iova(dev, size, client_iova, flags, iova);
435    }
436 
437    if (result != VK_SUCCESS)
438       return result;
439 
440    struct drm_msm_gem_info req = {
441       .handle = gem_handle,
442       .info = MSM_INFO_SET_IOVA,
443       .value = *iova,
444    };
445 
446    int ret =
447       drmCommandWriteRead(dev->fd, DRM_MSM_GEM_INFO, &req, sizeof(req));
448    if (ret < 0) {
449       util_vma_heap_free(&dev->vma, *iova, size);
450       mesa_loge("MSM_INFO_SET_IOVA failed! %d (%s)", ret, strerror(errno));
451       return VK_ERROR_OUT_OF_HOST_MEMORY;
452    }
453 
454    return VK_SUCCESS;
455 }
456 
457 static VkResult
tu_allocate_kernel_iova(struct tu_device * dev,uint32_t gem_handle,uint64_t * iova)458 tu_allocate_kernel_iova(struct tu_device *dev,
459                         uint32_t gem_handle,
460                         uint64_t *iova)
461 {
462    *iova = tu_gem_info(dev, gem_handle, MSM_INFO_GET_IOVA);
463    if (!*iova)
464       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
465 
466    return VK_SUCCESS;
467 }
468 
469 static VkResult
tu_bo_init(struct tu_device * dev,struct vk_object_base * base,struct tu_bo * bo,uint32_t gem_handle,uint64_t size,uint64_t client_iova,enum tu_bo_alloc_flags flags,const char * name)470 tu_bo_init(struct tu_device *dev,
471            struct vk_object_base *base,
472            struct tu_bo *bo,
473            uint32_t gem_handle,
474            uint64_t size,
475            uint64_t client_iova,
476            enum tu_bo_alloc_flags flags,
477            const char *name)
478 {
479    VkResult result = VK_SUCCESS;
480    uint64_t iova = 0;
481 
482    assert(!client_iova || dev->physical_device->has_set_iova);
483 
484    if (dev->physical_device->has_set_iova) {
485       result = msm_allocate_userspace_iova_locked(dev, gem_handle, size,
486                                                   client_iova, flags, &iova);
487    } else {
488       result = tu_allocate_kernel_iova(dev, gem_handle, &iova);
489    }
490 
491    if (result != VK_SUCCESS) {
492       tu_gem_close(dev, gem_handle);
493       return result;
494    }
495 
496    name = tu_debug_bos_add(dev, size, name);
497 
498    mtx_lock(&dev->bo_mutex);
499    uint32_t idx = dev->submit_bo_count++;
500 
501    /* grow the bo list if needed */
502    if (idx >= dev->submit_bo_list_size) {
503       uint32_t new_len = idx + 64;
504       struct drm_msm_gem_submit_bo *new_ptr = (struct drm_msm_gem_submit_bo *)
505          vk_realloc(&dev->vk.alloc, dev->submit_bo_list, new_len * sizeof(*dev->submit_bo_list),
506                     8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
507       if (!new_ptr) {
508          dev->submit_bo_count--;
509          mtx_unlock(&dev->bo_mutex);
510          if (dev->physical_device->has_set_iova)
511             util_vma_heap_free(&dev->vma, iova, size);
512          tu_gem_close(dev, gem_handle);
513          return VK_ERROR_OUT_OF_HOST_MEMORY;
514       }
515 
516       dev->submit_bo_list = new_ptr;
517       dev->submit_bo_list_size = new_len;
518    }
519 
520    bool dump = flags & TU_BO_ALLOC_ALLOW_DUMP;
521    dev->submit_bo_list[idx] = (struct drm_msm_gem_submit_bo) {
522       .flags = MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE |
523                COND(dump, MSM_SUBMIT_BO_DUMP),
524       .handle = gem_handle,
525       .presumed = iova,
526    };
527 
528    *bo = (struct tu_bo) {
529       .gem_handle = gem_handle,
530       .size = size,
531       .iova = iova,
532       .name = name,
533       .refcnt = 1,
534       .submit_bo_list_idx = idx,
535       .base = base,
536    };
537 
538    mtx_unlock(&dev->bo_mutex);
539 
540    tu_dump_bo_init(dev, bo);
541 
542    TU_RMV(bo_allocate, dev, bo);
543 
544    return VK_SUCCESS;
545 }
546 
547 /**
548  * Sets the name in the kernel so that the contents of /debug/dri/0/gem are more
549  * useful.
550  *
551  * We skip this on release builds (when we're also not doing BO debugging) to
552  * reduce overhead.
553  */
554 static void
tu_bo_set_kernel_name(struct tu_device * dev,struct tu_bo * bo,const char * name)555 tu_bo_set_kernel_name(struct tu_device *dev, struct tu_bo *bo, const char *name)
556 {
557    bool kernel_bo_names = dev->bo_sizes != NULL;
558 #if MESA_DEBUG
559    kernel_bo_names = true;
560 #endif
561    if (!kernel_bo_names)
562       return;
563 
564    struct drm_msm_gem_info req = {
565       .handle = bo->gem_handle,
566       .info = MSM_INFO_SET_NAME,
567       .value = (uintptr_t)(void *)name,
568       .len = strlen(name),
569    };
570 
571    int ret = drmCommandWrite(dev->fd, DRM_MSM_GEM_INFO, &req, sizeof(req));
572    if (ret) {
573       mesa_logw_once("Failed to set BO name with DRM_MSM_GEM_INFO: %d",
574                      ret);
575    }
576 }
577 
578 static inline void
msm_vma_lock(struct tu_device * dev)579 msm_vma_lock(struct tu_device *dev)
580 {
581    if (dev->physical_device->has_set_iova)
582       mtx_lock(&dev->vma_mutex);
583 }
584 
585 static inline void
msm_vma_unlock(struct tu_device * dev)586 msm_vma_unlock(struct tu_device *dev)
587 {
588    if (dev->physical_device->has_set_iova)
589       mtx_unlock(&dev->vma_mutex);
590 }
591 
592 static VkResult
msm_bo_init(struct tu_device * dev,struct vk_object_base * base,struct tu_bo ** out_bo,uint64_t size,uint64_t client_iova,VkMemoryPropertyFlags mem_property,enum tu_bo_alloc_flags flags,const char * name)593 msm_bo_init(struct tu_device *dev,
594             struct vk_object_base *base,
595             struct tu_bo **out_bo,
596             uint64_t size,
597             uint64_t client_iova,
598             VkMemoryPropertyFlags mem_property,
599             enum tu_bo_alloc_flags flags,
600             const char *name)
601 {
602    struct drm_msm_gem_new req = {
603       .size = size,
604       .flags = 0
605    };
606 
607    if (mem_property & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) {
608       if (mem_property & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) {
609          req.flags |= MSM_BO_CACHED_COHERENT;
610       } else {
611          req.flags |= MSM_BO_CACHED;
612       }
613    } else {
614       req.flags |= MSM_BO_WC;
615    }
616 
617    if (flags & TU_BO_ALLOC_GPU_READ_ONLY)
618       req.flags |= MSM_BO_GPU_READONLY;
619 
620    int ret = drmCommandWriteRead(dev->fd,
621                                  DRM_MSM_GEM_NEW, &req, sizeof(req));
622    if (ret)
623       return vk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY);
624 
625    struct tu_bo* bo = tu_device_lookup_bo(dev, req.handle);
626    assert(bo && bo->gem_handle == 0);
627 
628    assert(!(flags & TU_BO_ALLOC_DMABUF));
629 
630    msm_vma_lock(dev);
631 
632    VkResult result =
633       tu_bo_init(dev, base, bo, req.handle, size, client_iova, flags, name);
634 
635    msm_vma_unlock(dev);
636 
637    if (result == VK_SUCCESS) {
638       *out_bo = bo;
639       if (flags & TU_BO_ALLOC_INTERNAL_RESOURCE) {
640          TU_RMV(internal_resource_create, dev, bo);
641          TU_RMV(resource_name, dev, bo, name);
642       }
643    } else
644       memset(bo, 0, sizeof(*bo));
645 
646    /* We don't use bo->name here because for the !TU_DEBUG=bo case bo->name is NULL. */
647    tu_bo_set_kernel_name(dev, bo, name);
648 
649    if (result == VK_SUCCESS &&
650        (mem_property & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) &&
651        !(mem_property & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
652       tu_bo_map(dev, bo, NULL);
653 
654       /* Cached non-coherent memory may already have dirty cache lines,
655        * we should clean the cache lines before GPU got the chance to
656        * write into this memory.
657        *
658        * MSM already does this automatically for uncached (MSM_BO_WC) memory.
659        */
660       tu_bo_sync_cache(dev, bo, 0, VK_WHOLE_SIZE, TU_MEM_SYNC_CACHE_TO_GPU);
661    }
662 
663    return result;
664 }
665 
666 static VkResult
msm_bo_init_dmabuf(struct tu_device * dev,struct tu_bo ** out_bo,uint64_t size,int prime_fd)667 msm_bo_init_dmabuf(struct tu_device *dev,
668                    struct tu_bo **out_bo,
669                    uint64_t size,
670                    int prime_fd)
671 {
672    /* lseek() to get the real size */
673    off_t real_size = lseek(prime_fd, 0, SEEK_END);
674    lseek(prime_fd, 0, SEEK_SET);
675    if (real_size < 0 || (uint64_t) real_size < size)
676       return vk_error(dev, VK_ERROR_INVALID_EXTERNAL_HANDLE);
677 
678    /* iova allocation needs to consider the object's *real* size: */
679    size = real_size;
680 
681    /* Importing the same dmabuf several times would yield the same
682     * gem_handle. Thus there could be a race when destroying
683     * BO and importing the same dmabuf from different threads.
684     * We must not permit the creation of dmabuf BO and its release
685     * to happen in parallel.
686     */
687    u_rwlock_wrlock(&dev->dma_bo_lock);
688    msm_vma_lock(dev);
689 
690    uint32_t gem_handle;
691    int ret = drmPrimeFDToHandle(dev->fd, prime_fd,
692                                 &gem_handle);
693    if (ret) {
694       msm_vma_unlock(dev);
695       u_rwlock_wrunlock(&dev->dma_bo_lock);
696       return vk_error(dev, VK_ERROR_INVALID_EXTERNAL_HANDLE);
697    }
698 
699    struct tu_bo* bo = tu_device_lookup_bo(dev, gem_handle);
700 
701    if (bo->refcnt != 0) {
702       p_atomic_inc(&bo->refcnt);
703       msm_vma_unlock(dev);
704       u_rwlock_wrunlock(&dev->dma_bo_lock);
705 
706       *out_bo = bo;
707       return VK_SUCCESS;
708    }
709 
710    VkResult result =
711       tu_bo_init(dev, NULL, bo, gem_handle, size, 0, TU_BO_ALLOC_DMABUF, "dmabuf");
712 
713    if (result != VK_SUCCESS)
714       memset(bo, 0, sizeof(*bo));
715    else
716       *out_bo = bo;
717 
718    msm_vma_unlock(dev);
719    u_rwlock_wrunlock(&dev->dma_bo_lock);
720 
721    return result;
722 }
723 
724 static VkResult
msm_bo_map(struct tu_device * dev,struct tu_bo * bo,void * placed_addr)725 msm_bo_map(struct tu_device *dev, struct tu_bo *bo, void *placed_addr)
726 {
727    uint64_t offset = tu_gem_info(dev, bo->gem_handle, MSM_INFO_GET_OFFSET);
728    if (!offset)
729       return vk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY);
730 
731    /* TODO: Should we use the wrapper os_mmap() like Freedreno does? */
732    void *map = mmap(placed_addr, bo->size, PROT_READ | PROT_WRITE,
733                     MAP_SHARED | (placed_addr != NULL ? MAP_FIXED : 0),
734                     dev->fd, offset);
735    if (map == MAP_FAILED)
736       return vk_error(dev, VK_ERROR_MEMORY_MAP_FAILED);
737 
738    bo->map = map;
739    TU_RMV(bo_map, dev, bo);
740 
741    return VK_SUCCESS;
742 }
743 
744 static void
msm_bo_allow_dump(struct tu_device * dev,struct tu_bo * bo)745 msm_bo_allow_dump(struct tu_device *dev, struct tu_bo *bo)
746 {
747    mtx_lock(&dev->bo_mutex);
748    dev->submit_bo_list[bo->submit_bo_list_idx].flags |= MSM_SUBMIT_BO_DUMP;
749    mtx_unlock(&dev->bo_mutex);
750 }
751 
752 
753 static void
msm_bo_set_metadata(struct tu_device * dev,struct tu_bo * bo,void * metadata,uint32_t metadata_size)754 msm_bo_set_metadata(struct tu_device *dev, struct tu_bo *bo,
755                     void *metadata, uint32_t metadata_size)
756 {
757    struct drm_msm_gem_info req = {
758       .handle = bo->gem_handle,
759       .info = MSM_INFO_SET_METADATA,
760       .value = (uintptr_t)(void *)metadata,
761       .len = metadata_size,
762    };
763 
764    int ret = drmCommandWrite(dev->fd, DRM_MSM_GEM_INFO, &req, sizeof(req));
765    if (ret) {
766       mesa_logw_once("Failed to set BO metadata with DRM_MSM_GEM_INFO: %d",
767                      ret);
768    }
769 }
770 
771 static int
msm_bo_get_metadata(struct tu_device * dev,struct tu_bo * bo,void * metadata,uint32_t metadata_size)772 msm_bo_get_metadata(struct tu_device *dev, struct tu_bo *bo,
773                     void *metadata, uint32_t metadata_size)
774 {
775    struct drm_msm_gem_info req = {
776       .handle = bo->gem_handle,
777       .info = MSM_INFO_GET_METADATA,
778       .value = (uintptr_t)(void *)metadata,
779       .len = metadata_size,
780    };
781 
782    int ret = drmCommandWrite(dev->fd, DRM_MSM_GEM_INFO, &req, sizeof(req));
783    if (ret) {
784       mesa_logw_once("Failed to get BO metadata with DRM_MSM_GEM_INFO: %d",
785                      ret);
786    }
787 
788    return ret;
789 }
790 
791 static VkResult
msm_queue_submit(struct tu_queue * queue,void * _submit,struct vk_sync_wait * waits,uint32_t wait_count,struct vk_sync_signal * signals,uint32_t signal_count,struct tu_u_trace_submission_data * u_trace_submission_data)792 msm_queue_submit(struct tu_queue *queue, void *_submit,
793                  struct vk_sync_wait *waits, uint32_t wait_count,
794                  struct vk_sync_signal *signals, uint32_t signal_count,
795                  struct tu_u_trace_submission_data *u_trace_submission_data)
796 {
797    VkResult result = VK_SUCCESS;
798    int ret;
799    struct tu_msm_queue_submit *submit =
800       (struct tu_msm_queue_submit *)_submit;
801    struct drm_msm_gem_submit_syncobj *in_syncobjs, *out_syncobjs;
802    struct drm_msm_gem_submit req;
803    uint64_t gpu_offset = 0;
804    uint32_t entry_count =
805       util_dynarray_num_elements(&submit->commands, struct drm_msm_gem_submit_cmd);
806 #if HAVE_PERFETTO
807    struct tu_perfetto_clocks clocks;
808    uint64_t start_ts = tu_perfetto_begin_submit();
809 #endif
810 
811    uint32_t flags = MSM_PIPE_3D0;
812 
813    /* Allocate without wait timeline semaphores */
814    in_syncobjs = (struct drm_msm_gem_submit_syncobj *) vk_zalloc(
815       &queue->device->vk.alloc,
816       wait_count * sizeof(*in_syncobjs), 8,
817       VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
818 
819    if (in_syncobjs == NULL) {
820       result = vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
821       goto fail_in_syncobjs;
822    }
823 
824    /* Allocate with signal timeline semaphores considered */
825    out_syncobjs = (struct drm_msm_gem_submit_syncobj *) vk_zalloc(
826       &queue->device->vk.alloc,
827       signal_count * sizeof(*out_syncobjs), 8,
828       VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
829 
830    if (out_syncobjs == NULL) {
831       result = vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
832       goto fail_out_syncobjs;
833    }
834 
835    for (uint32_t i = 0; i < wait_count; i++) {
836       struct vk_sync *sync = waits[i].sync;
837 
838       in_syncobjs[i] = (struct drm_msm_gem_submit_syncobj) {
839          .handle = tu_syncobj_from_vk_sync(sync),
840          .flags = 0,
841          .point = waits[i].wait_value,
842       };
843    }
844 
845    for (uint32_t i = 0; i < signal_count; i++) {
846       struct vk_sync *sync = signals[i].sync;
847 
848       out_syncobjs[i] = (struct drm_msm_gem_submit_syncobj) {
849          .handle = tu_syncobj_from_vk_sync(sync),
850          .flags = 0,
851          .point = signals[i].signal_value,
852       };
853    }
854 
855    if (wait_count)
856       flags |= MSM_SUBMIT_SYNCOBJ_IN;
857 
858    if (signal_count)
859       flags |= MSM_SUBMIT_SYNCOBJ_OUT;
860 
861    mtx_lock(&queue->device->bo_mutex);
862 
863    if (queue->device->implicit_sync_bo_count == 0)
864       flags |= MSM_SUBMIT_NO_IMPLICIT;
865 
866    /* drm_msm_gem_submit_cmd requires index of bo which could change at any
867     * time when bo_mutex is not locked. So we update the index here under the
868     * lock.
869     */
870    util_dynarray_foreach (&submit->commands, struct drm_msm_gem_submit_cmd,
871                           cmd) {
872       unsigned i = cmd -
873          util_dynarray_element(&submit->commands,
874                                struct drm_msm_gem_submit_cmd, 0);
875       struct tu_bo **bo = util_dynarray_element(&submit->command_bos,
876                                                 struct tu_bo *, i);
877       cmd->submit_idx = (*bo)->submit_bo_list_idx;
878    }
879 
880    req = (struct drm_msm_gem_submit) {
881       .flags = flags,
882       .nr_bos = entry_count ? queue->device->submit_bo_count : 0,
883       .nr_cmds = entry_count,
884       .bos = (uint64_t)(uintptr_t) queue->device->submit_bo_list,
885       .cmds = (uint64_t)(uintptr_t)submit->commands.data,
886       .queueid = queue->msm_queue_id,
887       .in_syncobjs = (uint64_t)(uintptr_t)in_syncobjs,
888       .out_syncobjs = (uint64_t)(uintptr_t)out_syncobjs,
889       .nr_in_syncobjs = wait_count,
890       .nr_out_syncobjs = signal_count,
891       .syncobj_stride = sizeof(struct drm_msm_gem_submit_syncobj),
892    };
893 
894    ret = drmCommandWriteRead(queue->device->fd,
895                              DRM_MSM_GEM_SUBMIT,
896                              &req, sizeof(req));
897 
898    mtx_unlock(&queue->device->bo_mutex);
899 
900    if (ret) {
901       result = vk_device_set_lost(&queue->device->vk, "submit failed: %m");
902       goto fail_submit;
903    }
904 
905    p_atomic_set(&queue->fence, req.fence);
906 
907 #if HAVE_PERFETTO
908    clocks = tu_perfetto_end_submit(queue, queue->device->submit_count,
909                                    start_ts, NULL);
910    gpu_offset = clocks.gpu_ts_offset;
911 #endif
912 
913    if (u_trace_submission_data) {
914       u_trace_submission_data->gpu_ts_offset = gpu_offset;
915    }
916 
917    for (uint32_t i = 0; i < wait_count; i++) {
918       if (!vk_sync_is_tu_timeline_sync(waits[i].sync))
919          continue;
920 
921       struct tu_timeline_sync *sync =
922          container_of(waits[i].sync, struct tu_timeline_sync, base);
923 
924       assert(sync->state != TU_TIMELINE_SYNC_STATE_RESET);
925 
926       /* Set SIGNALED to the state of the wait timeline sync since this means the syncobj
927        * is done and ready again so this can be garbage-collectioned later.
928        */
929       sync->state = TU_TIMELINE_SYNC_STATE_SIGNALED;
930    }
931 
932    for (uint32_t i = 0; i < signal_count; i++) {
933       if (!vk_sync_is_tu_timeline_sync(signals[i].sync))
934          continue;
935 
936       struct tu_timeline_sync *sync =
937          container_of(signals[i].sync, struct tu_timeline_sync, base);
938 
939       assert(sync->state == TU_TIMELINE_SYNC_STATE_RESET);
940       /* Set SUBMITTED to the state of the signal timeline sync so we could wait for
941        * this timeline sync until completed if necessary.
942        */
943       sync->state = TU_TIMELINE_SYNC_STATE_SUBMITTED;
944    }
945 
946 fail_submit:
947    vk_free(&queue->device->vk.alloc, out_syncobjs);
948 fail_out_syncobjs:
949    vk_free(&queue->device->vk.alloc, in_syncobjs);
950 fail_in_syncobjs:
951    return result;
952 }
953 
954 static const struct tu_knl msm_knl_funcs = {
955       .name = "msm",
956 
957       .device_init = msm_device_init,
958       .device_finish = msm_device_finish,
959       .device_get_gpu_timestamp = msm_device_get_gpu_timestamp,
960       .device_get_suspend_count = msm_device_get_suspend_count,
961       .device_check_status = msm_device_check_status,
962       .submitqueue_new = msm_submitqueue_new,
963       .submitqueue_close = msm_submitqueue_close,
964       .bo_init = msm_bo_init,
965       .bo_init_dmabuf = msm_bo_init_dmabuf,
966       .bo_export_dmabuf = tu_drm_export_dmabuf,
967       .bo_map = msm_bo_map,
968       .bo_allow_dump = msm_bo_allow_dump,
969       .bo_finish = tu_drm_bo_finish,
970       .bo_set_metadata = msm_bo_set_metadata,
971       .bo_get_metadata = msm_bo_get_metadata,
972       .submit_create = msm_submit_create,
973       .submit_finish = msm_submit_finish,
974       .submit_add_entries = msm_submit_add_entries,
975       .queue_submit = msm_queue_submit,
976       .queue_wait_fence = msm_queue_wait_fence,
977 };
978 
979 VkResult
tu_knl_drm_msm_load(struct tu_instance * instance,int fd,struct _drmVersion * version,struct tu_physical_device ** out)980 tu_knl_drm_msm_load(struct tu_instance *instance,
981                     int fd, struct _drmVersion *version,
982                     struct tu_physical_device **out)
983 {
984    VkResult result = VK_SUCCESS;
985 
986    /* Version 1.6 added SYNCOBJ support. */
987    const int min_version_major = 1;
988    const int min_version_minor = 6;
989 
990    if (version->version_major != min_version_major ||
991        version->version_minor < min_version_minor) {
992       result = vk_startup_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
993                                  "kernel driver for device %s has version %d.%d, "
994                                  "but Vulkan requires version >= %d.%d",
995                                  version->name,
996                                  version->version_major, version->version_minor,
997                                  min_version_major, min_version_minor);
998       return result;
999    }
1000 
1001    struct tu_physical_device *device = (struct tu_physical_device *)
1002       vk_zalloc(&instance->vk.alloc, sizeof(*device), 8,
1003                 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1004    if (!device) {
1005       result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1006       goto fail;
1007    }
1008 
1009    device->msm_major_version = version->version_major;
1010    device->msm_minor_version = version->version_minor;
1011 
1012    device->instance = instance;
1013    device->local_fd = fd;
1014 
1015    if (tu_drm_get_gpu_id(device, &device->dev_id.gpu_id)) {
1016       result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1017                                  "could not get GPU ID");
1018       goto fail;
1019    }
1020 
1021    if (tu_drm_get_param(fd, MSM_PARAM_CHIP_ID, &device->dev_id.chip_id)) {
1022       result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1023                                  "could not get CHIP ID");
1024       goto fail;
1025    }
1026 
1027    if (tu_drm_get_gmem_size(device, &device->gmem_size)) {
1028       result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1029                                 "could not get GMEM size");
1030       goto fail;
1031    }
1032    device->gmem_size = debug_get_num_option("TU_GMEM", device->gmem_size);
1033 
1034    if (tu_drm_get_gmem_base(device, &device->gmem_base)) {
1035       result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1036                                  "could not get GMEM size");
1037       goto fail;
1038    }
1039 
1040    device->has_set_iova = !tu_drm_get_va_prop(device, &device->va_start,
1041                                               &device->va_size);
1042 
1043    device->has_preemption = tu_drm_has_preemption(device);
1044 
1045    /* Even if kernel is new enough, the GPU itself may not support it. */
1046    device->has_cached_coherent_memory =
1047       (device->msm_minor_version >= 8) &&
1048       tu_drm_is_memory_type_supported(fd, MSM_BO_CACHED_COHERENT);
1049 
1050    device->submitqueue_priority_count = tu_drm_get_priorities(device);
1051 
1052    device->ubwc_config.highest_bank_bit = tu_drm_get_highest_bank_bit(device);
1053    device->ubwc_config.bank_swizzle_levels = tu_drm_get_ubwc_swizzle(device);
1054    device->ubwc_config.macrotile_mode = tu_drm_get_macrotile_mode(device);
1055 
1056    device->syncobj_type = vk_drm_syncobj_get_type(fd);
1057    /* we don't support DRM_CAP_SYNCOBJ_TIMELINE, but drm-shim does */
1058    if (!(device->syncobj_type.features & VK_SYNC_FEATURE_TIMELINE))
1059       device->timeline_type = vk_sync_timeline_get_type(&tu_timeline_sync_type);
1060 
1061    device->sync_types[0] = &device->syncobj_type;
1062    device->sync_types[1] = &device->timeline_type.sync;
1063    device->sync_types[2] = NULL;
1064 
1065    device->heap.size = tu_get_system_heap_size(device);
1066    device->heap.used = 0u;
1067    device->heap.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT;
1068 
1069    instance->knl = &msm_knl_funcs;
1070 
1071    *out = device;
1072 
1073    return VK_SUCCESS;
1074 
1075 fail:
1076    vk_free(&instance->vk.alloc, device);
1077    return result;
1078 }
1079