• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2018 Google, Inc.
3  * Copyright © 2015 Intel Corporation
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include <fcntl.h>
8 #include <sys/mman.h>
9 #include <xf86drm.h>
10 
11 #include "tu_knl_drm.h"
12 #include "tu_device.h"
13 #include "tu_queue.h"
14 #include "tu_rmv.h"
15 
16 VkResult
tu_allocate_userspace_iova(struct tu_device * dev,uint64_t size,uint64_t client_iova,enum tu_bo_alloc_flags flags,uint64_t * iova)17 tu_allocate_userspace_iova(struct tu_device *dev,
18                            uint64_t size,
19                            uint64_t client_iova,
20                            enum tu_bo_alloc_flags flags,
21                            uint64_t *iova)
22 {
23    *iova = 0;
24 
25    if (flags & TU_BO_ALLOC_REPLAYABLE) {
26       if (client_iova) {
27          if (util_vma_heap_alloc_addr(&dev->vma, client_iova, size)) {
28             *iova = client_iova;
29          } else {
30             return VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS;
31          }
32       } else {
33          /* We have to separate replayable IOVAs from ordinary one in order to
34           * for them not to clash. The easiest way to do this is to allocate
35           * them from the other end of the address space.
36           */
37          dev->vma.alloc_high = true;
38          *iova = util_vma_heap_alloc(&dev->vma, size, os_page_size);
39       }
40    } else {
41       dev->vma.alloc_high = false;
42       *iova = util_vma_heap_alloc(&dev->vma, size, os_page_size);
43    }
44 
45    if (!*iova)
46       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
47 
48    return VK_SUCCESS;
49 }
50 
51 int
tu_drm_export_dmabuf(struct tu_device * dev,struct tu_bo * bo)52 tu_drm_export_dmabuf(struct tu_device *dev, struct tu_bo *bo)
53 {
54    int prime_fd;
55    int ret = drmPrimeHandleToFD(dev->fd, bo->gem_handle,
56                                 DRM_CLOEXEC | DRM_RDWR, &prime_fd);
57 
58    return ret == 0 ? prime_fd : -1;
59 }
60 
61 void
tu_drm_bo_finish(struct tu_device * dev,struct tu_bo * bo)62 tu_drm_bo_finish(struct tu_device *dev, struct tu_bo *bo)
63 {
64    assert(bo->gem_handle);
65 
66    u_rwlock_rdlock(&dev->dma_bo_lock);
67 
68    if (!p_atomic_dec_zero(&bo->refcnt)) {
69       u_rwlock_rdunlock(&dev->dma_bo_lock);
70       return;
71    }
72 
73    if (bo->map) {
74       TU_RMV(bo_unmap, dev, bo);
75       munmap(bo->map, bo->size);
76    }
77 
78    TU_RMV(bo_destroy, dev, bo);
79    tu_debug_bos_del(dev, bo);
80    tu_dump_bo_del(dev, bo);
81 
82    mtx_lock(&dev->bo_mutex);
83    dev->submit_bo_count--;
84    dev->submit_bo_list[bo->submit_bo_list_idx] = dev->submit_bo_list[dev->submit_bo_count];
85 
86    struct tu_bo* exchanging_bo = tu_device_lookup_bo(dev, dev->submit_bo_list[bo->submit_bo_list_idx].handle);
87    exchanging_bo->submit_bo_list_idx = bo->submit_bo_list_idx;
88 
89    if (bo->implicit_sync)
90       dev->implicit_sync_bo_count--;
91 
92    mtx_unlock(&dev->bo_mutex);
93 
94    if (dev->physical_device->has_set_iova) {
95       mtx_lock(&dev->vma_mutex);
96       struct tu_zombie_vma *vma = (struct tu_zombie_vma *)
97             u_vector_add(&dev->zombie_vmas);
98       vma->gem_handle = bo->gem_handle;
99 #ifdef TU_HAS_VIRTIO
100       vma->res_id = bo->res_id;
101 #endif
102       vma->iova = bo->iova;
103       vma->size = bo->size;
104       vma->fence = p_atomic_read(&dev->queues[0]->fence);
105 
106       /* Must be cleared under the VMA mutex, or another thread could race to
107        * reap the VMA, closing the BO and letting a new GEM allocation produce
108        * this handle again.
109        */
110       memset(bo, 0, sizeof(*bo));
111       mtx_unlock(&dev->vma_mutex);
112    } else {
113       /* Our BO structs are stored in a sparse array in the physical device,
114        * so we don't want to free the BO pointer, instead we want to reset it
115        * to 0, to signal that array entry as being free.
116        */
117       uint32_t gem_handle = bo->gem_handle;
118       memset(bo, 0, sizeof(*bo));
119 
120       /* Note that virtgpu GEM_CLOSE path is a bit different, but it does
121        * not use the !has_set_iova path so we can ignore that
122        */
123       struct drm_gem_close req = {
124          .handle = gem_handle,
125       };
126 
127       drmIoctl(dev->fd, DRM_IOCTL_GEM_CLOSE, &req);
128    }
129 
130    u_rwlock_rdunlock(&dev->dma_bo_lock);
131 }
132 
133 void *
msm_submit_create(struct tu_device * device)134 msm_submit_create(struct tu_device *device)
135 {
136    return vk_zalloc(&device->vk.alloc, sizeof(struct tu_msm_queue_submit), 8,
137                     VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
138 }
139 
140 void
msm_submit_finish(struct tu_device * device,void * _submit)141 msm_submit_finish(struct tu_device *device,
142                   void *_submit)
143 {
144    struct tu_msm_queue_submit *submit =
145       (struct tu_msm_queue_submit *)_submit;
146 
147    util_dynarray_fini(&submit->commands);
148    util_dynarray_fini(&submit->command_bos);
149    vk_free(&device->vk.alloc, submit);
150 }
151 
152 void
msm_submit_add_entries(struct tu_device * device,void * _submit,struct tu_cs_entry * entries,unsigned num_entries)153 msm_submit_add_entries(struct tu_device *device, void *_submit,
154                        struct tu_cs_entry *entries, unsigned num_entries)
155 {
156    struct tu_msm_queue_submit *submit =
157       (struct tu_msm_queue_submit *)_submit;
158 
159    struct drm_msm_gem_submit_cmd *cmds = (struct drm_msm_gem_submit_cmd *)
160       util_dynarray_grow(&submit->commands, struct drm_msm_gem_submit_cmd,
161                          num_entries);
162 
163    const struct tu_bo **bos = (const struct tu_bo **)
164       util_dynarray_grow(&submit->command_bos, struct tu_bo *,
165                          num_entries);
166 
167    for (unsigned i = 0; i < num_entries; i++) {
168       cmds[i].type = MSM_SUBMIT_CMD_BUF;
169       cmds[i].submit_idx = entries[i].bo->submit_bo_list_idx;
170       cmds[i].submit_offset = entries[i].offset;
171       cmds[i].size = entries[i].size;
172       cmds[i].pad = 0;
173       cmds[i].nr_relocs = 0;
174       cmds[i].relocs = 0;
175       bos[i] = entries[i].bo;
176    }
177 }
178 
179 uint32_t
tu_syncobj_from_vk_sync(struct vk_sync * sync)180 tu_syncobj_from_vk_sync(struct vk_sync *sync)
181 {
182    uint32_t syncobj = -1;
183    if (vk_sync_is_tu_timeline_sync(sync)) {
184       syncobj = to_tu_timeline_sync(sync)->syncobj;
185    } else if (vk_sync_type_is_drm_syncobj(sync->type)) {
186       syncobj = vk_sync_as_drm_syncobj(sync)->syncobj;
187    }
188 
189    assert(syncobj != -1);
190 
191    return syncobj;
192 }
193 
194 static VkResult
tu_timeline_sync_init(struct vk_device * vk_device,struct vk_sync * vk_sync,uint64_t initial_value)195 tu_timeline_sync_init(struct vk_device *vk_device,
196                       struct vk_sync *vk_sync,
197                       uint64_t initial_value)
198 {
199    struct tu_device *device = container_of(vk_device, struct tu_device, vk);
200    struct tu_timeline_sync *sync = to_tu_timeline_sync(vk_sync);
201    uint32_t flags = 0;
202 
203    assert(device->fd >= 0);
204 
205    int err = drmSyncobjCreate(device->fd, flags, &sync->syncobj);
206 
207    if (err < 0) {
208         return vk_error(device, VK_ERROR_DEVICE_LOST);
209    }
210 
211    sync->state = initial_value ? TU_TIMELINE_SYNC_STATE_SIGNALED :
212                                     TU_TIMELINE_SYNC_STATE_RESET;
213 
214    return VK_SUCCESS;
215 }
216 
217 static void
tu_timeline_sync_finish(struct vk_device * vk_device,struct vk_sync * vk_sync)218 tu_timeline_sync_finish(struct vk_device *vk_device,
219                    struct vk_sync *vk_sync)
220 {
221    struct tu_device *dev = container_of(vk_device, struct tu_device, vk);
222    struct tu_timeline_sync *sync = to_tu_timeline_sync(vk_sync);
223 
224    assert(dev->fd >= 0);
225    ASSERTED int err = drmSyncobjDestroy(dev->fd, sync->syncobj);
226    assert(err == 0);
227 }
228 
229 static VkResult
tu_timeline_sync_reset(struct vk_device * vk_device,struct vk_sync * vk_sync)230 tu_timeline_sync_reset(struct vk_device *vk_device,
231                   struct vk_sync *vk_sync)
232 {
233    struct tu_device *dev = container_of(vk_device, struct tu_device, vk);
234    struct tu_timeline_sync *sync = to_tu_timeline_sync(vk_sync);
235 
236    int err = drmSyncobjReset(dev->fd, &sync->syncobj, 1);
237    if (err) {
238       return vk_errorf(dev, VK_ERROR_UNKNOWN,
239                        "DRM_IOCTL_SYNCOBJ_RESET failed: %m");
240    } else {
241        sync->state = TU_TIMELINE_SYNC_STATE_RESET;
242    }
243 
244    return VK_SUCCESS;
245 }
246 
247 static VkResult
drm_syncobj_wait(struct tu_device * device,uint32_t * handles,uint32_t count_handles,uint64_t timeout_nsec,bool wait_all)248 drm_syncobj_wait(struct tu_device *device,
249                  uint32_t *handles, uint32_t count_handles,
250                  uint64_t timeout_nsec, bool wait_all)
251 {
252    uint32_t syncobj_wait_flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT;
253    if (wait_all) syncobj_wait_flags |= DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL;
254 
255    /* syncobj absolute timeouts are signed.  clamp OS_TIMEOUT_INFINITE down. */
256    timeout_nsec = MIN2(timeout_nsec, (uint64_t)INT64_MAX);
257 
258    int err = drmSyncobjWait(device->fd, handles,
259                             count_handles, timeout_nsec,
260                             syncobj_wait_flags,
261                             NULL /* first_signaled */);
262    if (err && errno == ETIME) {
263       return VK_TIMEOUT;
264    } else if (err) {
265       return vk_errorf(device, VK_ERROR_UNKNOWN,
266                        "DRM_IOCTL_SYNCOBJ_WAIT failed: %m");
267    }
268 
269    return VK_SUCCESS;
270 }
271 
272 /* Based on anv_bo_sync_wait */
273 static VkResult
tu_timeline_sync_wait(struct vk_device * vk_device,uint32_t wait_count,const struct vk_sync_wait * waits,enum vk_sync_wait_flags wait_flags,uint64_t abs_timeout_ns)274 tu_timeline_sync_wait(struct vk_device *vk_device,
275                  uint32_t wait_count,
276                  const struct vk_sync_wait *waits,
277                  enum vk_sync_wait_flags wait_flags,
278                  uint64_t abs_timeout_ns)
279 {
280    struct tu_device *dev = container_of(vk_device, struct tu_device, vk);
281    bool wait_all = !(wait_flags & VK_SYNC_WAIT_ANY);
282 
283    uint32_t handles[wait_count];
284    uint32_t submit_count;
285    VkResult ret = VK_SUCCESS;
286    uint32_t pending = wait_count;
287    struct tu_timeline_sync *submitted_syncs[wait_count];
288 
289    while (pending) {
290       pending = 0;
291       submit_count = 0;
292 
293       for (unsigned i = 0; i < wait_count; ++i) {
294          struct tu_timeline_sync *sync = to_tu_timeline_sync(waits[i].sync);
295 
296          if (sync->state == TU_TIMELINE_SYNC_STATE_RESET) {
297             assert(!(wait_flags & VK_SYNC_WAIT_PENDING));
298             pending++;
299          } else if (sync->state == TU_TIMELINE_SYNC_STATE_SIGNALED) {
300             if (wait_flags & VK_SYNC_WAIT_ANY)
301                return VK_SUCCESS;
302          } else if (sync->state == TU_TIMELINE_SYNC_STATE_SUBMITTED) {
303             if (!(wait_flags & VK_SYNC_WAIT_PENDING)) {
304                handles[submit_count] = sync->syncobj;
305                submitted_syncs[submit_count++] = sync;
306             }
307          }
308       }
309 
310       if (submit_count > 0) {
311          do {
312             ret = drm_syncobj_wait(dev, handles, submit_count, abs_timeout_ns, wait_all);
313          } while (ret == VK_TIMEOUT && os_time_get_nano() < abs_timeout_ns);
314 
315          if (ret == VK_SUCCESS) {
316             for (unsigned i = 0; i < submit_count; ++i) {
317                struct tu_timeline_sync *sync = submitted_syncs[i];
318                sync->state = TU_TIMELINE_SYNC_STATE_SIGNALED;
319             }
320          } else {
321             /* return error covering timeout */
322             return ret;
323          }
324       } else if (pending > 0) {
325          /* If we've hit this then someone decided to vkWaitForFences before
326           * they've actually submitted any of them to a queue.  This is a
327           * fairly pessimal case, so it's ok to lock here and use a standard
328           * pthreads condition variable.
329           */
330          pthread_mutex_lock(&dev->submit_mutex);
331 
332          /* It's possible that some of the fences have changed state since the
333           * last time we checked.  Now that we have the lock, check for
334           * pending fences again and don't wait if it's changed.
335           */
336          uint32_t now_pending = 0;
337          for (uint32_t i = 0; i < wait_count; i++) {
338             struct tu_timeline_sync *sync = to_tu_timeline_sync(waits[i].sync);
339             if (sync->state == TU_TIMELINE_SYNC_STATE_RESET)
340                now_pending++;
341          }
342          assert(now_pending <= pending);
343 
344          if (now_pending == pending) {
345             struct timespec abstime = {
346                .tv_sec = abs_timeout_ns / NSEC_PER_SEC,
347                .tv_nsec = abs_timeout_ns % NSEC_PER_SEC,
348             };
349 
350             ASSERTED int ret;
351             ret = pthread_cond_timedwait(&dev->timeline_cond,
352                                          &dev->submit_mutex, &abstime);
353             assert(ret != EINVAL);
354             if (os_time_get_nano() >= abs_timeout_ns) {
355                pthread_mutex_unlock(&dev->submit_mutex);
356                return VK_TIMEOUT;
357             }
358          }
359 
360          pthread_mutex_unlock(&dev->submit_mutex);
361       }
362    }
363 
364    return ret;
365 }
366 
367 const struct vk_sync_type tu_timeline_sync_type = {
368    .size = sizeof(struct tu_timeline_sync),
369    .features = (enum vk_sync_features)(
370       VK_SYNC_FEATURE_BINARY | VK_SYNC_FEATURE_GPU_WAIT |
371       VK_SYNC_FEATURE_GPU_MULTI_WAIT | VK_SYNC_FEATURE_CPU_WAIT |
372       VK_SYNC_FEATURE_CPU_RESET | VK_SYNC_FEATURE_WAIT_ANY |
373       VK_SYNC_FEATURE_WAIT_PENDING),
374    .init = tu_timeline_sync_init,
375    .finish = tu_timeline_sync_finish,
376    .reset = tu_timeline_sync_reset,
377    .wait_many = tu_timeline_sync_wait,
378 };
379