• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2022 Imagination Technologies Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a copy
5  * of this software and associated documentation files (the "Software"), to deal
6  * in the Software without restriction, including without limitation the rights
7  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8  * copies of the Software, and to permit persons to whom the Software is
9  * furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #include <assert.h>
25 #include <fcntl.h>
26 #include <stdbool.h>
27 #include <stddef.h>
28 #include <stdint.h>
29 #include <sys/mman.h>
30 #include <sys/types.h>
31 #include <vulkan/vulkan.h>
32 #include <unistd.h>
33 #include <xf86drm.h>
34 
35 #include "drm-uapi/pvr_drm.h"
36 #include "pvr_drm.h"
37 #include "pvr_drm_bo.h"
38 #include "pvr_private.h"
39 #include "pvr_winsys_helper.h"
40 #include "util/bitscan.h"
41 #include "util/macros.h"
42 #include "vk_log.h"
43 
pvr_drm_create_gem_bo(struct pvr_drm_winsys * drm_ws,uint32_t drm_flags,uint64_t size,uint32_t * const handle_out)44 static VkResult pvr_drm_create_gem_bo(struct pvr_drm_winsys *drm_ws,
45                                       uint32_t drm_flags,
46                                       uint64_t size,
47                                       uint32_t *const handle_out)
48 {
49    struct drm_pvr_ioctl_create_bo_args args = {
50       .size = size,
51       .flags = drm_flags,
52    };
53    VkResult result;
54 
55    result = pvr_ioctlf(drm_ws->base.render_fd,
56                        DRM_IOCTL_PVR_CREATE_BO,
57                        &args,
58                        VK_ERROR_OUT_OF_DEVICE_MEMORY,
59                        "Failed to create gem bo");
60    if (result != VK_SUCCESS)
61       return result;
62 
63    *handle_out = args.handle;
64 
65    return VK_SUCCESS;
66 }
67 
pvr_drm_destroy_gem_bo(struct pvr_drm_winsys * drm_ws,uint32_t handle)68 static VkResult pvr_drm_destroy_gem_bo(struct pvr_drm_winsys *drm_ws,
69                                        uint32_t handle)
70 {
71    struct drm_gem_close args = {
72       .handle = handle,
73    };
74 
75    /* The kernel driver doesn't have a corresponding DRM_IOCTL_PVR_DESTROY_BO
76     * IOCTL as DRM provides a common IOCTL for doing this.
77     */
78    return pvr_ioctlf(drm_ws->base.render_fd,
79                      DRM_IOCTL_GEM_CLOSE,
80                      &args,
81                      VK_ERROR_UNKNOWN,
82                      "Failed to destroy gem bo");
83 }
84 
pvr_drm_get_bo_mmap_offset(struct pvr_drm_winsys * drm_ws,uint32_t handle,uint64_t * const offset_out)85 static VkResult pvr_drm_get_bo_mmap_offset(struct pvr_drm_winsys *drm_ws,
86                                            uint32_t handle,
87                                            uint64_t *const offset_out)
88 {
89    struct drm_pvr_ioctl_get_bo_mmap_offset_args args = {
90       .handle = handle,
91    };
92    VkResult result;
93 
94    result = pvr_ioctl(drm_ws->base.render_fd,
95                       DRM_IOCTL_PVR_GET_BO_MMAP_OFFSET,
96                       &args,
97                       VK_ERROR_MEMORY_MAP_FAILED);
98    if (result != VK_SUCCESS)
99       return result;
100 
101    *offset_out = args.offset;
102 
103    return VK_SUCCESS;
104 }
105 
pvr_drm_buffer_acquire(struct pvr_drm_winsys_bo * drm_bo)106 static void pvr_drm_buffer_acquire(struct pvr_drm_winsys_bo *drm_bo)
107 {
108    p_atomic_inc(&drm_bo->ref_count);
109 }
110 
pvr_drm_buffer_release(struct pvr_drm_winsys_bo * drm_bo)111 static void pvr_drm_buffer_release(struct pvr_drm_winsys_bo *drm_bo)
112 {
113    struct pvr_drm_winsys *drm_ws = to_pvr_drm_winsys(drm_bo->base.ws);
114 
115    u_rwlock_rdlock(&drm_ws->dmabuf_bo_lock);
116 
117    if (p_atomic_dec_return(&drm_bo->ref_count) == 0) {
118       uint32_t handle = drm_bo->handle;
119 
120       /* Our BO structs are stored in a sparse array in the winsys structure,
121        * so we don't want to free the BO pointer, instead we want to reset it
122        * to 0, to signal that array entry as being free.
123        *
124        * We must do the reset before we actually free the BO in the kernel, since
125        * otherwise there is a chance the application creates another BO in a
126        * different thread and gets the same array entry, causing a race.
127        */
128       memset(drm_bo, 0, sizeof(*drm_bo));
129 
130       pvr_drm_destroy_gem_bo(drm_ws, handle);
131    }
132 
133    u_rwlock_rdunlock(&drm_ws->dmabuf_bo_lock);
134 }
135 
136 static VkResult
pvr_drm_display_buffer_create(struct pvr_drm_winsys * drm_ws,uint64_t size,struct pvr_winsys_bo ** const bo_out)137 pvr_drm_display_buffer_create(struct pvr_drm_winsys *drm_ws,
138                               uint64_t size,
139                               struct pvr_winsys_bo **const bo_out)
140 {
141    uint32_t handle;
142    VkResult result;
143    int ret;
144    int fd;
145 
146    result =
147       pvr_winsys_helper_display_buffer_create(&drm_ws->base, size, &handle);
148    if (result != VK_SUCCESS)
149       return result;
150 
151    ret = drmPrimeHandleToFD(drm_ws->base.display_fd, handle, DRM_CLOEXEC, &fd);
152    pvr_winsys_helper_display_buffer_destroy(&drm_ws->base, handle);
153    if (ret)
154       return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
155 
156    result = pvr_drm_winsys_buffer_create_from_fd(&drm_ws->base, fd, bo_out);
157    close(fd);
158    if (result != VK_SUCCESS)
159       return result;
160 
161    assert((*bo_out)->size >= size);
162 
163    return VK_SUCCESS;
164 }
165 
pvr_drm_get_alloc_flags(uint32_t ws_flags)166 static uint64_t pvr_drm_get_alloc_flags(uint32_t ws_flags)
167 {
168    uint64_t drm_flags = 0U;
169 
170    if (ws_flags & PVR_WINSYS_BO_FLAG_GPU_UNCACHED)
171       drm_flags |= DRM_PVR_BO_BYPASS_DEVICE_CACHE;
172 
173    if (ws_flags & PVR_WINSYS_BO_FLAG_PM_FW_PROTECT)
174       drm_flags |= DRM_PVR_BO_PM_FW_PROTECT;
175 
176    if (ws_flags & PVR_WINSYS_BO_FLAG_CPU_ACCESS)
177       drm_flags |= DRM_PVR_BO_ALLOW_CPU_USERSPACE_ACCESS;
178 
179    return drm_flags;
180 }
181 
182 static inline struct pvr_drm_winsys_bo *
pvr_drm_winsys_lookup_bo(struct pvr_drm_winsys * drm_ws,uint32_t handle)183 pvr_drm_winsys_lookup_bo(struct pvr_drm_winsys *drm_ws,
184                          uint32_t handle)
185 {
186    return (struct pvr_drm_winsys_bo *) util_sparse_array_get(&drm_ws->bo_map, handle);
187 }
188 
pvr_drm_winsys_buffer_create(struct pvr_winsys * ws,uint64_t size,uint64_t alignment,enum pvr_winsys_bo_type type,uint32_t ws_flags,struct pvr_winsys_bo ** const bo_out)189 VkResult pvr_drm_winsys_buffer_create(struct pvr_winsys *ws,
190                                       uint64_t size,
191                                       uint64_t alignment,
192                                       enum pvr_winsys_bo_type type,
193                                       uint32_t ws_flags,
194                                       struct pvr_winsys_bo **const bo_out)
195 {
196    const uint64_t drm_flags = pvr_drm_get_alloc_flags(ws_flags);
197    struct pvr_drm_winsys *drm_ws = to_pvr_drm_winsys(ws);
198    struct pvr_drm_winsys_bo *drm_bo;
199    uint32_t handle = 0;
200    VkResult result;
201 
202    assert(util_is_power_of_two_nonzero64(alignment));
203    size = ALIGN_POT(size, alignment);
204    size = ALIGN_POT(size, ws->page_size);
205 
206    if (type == PVR_WINSYS_BO_TYPE_DISPLAY)
207       return pvr_drm_display_buffer_create(drm_ws, size, bo_out);
208 
209    result = pvr_drm_create_gem_bo(drm_ws, drm_flags, size, &handle);
210    if (result != VK_SUCCESS)
211       return result;
212 
213    drm_bo = pvr_drm_winsys_lookup_bo(drm_ws, handle);
214    assert(drm_bo && drm_bo->handle == 0);
215 
216    drm_bo->base.size = size;
217    drm_bo->base.ws = ws;
218    drm_bo->handle = handle;
219    drm_bo->flags = drm_flags;
220 
221    p_atomic_set(&drm_bo->ref_count, 1);
222 
223    *bo_out = &drm_bo->base;
224 
225    return VK_SUCCESS;
226 }
227 
228 VkResult
pvr_drm_winsys_buffer_create_from_fd(struct pvr_winsys * ws,int fd,struct pvr_winsys_bo ** const bo_out)229 pvr_drm_winsys_buffer_create_from_fd(struct pvr_winsys *ws,
230                                      int fd,
231                                      struct pvr_winsys_bo **const bo_out)
232 {
233    struct pvr_drm_winsys *drm_ws = to_pvr_drm_winsys(ws);
234    struct pvr_drm_winsys_bo *drm_bo;
235    uint32_t handle;
236    off_t size;
237    int ret;
238 
239    size = lseek(fd, 0, SEEK_END);
240    if (size == (off_t)-1)
241       return vk_error(NULL, VK_ERROR_INVALID_EXTERNAL_HANDLE);
242 
243    /* Importing the same dma-buf several times will yield the same GEM
244     * handle. Thus, there is a potential race when destroying a BO and importing
245     * the same dma-buf from different threads. We must not permit the creation
246     * of a dma-buf BO and its release to happen in parallel.
247     */
248    u_rwlock_wrlock(&drm_ws->dmabuf_bo_lock);
249 
250    ret = drmPrimeFDToHandle(ws->render_fd, fd, &handle);
251    if (ret) {
252       u_rwlock_wrunlock(&drm_ws->dmabuf_bo_lock);
253 
254       return vk_error(NULL, VK_ERROR_INVALID_EXTERNAL_HANDLE);
255    }
256 
257    drm_bo = pvr_drm_winsys_lookup_bo(drm_ws, handle);
258 
259    if (p_atomic_read(&drm_bo->ref_count) == 0) {
260       drm_bo->base.ws = ws;
261       drm_bo->base.size = (uint64_t)size;
262       drm_bo->base.is_imported = true;
263       drm_bo->handle = handle;
264 
265       p_atomic_set(&drm_bo->ref_count, 1);
266    } else {
267       pvr_drm_buffer_acquire(drm_bo);
268    }
269 
270    u_rwlock_wrunlock(&drm_ws->dmabuf_bo_lock);
271 
272    *bo_out = &drm_bo->base;
273 
274    return VK_SUCCESS;
275 }
276 
pvr_drm_winsys_buffer_destroy(struct pvr_winsys_bo * bo)277 void pvr_drm_winsys_buffer_destroy(struct pvr_winsys_bo *bo)
278 {
279    struct pvr_drm_winsys_bo *drm_bo = to_pvr_drm_winsys_bo(bo);
280 
281    pvr_drm_buffer_release(drm_bo);
282 }
283 
pvr_drm_winsys_buffer_get_fd(struct pvr_winsys_bo * bo,int * const fd_out)284 VkResult pvr_drm_winsys_buffer_get_fd(struct pvr_winsys_bo *bo,
285                                       int *const fd_out)
286 {
287    struct pvr_drm_winsys_bo *drm_bo = to_pvr_drm_winsys_bo(bo);
288    struct pvr_drm_winsys *drm_ws = to_pvr_drm_winsys(bo->ws);
289    int ret;
290 
291    ret = drmPrimeHandleToFD(drm_ws->base.render_fd,
292                             drm_bo->handle,
293                             DRM_CLOEXEC,
294                             fd_out);
295    if (ret)
296       return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
297 
298    return VK_SUCCESS;
299 }
300 
pvr_drm_winsys_buffer_map(struct pvr_winsys_bo * bo)301 VkResult pvr_drm_winsys_buffer_map(struct pvr_winsys_bo *bo)
302 {
303    struct pvr_drm_winsys_bo *drm_bo = to_pvr_drm_winsys_bo(bo);
304    struct pvr_drm_winsys *drm_ws = to_pvr_drm_winsys(bo->ws);
305    uint64_t offset = 0;
306    void *map = NULL;
307    VkResult result;
308 
309    assert(!bo->map);
310 
311    result = pvr_drm_get_bo_mmap_offset(drm_ws, drm_bo->handle, &offset);
312    if (result != VK_SUCCESS)
313       goto err_out;
314 
315    result = pvr_mmap(bo->size,
316                      PROT_READ | PROT_WRITE,
317                      MAP_SHARED,
318                      drm_ws->base.render_fd,
319                      offset,
320                      &map);
321    if (result != VK_SUCCESS)
322       goto err_out;
323 
324    VG(VALGRIND_MALLOCLIKE_BLOCK(map, bo->size, 0, true));
325 
326    pvr_drm_buffer_acquire(drm_bo);
327    bo->map = map;
328 
329    return VK_SUCCESS;
330 
331 err_out:
332    return result;
333 }
334 
pvr_drm_winsys_buffer_unmap(struct pvr_winsys_bo * bo)335 void pvr_drm_winsys_buffer_unmap(struct pvr_winsys_bo *bo)
336 {
337    struct pvr_drm_winsys_bo *drm_bo = to_pvr_drm_winsys_bo(bo);
338 
339    assert(bo->map);
340 
341    pvr_munmap(bo->map, bo->size);
342 
343    VG(VALGRIND_FREELIKE_BLOCK(bo->map, 0));
344 
345    bo->map = NULL;
346 
347    pvr_drm_buffer_release(drm_bo);
348 }
349 
350 /* This function must be used to allocate from a heap carveout and must only be
351  * used within the winsys code. This also means whoever is using it, must know
352  * what they are doing.
353  */
pvr_drm_heap_alloc_carveout(struct pvr_winsys_heap * const heap,const pvr_dev_addr_t carveout_dev_addr,uint64_t size,uint64_t alignment,struct pvr_winsys_vma ** const vma_out)354 VkResult pvr_drm_heap_alloc_carveout(struct pvr_winsys_heap *const heap,
355                                      const pvr_dev_addr_t carveout_dev_addr,
356                                      uint64_t size,
357                                      uint64_t alignment,
358                                      struct pvr_winsys_vma **const vma_out)
359 {
360    const struct pvr_drm_winsys *const drm_ws = to_pvr_drm_winsys(heap->ws);
361    struct pvr_drm_winsys_vma *drm_vma;
362    VkResult result;
363 
364    assert(util_is_power_of_two_nonzero64(alignment));
365 
366    drm_vma = vk_zalloc(drm_ws->base.alloc,
367                        sizeof(*drm_vma),
368                        8,
369                        VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
370    if (!drm_vma) {
371       result = vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
372       goto err_out;
373    }
374 
375    /* The powervr kernel mode driver returns a page aligned size when
376     * allocating buffers.
377     */
378    alignment = MAX2(alignment, heap->page_size);
379    size = ALIGN_POT(size, alignment);
380 
381    /* TODO: Should we keep track of the allocations in the carveout? */
382 
383    drm_vma->base.dev_addr = carveout_dev_addr;
384    drm_vma->base.heap = heap;
385    drm_vma->base.size = size;
386 
387    p_atomic_inc(&heap->ref_count);
388 
389    *vma_out = &drm_vma->base;
390 
391    return VK_SUCCESS;
392 
393 err_out:
394    return result;
395 }
396 
pvr_drm_winsys_heap_alloc(struct pvr_winsys_heap * heap,uint64_t size,uint64_t alignment,struct pvr_winsys_vma ** const vma_out)397 VkResult pvr_drm_winsys_heap_alloc(struct pvr_winsys_heap *heap,
398                                    uint64_t size,
399                                    uint64_t alignment,
400                                    struct pvr_winsys_vma **const vma_out)
401 {
402    const struct pvr_drm_winsys *const drm_ws = to_pvr_drm_winsys(heap->ws);
403    struct pvr_drm_winsys_vma *drm_vma;
404    VkResult result;
405 
406    drm_vma = vk_alloc(drm_ws->base.alloc,
407                       sizeof(*drm_vma),
408                       8,
409                       VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
410    if (!drm_vma) {
411       result = vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
412       goto err_out;
413    }
414 
415    result = pvr_winsys_helper_heap_alloc(heap, size, alignment, &drm_vma->base);
416    if (result != VK_SUCCESS)
417       goto err_free_vma;
418 
419    *vma_out = &drm_vma->base;
420 
421    return VK_SUCCESS;
422 
423 err_free_vma:
424    vk_free(drm_ws->base.alloc, drm_vma);
425 
426 err_out:
427    return result;
428 }
429 
pvr_drm_winsys_heap_free(struct pvr_winsys_vma * vma)430 void pvr_drm_winsys_heap_free(struct pvr_winsys_vma *vma)
431 {
432    struct pvr_drm_winsys *drm_ws = to_pvr_drm_winsys(vma->heap->ws);
433    struct pvr_drm_winsys_vma *drm_vma = to_pvr_drm_winsys_vma(vma);
434    const uint64_t carveout_addr = vma->heap->static_data_carveout_addr.addr;
435 
436    /* A vma with an existing device mapping should not be freed. */
437    assert(!drm_vma->base.bo);
438 
439    /* Check if we are dealing with carveout address range. */
440    if (vma->dev_addr.addr >= carveout_addr &&
441        vma->dev_addr.addr <
442           (carveout_addr + vma->heap->static_data_carveout_size)) {
443       /* For the carveout addresses just decrement the reference count. */
444       p_atomic_dec(&vma->heap->ref_count);
445    } else {
446       /* Free allocated virtual space. */
447       pvr_winsys_helper_heap_free(vma);
448    }
449 
450    vk_free(drm_ws->base.alloc, drm_vma);
451 }
452 
pvr_drm_winsys_vma_map(struct pvr_winsys_vma * vma,struct pvr_winsys_bo * bo,uint64_t offset,uint64_t size,pvr_dev_addr_t * const dev_addr_out)453 VkResult pvr_drm_winsys_vma_map(struct pvr_winsys_vma *vma,
454                                 struct pvr_winsys_bo *bo,
455                                 uint64_t offset,
456                                 uint64_t size,
457                                 pvr_dev_addr_t *const dev_addr_out)
458 {
459    struct pvr_drm_winsys_bo *const drm_bo = to_pvr_drm_winsys_bo(bo);
460    struct pvr_drm_winsys *const drm_ws = to_pvr_drm_winsys(bo->ws);
461    const uint32_t virt_offset = offset & (vma->heap->page_size - 1);
462    const uint64_t aligned_virt_size =
463       ALIGN_POT(virt_offset + size, vma->heap->page_size);
464    const uint32_t phys_page_offset = offset - virt_offset;
465 
466    struct drm_pvr_ioctl_vm_map_args args = { .device_addr = vma->dev_addr.addr,
467                                              .flags = 0U,
468                                              .handle = drm_bo->handle,
469                                              .offset = phys_page_offset,
470                                              .size = aligned_virt_size,
471                                              .vm_context_handle =
472                                                 drm_ws->vm_context };
473 
474    VkResult result;
475 
476    /* Address should not be mapped already. */
477    assert(!vma->bo);
478 
479    /* Check if bo and vma can accommodate the given size and offset. */
480    if (ALIGN_POT(offset + size, vma->heap->page_size) > bo->size ||
481        aligned_virt_size > vma->size) {
482       return vk_error(NULL, VK_ERROR_MEMORY_MAP_FAILED);
483    }
484 
485    result = pvr_ioctl(drm_ws->base.render_fd,
486                       DRM_IOCTL_PVR_VM_MAP,
487                       &args,
488                       VK_ERROR_MEMORY_MAP_FAILED);
489    if (result != VK_SUCCESS)
490       return result;
491 
492    pvr_drm_buffer_acquire(drm_bo);
493 
494    vma->bo = &drm_bo->base;
495    vma->bo_offset = offset;
496    vma->mapped_size = aligned_virt_size;
497 
498    if (dev_addr_out)
499       *dev_addr_out = PVR_DEV_ADDR_OFFSET(vma->dev_addr, virt_offset);
500 
501    return VK_SUCCESS;
502 }
503 
pvr_drm_winsys_vma_unmap(struct pvr_winsys_vma * vma)504 void pvr_drm_winsys_vma_unmap(struct pvr_winsys_vma *vma)
505 {
506    struct pvr_drm_winsys_bo *const drm_bo = to_pvr_drm_winsys_bo(vma->bo);
507    struct pvr_drm_winsys *const drm_ws = to_pvr_drm_winsys(vma->bo->ws);
508 
509    struct drm_pvr_ioctl_vm_unmap_args args = {
510       .vm_context_handle = drm_ws->vm_context,
511       .device_addr = vma->dev_addr.addr,
512       .size = vma->mapped_size,
513    };
514 
515    /* Address should be mapped. */
516    assert(vma->bo);
517 
518    pvr_ioctlf(drm_ws->base.render_fd,
519               DRM_IOCTL_PVR_VM_UNMAP,
520               &args,
521               VK_ERROR_UNKNOWN,
522               "Unmap failed");
523 
524    pvr_drm_buffer_release(drm_bo);
525 
526    vma->bo = NULL;
527 }
528