1 /*
2 * Copyright © 2022 Imagination Technologies Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <fcntl.h>
26 #include <stdbool.h>
27 #include <stddef.h>
28 #include <stdint.h>
29 #include <sys/mman.h>
30 #include <sys/types.h>
31 #include <vulkan/vulkan.h>
32 #include <unistd.h>
33 #include <xf86drm.h>
34
35 #include "drm-uapi/pvr_drm.h"
36 #include "pvr_drm.h"
37 #include "pvr_drm_bo.h"
38 #include "pvr_private.h"
39 #include "pvr_winsys_helper.h"
40 #include "util/bitscan.h"
41 #include "util/macros.h"
42 #include "vk_log.h"
43
pvr_drm_create_gem_bo(struct pvr_drm_winsys * drm_ws,uint32_t drm_flags,uint64_t size,uint32_t * const handle_out)44 static VkResult pvr_drm_create_gem_bo(struct pvr_drm_winsys *drm_ws,
45 uint32_t drm_flags,
46 uint64_t size,
47 uint32_t *const handle_out)
48 {
49 struct drm_pvr_ioctl_create_bo_args args = {
50 .size = size,
51 .flags = drm_flags,
52 };
53 VkResult result;
54
55 result = pvr_ioctlf(drm_ws->base.render_fd,
56 DRM_IOCTL_PVR_CREATE_BO,
57 &args,
58 VK_ERROR_OUT_OF_DEVICE_MEMORY,
59 "Failed to create gem bo");
60 if (result != VK_SUCCESS)
61 return result;
62
63 *handle_out = args.handle;
64
65 return VK_SUCCESS;
66 }
67
pvr_drm_destroy_gem_bo(struct pvr_drm_winsys * drm_ws,uint32_t handle)68 static VkResult pvr_drm_destroy_gem_bo(struct pvr_drm_winsys *drm_ws,
69 uint32_t handle)
70 {
71 struct drm_gem_close args = {
72 .handle = handle,
73 };
74
75 /* The kernel driver doesn't have a corresponding DRM_IOCTL_PVR_DESTROY_BO
76 * IOCTL as DRM provides a common IOCTL for doing this.
77 */
78 return pvr_ioctlf(drm_ws->base.render_fd,
79 DRM_IOCTL_GEM_CLOSE,
80 &args,
81 VK_ERROR_UNKNOWN,
82 "Failed to destroy gem bo");
83 }
84
pvr_drm_get_bo_mmap_offset(struct pvr_drm_winsys * drm_ws,uint32_t handle,uint64_t * const offset_out)85 static VkResult pvr_drm_get_bo_mmap_offset(struct pvr_drm_winsys *drm_ws,
86 uint32_t handle,
87 uint64_t *const offset_out)
88 {
89 struct drm_pvr_ioctl_get_bo_mmap_offset_args args = {
90 .handle = handle,
91 };
92 VkResult result;
93
94 result = pvr_ioctl(drm_ws->base.render_fd,
95 DRM_IOCTL_PVR_GET_BO_MMAP_OFFSET,
96 &args,
97 VK_ERROR_MEMORY_MAP_FAILED);
98 if (result != VK_SUCCESS)
99 return result;
100
101 *offset_out = args.offset;
102
103 return VK_SUCCESS;
104 }
105
pvr_drm_buffer_acquire(struct pvr_drm_winsys_bo * drm_bo)106 static void pvr_drm_buffer_acquire(struct pvr_drm_winsys_bo *drm_bo)
107 {
108 p_atomic_inc(&drm_bo->ref_count);
109 }
110
pvr_drm_buffer_release(struct pvr_drm_winsys_bo * drm_bo)111 static void pvr_drm_buffer_release(struct pvr_drm_winsys_bo *drm_bo)
112 {
113 struct pvr_drm_winsys *drm_ws = to_pvr_drm_winsys(drm_bo->base.ws);
114
115 u_rwlock_rdlock(&drm_ws->dmabuf_bo_lock);
116
117 if (p_atomic_dec_return(&drm_bo->ref_count) == 0) {
118 uint32_t handle = drm_bo->handle;
119
120 /* Our BO structs are stored in a sparse array in the winsys structure,
121 * so we don't want to free the BO pointer, instead we want to reset it
122 * to 0, to signal that array entry as being free.
123 *
124 * We must do the reset before we actually free the BO in the kernel, since
125 * otherwise there is a chance the application creates another BO in a
126 * different thread and gets the same array entry, causing a race.
127 */
128 memset(drm_bo, 0, sizeof(*drm_bo));
129
130 pvr_drm_destroy_gem_bo(drm_ws, handle);
131 }
132
133 u_rwlock_rdunlock(&drm_ws->dmabuf_bo_lock);
134 }
135
136 static VkResult
pvr_drm_display_buffer_create(struct pvr_drm_winsys * drm_ws,uint64_t size,struct pvr_winsys_bo ** const bo_out)137 pvr_drm_display_buffer_create(struct pvr_drm_winsys *drm_ws,
138 uint64_t size,
139 struct pvr_winsys_bo **const bo_out)
140 {
141 uint32_t handle;
142 VkResult result;
143 int ret;
144 int fd;
145
146 result =
147 pvr_winsys_helper_display_buffer_create(&drm_ws->base, size, &handle);
148 if (result != VK_SUCCESS)
149 return result;
150
151 ret = drmPrimeHandleToFD(drm_ws->base.display_fd, handle, DRM_CLOEXEC, &fd);
152 pvr_winsys_helper_display_buffer_destroy(&drm_ws->base, handle);
153 if (ret)
154 return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
155
156 result = pvr_drm_winsys_buffer_create_from_fd(&drm_ws->base, fd, bo_out);
157 close(fd);
158 if (result != VK_SUCCESS)
159 return result;
160
161 assert((*bo_out)->size >= size);
162
163 return VK_SUCCESS;
164 }
165
pvr_drm_get_alloc_flags(uint32_t ws_flags)166 static uint64_t pvr_drm_get_alloc_flags(uint32_t ws_flags)
167 {
168 uint64_t drm_flags = 0U;
169
170 if (ws_flags & PVR_WINSYS_BO_FLAG_GPU_UNCACHED)
171 drm_flags |= DRM_PVR_BO_BYPASS_DEVICE_CACHE;
172
173 if (ws_flags & PVR_WINSYS_BO_FLAG_PM_FW_PROTECT)
174 drm_flags |= DRM_PVR_BO_PM_FW_PROTECT;
175
176 if (ws_flags & PVR_WINSYS_BO_FLAG_CPU_ACCESS)
177 drm_flags |= DRM_PVR_BO_ALLOW_CPU_USERSPACE_ACCESS;
178
179 return drm_flags;
180 }
181
182 static inline struct pvr_drm_winsys_bo *
pvr_drm_winsys_lookup_bo(struct pvr_drm_winsys * drm_ws,uint32_t handle)183 pvr_drm_winsys_lookup_bo(struct pvr_drm_winsys *drm_ws,
184 uint32_t handle)
185 {
186 return (struct pvr_drm_winsys_bo *) util_sparse_array_get(&drm_ws->bo_map, handle);
187 }
188
pvr_drm_winsys_buffer_create(struct pvr_winsys * ws,uint64_t size,uint64_t alignment,enum pvr_winsys_bo_type type,uint32_t ws_flags,struct pvr_winsys_bo ** const bo_out)189 VkResult pvr_drm_winsys_buffer_create(struct pvr_winsys *ws,
190 uint64_t size,
191 uint64_t alignment,
192 enum pvr_winsys_bo_type type,
193 uint32_t ws_flags,
194 struct pvr_winsys_bo **const bo_out)
195 {
196 const uint64_t drm_flags = pvr_drm_get_alloc_flags(ws_flags);
197 struct pvr_drm_winsys *drm_ws = to_pvr_drm_winsys(ws);
198 struct pvr_drm_winsys_bo *drm_bo;
199 uint32_t handle = 0;
200 VkResult result;
201
202 assert(util_is_power_of_two_nonzero64(alignment));
203 size = ALIGN_POT(size, alignment);
204 size = ALIGN_POT(size, ws->page_size);
205
206 if (type == PVR_WINSYS_BO_TYPE_DISPLAY)
207 return pvr_drm_display_buffer_create(drm_ws, size, bo_out);
208
209 result = pvr_drm_create_gem_bo(drm_ws, drm_flags, size, &handle);
210 if (result != VK_SUCCESS)
211 return result;
212
213 drm_bo = pvr_drm_winsys_lookup_bo(drm_ws, handle);
214 assert(drm_bo && drm_bo->handle == 0);
215
216 drm_bo->base.size = size;
217 drm_bo->base.ws = ws;
218 drm_bo->handle = handle;
219 drm_bo->flags = drm_flags;
220
221 p_atomic_set(&drm_bo->ref_count, 1);
222
223 *bo_out = &drm_bo->base;
224
225 return VK_SUCCESS;
226 }
227
228 VkResult
pvr_drm_winsys_buffer_create_from_fd(struct pvr_winsys * ws,int fd,struct pvr_winsys_bo ** const bo_out)229 pvr_drm_winsys_buffer_create_from_fd(struct pvr_winsys *ws,
230 int fd,
231 struct pvr_winsys_bo **const bo_out)
232 {
233 struct pvr_drm_winsys *drm_ws = to_pvr_drm_winsys(ws);
234 struct pvr_drm_winsys_bo *drm_bo;
235 uint32_t handle;
236 off_t size;
237 int ret;
238
239 size = lseek(fd, 0, SEEK_END);
240 if (size == (off_t)-1)
241 return vk_error(NULL, VK_ERROR_INVALID_EXTERNAL_HANDLE);
242
243 /* Importing the same dma-buf several times will yield the same GEM
244 * handle. Thus, there is a potential race when destroying a BO and importing
245 * the same dma-buf from different threads. We must not permit the creation
246 * of a dma-buf BO and its release to happen in parallel.
247 */
248 u_rwlock_wrlock(&drm_ws->dmabuf_bo_lock);
249
250 ret = drmPrimeFDToHandle(ws->render_fd, fd, &handle);
251 if (ret) {
252 u_rwlock_wrunlock(&drm_ws->dmabuf_bo_lock);
253
254 return vk_error(NULL, VK_ERROR_INVALID_EXTERNAL_HANDLE);
255 }
256
257 drm_bo = pvr_drm_winsys_lookup_bo(drm_ws, handle);
258
259 if (p_atomic_read(&drm_bo->ref_count) == 0) {
260 drm_bo->base.ws = ws;
261 drm_bo->base.size = (uint64_t)size;
262 drm_bo->base.is_imported = true;
263 drm_bo->handle = handle;
264
265 p_atomic_set(&drm_bo->ref_count, 1);
266 } else {
267 pvr_drm_buffer_acquire(drm_bo);
268 }
269
270 u_rwlock_wrunlock(&drm_ws->dmabuf_bo_lock);
271
272 *bo_out = &drm_bo->base;
273
274 return VK_SUCCESS;
275 }
276
pvr_drm_winsys_buffer_destroy(struct pvr_winsys_bo * bo)277 void pvr_drm_winsys_buffer_destroy(struct pvr_winsys_bo *bo)
278 {
279 struct pvr_drm_winsys_bo *drm_bo = to_pvr_drm_winsys_bo(bo);
280
281 pvr_drm_buffer_release(drm_bo);
282 }
283
pvr_drm_winsys_buffer_get_fd(struct pvr_winsys_bo * bo,int * const fd_out)284 VkResult pvr_drm_winsys_buffer_get_fd(struct pvr_winsys_bo *bo,
285 int *const fd_out)
286 {
287 struct pvr_drm_winsys_bo *drm_bo = to_pvr_drm_winsys_bo(bo);
288 struct pvr_drm_winsys *drm_ws = to_pvr_drm_winsys(bo->ws);
289 int ret;
290
291 ret = drmPrimeHandleToFD(drm_ws->base.render_fd,
292 drm_bo->handle,
293 DRM_CLOEXEC,
294 fd_out);
295 if (ret)
296 return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
297
298 return VK_SUCCESS;
299 }
300
pvr_drm_winsys_buffer_map(struct pvr_winsys_bo * bo)301 VkResult pvr_drm_winsys_buffer_map(struct pvr_winsys_bo *bo)
302 {
303 struct pvr_drm_winsys_bo *drm_bo = to_pvr_drm_winsys_bo(bo);
304 struct pvr_drm_winsys *drm_ws = to_pvr_drm_winsys(bo->ws);
305 uint64_t offset = 0;
306 void *map = NULL;
307 VkResult result;
308
309 assert(!bo->map);
310
311 result = pvr_drm_get_bo_mmap_offset(drm_ws, drm_bo->handle, &offset);
312 if (result != VK_SUCCESS)
313 goto err_out;
314
315 result = pvr_mmap(bo->size,
316 PROT_READ | PROT_WRITE,
317 MAP_SHARED,
318 drm_ws->base.render_fd,
319 offset,
320 &map);
321 if (result != VK_SUCCESS)
322 goto err_out;
323
324 VG(VALGRIND_MALLOCLIKE_BLOCK(map, bo->size, 0, true));
325
326 pvr_drm_buffer_acquire(drm_bo);
327 bo->map = map;
328
329 return VK_SUCCESS;
330
331 err_out:
332 return result;
333 }
334
pvr_drm_winsys_buffer_unmap(struct pvr_winsys_bo * bo)335 void pvr_drm_winsys_buffer_unmap(struct pvr_winsys_bo *bo)
336 {
337 struct pvr_drm_winsys_bo *drm_bo = to_pvr_drm_winsys_bo(bo);
338
339 assert(bo->map);
340
341 pvr_munmap(bo->map, bo->size);
342
343 VG(VALGRIND_FREELIKE_BLOCK(bo->map, 0));
344
345 bo->map = NULL;
346
347 pvr_drm_buffer_release(drm_bo);
348 }
349
350 /* This function must be used to allocate from a heap carveout and must only be
351 * used within the winsys code. This also means whoever is using it, must know
352 * what they are doing.
353 */
pvr_drm_heap_alloc_carveout(struct pvr_winsys_heap * const heap,const pvr_dev_addr_t carveout_dev_addr,uint64_t size,uint64_t alignment,struct pvr_winsys_vma ** const vma_out)354 VkResult pvr_drm_heap_alloc_carveout(struct pvr_winsys_heap *const heap,
355 const pvr_dev_addr_t carveout_dev_addr,
356 uint64_t size,
357 uint64_t alignment,
358 struct pvr_winsys_vma **const vma_out)
359 {
360 const struct pvr_drm_winsys *const drm_ws = to_pvr_drm_winsys(heap->ws);
361 struct pvr_drm_winsys_vma *drm_vma;
362 VkResult result;
363
364 assert(util_is_power_of_two_nonzero64(alignment));
365
366 drm_vma = vk_zalloc(drm_ws->base.alloc,
367 sizeof(*drm_vma),
368 8,
369 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
370 if (!drm_vma) {
371 result = vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
372 goto err_out;
373 }
374
375 /* The powervr kernel mode driver returns a page aligned size when
376 * allocating buffers.
377 */
378 alignment = MAX2(alignment, heap->page_size);
379 size = ALIGN_POT(size, alignment);
380
381 /* TODO: Should we keep track of the allocations in the carveout? */
382
383 drm_vma->base.dev_addr = carveout_dev_addr;
384 drm_vma->base.heap = heap;
385 drm_vma->base.size = size;
386
387 p_atomic_inc(&heap->ref_count);
388
389 *vma_out = &drm_vma->base;
390
391 return VK_SUCCESS;
392
393 err_out:
394 return result;
395 }
396
pvr_drm_winsys_heap_alloc(struct pvr_winsys_heap * heap,uint64_t size,uint64_t alignment,struct pvr_winsys_vma ** const vma_out)397 VkResult pvr_drm_winsys_heap_alloc(struct pvr_winsys_heap *heap,
398 uint64_t size,
399 uint64_t alignment,
400 struct pvr_winsys_vma **const vma_out)
401 {
402 const struct pvr_drm_winsys *const drm_ws = to_pvr_drm_winsys(heap->ws);
403 struct pvr_drm_winsys_vma *drm_vma;
404 VkResult result;
405
406 drm_vma = vk_alloc(drm_ws->base.alloc,
407 sizeof(*drm_vma),
408 8,
409 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
410 if (!drm_vma) {
411 result = vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
412 goto err_out;
413 }
414
415 result = pvr_winsys_helper_heap_alloc(heap, size, alignment, &drm_vma->base);
416 if (result != VK_SUCCESS)
417 goto err_free_vma;
418
419 *vma_out = &drm_vma->base;
420
421 return VK_SUCCESS;
422
423 err_free_vma:
424 vk_free(drm_ws->base.alloc, drm_vma);
425
426 err_out:
427 return result;
428 }
429
pvr_drm_winsys_heap_free(struct pvr_winsys_vma * vma)430 void pvr_drm_winsys_heap_free(struct pvr_winsys_vma *vma)
431 {
432 struct pvr_drm_winsys *drm_ws = to_pvr_drm_winsys(vma->heap->ws);
433 struct pvr_drm_winsys_vma *drm_vma = to_pvr_drm_winsys_vma(vma);
434 const uint64_t carveout_addr = vma->heap->static_data_carveout_addr.addr;
435
436 /* A vma with an existing device mapping should not be freed. */
437 assert(!drm_vma->base.bo);
438
439 /* Check if we are dealing with carveout address range. */
440 if (vma->dev_addr.addr >= carveout_addr &&
441 vma->dev_addr.addr <
442 (carveout_addr + vma->heap->static_data_carveout_size)) {
443 /* For the carveout addresses just decrement the reference count. */
444 p_atomic_dec(&vma->heap->ref_count);
445 } else {
446 /* Free allocated virtual space. */
447 pvr_winsys_helper_heap_free(vma);
448 }
449
450 vk_free(drm_ws->base.alloc, drm_vma);
451 }
452
pvr_drm_winsys_vma_map(struct pvr_winsys_vma * vma,struct pvr_winsys_bo * bo,uint64_t offset,uint64_t size,pvr_dev_addr_t * const dev_addr_out)453 VkResult pvr_drm_winsys_vma_map(struct pvr_winsys_vma *vma,
454 struct pvr_winsys_bo *bo,
455 uint64_t offset,
456 uint64_t size,
457 pvr_dev_addr_t *const dev_addr_out)
458 {
459 struct pvr_drm_winsys_bo *const drm_bo = to_pvr_drm_winsys_bo(bo);
460 struct pvr_drm_winsys *const drm_ws = to_pvr_drm_winsys(bo->ws);
461 const uint32_t virt_offset = offset & (vma->heap->page_size - 1);
462 const uint64_t aligned_virt_size =
463 ALIGN_POT(virt_offset + size, vma->heap->page_size);
464 const uint32_t phys_page_offset = offset - virt_offset;
465
466 struct drm_pvr_ioctl_vm_map_args args = { .device_addr = vma->dev_addr.addr,
467 .flags = 0U,
468 .handle = drm_bo->handle,
469 .offset = phys_page_offset,
470 .size = aligned_virt_size,
471 .vm_context_handle =
472 drm_ws->vm_context };
473
474 VkResult result;
475
476 /* Address should not be mapped already. */
477 assert(!vma->bo);
478
479 /* Check if bo and vma can accommodate the given size and offset. */
480 if (ALIGN_POT(offset + size, vma->heap->page_size) > bo->size ||
481 aligned_virt_size > vma->size) {
482 return vk_error(NULL, VK_ERROR_MEMORY_MAP_FAILED);
483 }
484
485 result = pvr_ioctl(drm_ws->base.render_fd,
486 DRM_IOCTL_PVR_VM_MAP,
487 &args,
488 VK_ERROR_MEMORY_MAP_FAILED);
489 if (result != VK_SUCCESS)
490 return result;
491
492 pvr_drm_buffer_acquire(drm_bo);
493
494 vma->bo = &drm_bo->base;
495 vma->bo_offset = offset;
496 vma->mapped_size = aligned_virt_size;
497
498 if (dev_addr_out)
499 *dev_addr_out = PVR_DEV_ADDR_OFFSET(vma->dev_addr, virt_offset);
500
501 return VK_SUCCESS;
502 }
503
pvr_drm_winsys_vma_unmap(struct pvr_winsys_vma * vma)504 void pvr_drm_winsys_vma_unmap(struct pvr_winsys_vma *vma)
505 {
506 struct pvr_drm_winsys_bo *const drm_bo = to_pvr_drm_winsys_bo(vma->bo);
507 struct pvr_drm_winsys *const drm_ws = to_pvr_drm_winsys(vma->bo->ws);
508
509 struct drm_pvr_ioctl_vm_unmap_args args = {
510 .vm_context_handle = drm_ws->vm_context,
511 .device_addr = vma->dev_addr.addr,
512 .size = vma->mapped_size,
513 };
514
515 /* Address should be mapped. */
516 assert(vma->bo);
517
518 pvr_ioctlf(drm_ws->base.render_fd,
519 DRM_IOCTL_PVR_VM_UNMAP,
520 &args,
521 VK_ERROR_UNKNOWN,
522 "Unmap failed");
523
524 pvr_drm_buffer_release(drm_bo);
525
526 vma->bo = NULL;
527 }
528