1 /*
2 * Copyright © 2011 Marek Olšák <maraeo@gmail.com>
3 *
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include "radeon_drm_cs.h"
8
9 #include "util/u_hash_table.h"
10 #include "util/u_memory.h"
11 #include "util/u_thread.h"
12 #include "util/os_mman.h"
13 #include "util/os_time.h"
14
15 #include "frontend/drm_driver.h"
16
17 #include <sys/ioctl.h>
18 #include <xf86drm.h>
19 #include <errno.h>
20 #include <fcntl.h>
21 #include <stdio.h>
22 #include <inttypes.h>
23
24 static struct pb_buffer_lean *
25 radeon_winsys_bo_create(struct radeon_winsys *rws,
26 uint64_t size,
27 unsigned alignment,
28 enum radeon_bo_domain domain,
29 enum radeon_bo_flag flags);
30
radeon_bo(struct pb_buffer_lean * bo)31 static inline struct radeon_bo *radeon_bo(struct pb_buffer_lean *bo)
32 {
33 return (struct radeon_bo *)bo;
34 }
35
36 struct radeon_bo_va_hole {
37 struct list_head list;
38 uint64_t offset;
39 uint64_t size;
40 };
41
radeon_real_bo_is_busy(struct radeon_bo * bo)42 static bool radeon_real_bo_is_busy(struct radeon_bo *bo)
43 {
44 struct drm_radeon_gem_busy args = {0};
45
46 args.handle = bo->handle;
47 return drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_BUSY,
48 &args, sizeof(args)) != 0;
49 }
50
radeon_bo_is_busy(struct radeon_winsys * rws,struct radeon_bo * bo)51 static bool radeon_bo_is_busy(struct radeon_winsys *rws, struct radeon_bo *bo)
52 {
53 unsigned num_idle;
54 bool busy = false;
55
56 if (bo->handle)
57 return radeon_real_bo_is_busy(bo);
58
59 mtx_lock(&bo->rws->bo_fence_lock);
60 for (num_idle = 0; num_idle < bo->u.slab.num_fences; ++num_idle) {
61 if (radeon_real_bo_is_busy(bo->u.slab.fences[num_idle])) {
62 busy = true;
63 break;
64 }
65 radeon_ws_bo_reference(rws, &bo->u.slab.fences[num_idle], NULL);
66 }
67 memmove(&bo->u.slab.fences[0], &bo->u.slab.fences[num_idle],
68 (bo->u.slab.num_fences - num_idle) * sizeof(bo->u.slab.fences[0]));
69 bo->u.slab.num_fences -= num_idle;
70 mtx_unlock(&bo->rws->bo_fence_lock);
71
72 return busy;
73 }
74
radeon_real_bo_wait_idle(struct radeon_bo * bo)75 static void radeon_real_bo_wait_idle(struct radeon_bo *bo)
76 {
77 struct drm_radeon_gem_wait_idle args = {0};
78
79 args.handle = bo->handle;
80 while (drmCommandWrite(bo->rws->fd, DRM_RADEON_GEM_WAIT_IDLE,
81 &args, sizeof(args)) == -EBUSY);
82 }
83
radeon_bo_wait_idle(struct radeon_winsys * rws,struct radeon_bo * bo)84 static void radeon_bo_wait_idle(struct radeon_winsys *rws, struct radeon_bo *bo)
85 {
86 if (bo->handle) {
87 radeon_real_bo_wait_idle(bo);
88 } else {
89 mtx_lock(&bo->rws->bo_fence_lock);
90 while (bo->u.slab.num_fences) {
91 struct radeon_bo *fence = NULL;
92 radeon_ws_bo_reference(rws, &fence, bo->u.slab.fences[0]);
93 mtx_unlock(&bo->rws->bo_fence_lock);
94
95 /* Wait without holding the fence lock. */
96 radeon_real_bo_wait_idle(fence);
97
98 mtx_lock(&bo->rws->bo_fence_lock);
99 if (bo->u.slab.num_fences && fence == bo->u.slab.fences[0]) {
100 radeon_ws_bo_reference(rws, &bo->u.slab.fences[0], NULL);
101 memmove(&bo->u.slab.fences[0], &bo->u.slab.fences[1],
102 (bo->u.slab.num_fences - 1) * sizeof(bo->u.slab.fences[0]));
103 bo->u.slab.num_fences--;
104 }
105 radeon_ws_bo_reference(rws, &fence, NULL);
106 }
107 mtx_unlock(&bo->rws->bo_fence_lock);
108 }
109 }
110
radeon_bo_wait(struct radeon_winsys * rws,struct pb_buffer_lean * _buf,uint64_t timeout,unsigned usage)111 static bool radeon_bo_wait(struct radeon_winsys *rws,
112 struct pb_buffer_lean *_buf, uint64_t timeout,
113 unsigned usage)
114 {
115 struct radeon_bo *bo = radeon_bo(_buf);
116 int64_t abs_timeout;
117
118 /* No timeout. Just query. */
119 if (timeout == 0)
120 return !bo->num_active_ioctls && !radeon_bo_is_busy(rws, bo);
121
122 abs_timeout = os_time_get_absolute_timeout(timeout);
123
124 /* Wait if any ioctl is being submitted with this buffer. */
125 if (!os_wait_until_zero_abs_timeout(&bo->num_active_ioctls, abs_timeout))
126 return false;
127
128 /* Infinite timeout. */
129 if (abs_timeout == OS_TIMEOUT_INFINITE) {
130 radeon_bo_wait_idle(rws, bo);
131 return true;
132 }
133
134 /* Other timeouts need to be emulated with a loop. */
135 while (radeon_bo_is_busy(rws, bo)) {
136 if (os_time_get_nano() >= abs_timeout)
137 return false;
138 os_time_sleep(10);
139 }
140
141 return true;
142 }
143
get_valid_domain(enum radeon_bo_domain domain)144 static enum radeon_bo_domain get_valid_domain(enum radeon_bo_domain domain)
145 {
146 /* Zero domains the driver doesn't understand. */
147 domain &= RADEON_DOMAIN_VRAM_GTT;
148
149 /* If no domain is set, we must set something... */
150 if (!domain)
151 domain = RADEON_DOMAIN_VRAM_GTT;
152
153 return domain;
154 }
155
radeon_bo_get_initial_domain(struct pb_buffer_lean * buf)156 static enum radeon_bo_domain radeon_bo_get_initial_domain(
157 struct pb_buffer_lean *buf)
158 {
159 struct radeon_bo *bo = (struct radeon_bo*)buf;
160 struct drm_radeon_gem_op args;
161
162 memset(&args, 0, sizeof(args));
163 args.handle = bo->handle;
164 args.op = RADEON_GEM_OP_GET_INITIAL_DOMAIN;
165
166 if (drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_OP,
167 &args, sizeof(args))) {
168 fprintf(stderr, "radeon: failed to get initial domain: %p 0x%08X\n",
169 bo, bo->handle);
170 /* Default domain as returned by get_valid_domain. */
171 return RADEON_DOMAIN_VRAM_GTT;
172 }
173
174 /* GEM domains and winsys domains are defined the same. */
175 return get_valid_domain(args.value);
176 }
177
radeon_bomgr_find_va(const struct radeon_info * info,struct radeon_vm_heap * heap,uint64_t size,uint64_t alignment)178 static uint64_t radeon_bomgr_find_va(const struct radeon_info *info,
179 struct radeon_vm_heap *heap,
180 uint64_t size, uint64_t alignment)
181 {
182 struct radeon_bo_va_hole *hole, *n;
183 uint64_t offset = 0, waste = 0;
184
185 /* All VM address space holes will implicitly start aligned to the
186 * size alignment, so we don't need to sanitize the alignment here
187 */
188 size = align(size, info->gart_page_size);
189
190 mtx_lock(&heap->mutex);
191 /* first look for a hole */
192 LIST_FOR_EACH_ENTRY_SAFE(hole, n, &heap->holes, list) {
193 offset = hole->offset;
194 waste = offset % alignment;
195 waste = waste ? alignment - waste : 0;
196 offset += waste;
197 if (offset >= (hole->offset + hole->size)) {
198 continue;
199 }
200 if (!waste && hole->size == size) {
201 offset = hole->offset;
202 list_del(&hole->list);
203 FREE(hole);
204 mtx_unlock(&heap->mutex);
205 return offset;
206 }
207 if ((hole->size - waste) > size) {
208 if (waste) {
209 n = CALLOC_STRUCT(radeon_bo_va_hole);
210 n->size = waste;
211 n->offset = hole->offset;
212 list_add(&n->list, &hole->list);
213 }
214 hole->size -= (size + waste);
215 hole->offset += size + waste;
216 mtx_unlock(&heap->mutex);
217 return offset;
218 }
219 if ((hole->size - waste) == size) {
220 hole->size = waste;
221 mtx_unlock(&heap->mutex);
222 return offset;
223 }
224 }
225
226 offset = heap->start;
227 waste = offset % alignment;
228 waste = waste ? alignment - waste : 0;
229
230 if (offset + waste + size > heap->end) {
231 mtx_unlock(&heap->mutex);
232 return 0;
233 }
234
235 if (waste) {
236 n = CALLOC_STRUCT(radeon_bo_va_hole);
237 n->size = waste;
238 n->offset = offset;
239 list_add(&n->list, &heap->holes);
240 }
241 offset += waste;
242 heap->start += size + waste;
243 mtx_unlock(&heap->mutex);
244 return offset;
245 }
246
radeon_bomgr_find_va64(struct radeon_drm_winsys * ws,uint64_t size,uint64_t alignment)247 static uint64_t radeon_bomgr_find_va64(struct radeon_drm_winsys *ws,
248 uint64_t size, uint64_t alignment)
249 {
250 uint64_t va = 0;
251
252 /* Try to allocate from the 64-bit address space first.
253 * If it doesn't exist (start = 0) or if it doesn't have enough space,
254 * fall back to the 32-bit address space.
255 */
256 if (ws->vm64.start)
257 va = radeon_bomgr_find_va(&ws->info, &ws->vm64, size, alignment);
258 if (!va)
259 va = radeon_bomgr_find_va(&ws->info, &ws->vm32, size, alignment);
260 return va;
261 }
262
radeon_bomgr_free_va(const struct radeon_info * info,struct radeon_vm_heap * heap,uint64_t va,uint64_t size)263 static void radeon_bomgr_free_va(const struct radeon_info *info,
264 struct radeon_vm_heap *heap,
265 uint64_t va, uint64_t size)
266 {
267 struct radeon_bo_va_hole *hole = NULL;
268
269 size = align(size, info->gart_page_size);
270
271 mtx_lock(&heap->mutex);
272 if ((va + size) == heap->start) {
273 heap->start = va;
274 /* Delete uppermost hole if it reaches the new top */
275 if (!list_is_empty(&heap->holes)) {
276 hole = container_of(heap->holes.next, struct radeon_bo_va_hole, list);
277 if ((hole->offset + hole->size) == va) {
278 heap->start = hole->offset;
279 list_del(&hole->list);
280 FREE(hole);
281 }
282 }
283 } else {
284 struct radeon_bo_va_hole *next;
285
286 hole = container_of(&heap->holes, struct radeon_bo_va_hole, list);
287 LIST_FOR_EACH_ENTRY(next, &heap->holes, list) {
288 if (next->offset < va)
289 break;
290 hole = next;
291 }
292
293 if (&hole->list != &heap->holes) {
294 /* Grow upper hole if it's adjacent */
295 if (hole->offset == (va + size)) {
296 hole->offset = va;
297 hole->size += size;
298 /* Merge lower hole if it's adjacent */
299 if (next != hole && &next->list != &heap->holes &&
300 (next->offset + next->size) == va) {
301 next->size += hole->size;
302 list_del(&hole->list);
303 FREE(hole);
304 }
305 goto out;
306 }
307 }
308
309 /* Grow lower hole if it's adjacent */
310 if (next != hole && &next->list != &heap->holes &&
311 (next->offset + next->size) == va) {
312 next->size += size;
313 goto out;
314 }
315
316 /* FIXME on allocation failure we just lose virtual address space
317 * maybe print a warning
318 */
319 next = CALLOC_STRUCT(radeon_bo_va_hole);
320 if (next) {
321 next->size = size;
322 next->offset = va;
323 list_add(&next->list, &hole->list);
324 }
325 }
326 out:
327 mtx_unlock(&heap->mutex);
328 }
329
radeon_bo_destroy(void * winsys,struct pb_buffer_lean * _buf)330 void radeon_bo_destroy(void *winsys, struct pb_buffer_lean *_buf)
331 {
332 struct radeon_bo *bo = radeon_bo((struct pb_buffer_lean*)_buf);
333 struct radeon_drm_winsys *rws = bo->rws;
334 struct drm_gem_close args;
335
336 assert(bo->handle && "must not be called for slab entries");
337
338 memset(&args, 0, sizeof(args));
339
340 mtx_lock(&rws->bo_handles_mutex);
341 _mesa_hash_table_remove_key(rws->bo_handles, (void*)(uintptr_t)bo->handle);
342 if (bo->flink_name) {
343 _mesa_hash_table_remove_key(rws->bo_names,
344 (void*)(uintptr_t)bo->flink_name);
345 }
346 mtx_unlock(&rws->bo_handles_mutex);
347
348 if (bo->u.real.ptr)
349 os_munmap(bo->u.real.ptr, bo->base.size);
350
351 if (rws->info.r600_has_virtual_memory) {
352 if (rws->va_unmap_working) {
353 struct drm_radeon_gem_va va;
354
355 va.handle = bo->handle;
356 va.vm_id = 0;
357 va.operation = RADEON_VA_UNMAP;
358 va.flags = RADEON_VM_PAGE_READABLE |
359 RADEON_VM_PAGE_WRITEABLE |
360 RADEON_VM_PAGE_SNOOPED;
361 va.offset = bo->va;
362
363 if (drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_VA, &va,
364 sizeof(va)) != 0 &&
365 va.operation == RADEON_VA_RESULT_ERROR) {
366 fprintf(stderr, "radeon: Failed to deallocate virtual address for buffer:\n");
367 fprintf(stderr, "radeon: size : %"PRIu64" bytes\n", bo->base.size);
368 fprintf(stderr, "radeon: va : 0x%"PRIx64"\n", bo->va);
369 }
370 }
371
372 radeon_bomgr_free_va(&rws->info,
373 bo->va < rws->vm32.end ? &rws->vm32 : &rws->vm64,
374 bo->va, bo->base.size);
375 }
376
377 /* Close object. */
378 args.handle = bo->handle;
379 drmIoctl(rws->fd, DRM_IOCTL_GEM_CLOSE, &args);
380
381 mtx_destroy(&bo->u.real.map_mutex);
382
383 if (bo->initial_domain & RADEON_DOMAIN_VRAM)
384 rws->allocated_vram -= align(bo->base.size, rws->info.gart_page_size);
385 else if (bo->initial_domain & RADEON_DOMAIN_GTT)
386 rws->allocated_gtt -= align(bo->base.size, rws->info.gart_page_size);
387
388 if (bo->u.real.map_count >= 1) {
389 if (bo->initial_domain & RADEON_DOMAIN_VRAM)
390 bo->rws->mapped_vram -= bo->base.size;
391 else
392 bo->rws->mapped_gtt -= bo->base.size;
393 bo->rws->num_mapped_buffers--;
394 }
395
396 FREE(bo);
397 }
398
radeon_bo_destroy_or_cache(void * winsys,struct pb_buffer_lean * _buf)399 static void radeon_bo_destroy_or_cache(void *winsys, struct pb_buffer_lean *_buf)
400 {
401 struct radeon_drm_winsys *rws = (struct radeon_drm_winsys *)winsys;
402 struct radeon_bo *bo = radeon_bo(_buf);
403
404 assert(bo->handle && "must not be called for slab entries");
405
406 if (bo->u.real.use_reusable_pool)
407 pb_cache_add_buffer(&rws->bo_cache, &bo->u.real.cache_entry);
408 else
409 radeon_bo_destroy(NULL, _buf);
410 }
411
radeon_bo_do_map(struct radeon_bo * bo)412 void *radeon_bo_do_map(struct radeon_bo *bo)
413 {
414 struct drm_radeon_gem_mmap args = {0};
415 void *ptr;
416 unsigned offset;
417
418 /* If the buffer is created from user memory, return the user pointer. */
419 if (bo->user_ptr)
420 return bo->user_ptr;
421
422 if (bo->handle) {
423 offset = 0;
424 } else {
425 offset = bo->va - bo->u.slab.real->va;
426 bo = bo->u.slab.real;
427 }
428
429 /* Map the buffer. */
430 mtx_lock(&bo->u.real.map_mutex);
431 /* Return the pointer if it's already mapped. */
432 if (bo->u.real.ptr) {
433 bo->u.real.map_count++;
434 mtx_unlock(&bo->u.real.map_mutex);
435 return (uint8_t*)bo->u.real.ptr + offset;
436 }
437 args.handle = bo->handle;
438 args.offset = 0;
439 args.size = (uint64_t)bo->base.size;
440 if (drmCommandWriteRead(bo->rws->fd,
441 DRM_RADEON_GEM_MMAP,
442 &args,
443 sizeof(args))) {
444 mtx_unlock(&bo->u.real.map_mutex);
445 fprintf(stderr, "radeon: gem_mmap failed: %p 0x%08X\n",
446 bo, bo->handle);
447 return NULL;
448 }
449
450 ptr = os_mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED,
451 bo->rws->fd, args.addr_ptr);
452 if (ptr == MAP_FAILED) {
453 /* Clear the cache and try again. */
454 pb_cache_release_all_buffers(&bo->rws->bo_cache);
455
456 ptr = os_mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED,
457 bo->rws->fd, args.addr_ptr);
458 if (ptr == MAP_FAILED) {
459 mtx_unlock(&bo->u.real.map_mutex);
460 fprintf(stderr, "radeon: mmap failed, errno: %i\n", errno);
461 return NULL;
462 }
463 }
464 bo->u.real.ptr = ptr;
465 bo->u.real.map_count = 1;
466
467 if (bo->initial_domain & RADEON_DOMAIN_VRAM)
468 bo->rws->mapped_vram += bo->base.size;
469 else
470 bo->rws->mapped_gtt += bo->base.size;
471 bo->rws->num_mapped_buffers++;
472
473 mtx_unlock(&bo->u.real.map_mutex);
474 return (uint8_t*)bo->u.real.ptr + offset;
475 }
476
radeon_bo_map(struct radeon_winsys * rws,struct pb_buffer_lean * buf,struct radeon_cmdbuf * rcs,enum pipe_map_flags usage)477 static void *radeon_bo_map(struct radeon_winsys *rws,
478 struct pb_buffer_lean *buf,
479 struct radeon_cmdbuf *rcs,
480 enum pipe_map_flags usage)
481 {
482 struct radeon_bo *bo = (struct radeon_bo*)buf;
483 struct radeon_drm_cs *cs = rcs ? radeon_drm_cs(rcs) : NULL;
484
485 /* If it's not unsynchronized bo_map, flush CS if needed and then wait. */
486 if (!(usage & PIPE_MAP_UNSYNCHRONIZED)) {
487 /* DONTBLOCK doesn't make sense with UNSYNCHRONIZED. */
488 if (usage & PIPE_MAP_DONTBLOCK) {
489 if (!(usage & PIPE_MAP_WRITE)) {
490 /* Mapping for read.
491 *
492 * Since we are mapping for read, we don't need to wait
493 * if the GPU is using the buffer for read too
494 * (neither one is changing it).
495 *
496 * Only check whether the buffer is being used for write. */
497 if (cs && radeon_bo_is_referenced_by_cs_for_write(cs, bo)) {
498 cs->flush_cs(cs->flush_data,
499 RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL);
500 return NULL;
501 }
502
503 if (!radeon_bo_wait(rws, (struct pb_buffer_lean*)bo, 0,
504 RADEON_USAGE_WRITE)) {
505 return NULL;
506 }
507 } else {
508 if (cs && radeon_bo_is_referenced_by_cs(cs, bo)) {
509 cs->flush_cs(cs->flush_data,
510 RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL);
511 return NULL;
512 }
513
514 if (!radeon_bo_wait(rws, (struct pb_buffer_lean*)bo, 0,
515 RADEON_USAGE_READWRITE)) {
516 return NULL;
517 }
518 }
519 } else {
520 uint64_t time = os_time_get_nano();
521
522 if (!(usage & PIPE_MAP_WRITE)) {
523 /* Mapping for read.
524 *
525 * Since we are mapping for read, we don't need to wait
526 * if the GPU is using the buffer for read too
527 * (neither one is changing it).
528 *
529 * Only check whether the buffer is being used for write. */
530 if (cs && radeon_bo_is_referenced_by_cs_for_write(cs, bo)) {
531 cs->flush_cs(cs->flush_data,
532 RADEON_FLUSH_START_NEXT_GFX_IB_NOW, NULL);
533 }
534 radeon_bo_wait(rws, (struct pb_buffer_lean*)bo, OS_TIMEOUT_INFINITE,
535 RADEON_USAGE_WRITE);
536 } else {
537 /* Mapping for write. */
538 if (cs) {
539 if (radeon_bo_is_referenced_by_cs(cs, bo)) {
540 cs->flush_cs(cs->flush_data,
541 RADEON_FLUSH_START_NEXT_GFX_IB_NOW, NULL);
542 } else {
543 /* Try to avoid busy-waiting in radeon_bo_wait. */
544 if (p_atomic_read(&bo->num_active_ioctls))
545 radeon_drm_cs_sync_flush(rcs);
546 }
547 }
548
549 radeon_bo_wait(rws, (struct pb_buffer_lean*)bo, OS_TIMEOUT_INFINITE,
550 RADEON_USAGE_READWRITE);
551 }
552
553 bo->rws->buffer_wait_time += os_time_get_nano() - time;
554 }
555 }
556
557 return radeon_bo_do_map(bo);
558 }
559
radeon_bo_unmap(struct radeon_winsys * rws,struct pb_buffer_lean * _buf)560 static void radeon_bo_unmap(struct radeon_winsys *rws, struct pb_buffer_lean *_buf)
561 {
562 struct radeon_bo *bo = (struct radeon_bo*)_buf;
563
564 if (bo->user_ptr)
565 return;
566
567 if (!bo->handle)
568 bo = bo->u.slab.real;
569
570 mtx_lock(&bo->u.real.map_mutex);
571 if (!bo->u.real.ptr) {
572 mtx_unlock(&bo->u.real.map_mutex);
573 return; /* it's not been mapped */
574 }
575
576 assert(bo->u.real.map_count);
577 if (--bo->u.real.map_count) {
578 mtx_unlock(&bo->u.real.map_mutex);
579 return; /* it's been mapped multiple times */
580 }
581
582 os_munmap(bo->u.real.ptr, bo->base.size);
583 bo->u.real.ptr = NULL;
584
585 if (bo->initial_domain & RADEON_DOMAIN_VRAM)
586 bo->rws->mapped_vram -= bo->base.size;
587 else
588 bo->rws->mapped_gtt -= bo->base.size;
589 bo->rws->num_mapped_buffers--;
590
591 mtx_unlock(&bo->u.real.map_mutex);
592 }
593
radeon_create_bo(struct radeon_drm_winsys * rws,unsigned size,unsigned alignment,unsigned initial_domains,unsigned flags,int heap)594 static struct radeon_bo *radeon_create_bo(struct radeon_drm_winsys *rws,
595 unsigned size, unsigned alignment,
596 unsigned initial_domains,
597 unsigned flags,
598 int heap)
599 {
600 struct radeon_bo *bo;
601 struct drm_radeon_gem_create args;
602 int r;
603
604 memset(&args, 0, sizeof(args));
605
606 assert(initial_domains);
607 assert((initial_domains &
608 ~(RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) == 0);
609
610 args.size = size;
611 args.alignment = alignment;
612 args.initial_domain = initial_domains;
613 args.flags = 0;
614
615 /* If VRAM is just stolen system memory, allow both VRAM and
616 * GTT, whichever has free space. If a buffer is evicted from
617 * VRAM to GTT, it will stay there.
618 */
619 if (!rws->info.has_dedicated_vram)
620 args.initial_domain |= RADEON_DOMAIN_GTT;
621
622 if (flags & RADEON_FLAG_GTT_WC)
623 args.flags |= RADEON_GEM_GTT_WC;
624 if (flags & RADEON_FLAG_NO_CPU_ACCESS)
625 args.flags |= RADEON_GEM_NO_CPU_ACCESS;
626
627 if (drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_CREATE,
628 &args, sizeof(args))) {
629 fprintf(stderr, "radeon: Failed to allocate a buffer:\n");
630 fprintf(stderr, "radeon: size : %u bytes\n", size);
631 fprintf(stderr, "radeon: alignment : %u bytes\n", alignment);
632 fprintf(stderr, "radeon: domains : %u\n", args.initial_domain);
633 fprintf(stderr, "radeon: flags : %u\n", args.flags);
634 return NULL;
635 }
636
637 assert(args.handle != 0);
638
639 bo = CALLOC_STRUCT(radeon_bo);
640 if (!bo)
641 return NULL;
642
643 pipe_reference_init(&bo->base.reference, 1);
644 bo->base.alignment_log2 = util_logbase2(alignment);
645 bo->base.usage = 0;
646 bo->base.size = size;
647 bo->rws = rws;
648 bo->handle = args.handle;
649 bo->va = 0;
650 bo->initial_domain = initial_domains;
651 bo->hash = __sync_fetch_and_add(&rws->next_bo_hash, 1);
652 (void) mtx_init(&bo->u.real.map_mutex, mtx_plain);
653
654 if (heap >= 0) {
655 pb_cache_init_entry(&rws->bo_cache, &bo->u.real.cache_entry, &bo->base,
656 heap);
657 }
658
659 if (rws->info.r600_has_virtual_memory) {
660 struct drm_radeon_gem_va va;
661 unsigned va_gap_size;
662
663 va_gap_size = rws->check_vm ? MAX2(4 * alignment, 64 * 1024) : 0;
664
665 if (flags & RADEON_FLAG_32BIT) {
666 bo->va = radeon_bomgr_find_va(&rws->info, &rws->vm32,
667 size + va_gap_size, alignment);
668 assert(bo->va + size < rws->vm32.end);
669 } else {
670 bo->va = radeon_bomgr_find_va64(rws, size + va_gap_size, alignment);
671 }
672
673 va.handle = bo->handle;
674 va.vm_id = 0;
675 va.operation = RADEON_VA_MAP;
676 va.flags = RADEON_VM_PAGE_READABLE |
677 RADEON_VM_PAGE_WRITEABLE |
678 RADEON_VM_PAGE_SNOOPED;
679 va.offset = bo->va;
680 r = drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
681 if (r && va.operation == RADEON_VA_RESULT_ERROR) {
682 fprintf(stderr, "radeon: Failed to allocate virtual address for buffer:\n");
683 fprintf(stderr, "radeon: size : %d bytes\n", size);
684 fprintf(stderr, "radeon: alignment : %d bytes\n", alignment);
685 fprintf(stderr, "radeon: domains : %d\n", args.initial_domain);
686 fprintf(stderr, "radeon: va : 0x%016llx\n", (unsigned long long)bo->va);
687 radeon_bo_destroy(NULL, &bo->base);
688 return NULL;
689 }
690 mtx_lock(&rws->bo_handles_mutex);
691 if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
692 struct pb_buffer_lean *b = &bo->base;
693 struct radeon_bo *old_bo =
694 _mesa_hash_table_u64_search(rws->bo_vas, va.offset);
695
696 mtx_unlock(&rws->bo_handles_mutex);
697 radeon_bo_reference(&rws->base, &b, &old_bo->base);
698 return radeon_bo(b);
699 }
700
701 _mesa_hash_table_u64_insert(rws->bo_vas, bo->va, bo);
702 mtx_unlock(&rws->bo_handles_mutex);
703 }
704
705 if (initial_domains & RADEON_DOMAIN_VRAM)
706 rws->allocated_vram += align(size, rws->info.gart_page_size);
707 else if (initial_domains & RADEON_DOMAIN_GTT)
708 rws->allocated_gtt += align(size, rws->info.gart_page_size);
709
710 return bo;
711 }
712
radeon_bo_can_reclaim(void * winsys,struct pb_buffer_lean * _buf)713 bool radeon_bo_can_reclaim(void *winsys, struct pb_buffer_lean *_buf)
714 {
715 struct radeon_bo *bo = radeon_bo((struct pb_buffer_lean*)_buf);
716
717 if (radeon_bo_is_referenced_by_any_cs(bo))
718 return false;
719
720 return radeon_bo_wait(winsys, (struct pb_buffer_lean*)_buf, 0, RADEON_USAGE_READWRITE);
721 }
722
radeon_bo_can_reclaim_slab(void * priv,struct pb_slab_entry * entry)723 bool radeon_bo_can_reclaim_slab(void *priv, struct pb_slab_entry *entry)
724 {
725 struct radeon_bo *bo = container_of(entry, struct radeon_bo, u.slab.entry);
726
727 return radeon_bo_can_reclaim(priv, &bo->base);
728 }
729
radeon_bo_slab_destroy(void * winsys,struct pb_buffer_lean * _buf)730 static void radeon_bo_slab_destroy(void *winsys, struct pb_buffer_lean *_buf)
731 {
732 struct radeon_bo *bo = radeon_bo(_buf);
733
734 assert(!bo->handle);
735
736 pb_slab_free(&bo->rws->bo_slabs, &bo->u.slab.entry);
737 }
738
radeon_bo_slab_alloc(void * priv,unsigned heap,unsigned entry_size,unsigned group_index)739 struct pb_slab *radeon_bo_slab_alloc(void *priv, unsigned heap,
740 unsigned entry_size,
741 unsigned group_index)
742 {
743 struct radeon_drm_winsys *ws = priv;
744 struct radeon_slab *slab = CALLOC_STRUCT(radeon_slab);
745 enum radeon_bo_domain domains = radeon_domain_from_heap(heap);
746 enum radeon_bo_flag flags = radeon_flags_from_heap(heap);
747 unsigned base_hash;
748
749 if (!slab)
750 return NULL;
751
752 slab->buffer = radeon_bo(radeon_winsys_bo_create(&ws->base,
753 64 * 1024, 64 * 1024,
754 domains, flags));
755 if (!slab->buffer)
756 goto fail;
757
758 assert(slab->buffer->handle);
759
760 slab->base.num_entries = slab->buffer->base.size / entry_size;
761 slab->base.num_free = slab->base.num_entries;
762 slab->base.group_index = group_index;
763 slab->base.entry_size = entry_size;
764 slab->entries = CALLOC(slab->base.num_entries, sizeof(*slab->entries));
765 if (!slab->entries)
766 goto fail_buffer;
767
768 list_inithead(&slab->base.free);
769
770 base_hash = __sync_fetch_and_add(&ws->next_bo_hash, slab->base.num_entries);
771
772 for (unsigned i = 0; i < slab->base.num_entries; ++i) {
773 struct radeon_bo *bo = &slab->entries[i];
774
775 bo->base.alignment_log2 = util_logbase2(entry_size);
776 bo->base.usage = slab->buffer->base.usage;
777 bo->base.size = entry_size;
778 bo->rws = ws;
779 bo->va = slab->buffer->va + i * entry_size;
780 bo->initial_domain = domains;
781 bo->hash = base_hash + i;
782 bo->u.slab.entry.slab = &slab->base;
783 bo->u.slab.real = slab->buffer;
784
785 list_addtail(&bo->u.slab.entry.head, &slab->base.free);
786 }
787
788 return &slab->base;
789
790 fail_buffer:
791 radeon_ws_bo_reference(&ws->base, &slab->buffer, NULL);
792 fail:
793 FREE(slab);
794 return NULL;
795 }
796
radeon_bo_slab_free(void * priv,struct pb_slab * pslab)797 void radeon_bo_slab_free(void *priv, struct pb_slab *pslab)
798 {
799 struct radeon_winsys *rws = (struct radeon_winsys *)priv;
800 struct radeon_slab *slab = (struct radeon_slab *)pslab;
801
802 for (unsigned i = 0; i < slab->base.num_entries; ++i) {
803 struct radeon_bo *bo = &slab->entries[i];
804 for (unsigned j = 0; j < bo->u.slab.num_fences; ++j)
805 radeon_ws_bo_reference(rws, &bo->u.slab.fences[j], NULL);
806 FREE(bo->u.slab.fences);
807 }
808
809 FREE(slab->entries);
810 radeon_ws_bo_reference(rws, &slab->buffer, NULL);
811 FREE(slab);
812 }
813
eg_tile_split(unsigned tile_split)814 static unsigned eg_tile_split(unsigned tile_split)
815 {
816 switch (tile_split) {
817 case 0: tile_split = 64; break;
818 case 1: tile_split = 128; break;
819 case 2: tile_split = 256; break;
820 case 3: tile_split = 512; break;
821 default:
822 case 4: tile_split = 1024; break;
823 case 5: tile_split = 2048; break;
824 case 6: tile_split = 4096; break;
825 }
826 return tile_split;
827 }
828
eg_tile_split_rev(unsigned eg_tile_split)829 static unsigned eg_tile_split_rev(unsigned eg_tile_split)
830 {
831 switch (eg_tile_split) {
832 case 64: return 0;
833 case 128: return 1;
834 case 256: return 2;
835 case 512: return 3;
836 default:
837 case 1024: return 4;
838 case 2048: return 5;
839 case 4096: return 6;
840 }
841 }
842
radeon_bo_get_metadata(struct radeon_winsys * rws,struct pb_buffer_lean * _buf,struct radeon_bo_metadata * md,struct radeon_surf * surf)843 static void radeon_bo_get_metadata(struct radeon_winsys *rws,
844 struct pb_buffer_lean *_buf,
845 struct radeon_bo_metadata *md,
846 struct radeon_surf *surf)
847 {
848 struct radeon_bo *bo = radeon_bo(_buf);
849 struct drm_radeon_gem_set_tiling args;
850
851 assert(bo->handle && "must not be called for slab entries");
852
853 memset(&args, 0, sizeof(args));
854
855 args.handle = bo->handle;
856
857 drmCommandWriteRead(bo->rws->fd,
858 DRM_RADEON_GEM_GET_TILING,
859 &args,
860 sizeof(args));
861
862 if (surf) {
863 if (args.tiling_flags & RADEON_TILING_MACRO)
864 md->mode = RADEON_SURF_MODE_2D;
865 else if (args.tiling_flags & RADEON_TILING_MICRO)
866 md->mode = RADEON_SURF_MODE_1D;
867 else
868 md->mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
869
870 surf->u.legacy.bankw = (args.tiling_flags >> RADEON_TILING_EG_BANKW_SHIFT) & RADEON_TILING_EG_BANKW_MASK;
871 surf->u.legacy.bankh = (args.tiling_flags >> RADEON_TILING_EG_BANKH_SHIFT) & RADEON_TILING_EG_BANKH_MASK;
872 surf->u.legacy.tile_split = (args.tiling_flags >> RADEON_TILING_EG_TILE_SPLIT_SHIFT) & RADEON_TILING_EG_TILE_SPLIT_MASK;
873 surf->u.legacy.tile_split = eg_tile_split(surf->u.legacy.tile_split);
874 surf->u.legacy.mtilea = (args.tiling_flags >> RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT) & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK;
875
876 if (bo->rws->gen >= DRV_SI && !(args.tiling_flags & RADEON_TILING_R600_NO_SCANOUT))
877 surf->flags |= RADEON_SURF_SCANOUT;
878 else
879 surf->flags &= ~RADEON_SURF_SCANOUT;
880 return;
881 }
882
883 md->u.legacy.microtile = RADEON_LAYOUT_LINEAR;
884 md->u.legacy.macrotile = RADEON_LAYOUT_LINEAR;
885 if (args.tiling_flags & RADEON_TILING_MICRO)
886 md->u.legacy.microtile = RADEON_LAYOUT_TILED;
887 else if (args.tiling_flags & RADEON_TILING_MICRO_SQUARE)
888 md->u.legacy.microtile = RADEON_LAYOUT_SQUARETILED;
889
890 if (args.tiling_flags & RADEON_TILING_MACRO)
891 md->u.legacy.macrotile = RADEON_LAYOUT_TILED;
892
893 md->u.legacy.bankw = (args.tiling_flags >> RADEON_TILING_EG_BANKW_SHIFT) & RADEON_TILING_EG_BANKW_MASK;
894 md->u.legacy.bankh = (args.tiling_flags >> RADEON_TILING_EG_BANKH_SHIFT) & RADEON_TILING_EG_BANKH_MASK;
895 md->u.legacy.tile_split = (args.tiling_flags >> RADEON_TILING_EG_TILE_SPLIT_SHIFT) & RADEON_TILING_EG_TILE_SPLIT_MASK;
896 md->u.legacy.mtilea = (args.tiling_flags >> RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT) & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK;
897 md->u.legacy.tile_split = eg_tile_split(md->u.legacy.tile_split);
898 md->u.legacy.scanout = bo->rws->gen >= DRV_SI && !(args.tiling_flags & RADEON_TILING_R600_NO_SCANOUT);
899 }
900
radeon_bo_set_metadata(struct radeon_winsys * rws,struct pb_buffer_lean * _buf,struct radeon_bo_metadata * md,struct radeon_surf * surf)901 static void radeon_bo_set_metadata(struct radeon_winsys *rws,
902 struct pb_buffer_lean *_buf,
903 struct radeon_bo_metadata *md,
904 struct radeon_surf *surf)
905 {
906 struct radeon_bo *bo = radeon_bo(_buf);
907 struct drm_radeon_gem_set_tiling args;
908
909 assert(bo->handle && "must not be called for slab entries");
910
911 memset(&args, 0, sizeof(args));
912
913 os_wait_until_zero(&bo->num_active_ioctls, OS_TIMEOUT_INFINITE);
914
915 if (surf) {
916 if (surf->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D)
917 args.tiling_flags |= RADEON_TILING_MICRO;
918 if (surf->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D)
919 args.tiling_flags |= RADEON_TILING_MACRO;
920
921 args.tiling_flags |= (surf->u.legacy.bankw & RADEON_TILING_EG_BANKW_MASK) <<
922 RADEON_TILING_EG_BANKW_SHIFT;
923 args.tiling_flags |= (surf->u.legacy.bankh & RADEON_TILING_EG_BANKH_MASK) <<
924 RADEON_TILING_EG_BANKH_SHIFT;
925 if (surf->u.legacy.tile_split) {
926 args.tiling_flags |= (eg_tile_split_rev(surf->u.legacy.tile_split) &
927 RADEON_TILING_EG_TILE_SPLIT_MASK) <<
928 RADEON_TILING_EG_TILE_SPLIT_SHIFT;
929 }
930 args.tiling_flags |= (surf->u.legacy.mtilea & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK) <<
931 RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT;
932
933 if (bo->rws->gen >= DRV_SI && !(surf->flags & RADEON_SURF_SCANOUT))
934 args.tiling_flags |= RADEON_TILING_R600_NO_SCANOUT;
935
936 args.pitch = surf->u.legacy.level[0].nblk_x * surf->bpe;
937 } else {
938 if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
939 args.tiling_flags |= RADEON_TILING_MICRO;
940 else if (md->u.legacy.microtile == RADEON_LAYOUT_SQUARETILED)
941 args.tiling_flags |= RADEON_TILING_MICRO_SQUARE;
942
943 if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
944 args.tiling_flags |= RADEON_TILING_MACRO;
945
946 args.tiling_flags |= (md->u.legacy.bankw & RADEON_TILING_EG_BANKW_MASK) <<
947 RADEON_TILING_EG_BANKW_SHIFT;
948 args.tiling_flags |= (md->u.legacy.bankh & RADEON_TILING_EG_BANKH_MASK) <<
949 RADEON_TILING_EG_BANKH_SHIFT;
950 if (md->u.legacy.tile_split) {
951 args.tiling_flags |= (eg_tile_split_rev(md->u.legacy.tile_split) &
952 RADEON_TILING_EG_TILE_SPLIT_MASK) <<
953 RADEON_TILING_EG_TILE_SPLIT_SHIFT;
954 }
955 args.tiling_flags |= (md->u.legacy.mtilea & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK) <<
956 RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT;
957
958 if (bo->rws->gen >= DRV_SI && !md->u.legacy.scanout)
959 args.tiling_flags |= RADEON_TILING_R600_NO_SCANOUT;
960
961 args.pitch = md->u.legacy.stride;
962 }
963
964 args.handle = bo->handle;
965
966 drmCommandWriteRead(bo->rws->fd,
967 DRM_RADEON_GEM_SET_TILING,
968 &args,
969 sizeof(args));
970 }
971
972 static struct pb_buffer_lean *
radeon_winsys_bo_create(struct radeon_winsys * rws,uint64_t size,unsigned alignment,enum radeon_bo_domain domain,enum radeon_bo_flag flags)973 radeon_winsys_bo_create(struct radeon_winsys *rws,
974 uint64_t size,
975 unsigned alignment,
976 enum radeon_bo_domain domain,
977 enum radeon_bo_flag flags)
978 {
979 struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
980 struct radeon_bo *bo;
981
982 radeon_canonicalize_bo_flags(&domain, &flags);
983
984 assert(!(flags & RADEON_FLAG_SPARSE)); /* not supported */
985
986 /* Only 32-bit sizes are supported. */
987 if (size > UINT_MAX)
988 return NULL;
989
990 int heap = radeon_get_heap_index(domain, flags);
991
992 /* Sub-allocate small buffers from slabs. */
993 if (heap >= 0 &&
994 size <= (1 << RADEON_SLAB_MAX_SIZE_LOG2) &&
995 ws->info.r600_has_virtual_memory &&
996 alignment <= MAX2(1 << RADEON_SLAB_MIN_SIZE_LOG2, util_next_power_of_two(size))) {
997 struct pb_slab_entry *entry;
998
999 entry = pb_slab_alloc(&ws->bo_slabs, size, heap);
1000 if (!entry) {
1001 /* Clear the cache and try again. */
1002 pb_cache_release_all_buffers(&ws->bo_cache);
1003
1004 entry = pb_slab_alloc(&ws->bo_slabs, size, heap);
1005 }
1006 if (!entry)
1007 return NULL;
1008
1009 bo = container_of(entry, struct radeon_bo, u.slab.entry);
1010
1011 pipe_reference_init(&bo->base.reference, 1);
1012
1013 return &bo->base;
1014 }
1015
1016 /* Align size to page size. This is the minimum alignment for normal
1017 * BOs. Aligning this here helps the cached bufmgr. Especially small BOs,
1018 * like constant/uniform buffers, can benefit from better and more reuse.
1019 */
1020 size = align(size, ws->info.gart_page_size);
1021 alignment = align(alignment, ws->info.gart_page_size);
1022
1023 bool use_reusable_pool = flags & RADEON_FLAG_NO_INTERPROCESS_SHARING &&
1024 !(flags & RADEON_FLAG_DISCARDABLE);
1025
1026 /* Shared resources don't use cached heaps. */
1027 if (use_reusable_pool) {
1028 /* RADEON_FLAG_NO_SUBALLOC is irrelevant for the cache. */
1029 heap = radeon_get_heap_index(domain, flags & ~RADEON_FLAG_NO_SUBALLOC);
1030 assert(heap >= 0 && heap < RADEON_NUM_HEAPS);
1031
1032 bo = radeon_bo((struct pb_buffer_lean*)pb_cache_reclaim_buffer(&ws->bo_cache, size,
1033 alignment, 0, heap));
1034 if (bo)
1035 return &bo->base;
1036 }
1037
1038 bo = radeon_create_bo(ws, size, alignment, domain, flags, heap);
1039 if (!bo) {
1040 /* Clear the cache and try again. */
1041 if (ws->info.r600_has_virtual_memory)
1042 pb_slabs_reclaim(&ws->bo_slabs);
1043 pb_cache_release_all_buffers(&ws->bo_cache);
1044 bo = radeon_create_bo(ws, size, alignment, domain, flags, heap);
1045 if (!bo)
1046 return NULL;
1047 }
1048
1049 bo->u.real.use_reusable_pool = use_reusable_pool;
1050
1051 mtx_lock(&ws->bo_handles_mutex);
1052 _mesa_hash_table_insert(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo);
1053 mtx_unlock(&ws->bo_handles_mutex);
1054
1055 return &bo->base;
1056 }
1057
radeon_winsys_bo_destroy(struct radeon_winsys * ws,struct pb_buffer_lean * buf)1058 static void radeon_winsys_bo_destroy(struct radeon_winsys *ws, struct pb_buffer_lean *buf)
1059 {
1060 struct radeon_bo *bo = radeon_bo(buf);
1061
1062 if (bo->handle)
1063 radeon_bo_destroy_or_cache(ws, buf);
1064 else
1065 radeon_bo_slab_destroy(ws, buf);
1066 }
1067
radeon_winsys_bo_from_ptr(struct radeon_winsys * rws,void * pointer,uint64_t size,enum radeon_bo_flag flags)1068 static struct pb_buffer_lean *radeon_winsys_bo_from_ptr(struct radeon_winsys *rws,
1069 void *pointer, uint64_t size,
1070 enum radeon_bo_flag flags)
1071 {
1072 struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
1073 struct drm_radeon_gem_userptr args;
1074 struct radeon_bo *bo;
1075 int r;
1076
1077 bo = CALLOC_STRUCT(radeon_bo);
1078 if (!bo)
1079 return NULL;
1080
1081 memset(&args, 0, sizeof(args));
1082 args.addr = (uintptr_t)pointer;
1083 args.size = align(size, ws->info.gart_page_size);
1084 args.flags = RADEON_GEM_USERPTR_ANONONLY |
1085 RADEON_GEM_USERPTR_REGISTER |
1086 RADEON_GEM_USERPTR_VALIDATE;
1087
1088 if (drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_USERPTR,
1089 &args, sizeof(args))) {
1090 FREE(bo);
1091 return NULL;
1092 }
1093
1094 assert(args.handle != 0);
1095
1096 mtx_lock(&ws->bo_handles_mutex);
1097
1098 /* Initialize it. */
1099 pipe_reference_init(&bo->base.reference, 1);
1100 bo->handle = args.handle;
1101 bo->base.alignment_log2 = 0;
1102 bo->base.size = size;
1103 bo->rws = ws;
1104 bo->user_ptr = pointer;
1105 bo->va = 0;
1106 bo->initial_domain = RADEON_DOMAIN_GTT;
1107 bo->hash = __sync_fetch_and_add(&ws->next_bo_hash, 1);
1108 (void) mtx_init(&bo->u.real.map_mutex, mtx_plain);
1109
1110 _mesa_hash_table_insert(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo);
1111
1112 mtx_unlock(&ws->bo_handles_mutex);
1113
1114 if (ws->info.r600_has_virtual_memory) {
1115 struct drm_radeon_gem_va va;
1116
1117 bo->va = radeon_bomgr_find_va64(ws, bo->base.size, 1 << 20);
1118
1119 va.handle = bo->handle;
1120 va.operation = RADEON_VA_MAP;
1121 va.vm_id = 0;
1122 va.offset = bo->va;
1123 va.flags = RADEON_VM_PAGE_READABLE |
1124 RADEON_VM_PAGE_WRITEABLE |
1125 RADEON_VM_PAGE_SNOOPED;
1126 va.offset = bo->va;
1127 r = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
1128 if (r && va.operation == RADEON_VA_RESULT_ERROR) {
1129 fprintf(stderr, "radeon: Failed to assign virtual address space\n");
1130 radeon_bo_destroy(NULL, &bo->base);
1131 return NULL;
1132 }
1133 mtx_lock(&ws->bo_handles_mutex);
1134 if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
1135 struct pb_buffer_lean *b = &bo->base;
1136 struct radeon_bo *old_bo =
1137 _mesa_hash_table_u64_search(ws->bo_vas, va.offset);
1138
1139 mtx_unlock(&ws->bo_handles_mutex);
1140 radeon_bo_reference(rws, &b, &old_bo->base);
1141 return b;
1142 }
1143
1144 _mesa_hash_table_u64_insert(ws->bo_vas, bo->va, bo);
1145 mtx_unlock(&ws->bo_handles_mutex);
1146 }
1147
1148 ws->allocated_gtt += align(bo->base.size, ws->info.gart_page_size);
1149
1150 return (struct pb_buffer_lean*)bo;
1151 }
1152
radeon_winsys_bo_from_handle(struct radeon_winsys * rws,struct winsys_handle * whandle,unsigned vm_alignment,bool is_dri_prime_linear_buffer)1153 static struct pb_buffer_lean *radeon_winsys_bo_from_handle(struct radeon_winsys *rws,
1154 struct winsys_handle *whandle,
1155 unsigned vm_alignment,
1156 bool is_dri_prime_linear_buffer)
1157 {
1158 struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
1159 struct radeon_bo *bo;
1160 int r;
1161 unsigned handle;
1162 uint64_t size = 0;
1163
1164 /* We must maintain a list of pairs <handle, bo>, so that we always return
1165 * the same BO for one particular handle. If we didn't do that and created
1166 * more than one BO for the same handle and then relocated them in a CS,
1167 * we would hit a deadlock in the kernel.
1168 *
1169 * The list of pairs is guarded by a mutex, of course. */
1170 mtx_lock(&ws->bo_handles_mutex);
1171
1172 if (whandle->type == WINSYS_HANDLE_TYPE_SHARED) {
1173 /* First check if there already is an existing bo for the handle. */
1174 bo = util_hash_table_get(ws->bo_names, (void*)(uintptr_t)whandle->handle);
1175 } else if (whandle->type == WINSYS_HANDLE_TYPE_FD) {
1176 /* We must first get the GEM handle, as fds are unreliable keys */
1177 r = drmPrimeFDToHandle(ws->fd, whandle->handle, &handle);
1178 if (r)
1179 goto fail;
1180 bo = util_hash_table_get(ws->bo_handles, (void*)(uintptr_t)handle);
1181 } else {
1182 /* Unknown handle type */
1183 goto fail;
1184 }
1185
1186 if (bo) {
1187 /* Increase the refcount. */
1188 if (unlikely(p_atomic_inc_return(&bo->base.reference.count) == 1)) {
1189 p_atomic_dec(&bo->base.reference.count);
1190 assert(p_atomic_read(&bo->base.reference.count) == 0);
1191 } else {
1192 goto done;
1193 }
1194 }
1195
1196 /* There isn't, create a new one. */
1197 bo = CALLOC_STRUCT(radeon_bo);
1198 if (!bo) {
1199 goto fail;
1200 }
1201
1202 if (whandle->type == WINSYS_HANDLE_TYPE_SHARED) {
1203 struct drm_gem_open open_arg = {};
1204 memset(&open_arg, 0, sizeof(open_arg));
1205 /* Open the BO. */
1206 open_arg.name = whandle->handle;
1207 if (drmIoctl(ws->fd, DRM_IOCTL_GEM_OPEN, &open_arg)) {
1208 FREE(bo);
1209 goto fail;
1210 }
1211 handle = open_arg.handle;
1212 size = open_arg.size;
1213 bo->flink_name = whandle->handle;
1214 } else if (whandle->type == WINSYS_HANDLE_TYPE_FD) {
1215 size = lseek(whandle->handle, 0, SEEK_END);
1216 /*
1217 * Could check errno to determine whether the kernel is new enough, but
1218 * it doesn't really matter why this failed, just that it failed.
1219 */
1220 if (size == (off_t)-1) {
1221 FREE(bo);
1222 goto fail;
1223 }
1224 lseek(whandle->handle, 0, SEEK_SET);
1225 }
1226
1227 assert(handle != 0);
1228
1229 bo->handle = handle;
1230
1231 /* Initialize it. */
1232 pipe_reference_init(&bo->base.reference, 1);
1233 bo->base.alignment_log2 = 0;
1234 bo->base.size = (unsigned) size;
1235 bo->rws = ws;
1236 bo->va = 0;
1237 bo->hash = __sync_fetch_and_add(&ws->next_bo_hash, 1);
1238 (void) mtx_init(&bo->u.real.map_mutex, mtx_plain);
1239
1240 if (bo->flink_name)
1241 _mesa_hash_table_insert(ws->bo_names, (void*)(uintptr_t)bo->flink_name, bo);
1242
1243 _mesa_hash_table_insert(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo);
1244
1245 done:
1246 mtx_unlock(&ws->bo_handles_mutex);
1247
1248 if (ws->info.r600_has_virtual_memory && !bo->va) {
1249 struct drm_radeon_gem_va va;
1250
1251 bo->va = radeon_bomgr_find_va64(ws, bo->base.size, vm_alignment);
1252
1253 va.handle = bo->handle;
1254 va.operation = RADEON_VA_MAP;
1255 va.vm_id = 0;
1256 va.offset = bo->va;
1257 va.flags = RADEON_VM_PAGE_READABLE |
1258 RADEON_VM_PAGE_WRITEABLE |
1259 RADEON_VM_PAGE_SNOOPED;
1260 va.offset = bo->va;
1261 r = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
1262 if (r && va.operation == RADEON_VA_RESULT_ERROR) {
1263 fprintf(stderr, "radeon: Failed to assign virtual address space\n");
1264 radeon_bo_destroy(NULL, &bo->base);
1265 return NULL;
1266 }
1267 mtx_lock(&ws->bo_handles_mutex);
1268 if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
1269 struct pb_buffer_lean *b = &bo->base;
1270 struct radeon_bo *old_bo =
1271 _mesa_hash_table_u64_search(ws->bo_vas, va.offset);
1272
1273 mtx_unlock(&ws->bo_handles_mutex);
1274 radeon_bo_reference(rws, &b, &old_bo->base);
1275 return b;
1276 }
1277
1278 _mesa_hash_table_u64_insert(ws->bo_vas, bo->va, bo);
1279 mtx_unlock(&ws->bo_handles_mutex);
1280 }
1281
1282 bo->initial_domain = radeon_bo_get_initial_domain((void*)bo);
1283
1284 if (bo->initial_domain & RADEON_DOMAIN_VRAM)
1285 ws->allocated_vram += align(bo->base.size, ws->info.gart_page_size);
1286 else if (bo->initial_domain & RADEON_DOMAIN_GTT)
1287 ws->allocated_gtt += align(bo->base.size, ws->info.gart_page_size);
1288
1289 return (struct pb_buffer_lean*)bo;
1290
1291 fail:
1292 mtx_unlock(&ws->bo_handles_mutex);
1293 return NULL;
1294 }
1295
radeon_winsys_bo_get_handle(struct radeon_winsys * rws,struct pb_buffer_lean * buffer,struct winsys_handle * whandle)1296 static bool radeon_winsys_bo_get_handle(struct radeon_winsys *rws,
1297 struct pb_buffer_lean *buffer,
1298 struct winsys_handle *whandle)
1299 {
1300 struct drm_gem_flink flink;
1301 struct radeon_bo *bo = radeon_bo(buffer);
1302 struct radeon_drm_winsys *ws = bo->rws;
1303
1304 /* Don't allow exports of slab entries. */
1305 if (!bo->handle)
1306 return false;
1307
1308 memset(&flink, 0, sizeof(flink));
1309
1310 bo->u.real.use_reusable_pool = false;
1311
1312 if (whandle->type == WINSYS_HANDLE_TYPE_SHARED) {
1313 if (!bo->flink_name) {
1314 flink.handle = bo->handle;
1315
1316 if (ioctl(ws->fd, DRM_IOCTL_GEM_FLINK, &flink)) {
1317 return false;
1318 }
1319
1320 bo->flink_name = flink.name;
1321
1322 mtx_lock(&ws->bo_handles_mutex);
1323 _mesa_hash_table_insert(ws->bo_names, (void*)(uintptr_t)bo->flink_name, bo);
1324 mtx_unlock(&ws->bo_handles_mutex);
1325 }
1326 whandle->handle = bo->flink_name;
1327 } else if (whandle->type == WINSYS_HANDLE_TYPE_KMS) {
1328 whandle->handle = bo->handle;
1329 } else if (whandle->type == WINSYS_HANDLE_TYPE_FD) {
1330 if (drmPrimeHandleToFD(ws->fd, bo->handle, DRM_CLOEXEC, (int*)&whandle->handle))
1331 return false;
1332 }
1333
1334 return true;
1335 }
1336
radeon_winsys_bo_is_user_ptr(struct pb_buffer_lean * buf)1337 static bool radeon_winsys_bo_is_user_ptr(struct pb_buffer_lean *buf)
1338 {
1339 return ((struct radeon_bo*)buf)->user_ptr != NULL;
1340 }
1341
radeon_winsys_bo_is_suballocated(struct pb_buffer_lean * buf)1342 static bool radeon_winsys_bo_is_suballocated(struct pb_buffer_lean *buf)
1343 {
1344 return !((struct radeon_bo*)buf)->handle;
1345 }
1346
radeon_winsys_bo_va(struct pb_buffer_lean * buf)1347 static uint64_t radeon_winsys_bo_va(struct pb_buffer_lean *buf)
1348 {
1349 return ((struct radeon_bo*)buf)->va;
1350 }
1351
radeon_winsys_bo_get_reloc_offset(struct pb_buffer_lean * buf)1352 static unsigned radeon_winsys_bo_get_reloc_offset(struct pb_buffer_lean *buf)
1353 {
1354 struct radeon_bo *bo = radeon_bo(buf);
1355
1356 if (bo->handle)
1357 return 0;
1358
1359 return bo->va - bo->u.slab.real->va;
1360 }
1361
radeon_drm_bo_init_functions(struct radeon_drm_winsys * ws)1362 void radeon_drm_bo_init_functions(struct radeon_drm_winsys *ws)
1363 {
1364 ws->base.buffer_set_metadata = radeon_bo_set_metadata;
1365 ws->base.buffer_get_metadata = radeon_bo_get_metadata;
1366 ws->base.buffer_map = radeon_bo_map;
1367 ws->base.buffer_unmap = radeon_bo_unmap;
1368 ws->base.buffer_wait = radeon_bo_wait;
1369 ws->base.buffer_create = radeon_winsys_bo_create;
1370 ws->base.buffer_destroy = radeon_winsys_bo_destroy;
1371 ws->base.buffer_from_handle = radeon_winsys_bo_from_handle;
1372 ws->base.buffer_from_ptr = radeon_winsys_bo_from_ptr;
1373 ws->base.buffer_is_user_ptr = radeon_winsys_bo_is_user_ptr;
1374 ws->base.buffer_is_suballocated = radeon_winsys_bo_is_suballocated;
1375 ws->base.buffer_get_handle = radeon_winsys_bo_get_handle;
1376 ws->base.buffer_get_virtual_address = radeon_winsys_bo_va;
1377 ws->base.buffer_get_reloc_offset = radeon_winsys_bo_get_reloc_offset;
1378 ws->base.buffer_get_initial_domain = radeon_bo_get_initial_domain;
1379 }
1380