• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2011 Marek Olšák <maraeo@gmail.com>
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "radeon_drm_cs.h"
8 
9 #include "util/u_hash_table.h"
10 #include "util/u_memory.h"
11 #include "util/u_thread.h"
12 #include "util/os_mman.h"
13 #include "util/os_time.h"
14 
15 #include "frontend/drm_driver.h"
16 
17 #include <sys/ioctl.h>
18 #include <xf86drm.h>
19 #include <errno.h>
20 #include <fcntl.h>
21 #include <stdio.h>
22 #include <inttypes.h>
23 
24 static struct pb_buffer_lean *
25 radeon_winsys_bo_create(struct radeon_winsys *rws,
26                         uint64_t size,
27                         unsigned alignment,
28                         enum radeon_bo_domain domain,
29                         enum radeon_bo_flag flags);
30 
radeon_bo(struct pb_buffer_lean * bo)31 static inline struct radeon_bo *radeon_bo(struct pb_buffer_lean *bo)
32 {
33    return (struct radeon_bo *)bo;
34 }
35 
36 struct radeon_bo_va_hole {
37    struct list_head list;
38    uint64_t         offset;
39    uint64_t         size;
40 };
41 
radeon_real_bo_is_busy(struct radeon_bo * bo)42 static bool radeon_real_bo_is_busy(struct radeon_bo *bo)
43 {
44    struct drm_radeon_gem_busy args = {0};
45 
46    args.handle = bo->handle;
47    return drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_BUSY,
48                               &args, sizeof(args)) != 0;
49 }
50 
radeon_bo_is_busy(struct radeon_winsys * rws,struct radeon_bo * bo)51 static bool radeon_bo_is_busy(struct radeon_winsys *rws, struct radeon_bo *bo)
52 {
53    unsigned num_idle;
54    bool busy = false;
55 
56    if (bo->handle)
57       return radeon_real_bo_is_busy(bo);
58 
59    mtx_lock(&bo->rws->bo_fence_lock);
60    for (num_idle = 0; num_idle < bo->u.slab.num_fences; ++num_idle) {
61       if (radeon_real_bo_is_busy(bo->u.slab.fences[num_idle])) {
62          busy = true;
63          break;
64       }
65       radeon_ws_bo_reference(rws, &bo->u.slab.fences[num_idle], NULL);
66    }
67    memmove(&bo->u.slab.fences[0], &bo->u.slab.fences[num_idle],
68          (bo->u.slab.num_fences - num_idle) * sizeof(bo->u.slab.fences[0]));
69    bo->u.slab.num_fences -= num_idle;
70    mtx_unlock(&bo->rws->bo_fence_lock);
71 
72    return busy;
73 }
74 
radeon_real_bo_wait_idle(struct radeon_bo * bo)75 static void radeon_real_bo_wait_idle(struct radeon_bo *bo)
76 {
77    struct drm_radeon_gem_wait_idle args = {0};
78 
79    args.handle = bo->handle;
80    while (drmCommandWrite(bo->rws->fd, DRM_RADEON_GEM_WAIT_IDLE,
81                           &args, sizeof(args)) == -EBUSY);
82 }
83 
radeon_bo_wait_idle(struct radeon_winsys * rws,struct radeon_bo * bo)84 static void radeon_bo_wait_idle(struct radeon_winsys *rws, struct radeon_bo *bo)
85 {
86    if (bo->handle) {
87       radeon_real_bo_wait_idle(bo);
88    } else {
89       mtx_lock(&bo->rws->bo_fence_lock);
90       while (bo->u.slab.num_fences) {
91          struct radeon_bo *fence = NULL;
92          radeon_ws_bo_reference(rws, &fence, bo->u.slab.fences[0]);
93          mtx_unlock(&bo->rws->bo_fence_lock);
94 
95          /* Wait without holding the fence lock. */
96          radeon_real_bo_wait_idle(fence);
97 
98          mtx_lock(&bo->rws->bo_fence_lock);
99          if (bo->u.slab.num_fences && fence == bo->u.slab.fences[0]) {
100             radeon_ws_bo_reference(rws, &bo->u.slab.fences[0], NULL);
101             memmove(&bo->u.slab.fences[0], &bo->u.slab.fences[1],
102                   (bo->u.slab.num_fences - 1) * sizeof(bo->u.slab.fences[0]));
103             bo->u.slab.num_fences--;
104          }
105          radeon_ws_bo_reference(rws, &fence, NULL);
106       }
107       mtx_unlock(&bo->rws->bo_fence_lock);
108    }
109 }
110 
radeon_bo_wait(struct radeon_winsys * rws,struct pb_buffer_lean * _buf,uint64_t timeout,unsigned usage)111 static bool radeon_bo_wait(struct radeon_winsys *rws,
112                            struct pb_buffer_lean *_buf, uint64_t timeout,
113                            unsigned usage)
114 {
115    struct radeon_bo *bo = radeon_bo(_buf);
116    int64_t abs_timeout;
117 
118    /* No timeout. Just query. */
119    if (timeout == 0)
120       return !bo->num_active_ioctls && !radeon_bo_is_busy(rws, bo);
121 
122    abs_timeout = os_time_get_absolute_timeout(timeout);
123 
124    /* Wait if any ioctl is being submitted with this buffer. */
125    if (!os_wait_until_zero_abs_timeout(&bo->num_active_ioctls, abs_timeout))
126       return false;
127 
128    /* Infinite timeout. */
129    if (abs_timeout == OS_TIMEOUT_INFINITE) {
130       radeon_bo_wait_idle(rws, bo);
131       return true;
132    }
133 
134    /* Other timeouts need to be emulated with a loop. */
135    while (radeon_bo_is_busy(rws, bo)) {
136       if (os_time_get_nano() >= abs_timeout)
137          return false;
138       os_time_sleep(10);
139    }
140 
141    return true;
142 }
143 
get_valid_domain(enum radeon_bo_domain domain)144 static enum radeon_bo_domain get_valid_domain(enum radeon_bo_domain domain)
145 {
146    /* Zero domains the driver doesn't understand. */
147    domain &= RADEON_DOMAIN_VRAM_GTT;
148 
149    /* If no domain is set, we must set something... */
150    if (!domain)
151       domain = RADEON_DOMAIN_VRAM_GTT;
152 
153    return domain;
154 }
155 
radeon_bo_get_initial_domain(struct pb_buffer_lean * buf)156 static enum radeon_bo_domain radeon_bo_get_initial_domain(
157       struct pb_buffer_lean *buf)
158 {
159    struct radeon_bo *bo = (struct radeon_bo*)buf;
160    struct drm_radeon_gem_op args;
161 
162    memset(&args, 0, sizeof(args));
163    args.handle = bo->handle;
164    args.op = RADEON_GEM_OP_GET_INITIAL_DOMAIN;
165 
166    if (drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_OP,
167                            &args, sizeof(args))) {
168       fprintf(stderr, "radeon: failed to get initial domain: %p 0x%08X\n",
169               bo, bo->handle);
170       /* Default domain as returned by get_valid_domain. */
171       return RADEON_DOMAIN_VRAM_GTT;
172    }
173 
174    /* GEM domains and winsys domains are defined the same. */
175    return get_valid_domain(args.value);
176 }
177 
radeon_bomgr_find_va(const struct radeon_info * info,struct radeon_vm_heap * heap,uint64_t size,uint64_t alignment)178 static uint64_t radeon_bomgr_find_va(const struct radeon_info *info,
179                                      struct radeon_vm_heap *heap,
180                                      uint64_t size, uint64_t alignment)
181 {
182    struct radeon_bo_va_hole *hole, *n;
183    uint64_t offset = 0, waste = 0;
184 
185    /* All VM address space holes will implicitly start aligned to the
186     * size alignment, so we don't need to sanitize the alignment here
187     */
188    size = align(size, info->gart_page_size);
189 
190    mtx_lock(&heap->mutex);
191    /* first look for a hole */
192    LIST_FOR_EACH_ENTRY_SAFE(hole, n, &heap->holes, list) {
193       offset = hole->offset;
194       waste = offset % alignment;
195       waste = waste ? alignment - waste : 0;
196       offset += waste;
197       if (offset >= (hole->offset + hole->size)) {
198          continue;
199       }
200       if (!waste && hole->size == size) {
201          offset = hole->offset;
202          list_del(&hole->list);
203          FREE(hole);
204          mtx_unlock(&heap->mutex);
205          return offset;
206       }
207       if ((hole->size - waste) > size) {
208          if (waste) {
209             n = CALLOC_STRUCT(radeon_bo_va_hole);
210             n->size = waste;
211             n->offset = hole->offset;
212             list_add(&n->list, &hole->list);
213          }
214          hole->size -= (size + waste);
215          hole->offset += size + waste;
216          mtx_unlock(&heap->mutex);
217          return offset;
218       }
219       if ((hole->size - waste) == size) {
220          hole->size = waste;
221          mtx_unlock(&heap->mutex);
222          return offset;
223       }
224    }
225 
226    offset = heap->start;
227    waste = offset % alignment;
228    waste = waste ? alignment - waste : 0;
229 
230    if (offset + waste + size > heap->end) {
231       mtx_unlock(&heap->mutex);
232       return 0;
233    }
234 
235    if (waste) {
236       n = CALLOC_STRUCT(radeon_bo_va_hole);
237       n->size = waste;
238       n->offset = offset;
239       list_add(&n->list, &heap->holes);
240    }
241    offset += waste;
242    heap->start += size + waste;
243    mtx_unlock(&heap->mutex);
244    return offset;
245 }
246 
radeon_bomgr_find_va64(struct radeon_drm_winsys * ws,uint64_t size,uint64_t alignment)247 static uint64_t radeon_bomgr_find_va64(struct radeon_drm_winsys *ws,
248                                        uint64_t size, uint64_t alignment)
249 {
250    uint64_t va = 0;
251 
252    /* Try to allocate from the 64-bit address space first.
253     * If it doesn't exist (start = 0) or if it doesn't have enough space,
254     * fall back to the 32-bit address space.
255     */
256    if (ws->vm64.start)
257       va = radeon_bomgr_find_va(&ws->info, &ws->vm64, size, alignment);
258    if (!va)
259       va = radeon_bomgr_find_va(&ws->info, &ws->vm32, size, alignment);
260    return va;
261 }
262 
radeon_bomgr_free_va(const struct radeon_info * info,struct radeon_vm_heap * heap,uint64_t va,uint64_t size)263 static void radeon_bomgr_free_va(const struct radeon_info *info,
264                                  struct radeon_vm_heap *heap,
265                                  uint64_t va, uint64_t size)
266 {
267    struct radeon_bo_va_hole *hole = NULL;
268 
269    size = align(size, info->gart_page_size);
270 
271    mtx_lock(&heap->mutex);
272    if ((va + size) == heap->start) {
273       heap->start = va;
274       /* Delete uppermost hole if it reaches the new top */
275       if (!list_is_empty(&heap->holes)) {
276          hole = container_of(heap->holes.next, struct radeon_bo_va_hole, list);
277          if ((hole->offset + hole->size) == va) {
278             heap->start = hole->offset;
279             list_del(&hole->list);
280             FREE(hole);
281          }
282       }
283    } else {
284       struct radeon_bo_va_hole *next;
285 
286       hole = container_of(&heap->holes, struct radeon_bo_va_hole, list);
287       LIST_FOR_EACH_ENTRY(next, &heap->holes, list) {
288          if (next->offset < va)
289             break;
290          hole = next;
291       }
292 
293       if (&hole->list != &heap->holes) {
294          /* Grow upper hole if it's adjacent */
295          if (hole->offset == (va + size)) {
296             hole->offset = va;
297             hole->size += size;
298             /* Merge lower hole if it's adjacent */
299             if (next != hole && &next->list != &heap->holes &&
300                 (next->offset + next->size) == va) {
301                next->size += hole->size;
302                list_del(&hole->list);
303                FREE(hole);
304             }
305             goto out;
306          }
307       }
308 
309       /* Grow lower hole if it's adjacent */
310       if (next != hole && &next->list != &heap->holes &&
311           (next->offset + next->size) == va) {
312          next->size += size;
313          goto out;
314       }
315 
316       /* FIXME on allocation failure we just lose virtual address space
317        * maybe print a warning
318        */
319       next = CALLOC_STRUCT(radeon_bo_va_hole);
320       if (next) {
321          next->size = size;
322          next->offset = va;
323          list_add(&next->list, &hole->list);
324       }
325    }
326 out:
327    mtx_unlock(&heap->mutex);
328 }
329 
radeon_bo_destroy(void * winsys,struct pb_buffer_lean * _buf)330 void radeon_bo_destroy(void *winsys, struct pb_buffer_lean *_buf)
331 {
332    struct radeon_bo *bo = radeon_bo((struct pb_buffer_lean*)_buf);
333    struct radeon_drm_winsys *rws = bo->rws;
334    struct drm_gem_close args;
335 
336    assert(bo->handle && "must not be called for slab entries");
337 
338    memset(&args, 0, sizeof(args));
339 
340    mtx_lock(&rws->bo_handles_mutex);
341    _mesa_hash_table_remove_key(rws->bo_handles, (void*)(uintptr_t)bo->handle);
342    if (bo->flink_name) {
343       _mesa_hash_table_remove_key(rws->bo_names,
344                                   (void*)(uintptr_t)bo->flink_name);
345    }
346    mtx_unlock(&rws->bo_handles_mutex);
347 
348    if (bo->u.real.ptr)
349       os_munmap(bo->u.real.ptr, bo->base.size);
350 
351    if (rws->info.r600_has_virtual_memory) {
352       if (rws->va_unmap_working) {
353          struct drm_radeon_gem_va va;
354 
355          va.handle = bo->handle;
356          va.vm_id = 0;
357          va.operation = RADEON_VA_UNMAP;
358          va.flags = RADEON_VM_PAGE_READABLE |
359                     RADEON_VM_PAGE_WRITEABLE |
360                     RADEON_VM_PAGE_SNOOPED;
361          va.offset = bo->va;
362 
363          if (drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_VA, &va,
364                                  sizeof(va)) != 0 &&
365              va.operation == RADEON_VA_RESULT_ERROR) {
366             fprintf(stderr, "radeon: Failed to deallocate virtual address for buffer:\n");
367             fprintf(stderr, "radeon:    size      : %"PRIu64" bytes\n", bo->base.size);
368             fprintf(stderr, "radeon:    va        : 0x%"PRIx64"\n", bo->va);
369          }
370       }
371 
372       radeon_bomgr_free_va(&rws->info,
373                            bo->va < rws->vm32.end ? &rws->vm32 : &rws->vm64,
374                            bo->va, bo->base.size);
375    }
376 
377    /* Close object. */
378    args.handle = bo->handle;
379    drmIoctl(rws->fd, DRM_IOCTL_GEM_CLOSE, &args);
380 
381    mtx_destroy(&bo->u.real.map_mutex);
382 
383    if (bo->initial_domain & RADEON_DOMAIN_VRAM)
384       rws->allocated_vram -= align(bo->base.size, rws->info.gart_page_size);
385    else if (bo->initial_domain & RADEON_DOMAIN_GTT)
386       rws->allocated_gtt -= align(bo->base.size, rws->info.gart_page_size);
387 
388    if (bo->u.real.map_count >= 1) {
389       if (bo->initial_domain & RADEON_DOMAIN_VRAM)
390          bo->rws->mapped_vram -= bo->base.size;
391       else
392          bo->rws->mapped_gtt -= bo->base.size;
393       bo->rws->num_mapped_buffers--;
394    }
395 
396    FREE(bo);
397 }
398 
radeon_bo_destroy_or_cache(void * winsys,struct pb_buffer_lean * _buf)399 static void radeon_bo_destroy_or_cache(void *winsys, struct pb_buffer_lean *_buf)
400 {
401    struct radeon_drm_winsys *rws = (struct radeon_drm_winsys *)winsys;
402    struct radeon_bo *bo = radeon_bo(_buf);
403 
404    assert(bo->handle && "must not be called for slab entries");
405 
406    if (bo->u.real.use_reusable_pool)
407       pb_cache_add_buffer(&rws->bo_cache, &bo->u.real.cache_entry);
408    else
409       radeon_bo_destroy(NULL, _buf);
410 }
411 
radeon_bo_do_map(struct radeon_bo * bo)412 void *radeon_bo_do_map(struct radeon_bo *bo)
413 {
414    struct drm_radeon_gem_mmap args = {0};
415    void *ptr;
416    unsigned offset;
417 
418    /* If the buffer is created from user memory, return the user pointer. */
419    if (bo->user_ptr)
420       return bo->user_ptr;
421 
422    if (bo->handle) {
423       offset = 0;
424    } else {
425       offset = bo->va - bo->u.slab.real->va;
426       bo = bo->u.slab.real;
427    }
428 
429    /* Map the buffer. */
430    mtx_lock(&bo->u.real.map_mutex);
431    /* Return the pointer if it's already mapped. */
432    if (bo->u.real.ptr) {
433       bo->u.real.map_count++;
434       mtx_unlock(&bo->u.real.map_mutex);
435       return (uint8_t*)bo->u.real.ptr + offset;
436    }
437    args.handle = bo->handle;
438    args.offset = 0;
439    args.size = (uint64_t)bo->base.size;
440    if (drmCommandWriteRead(bo->rws->fd,
441                            DRM_RADEON_GEM_MMAP,
442                            &args,
443                            sizeof(args))) {
444       mtx_unlock(&bo->u.real.map_mutex);
445       fprintf(stderr, "radeon: gem_mmap failed: %p 0x%08X\n",
446               bo, bo->handle);
447       return NULL;
448    }
449 
450    ptr = os_mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED,
451                  bo->rws->fd, args.addr_ptr);
452    if (ptr == MAP_FAILED) {
453       /* Clear the cache and try again. */
454       pb_cache_release_all_buffers(&bo->rws->bo_cache);
455 
456       ptr = os_mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED,
457                     bo->rws->fd, args.addr_ptr);
458       if (ptr == MAP_FAILED) {
459          mtx_unlock(&bo->u.real.map_mutex);
460          fprintf(stderr, "radeon: mmap failed, errno: %i\n", errno);
461          return NULL;
462       }
463    }
464    bo->u.real.ptr = ptr;
465    bo->u.real.map_count = 1;
466 
467    if (bo->initial_domain & RADEON_DOMAIN_VRAM)
468       bo->rws->mapped_vram += bo->base.size;
469    else
470       bo->rws->mapped_gtt += bo->base.size;
471    bo->rws->num_mapped_buffers++;
472 
473    mtx_unlock(&bo->u.real.map_mutex);
474    return (uint8_t*)bo->u.real.ptr + offset;
475 }
476 
radeon_bo_map(struct radeon_winsys * rws,struct pb_buffer_lean * buf,struct radeon_cmdbuf * rcs,enum pipe_map_flags usage)477 static void *radeon_bo_map(struct radeon_winsys *rws,
478                            struct pb_buffer_lean *buf,
479                            struct radeon_cmdbuf *rcs,
480                            enum pipe_map_flags usage)
481 {
482    struct radeon_bo *bo = (struct radeon_bo*)buf;
483    struct radeon_drm_cs *cs = rcs ? radeon_drm_cs(rcs) : NULL;
484 
485    /* If it's not unsynchronized bo_map, flush CS if needed and then wait. */
486    if (!(usage & PIPE_MAP_UNSYNCHRONIZED)) {
487       /* DONTBLOCK doesn't make sense with UNSYNCHRONIZED. */
488       if (usage & PIPE_MAP_DONTBLOCK) {
489          if (!(usage & PIPE_MAP_WRITE)) {
490             /* Mapping for read.
491              *
492              * Since we are mapping for read, we don't need to wait
493              * if the GPU is using the buffer for read too
494              * (neither one is changing it).
495              *
496              * Only check whether the buffer is being used for write. */
497             if (cs && radeon_bo_is_referenced_by_cs_for_write(cs, bo)) {
498                cs->flush_cs(cs->flush_data,
499                             RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL);
500                return NULL;
501             }
502 
503             if (!radeon_bo_wait(rws, (struct pb_buffer_lean*)bo, 0,
504                                 RADEON_USAGE_WRITE)) {
505                return NULL;
506             }
507          } else {
508             if (cs && radeon_bo_is_referenced_by_cs(cs, bo)) {
509                cs->flush_cs(cs->flush_data,
510                             RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL);
511                return NULL;
512             }
513 
514             if (!radeon_bo_wait(rws, (struct pb_buffer_lean*)bo, 0,
515                                 RADEON_USAGE_READWRITE)) {
516                return NULL;
517             }
518          }
519       } else {
520          uint64_t time = os_time_get_nano();
521 
522          if (!(usage & PIPE_MAP_WRITE)) {
523             /* Mapping for read.
524              *
525              * Since we are mapping for read, we don't need to wait
526              * if the GPU is using the buffer for read too
527              * (neither one is changing it).
528              *
529              * Only check whether the buffer is being used for write. */
530             if (cs && radeon_bo_is_referenced_by_cs_for_write(cs, bo)) {
531                cs->flush_cs(cs->flush_data,
532                             RADEON_FLUSH_START_NEXT_GFX_IB_NOW, NULL);
533             }
534             radeon_bo_wait(rws, (struct pb_buffer_lean*)bo, OS_TIMEOUT_INFINITE,
535                            RADEON_USAGE_WRITE);
536          } else {
537             /* Mapping for write. */
538             if (cs) {
539                if (radeon_bo_is_referenced_by_cs(cs, bo)) {
540                   cs->flush_cs(cs->flush_data,
541                                RADEON_FLUSH_START_NEXT_GFX_IB_NOW, NULL);
542                } else {
543                   /* Try to avoid busy-waiting in radeon_bo_wait. */
544                   if (p_atomic_read(&bo->num_active_ioctls))
545                      radeon_drm_cs_sync_flush(rcs);
546                }
547             }
548 
549             radeon_bo_wait(rws, (struct pb_buffer_lean*)bo, OS_TIMEOUT_INFINITE,
550                            RADEON_USAGE_READWRITE);
551          }
552 
553          bo->rws->buffer_wait_time += os_time_get_nano() - time;
554       }
555    }
556 
557    return radeon_bo_do_map(bo);
558 }
559 
radeon_bo_unmap(struct radeon_winsys * rws,struct pb_buffer_lean * _buf)560 static void radeon_bo_unmap(struct radeon_winsys *rws, struct pb_buffer_lean *_buf)
561 {
562    struct radeon_bo *bo = (struct radeon_bo*)_buf;
563 
564    if (bo->user_ptr)
565       return;
566 
567    if (!bo->handle)
568       bo = bo->u.slab.real;
569 
570    mtx_lock(&bo->u.real.map_mutex);
571    if (!bo->u.real.ptr) {
572       mtx_unlock(&bo->u.real.map_mutex);
573       return; /* it's not been mapped */
574    }
575 
576    assert(bo->u.real.map_count);
577    if (--bo->u.real.map_count) {
578       mtx_unlock(&bo->u.real.map_mutex);
579       return; /* it's been mapped multiple times */
580    }
581 
582    os_munmap(bo->u.real.ptr, bo->base.size);
583    bo->u.real.ptr = NULL;
584 
585    if (bo->initial_domain & RADEON_DOMAIN_VRAM)
586       bo->rws->mapped_vram -= bo->base.size;
587    else
588       bo->rws->mapped_gtt -= bo->base.size;
589    bo->rws->num_mapped_buffers--;
590 
591    mtx_unlock(&bo->u.real.map_mutex);
592 }
593 
radeon_create_bo(struct radeon_drm_winsys * rws,unsigned size,unsigned alignment,unsigned initial_domains,unsigned flags,int heap)594 static struct radeon_bo *radeon_create_bo(struct radeon_drm_winsys *rws,
595                                           unsigned size, unsigned alignment,
596                                           unsigned initial_domains,
597                                           unsigned flags,
598                                           int heap)
599 {
600    struct radeon_bo *bo;
601    struct drm_radeon_gem_create args;
602    int r;
603 
604    memset(&args, 0, sizeof(args));
605 
606    assert(initial_domains);
607    assert((initial_domains &
608            ~(RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) == 0);
609 
610    args.size = size;
611    args.alignment = alignment;
612    args.initial_domain = initial_domains;
613    args.flags = 0;
614 
615    /* If VRAM is just stolen system memory, allow both VRAM and
616     * GTT, whichever has free space. If a buffer is evicted from
617     * VRAM to GTT, it will stay there.
618     */
619    if (!rws->info.has_dedicated_vram)
620       args.initial_domain |= RADEON_DOMAIN_GTT;
621 
622    if (flags & RADEON_FLAG_GTT_WC)
623       args.flags |= RADEON_GEM_GTT_WC;
624    if (flags & RADEON_FLAG_NO_CPU_ACCESS)
625       args.flags |= RADEON_GEM_NO_CPU_ACCESS;
626 
627    if (drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_CREATE,
628                            &args, sizeof(args))) {
629       fprintf(stderr, "radeon: Failed to allocate a buffer:\n");
630       fprintf(stderr, "radeon:    size      : %u bytes\n", size);
631       fprintf(stderr, "radeon:    alignment : %u bytes\n", alignment);
632       fprintf(stderr, "radeon:    domains   : %u\n", args.initial_domain);
633       fprintf(stderr, "radeon:    flags     : %u\n", args.flags);
634       return NULL;
635    }
636 
637    assert(args.handle != 0);
638 
639    bo = CALLOC_STRUCT(radeon_bo);
640    if (!bo)
641       return NULL;
642 
643    pipe_reference_init(&bo->base.reference, 1);
644    bo->base.alignment_log2 = util_logbase2(alignment);
645    bo->base.usage = 0;
646    bo->base.size = size;
647    bo->rws = rws;
648    bo->handle = args.handle;
649    bo->va = 0;
650    bo->initial_domain = initial_domains;
651    bo->hash = __sync_fetch_and_add(&rws->next_bo_hash, 1);
652    (void) mtx_init(&bo->u.real.map_mutex, mtx_plain);
653 
654    if (heap >= 0) {
655       pb_cache_init_entry(&rws->bo_cache, &bo->u.real.cache_entry, &bo->base,
656                           heap);
657    }
658 
659    if (rws->info.r600_has_virtual_memory) {
660       struct drm_radeon_gem_va va;
661       unsigned va_gap_size;
662 
663       va_gap_size = rws->check_vm ? MAX2(4 * alignment, 64 * 1024) : 0;
664 
665       if (flags & RADEON_FLAG_32BIT) {
666          bo->va = radeon_bomgr_find_va(&rws->info, &rws->vm32,
667                                        size + va_gap_size, alignment);
668          assert(bo->va + size < rws->vm32.end);
669       } else {
670          bo->va = radeon_bomgr_find_va64(rws, size + va_gap_size, alignment);
671       }
672 
673       va.handle = bo->handle;
674       va.vm_id = 0;
675       va.operation = RADEON_VA_MAP;
676       va.flags = RADEON_VM_PAGE_READABLE |
677                  RADEON_VM_PAGE_WRITEABLE |
678                  RADEON_VM_PAGE_SNOOPED;
679       va.offset = bo->va;
680       r = drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
681       if (r && va.operation == RADEON_VA_RESULT_ERROR) {
682          fprintf(stderr, "radeon: Failed to allocate virtual address for buffer:\n");
683          fprintf(stderr, "radeon:    size      : %d bytes\n", size);
684          fprintf(stderr, "radeon:    alignment : %d bytes\n", alignment);
685          fprintf(stderr, "radeon:    domains   : %d\n", args.initial_domain);
686          fprintf(stderr, "radeon:    va        : 0x%016llx\n", (unsigned long long)bo->va);
687          radeon_bo_destroy(NULL, &bo->base);
688          return NULL;
689       }
690       mtx_lock(&rws->bo_handles_mutex);
691       if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
692          struct pb_buffer_lean *b = &bo->base;
693          struct radeon_bo *old_bo =
694                _mesa_hash_table_u64_search(rws->bo_vas, va.offset);
695 
696          mtx_unlock(&rws->bo_handles_mutex);
697          radeon_bo_reference(&rws->base, &b, &old_bo->base);
698          return radeon_bo(b);
699       }
700 
701       _mesa_hash_table_u64_insert(rws->bo_vas, bo->va, bo);
702       mtx_unlock(&rws->bo_handles_mutex);
703    }
704 
705    if (initial_domains & RADEON_DOMAIN_VRAM)
706       rws->allocated_vram += align(size, rws->info.gart_page_size);
707    else if (initial_domains & RADEON_DOMAIN_GTT)
708       rws->allocated_gtt += align(size, rws->info.gart_page_size);
709 
710    return bo;
711 }
712 
radeon_bo_can_reclaim(void * winsys,struct pb_buffer_lean * _buf)713 bool radeon_bo_can_reclaim(void *winsys, struct pb_buffer_lean *_buf)
714 {
715    struct radeon_bo *bo = radeon_bo((struct pb_buffer_lean*)_buf);
716 
717    if (radeon_bo_is_referenced_by_any_cs(bo))
718       return false;
719 
720    return radeon_bo_wait(winsys, (struct pb_buffer_lean*)_buf, 0, RADEON_USAGE_READWRITE);
721 }
722 
radeon_bo_can_reclaim_slab(void * priv,struct pb_slab_entry * entry)723 bool radeon_bo_can_reclaim_slab(void *priv, struct pb_slab_entry *entry)
724 {
725    struct radeon_bo *bo = container_of(entry, struct radeon_bo, u.slab.entry);
726 
727    return radeon_bo_can_reclaim(priv, &bo->base);
728 }
729 
radeon_bo_slab_destroy(void * winsys,struct pb_buffer_lean * _buf)730 static void radeon_bo_slab_destroy(void *winsys, struct pb_buffer_lean *_buf)
731 {
732    struct radeon_bo *bo = radeon_bo(_buf);
733 
734    assert(!bo->handle);
735 
736    pb_slab_free(&bo->rws->bo_slabs, &bo->u.slab.entry);
737 }
738 
radeon_bo_slab_alloc(void * priv,unsigned heap,unsigned entry_size,unsigned group_index)739 struct pb_slab *radeon_bo_slab_alloc(void *priv, unsigned heap,
740                                      unsigned entry_size,
741                                      unsigned group_index)
742 {
743    struct radeon_drm_winsys *ws = priv;
744    struct radeon_slab *slab = CALLOC_STRUCT(radeon_slab);
745    enum radeon_bo_domain domains = radeon_domain_from_heap(heap);
746    enum radeon_bo_flag flags = radeon_flags_from_heap(heap);
747    unsigned base_hash;
748 
749    if (!slab)
750       return NULL;
751 
752    slab->buffer = radeon_bo(radeon_winsys_bo_create(&ws->base,
753                                                     64 * 1024, 64 * 1024,
754                                                     domains, flags));
755    if (!slab->buffer)
756       goto fail;
757 
758    assert(slab->buffer->handle);
759 
760    slab->base.num_entries = slab->buffer->base.size / entry_size;
761    slab->base.num_free = slab->base.num_entries;
762    slab->base.group_index = group_index;
763    slab->base.entry_size = entry_size;
764    slab->entries = CALLOC(slab->base.num_entries, sizeof(*slab->entries));
765    if (!slab->entries)
766       goto fail_buffer;
767 
768    list_inithead(&slab->base.free);
769 
770    base_hash = __sync_fetch_and_add(&ws->next_bo_hash, slab->base.num_entries);
771 
772    for (unsigned i = 0; i < slab->base.num_entries; ++i) {
773       struct radeon_bo *bo = &slab->entries[i];
774 
775       bo->base.alignment_log2 = util_logbase2(entry_size);
776       bo->base.usage = slab->buffer->base.usage;
777       bo->base.size = entry_size;
778       bo->rws = ws;
779       bo->va = slab->buffer->va + i * entry_size;
780       bo->initial_domain = domains;
781       bo->hash = base_hash + i;
782       bo->u.slab.entry.slab = &slab->base;
783       bo->u.slab.real = slab->buffer;
784 
785       list_addtail(&bo->u.slab.entry.head, &slab->base.free);
786    }
787 
788    return &slab->base;
789 
790 fail_buffer:
791    radeon_ws_bo_reference(&ws->base, &slab->buffer, NULL);
792 fail:
793    FREE(slab);
794    return NULL;
795 }
796 
radeon_bo_slab_free(void * priv,struct pb_slab * pslab)797 void radeon_bo_slab_free(void *priv, struct pb_slab *pslab)
798 {
799    struct radeon_winsys *rws = (struct radeon_winsys *)priv;
800    struct radeon_slab *slab = (struct radeon_slab *)pslab;
801 
802    for (unsigned i = 0; i < slab->base.num_entries; ++i) {
803       struct radeon_bo *bo = &slab->entries[i];
804       for (unsigned j = 0; j < bo->u.slab.num_fences; ++j)
805          radeon_ws_bo_reference(rws, &bo->u.slab.fences[j], NULL);
806       FREE(bo->u.slab.fences);
807    }
808 
809    FREE(slab->entries);
810    radeon_ws_bo_reference(rws, &slab->buffer, NULL);
811    FREE(slab);
812 }
813 
eg_tile_split(unsigned tile_split)814 static unsigned eg_tile_split(unsigned tile_split)
815 {
816    switch (tile_split) {
817    case 0:     tile_split = 64;    break;
818    case 1:     tile_split = 128;   break;
819    case 2:     tile_split = 256;   break;
820    case 3:     tile_split = 512;   break;
821    default:
822    case 4:     tile_split = 1024;  break;
823    case 5:     tile_split = 2048;  break;
824    case 6:     tile_split = 4096;  break;
825    }
826    return tile_split;
827 }
828 
eg_tile_split_rev(unsigned eg_tile_split)829 static unsigned eg_tile_split_rev(unsigned eg_tile_split)
830 {
831    switch (eg_tile_split) {
832    case 64:    return 0;
833    case 128:   return 1;
834    case 256:   return 2;
835    case 512:   return 3;
836    default:
837    case 1024:  return 4;
838    case 2048:  return 5;
839    case 4096:  return 6;
840    }
841 }
842 
radeon_bo_get_metadata(struct radeon_winsys * rws,struct pb_buffer_lean * _buf,struct radeon_bo_metadata * md,struct radeon_surf * surf)843 static void radeon_bo_get_metadata(struct radeon_winsys *rws,
844                                    struct pb_buffer_lean *_buf,
845                                    struct radeon_bo_metadata *md,
846                                    struct radeon_surf *surf)
847 {
848    struct radeon_bo *bo = radeon_bo(_buf);
849    struct drm_radeon_gem_set_tiling args;
850 
851    assert(bo->handle && "must not be called for slab entries");
852 
853    memset(&args, 0, sizeof(args));
854 
855    args.handle = bo->handle;
856 
857    drmCommandWriteRead(bo->rws->fd,
858                        DRM_RADEON_GEM_GET_TILING,
859                        &args,
860                        sizeof(args));
861 
862    if (surf) {
863       if (args.tiling_flags & RADEON_TILING_MACRO)
864          md->mode = RADEON_SURF_MODE_2D;
865       else if (args.tiling_flags & RADEON_TILING_MICRO)
866          md->mode = RADEON_SURF_MODE_1D;
867       else
868          md->mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
869 
870       surf->u.legacy.bankw = (args.tiling_flags >> RADEON_TILING_EG_BANKW_SHIFT) & RADEON_TILING_EG_BANKW_MASK;
871       surf->u.legacy.bankh = (args.tiling_flags >> RADEON_TILING_EG_BANKH_SHIFT) & RADEON_TILING_EG_BANKH_MASK;
872       surf->u.legacy.tile_split = (args.tiling_flags >> RADEON_TILING_EG_TILE_SPLIT_SHIFT) & RADEON_TILING_EG_TILE_SPLIT_MASK;
873       surf->u.legacy.tile_split = eg_tile_split(surf->u.legacy.tile_split);
874       surf->u.legacy.mtilea = (args.tiling_flags >> RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT) & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK;
875 
876       if (bo->rws->gen >= DRV_SI && !(args.tiling_flags & RADEON_TILING_R600_NO_SCANOUT))
877          surf->flags |= RADEON_SURF_SCANOUT;
878       else
879          surf->flags &= ~RADEON_SURF_SCANOUT;
880       return;
881    }
882 
883    md->u.legacy.microtile = RADEON_LAYOUT_LINEAR;
884    md->u.legacy.macrotile = RADEON_LAYOUT_LINEAR;
885    if (args.tiling_flags & RADEON_TILING_MICRO)
886       md->u.legacy.microtile = RADEON_LAYOUT_TILED;
887    else if (args.tiling_flags & RADEON_TILING_MICRO_SQUARE)
888       md->u.legacy.microtile = RADEON_LAYOUT_SQUARETILED;
889 
890    if (args.tiling_flags & RADEON_TILING_MACRO)
891       md->u.legacy.macrotile = RADEON_LAYOUT_TILED;
892 
893    md->u.legacy.bankw = (args.tiling_flags >> RADEON_TILING_EG_BANKW_SHIFT) & RADEON_TILING_EG_BANKW_MASK;
894    md->u.legacy.bankh = (args.tiling_flags >> RADEON_TILING_EG_BANKH_SHIFT) & RADEON_TILING_EG_BANKH_MASK;
895    md->u.legacy.tile_split = (args.tiling_flags >> RADEON_TILING_EG_TILE_SPLIT_SHIFT) & RADEON_TILING_EG_TILE_SPLIT_MASK;
896    md->u.legacy.mtilea = (args.tiling_flags >> RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT) & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK;
897    md->u.legacy.tile_split = eg_tile_split(md->u.legacy.tile_split);
898    md->u.legacy.scanout = bo->rws->gen >= DRV_SI && !(args.tiling_flags & RADEON_TILING_R600_NO_SCANOUT);
899 }
900 
radeon_bo_set_metadata(struct radeon_winsys * rws,struct pb_buffer_lean * _buf,struct radeon_bo_metadata * md,struct radeon_surf * surf)901 static void radeon_bo_set_metadata(struct radeon_winsys *rws,
902                                    struct pb_buffer_lean *_buf,
903                                    struct radeon_bo_metadata *md,
904                                    struct radeon_surf *surf)
905 {
906    struct radeon_bo *bo = radeon_bo(_buf);
907    struct drm_radeon_gem_set_tiling args;
908 
909    assert(bo->handle && "must not be called for slab entries");
910 
911    memset(&args, 0, sizeof(args));
912 
913    os_wait_until_zero(&bo->num_active_ioctls, OS_TIMEOUT_INFINITE);
914 
915    if (surf) {
916       if (surf->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D)
917          args.tiling_flags |= RADEON_TILING_MICRO;
918       if (surf->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D)
919          args.tiling_flags |= RADEON_TILING_MACRO;
920 
921       args.tiling_flags |= (surf->u.legacy.bankw & RADEON_TILING_EG_BANKW_MASK) <<
922                            RADEON_TILING_EG_BANKW_SHIFT;
923       args.tiling_flags |= (surf->u.legacy.bankh & RADEON_TILING_EG_BANKH_MASK) <<
924                            RADEON_TILING_EG_BANKH_SHIFT;
925       if (surf->u.legacy.tile_split) {
926          args.tiling_flags |= (eg_tile_split_rev(surf->u.legacy.tile_split) &
927                                RADEON_TILING_EG_TILE_SPLIT_MASK) <<
928                               RADEON_TILING_EG_TILE_SPLIT_SHIFT;
929       }
930       args.tiling_flags |= (surf->u.legacy.mtilea & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK) <<
931                            RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT;
932 
933       if (bo->rws->gen >= DRV_SI && !(surf->flags & RADEON_SURF_SCANOUT))
934          args.tiling_flags |= RADEON_TILING_R600_NO_SCANOUT;
935 
936       args.pitch = surf->u.legacy.level[0].nblk_x * surf->bpe;
937    } else {
938       if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
939          args.tiling_flags |= RADEON_TILING_MICRO;
940       else if (md->u.legacy.microtile == RADEON_LAYOUT_SQUARETILED)
941          args.tiling_flags |= RADEON_TILING_MICRO_SQUARE;
942 
943       if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
944          args.tiling_flags |= RADEON_TILING_MACRO;
945 
946       args.tiling_flags |= (md->u.legacy.bankw & RADEON_TILING_EG_BANKW_MASK) <<
947                            RADEON_TILING_EG_BANKW_SHIFT;
948       args.tiling_flags |= (md->u.legacy.bankh & RADEON_TILING_EG_BANKH_MASK) <<
949                            RADEON_TILING_EG_BANKH_SHIFT;
950       if (md->u.legacy.tile_split) {
951          args.tiling_flags |= (eg_tile_split_rev(md->u.legacy.tile_split) &
952                                RADEON_TILING_EG_TILE_SPLIT_MASK) <<
953                               RADEON_TILING_EG_TILE_SPLIT_SHIFT;
954       }
955       args.tiling_flags |= (md->u.legacy.mtilea & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK) <<
956                            RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT;
957 
958       if (bo->rws->gen >= DRV_SI && !md->u.legacy.scanout)
959          args.tiling_flags |= RADEON_TILING_R600_NO_SCANOUT;
960 
961       args.pitch = md->u.legacy.stride;
962    }
963 
964    args.handle = bo->handle;
965 
966    drmCommandWriteRead(bo->rws->fd,
967                        DRM_RADEON_GEM_SET_TILING,
968                        &args,
969                        sizeof(args));
970 }
971 
972 static struct pb_buffer_lean *
radeon_winsys_bo_create(struct radeon_winsys * rws,uint64_t size,unsigned alignment,enum radeon_bo_domain domain,enum radeon_bo_flag flags)973 radeon_winsys_bo_create(struct radeon_winsys *rws,
974                         uint64_t size,
975                         unsigned alignment,
976                         enum radeon_bo_domain domain,
977                         enum radeon_bo_flag flags)
978 {
979    struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
980    struct radeon_bo *bo;
981 
982    radeon_canonicalize_bo_flags(&domain, &flags);
983 
984    assert(!(flags & RADEON_FLAG_SPARSE)); /* not supported */
985 
986    /* Only 32-bit sizes are supported. */
987    if (size > UINT_MAX)
988       return NULL;
989 
990    int heap = radeon_get_heap_index(domain, flags);
991 
992    /* Sub-allocate small buffers from slabs. */
993    if (heap >= 0 &&
994        size <= (1 << RADEON_SLAB_MAX_SIZE_LOG2) &&
995        ws->info.r600_has_virtual_memory &&
996        alignment <= MAX2(1 << RADEON_SLAB_MIN_SIZE_LOG2, util_next_power_of_two(size))) {
997       struct pb_slab_entry *entry;
998 
999       entry = pb_slab_alloc(&ws->bo_slabs, size, heap);
1000       if (!entry) {
1001          /* Clear the cache and try again. */
1002          pb_cache_release_all_buffers(&ws->bo_cache);
1003 
1004          entry = pb_slab_alloc(&ws->bo_slabs, size, heap);
1005       }
1006       if (!entry)
1007          return NULL;
1008 
1009       bo = container_of(entry, struct radeon_bo, u.slab.entry);
1010 
1011       pipe_reference_init(&bo->base.reference, 1);
1012 
1013       return &bo->base;
1014    }
1015 
1016    /* Align size to page size. This is the minimum alignment for normal
1017     * BOs. Aligning this here helps the cached bufmgr. Especially small BOs,
1018     * like constant/uniform buffers, can benefit from better and more reuse.
1019     */
1020    size = align(size, ws->info.gart_page_size);
1021    alignment = align(alignment, ws->info.gart_page_size);
1022 
1023    bool use_reusable_pool = flags & RADEON_FLAG_NO_INTERPROCESS_SHARING &&
1024                             !(flags & RADEON_FLAG_DISCARDABLE);
1025 
1026    /* Shared resources don't use cached heaps. */
1027    if (use_reusable_pool) {
1028       /* RADEON_FLAG_NO_SUBALLOC is irrelevant for the cache. */
1029       heap = radeon_get_heap_index(domain, flags & ~RADEON_FLAG_NO_SUBALLOC);
1030       assert(heap >= 0 && heap < RADEON_NUM_HEAPS);
1031 
1032       bo = radeon_bo((struct pb_buffer_lean*)pb_cache_reclaim_buffer(&ws->bo_cache, size,
1033                                                                 alignment, 0, heap));
1034       if (bo)
1035          return &bo->base;
1036    }
1037 
1038    bo = radeon_create_bo(ws, size, alignment, domain, flags, heap);
1039    if (!bo) {
1040       /* Clear the cache and try again. */
1041       if (ws->info.r600_has_virtual_memory)
1042          pb_slabs_reclaim(&ws->bo_slabs);
1043       pb_cache_release_all_buffers(&ws->bo_cache);
1044       bo = radeon_create_bo(ws, size, alignment, domain, flags, heap);
1045       if (!bo)
1046          return NULL;
1047    }
1048 
1049    bo->u.real.use_reusable_pool = use_reusable_pool;
1050 
1051    mtx_lock(&ws->bo_handles_mutex);
1052    _mesa_hash_table_insert(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo);
1053    mtx_unlock(&ws->bo_handles_mutex);
1054 
1055    return &bo->base;
1056 }
1057 
radeon_winsys_bo_destroy(struct radeon_winsys * ws,struct pb_buffer_lean * buf)1058 static void radeon_winsys_bo_destroy(struct radeon_winsys *ws, struct pb_buffer_lean *buf)
1059 {
1060    struct radeon_bo *bo = radeon_bo(buf);
1061 
1062    if (bo->handle)
1063       radeon_bo_destroy_or_cache(ws, buf);
1064    else
1065       radeon_bo_slab_destroy(ws, buf);
1066 }
1067 
radeon_winsys_bo_from_ptr(struct radeon_winsys * rws,void * pointer,uint64_t size,enum radeon_bo_flag flags)1068 static struct pb_buffer_lean *radeon_winsys_bo_from_ptr(struct radeon_winsys *rws,
1069                                                    void *pointer, uint64_t size,
1070                                                    enum radeon_bo_flag flags)
1071 {
1072    struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
1073    struct drm_radeon_gem_userptr args;
1074    struct radeon_bo *bo;
1075    int r;
1076 
1077    bo = CALLOC_STRUCT(radeon_bo);
1078    if (!bo)
1079       return NULL;
1080 
1081    memset(&args, 0, sizeof(args));
1082    args.addr = (uintptr_t)pointer;
1083    args.size = align(size, ws->info.gart_page_size);
1084    args.flags = RADEON_GEM_USERPTR_ANONONLY |
1085                 RADEON_GEM_USERPTR_REGISTER |
1086                 RADEON_GEM_USERPTR_VALIDATE;
1087 
1088    if (drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_USERPTR,
1089                            &args, sizeof(args))) {
1090       FREE(bo);
1091       return NULL;
1092    }
1093 
1094    assert(args.handle != 0);
1095 
1096    mtx_lock(&ws->bo_handles_mutex);
1097 
1098    /* Initialize it. */
1099    pipe_reference_init(&bo->base.reference, 1);
1100    bo->handle = args.handle;
1101    bo->base.alignment_log2 = 0;
1102    bo->base.size = size;
1103    bo->rws = ws;
1104    bo->user_ptr = pointer;
1105    bo->va = 0;
1106    bo->initial_domain = RADEON_DOMAIN_GTT;
1107    bo->hash = __sync_fetch_and_add(&ws->next_bo_hash, 1);
1108    (void) mtx_init(&bo->u.real.map_mutex, mtx_plain);
1109 
1110    _mesa_hash_table_insert(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo);
1111 
1112    mtx_unlock(&ws->bo_handles_mutex);
1113 
1114    if (ws->info.r600_has_virtual_memory) {
1115       struct drm_radeon_gem_va va;
1116 
1117       bo->va = radeon_bomgr_find_va64(ws, bo->base.size, 1 << 20);
1118 
1119       va.handle = bo->handle;
1120       va.operation = RADEON_VA_MAP;
1121       va.vm_id = 0;
1122       va.offset = bo->va;
1123       va.flags = RADEON_VM_PAGE_READABLE |
1124                  RADEON_VM_PAGE_WRITEABLE |
1125                  RADEON_VM_PAGE_SNOOPED;
1126       va.offset = bo->va;
1127       r = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
1128       if (r && va.operation == RADEON_VA_RESULT_ERROR) {
1129          fprintf(stderr, "radeon: Failed to assign virtual address space\n");
1130          radeon_bo_destroy(NULL, &bo->base);
1131          return NULL;
1132       }
1133       mtx_lock(&ws->bo_handles_mutex);
1134       if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
1135          struct pb_buffer_lean *b = &bo->base;
1136          struct radeon_bo *old_bo =
1137                _mesa_hash_table_u64_search(ws->bo_vas, va.offset);
1138 
1139          mtx_unlock(&ws->bo_handles_mutex);
1140          radeon_bo_reference(rws, &b, &old_bo->base);
1141          return b;
1142       }
1143 
1144       _mesa_hash_table_u64_insert(ws->bo_vas, bo->va, bo);
1145       mtx_unlock(&ws->bo_handles_mutex);
1146    }
1147 
1148    ws->allocated_gtt += align(bo->base.size, ws->info.gart_page_size);
1149 
1150    return (struct pb_buffer_lean*)bo;
1151 }
1152 
radeon_winsys_bo_from_handle(struct radeon_winsys * rws,struct winsys_handle * whandle,unsigned vm_alignment,bool is_dri_prime_linear_buffer)1153 static struct pb_buffer_lean *radeon_winsys_bo_from_handle(struct radeon_winsys *rws,
1154                                                       struct winsys_handle *whandle,
1155                                                       unsigned vm_alignment,
1156                                                       bool is_dri_prime_linear_buffer)
1157 {
1158    struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
1159    struct radeon_bo *bo;
1160    int r;
1161    unsigned handle;
1162    uint64_t size = 0;
1163 
1164    /* We must maintain a list of pairs <handle, bo>, so that we always return
1165     * the same BO for one particular handle. If we didn't do that and created
1166     * more than one BO for the same handle and then relocated them in a CS,
1167     * we would hit a deadlock in the kernel.
1168     *
1169     * The list of pairs is guarded by a mutex, of course. */
1170    mtx_lock(&ws->bo_handles_mutex);
1171 
1172    if (whandle->type == WINSYS_HANDLE_TYPE_SHARED) {
1173       /* First check if there already is an existing bo for the handle. */
1174       bo = util_hash_table_get(ws->bo_names, (void*)(uintptr_t)whandle->handle);
1175    } else if (whandle->type == WINSYS_HANDLE_TYPE_FD) {
1176       /* We must first get the GEM handle, as fds are unreliable keys */
1177       r = drmPrimeFDToHandle(ws->fd, whandle->handle, &handle);
1178       if (r)
1179          goto fail;
1180       bo = util_hash_table_get(ws->bo_handles, (void*)(uintptr_t)handle);
1181    } else {
1182       /* Unknown handle type */
1183       goto fail;
1184    }
1185 
1186    if (bo) {
1187       /* Increase the refcount. */
1188       if (unlikely(p_atomic_inc_return(&bo->base.reference.count) == 1)) {
1189          p_atomic_dec(&bo->base.reference.count);
1190          assert(p_atomic_read(&bo->base.reference.count) == 0);
1191       } else {
1192          goto done;
1193       }
1194    }
1195 
1196    /* There isn't, create a new one. */
1197    bo = CALLOC_STRUCT(radeon_bo);
1198    if (!bo) {
1199       goto fail;
1200    }
1201 
1202    if (whandle->type == WINSYS_HANDLE_TYPE_SHARED) {
1203       struct drm_gem_open open_arg = {};
1204       memset(&open_arg, 0, sizeof(open_arg));
1205       /* Open the BO. */
1206       open_arg.name = whandle->handle;
1207       if (drmIoctl(ws->fd, DRM_IOCTL_GEM_OPEN, &open_arg)) {
1208          FREE(bo);
1209          goto fail;
1210       }
1211       handle = open_arg.handle;
1212       size = open_arg.size;
1213       bo->flink_name = whandle->handle;
1214    } else if (whandle->type == WINSYS_HANDLE_TYPE_FD) {
1215       size = lseek(whandle->handle, 0, SEEK_END);
1216       /*
1217        * Could check errno to determine whether the kernel is new enough, but
1218        * it doesn't really matter why this failed, just that it failed.
1219        */
1220       if (size == (off_t)-1) {
1221          FREE(bo);
1222          goto fail;
1223       }
1224       lseek(whandle->handle, 0, SEEK_SET);
1225    }
1226 
1227    assert(handle != 0);
1228 
1229    bo->handle = handle;
1230 
1231    /* Initialize it. */
1232    pipe_reference_init(&bo->base.reference, 1);
1233    bo->base.alignment_log2 = 0;
1234    bo->base.size = (unsigned) size;
1235    bo->rws = ws;
1236    bo->va = 0;
1237    bo->hash = __sync_fetch_and_add(&ws->next_bo_hash, 1);
1238    (void) mtx_init(&bo->u.real.map_mutex, mtx_plain);
1239 
1240    if (bo->flink_name)
1241       _mesa_hash_table_insert(ws->bo_names, (void*)(uintptr_t)bo->flink_name, bo);
1242 
1243    _mesa_hash_table_insert(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo);
1244 
1245 done:
1246    mtx_unlock(&ws->bo_handles_mutex);
1247 
1248    if (ws->info.r600_has_virtual_memory && !bo->va) {
1249       struct drm_radeon_gem_va va;
1250 
1251       bo->va = radeon_bomgr_find_va64(ws, bo->base.size, vm_alignment);
1252 
1253       va.handle = bo->handle;
1254       va.operation = RADEON_VA_MAP;
1255       va.vm_id = 0;
1256       va.offset = bo->va;
1257       va.flags = RADEON_VM_PAGE_READABLE |
1258                  RADEON_VM_PAGE_WRITEABLE |
1259                  RADEON_VM_PAGE_SNOOPED;
1260       va.offset = bo->va;
1261       r = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
1262       if (r && va.operation == RADEON_VA_RESULT_ERROR) {
1263          fprintf(stderr, "radeon: Failed to assign virtual address space\n");
1264          radeon_bo_destroy(NULL, &bo->base);
1265          return NULL;
1266       }
1267       mtx_lock(&ws->bo_handles_mutex);
1268       if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
1269          struct pb_buffer_lean *b = &bo->base;
1270          struct radeon_bo *old_bo =
1271                _mesa_hash_table_u64_search(ws->bo_vas, va.offset);
1272 
1273          mtx_unlock(&ws->bo_handles_mutex);
1274          radeon_bo_reference(rws, &b, &old_bo->base);
1275          return b;
1276       }
1277 
1278       _mesa_hash_table_u64_insert(ws->bo_vas, bo->va, bo);
1279       mtx_unlock(&ws->bo_handles_mutex);
1280    }
1281 
1282    bo->initial_domain = radeon_bo_get_initial_domain((void*)bo);
1283 
1284    if (bo->initial_domain & RADEON_DOMAIN_VRAM)
1285       ws->allocated_vram += align(bo->base.size, ws->info.gart_page_size);
1286    else if (bo->initial_domain & RADEON_DOMAIN_GTT)
1287       ws->allocated_gtt += align(bo->base.size, ws->info.gart_page_size);
1288 
1289    return (struct pb_buffer_lean*)bo;
1290 
1291 fail:
1292    mtx_unlock(&ws->bo_handles_mutex);
1293    return NULL;
1294 }
1295 
radeon_winsys_bo_get_handle(struct radeon_winsys * rws,struct pb_buffer_lean * buffer,struct winsys_handle * whandle)1296 static bool radeon_winsys_bo_get_handle(struct radeon_winsys *rws,
1297                                         struct pb_buffer_lean *buffer,
1298                                         struct winsys_handle *whandle)
1299 {
1300    struct drm_gem_flink flink;
1301    struct radeon_bo *bo = radeon_bo(buffer);
1302    struct radeon_drm_winsys *ws = bo->rws;
1303 
1304    /* Don't allow exports of slab entries. */
1305    if (!bo->handle)
1306       return false;
1307 
1308    memset(&flink, 0, sizeof(flink));
1309 
1310    bo->u.real.use_reusable_pool = false;
1311 
1312    if (whandle->type == WINSYS_HANDLE_TYPE_SHARED) {
1313       if (!bo->flink_name) {
1314          flink.handle = bo->handle;
1315 
1316          if (ioctl(ws->fd, DRM_IOCTL_GEM_FLINK, &flink)) {
1317             return false;
1318          }
1319 
1320          bo->flink_name = flink.name;
1321 
1322          mtx_lock(&ws->bo_handles_mutex);
1323          _mesa_hash_table_insert(ws->bo_names, (void*)(uintptr_t)bo->flink_name, bo);
1324          mtx_unlock(&ws->bo_handles_mutex);
1325       }
1326       whandle->handle = bo->flink_name;
1327    } else if (whandle->type == WINSYS_HANDLE_TYPE_KMS) {
1328       whandle->handle = bo->handle;
1329    } else if (whandle->type == WINSYS_HANDLE_TYPE_FD) {
1330       if (drmPrimeHandleToFD(ws->fd, bo->handle, DRM_CLOEXEC, (int*)&whandle->handle))
1331          return false;
1332    }
1333 
1334    return true;
1335 }
1336 
radeon_winsys_bo_is_user_ptr(struct pb_buffer_lean * buf)1337 static bool radeon_winsys_bo_is_user_ptr(struct pb_buffer_lean *buf)
1338 {
1339    return ((struct radeon_bo*)buf)->user_ptr != NULL;
1340 }
1341 
radeon_winsys_bo_is_suballocated(struct pb_buffer_lean * buf)1342 static bool radeon_winsys_bo_is_suballocated(struct pb_buffer_lean *buf)
1343 {
1344    return !((struct radeon_bo*)buf)->handle;
1345 }
1346 
radeon_winsys_bo_va(struct pb_buffer_lean * buf)1347 static uint64_t radeon_winsys_bo_va(struct pb_buffer_lean *buf)
1348 {
1349    return ((struct radeon_bo*)buf)->va;
1350 }
1351 
radeon_winsys_bo_get_reloc_offset(struct pb_buffer_lean * buf)1352 static unsigned radeon_winsys_bo_get_reloc_offset(struct pb_buffer_lean *buf)
1353 {
1354    struct radeon_bo *bo = radeon_bo(buf);
1355 
1356    if (bo->handle)
1357       return 0;
1358 
1359    return bo->va - bo->u.slab.real->va;
1360 }
1361 
radeon_drm_bo_init_functions(struct radeon_drm_winsys * ws)1362 void radeon_drm_bo_init_functions(struct radeon_drm_winsys *ws)
1363 {
1364    ws->base.buffer_set_metadata = radeon_bo_set_metadata;
1365    ws->base.buffer_get_metadata = radeon_bo_get_metadata;
1366    ws->base.buffer_map = radeon_bo_map;
1367    ws->base.buffer_unmap = radeon_bo_unmap;
1368    ws->base.buffer_wait = radeon_bo_wait;
1369    ws->base.buffer_create = radeon_winsys_bo_create;
1370    ws->base.buffer_destroy = radeon_winsys_bo_destroy;
1371    ws->base.buffer_from_handle = radeon_winsys_bo_from_handle;
1372    ws->base.buffer_from_ptr = radeon_winsys_bo_from_ptr;
1373    ws->base.buffer_is_user_ptr = radeon_winsys_bo_is_user_ptr;
1374    ws->base.buffer_is_suballocated = radeon_winsys_bo_is_suballocated;
1375    ws->base.buffer_get_handle = radeon_winsys_bo_get_handle;
1376    ws->base.buffer_get_virtual_address = radeon_winsys_bo_va;
1377    ws->base.buffer_get_reloc_offset = radeon_winsys_bo_get_reloc_offset;
1378    ws->base.buffer_get_initial_domain = radeon_bo_get_initial_domain;
1379 }
1380