• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2011 Marek Olšák <maraeo@gmail.com>
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining
6  * a copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
14  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
15  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16  * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
17  * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20  * USE OR OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * The above copyright notice and this permission notice (including the
23  * next paragraph) shall be included in all copies or substantial portions
24  * of the Software.
25  */
26 
27 #include "radeon_drm_cs.h"
28 
29 #include "util/u_hash_table.h"
30 #include "util/u_memory.h"
31 #include "util/simple_list.h"
32 #include "os/os_thread.h"
33 #include "os/os_mman.h"
34 #include "os/os_time.h"
35 
36 #include "state_tracker/drm_driver.h"
37 
38 #include <sys/ioctl.h>
39 #include <xf86drm.h>
40 #include <errno.h>
41 #include <fcntl.h>
42 #include <stdio.h>
43 #include <inttypes.h>
44 
45 static struct pb_buffer *
46 radeon_winsys_bo_create(struct radeon_winsys *rws,
47                         uint64_t size,
48                         unsigned alignment,
49                         enum radeon_bo_domain domain,
50                         enum radeon_bo_flag flags);
51 
radeon_bo(struct pb_buffer * bo)52 static inline struct radeon_bo *radeon_bo(struct pb_buffer *bo)
53 {
54     return (struct radeon_bo *)bo;
55 }
56 
57 struct radeon_bo_va_hole {
58     struct list_head list;
59     uint64_t         offset;
60     uint64_t         size;
61 };
62 
radeon_real_bo_is_busy(struct radeon_bo * bo)63 static bool radeon_real_bo_is_busy(struct radeon_bo *bo)
64 {
65     struct drm_radeon_gem_busy args = {0};
66 
67     args.handle = bo->handle;
68     return drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_BUSY,
69                                &args, sizeof(args)) != 0;
70 }
71 
radeon_bo_is_busy(struct radeon_bo * bo)72 static bool radeon_bo_is_busy(struct radeon_bo *bo)
73 {
74     unsigned num_idle;
75     bool busy = false;
76 
77     if (bo->handle)
78         return radeon_real_bo_is_busy(bo);
79 
80     pipe_mutex_lock(bo->rws->bo_fence_lock);
81     for (num_idle = 0; num_idle < bo->u.slab.num_fences; ++num_idle) {
82         if (radeon_real_bo_is_busy(bo->u.slab.fences[num_idle])) {
83             busy = true;
84             break;
85         }
86         radeon_bo_reference(&bo->u.slab.fences[num_idle], NULL);
87     }
88     memmove(&bo->u.slab.fences[0], &bo->u.slab.fences[num_idle],
89             (bo->u.slab.num_fences - num_idle) * sizeof(bo->u.slab.fences[0]));
90     bo->u.slab.num_fences -= num_idle;
91     pipe_mutex_unlock(bo->rws->bo_fence_lock);
92 
93     return busy;
94 }
95 
radeon_real_bo_wait_idle(struct radeon_bo * bo)96 static void radeon_real_bo_wait_idle(struct radeon_bo *bo)
97 {
98     struct drm_radeon_gem_wait_idle args = {0};
99 
100     args.handle = bo->handle;
101     while (drmCommandWrite(bo->rws->fd, DRM_RADEON_GEM_WAIT_IDLE,
102                            &args, sizeof(args)) == -EBUSY);
103 }
104 
radeon_bo_wait_idle(struct radeon_bo * bo)105 static void radeon_bo_wait_idle(struct radeon_bo *bo)
106 {
107     if (bo->handle) {
108         radeon_real_bo_wait_idle(bo);
109     } else {
110         pipe_mutex_lock(bo->rws->bo_fence_lock);
111         while (bo->u.slab.num_fences) {
112             struct radeon_bo *fence = NULL;
113             radeon_bo_reference(&fence, bo->u.slab.fences[0]);
114             pipe_mutex_unlock(bo->rws->bo_fence_lock);
115 
116             /* Wait without holding the fence lock. */
117             radeon_real_bo_wait_idle(fence);
118 
119             pipe_mutex_lock(bo->rws->bo_fence_lock);
120             if (bo->u.slab.num_fences && fence == bo->u.slab.fences[0]) {
121                 radeon_bo_reference(&bo->u.slab.fences[0], NULL);
122                 memmove(&bo->u.slab.fences[0], &bo->u.slab.fences[1],
123                         (bo->u.slab.num_fences - 1) * sizeof(bo->u.slab.fences[0]));
124                 bo->u.slab.num_fences--;
125             }
126             radeon_bo_reference(&fence, NULL);
127         }
128         pipe_mutex_unlock(bo->rws->bo_fence_lock);
129     }
130 }
131 
radeon_bo_wait(struct pb_buffer * _buf,uint64_t timeout,enum radeon_bo_usage usage)132 static bool radeon_bo_wait(struct pb_buffer *_buf, uint64_t timeout,
133                            enum radeon_bo_usage usage)
134 {
135     struct radeon_bo *bo = radeon_bo(_buf);
136     int64_t abs_timeout;
137 
138     /* No timeout. Just query. */
139     if (timeout == 0)
140         return !bo->num_active_ioctls && !radeon_bo_is_busy(bo);
141 
142     abs_timeout = os_time_get_absolute_timeout(timeout);
143 
144     /* Wait if any ioctl is being submitted with this buffer. */
145     if (!os_wait_until_zero_abs_timeout(&bo->num_active_ioctls, abs_timeout))
146         return false;
147 
148     /* Infinite timeout. */
149     if (abs_timeout == PIPE_TIMEOUT_INFINITE) {
150         radeon_bo_wait_idle(bo);
151         return true;
152     }
153 
154     /* Other timeouts need to be emulated with a loop. */
155     while (radeon_bo_is_busy(bo)) {
156        if (os_time_get_nano() >= abs_timeout)
157           return false;
158        os_time_sleep(10);
159     }
160 
161     return true;
162 }
163 
get_valid_domain(enum radeon_bo_domain domain)164 static enum radeon_bo_domain get_valid_domain(enum radeon_bo_domain domain)
165 {
166     /* Zero domains the driver doesn't understand. */
167     domain &= RADEON_DOMAIN_VRAM_GTT;
168 
169     /* If no domain is set, we must set something... */
170     if (!domain)
171         domain = RADEON_DOMAIN_VRAM_GTT;
172 
173     return domain;
174 }
175 
radeon_bo_get_initial_domain(struct pb_buffer * buf)176 static enum radeon_bo_domain radeon_bo_get_initial_domain(
177 		struct pb_buffer *buf)
178 {
179     struct radeon_bo *bo = (struct radeon_bo*)buf;
180     struct drm_radeon_gem_op args;
181 
182     if (bo->rws->info.drm_minor < 38)
183         return RADEON_DOMAIN_VRAM_GTT;
184 
185     memset(&args, 0, sizeof(args));
186     args.handle = bo->handle;
187     args.op = RADEON_GEM_OP_GET_INITIAL_DOMAIN;
188 
189     if (drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_OP,
190                             &args, sizeof(args))) {
191         fprintf(stderr, "radeon: failed to get initial domain: %p 0x%08X\n",
192                 bo, bo->handle);
193         /* Default domain as returned by get_valid_domain. */
194         return RADEON_DOMAIN_VRAM_GTT;
195     }
196 
197     /* GEM domains and winsys domains are defined the same. */
198     return get_valid_domain(args.value);
199 }
200 
radeon_bomgr_find_va(struct radeon_drm_winsys * rws,uint64_t size,uint64_t alignment)201 static uint64_t radeon_bomgr_find_va(struct radeon_drm_winsys *rws,
202                                      uint64_t size, uint64_t alignment)
203 {
204     struct radeon_bo_va_hole *hole, *n;
205     uint64_t offset = 0, waste = 0;
206 
207     /* All VM address space holes will implicitly start aligned to the
208      * size alignment, so we don't need to sanitize the alignment here
209      */
210     size = align(size, rws->info.gart_page_size);
211 
212     pipe_mutex_lock(rws->bo_va_mutex);
213     /* first look for a hole */
214     LIST_FOR_EACH_ENTRY_SAFE(hole, n, &rws->va_holes, list) {
215         offset = hole->offset;
216         waste = offset % alignment;
217         waste = waste ? alignment - waste : 0;
218         offset += waste;
219         if (offset >= (hole->offset + hole->size)) {
220             continue;
221         }
222         if (!waste && hole->size == size) {
223             offset = hole->offset;
224             list_del(&hole->list);
225             FREE(hole);
226             pipe_mutex_unlock(rws->bo_va_mutex);
227             return offset;
228         }
229         if ((hole->size - waste) > size) {
230             if (waste) {
231                 n = CALLOC_STRUCT(radeon_bo_va_hole);
232                 n->size = waste;
233                 n->offset = hole->offset;
234                 list_add(&n->list, &hole->list);
235             }
236             hole->size -= (size + waste);
237             hole->offset += size + waste;
238             pipe_mutex_unlock(rws->bo_va_mutex);
239             return offset;
240         }
241         if ((hole->size - waste) == size) {
242             hole->size = waste;
243             pipe_mutex_unlock(rws->bo_va_mutex);
244             return offset;
245         }
246     }
247 
248     offset = rws->va_offset;
249     waste = offset % alignment;
250     waste = waste ? alignment - waste : 0;
251     if (waste) {
252         n = CALLOC_STRUCT(radeon_bo_va_hole);
253         n->size = waste;
254         n->offset = offset;
255         list_add(&n->list, &rws->va_holes);
256     }
257     offset += waste;
258     rws->va_offset += size + waste;
259     pipe_mutex_unlock(rws->bo_va_mutex);
260     return offset;
261 }
262 
radeon_bomgr_free_va(struct radeon_drm_winsys * rws,uint64_t va,uint64_t size)263 static void radeon_bomgr_free_va(struct radeon_drm_winsys *rws,
264                                  uint64_t va, uint64_t size)
265 {
266     struct radeon_bo_va_hole *hole = NULL;
267 
268     size = align(size, rws->info.gart_page_size);
269 
270     pipe_mutex_lock(rws->bo_va_mutex);
271     if ((va + size) == rws->va_offset) {
272         rws->va_offset = va;
273         /* Delete uppermost hole if it reaches the new top */
274         if (!LIST_IS_EMPTY(&rws->va_holes)) {
275             hole = container_of(rws->va_holes.next, hole, list);
276             if ((hole->offset + hole->size) == va) {
277                 rws->va_offset = hole->offset;
278                 list_del(&hole->list);
279                 FREE(hole);
280             }
281         }
282     } else {
283         struct radeon_bo_va_hole *next;
284 
285         hole = container_of(&rws->va_holes, hole, list);
286         LIST_FOR_EACH_ENTRY(next, &rws->va_holes, list) {
287 	    if (next->offset < va)
288 	        break;
289             hole = next;
290         }
291 
292         if (&hole->list != &rws->va_holes) {
293             /* Grow upper hole if it's adjacent */
294             if (hole->offset == (va + size)) {
295                 hole->offset = va;
296                 hole->size += size;
297                 /* Merge lower hole if it's adjacent */
298                 if (next != hole && &next->list != &rws->va_holes &&
299                     (next->offset + next->size) == va) {
300                     next->size += hole->size;
301                     list_del(&hole->list);
302                     FREE(hole);
303                 }
304                 goto out;
305             }
306         }
307 
308         /* Grow lower hole if it's adjacent */
309         if (next != hole && &next->list != &rws->va_holes &&
310             (next->offset + next->size) == va) {
311             next->size += size;
312             goto out;
313         }
314 
315         /* FIXME on allocation failure we just lose virtual address space
316          * maybe print a warning
317          */
318         next = CALLOC_STRUCT(radeon_bo_va_hole);
319         if (next) {
320             next->size = size;
321             next->offset = va;
322             list_add(&next->list, &hole->list);
323         }
324     }
325 out:
326     pipe_mutex_unlock(rws->bo_va_mutex);
327 }
328 
radeon_bo_destroy(struct pb_buffer * _buf)329 void radeon_bo_destroy(struct pb_buffer *_buf)
330 {
331     struct radeon_bo *bo = radeon_bo(_buf);
332     struct radeon_drm_winsys *rws = bo->rws;
333     struct drm_gem_close args;
334 
335     assert(bo->handle && "must not be called for slab entries");
336 
337     memset(&args, 0, sizeof(args));
338 
339     pipe_mutex_lock(rws->bo_handles_mutex);
340     util_hash_table_remove(rws->bo_handles, (void*)(uintptr_t)bo->handle);
341     if (bo->flink_name) {
342         util_hash_table_remove(rws->bo_names,
343                                (void*)(uintptr_t)bo->flink_name);
344     }
345     pipe_mutex_unlock(rws->bo_handles_mutex);
346 
347     if (bo->u.real.ptr)
348         os_munmap(bo->u.real.ptr, bo->base.size);
349 
350     if (rws->info.has_virtual_memory) {
351         if (rws->va_unmap_working) {
352             struct drm_radeon_gem_va va;
353 
354             va.handle = bo->handle;
355             va.vm_id = 0;
356             va.operation = RADEON_VA_UNMAP;
357             va.flags = RADEON_VM_PAGE_READABLE |
358                        RADEON_VM_PAGE_WRITEABLE |
359                        RADEON_VM_PAGE_SNOOPED;
360             va.offset = bo->va;
361 
362             if (drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_VA, &va,
363 				    sizeof(va)) != 0 &&
364 		va.operation == RADEON_VA_RESULT_ERROR) {
365                 fprintf(stderr, "radeon: Failed to deallocate virtual address for buffer:\n");
366                 fprintf(stderr, "radeon:    size      : %"PRIu64" bytes\n", bo->base.size);
367                 fprintf(stderr, "radeon:    va        : 0x%"PRIx64"\n", bo->va);
368             }
369 	}
370 
371 	radeon_bomgr_free_va(rws, bo->va, bo->base.size);
372     }
373 
374     /* Close object. */
375     args.handle = bo->handle;
376     drmIoctl(rws->fd, DRM_IOCTL_GEM_CLOSE, &args);
377 
378     pipe_mutex_destroy(bo->u.real.map_mutex);
379 
380     if (bo->initial_domain & RADEON_DOMAIN_VRAM)
381         rws->allocated_vram -= align(bo->base.size, rws->info.gart_page_size);
382     else if (bo->initial_domain & RADEON_DOMAIN_GTT)
383         rws->allocated_gtt -= align(bo->base.size, rws->info.gart_page_size);
384 
385     if (bo->u.real.map_count >= 1) {
386         if (bo->initial_domain & RADEON_DOMAIN_VRAM)
387             bo->rws->mapped_vram -= bo->base.size;
388         else
389             bo->rws->mapped_gtt -= bo->base.size;
390     }
391 
392     FREE(bo);
393 }
394 
radeon_bo_destroy_or_cache(struct pb_buffer * _buf)395 static void radeon_bo_destroy_or_cache(struct pb_buffer *_buf)
396 {
397    struct radeon_bo *bo = radeon_bo(_buf);
398 
399     assert(bo->handle && "must not be called for slab entries");
400 
401    if (bo->u.real.use_reusable_pool)
402       pb_cache_add_buffer(&bo->u.real.cache_entry);
403    else
404       radeon_bo_destroy(_buf);
405 }
406 
radeon_bo_do_map(struct radeon_bo * bo)407 void *radeon_bo_do_map(struct radeon_bo *bo)
408 {
409     struct drm_radeon_gem_mmap args = {0};
410     void *ptr;
411     unsigned offset;
412 
413     /* If the buffer is created from user memory, return the user pointer. */
414     if (bo->user_ptr)
415         return bo->user_ptr;
416 
417     if (bo->handle) {
418         offset = 0;
419     } else {
420         offset = bo->va - bo->u.slab.real->va;
421         bo = bo->u.slab.real;
422     }
423 
424     /* Map the buffer. */
425     pipe_mutex_lock(bo->u.real.map_mutex);
426     /* Return the pointer if it's already mapped. */
427     if (bo->u.real.ptr) {
428         bo->u.real.map_count++;
429         pipe_mutex_unlock(bo->u.real.map_mutex);
430         return (uint8_t*)bo->u.real.ptr + offset;
431     }
432     args.handle = bo->handle;
433     args.offset = 0;
434     args.size = (uint64_t)bo->base.size;
435     if (drmCommandWriteRead(bo->rws->fd,
436                             DRM_RADEON_GEM_MMAP,
437                             &args,
438                             sizeof(args))) {
439         pipe_mutex_unlock(bo->u.real.map_mutex);
440         fprintf(stderr, "radeon: gem_mmap failed: %p 0x%08X\n",
441                 bo, bo->handle);
442         return NULL;
443     }
444 
445     ptr = os_mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED,
446                bo->rws->fd, args.addr_ptr);
447     if (ptr == MAP_FAILED) {
448         /* Clear the cache and try again. */
449         pb_cache_release_all_buffers(&bo->rws->bo_cache);
450 
451         ptr = os_mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED,
452                       bo->rws->fd, args.addr_ptr);
453         if (ptr == MAP_FAILED) {
454             pipe_mutex_unlock(bo->u.real.map_mutex);
455             fprintf(stderr, "radeon: mmap failed, errno: %i\n", errno);
456             return NULL;
457         }
458     }
459     bo->u.real.ptr = ptr;
460     bo->u.real.map_count = 1;
461 
462     if (bo->initial_domain & RADEON_DOMAIN_VRAM)
463        bo->rws->mapped_vram += bo->base.size;
464     else
465        bo->rws->mapped_gtt += bo->base.size;
466 
467     pipe_mutex_unlock(bo->u.real.map_mutex);
468     return (uint8_t*)bo->u.real.ptr + offset;
469 }
470 
radeon_bo_map(struct pb_buffer * buf,struct radeon_winsys_cs * rcs,enum pipe_transfer_usage usage)471 static void *radeon_bo_map(struct pb_buffer *buf,
472                            struct radeon_winsys_cs *rcs,
473                            enum pipe_transfer_usage usage)
474 {
475     struct radeon_bo *bo = (struct radeon_bo*)buf;
476     struct radeon_drm_cs *cs = (struct radeon_drm_cs*)rcs;
477 
478     /* If it's not unsynchronized bo_map, flush CS if needed and then wait. */
479     if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
480         /* DONTBLOCK doesn't make sense with UNSYNCHRONIZED. */
481         if (usage & PIPE_TRANSFER_DONTBLOCK) {
482             if (!(usage & PIPE_TRANSFER_WRITE)) {
483                 /* Mapping for read.
484                  *
485                  * Since we are mapping for read, we don't need to wait
486                  * if the GPU is using the buffer for read too
487                  * (neither one is changing it).
488                  *
489                  * Only check whether the buffer is being used for write. */
490                 if (cs && radeon_bo_is_referenced_by_cs_for_write(cs, bo)) {
491                     cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC, NULL);
492                     return NULL;
493                 }
494 
495                 if (!radeon_bo_wait((struct pb_buffer*)bo, 0,
496                                     RADEON_USAGE_WRITE)) {
497                     return NULL;
498                 }
499             } else {
500                 if (cs && radeon_bo_is_referenced_by_cs(cs, bo)) {
501                     cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC, NULL);
502                     return NULL;
503                 }
504 
505                 if (!radeon_bo_wait((struct pb_buffer*)bo, 0,
506                                     RADEON_USAGE_READWRITE)) {
507                     return NULL;
508                 }
509             }
510         } else {
511             uint64_t time = os_time_get_nano();
512 
513             if (!(usage & PIPE_TRANSFER_WRITE)) {
514                 /* Mapping for read.
515                  *
516                  * Since we are mapping for read, we don't need to wait
517                  * if the GPU is using the buffer for read too
518                  * (neither one is changing it).
519                  *
520                  * Only check whether the buffer is being used for write. */
521                 if (cs && radeon_bo_is_referenced_by_cs_for_write(cs, bo)) {
522                     cs->flush_cs(cs->flush_data, 0, NULL);
523                 }
524                 radeon_bo_wait((struct pb_buffer*)bo, PIPE_TIMEOUT_INFINITE,
525                                RADEON_USAGE_WRITE);
526             } else {
527                 /* Mapping for write. */
528                 if (cs) {
529                     if (radeon_bo_is_referenced_by_cs(cs, bo)) {
530                         cs->flush_cs(cs->flush_data, 0, NULL);
531                     } else {
532                         /* Try to avoid busy-waiting in radeon_bo_wait. */
533                         if (p_atomic_read(&bo->num_active_ioctls))
534                             radeon_drm_cs_sync_flush(rcs);
535                     }
536                 }
537 
538                 radeon_bo_wait((struct pb_buffer*)bo, PIPE_TIMEOUT_INFINITE,
539                                RADEON_USAGE_READWRITE);
540             }
541 
542             bo->rws->buffer_wait_time += os_time_get_nano() - time;
543         }
544     }
545 
546     return radeon_bo_do_map(bo);
547 }
548 
radeon_bo_unmap(struct pb_buffer * _buf)549 static void radeon_bo_unmap(struct pb_buffer *_buf)
550 {
551     struct radeon_bo *bo = (struct radeon_bo*)_buf;
552 
553     if (bo->user_ptr)
554         return;
555 
556     if (!bo->handle)
557         bo = bo->u.slab.real;
558 
559     pipe_mutex_lock(bo->u.real.map_mutex);
560     if (!bo->u.real.ptr) {
561         pipe_mutex_unlock(bo->u.real.map_mutex);
562         return; /* it's not been mapped */
563     }
564 
565     assert(bo->u.real.map_count);
566     if (--bo->u.real.map_count) {
567         pipe_mutex_unlock(bo->u.real.map_mutex);
568         return; /* it's been mapped multiple times */
569     }
570 
571     os_munmap(bo->u.real.ptr, bo->base.size);
572     bo->u.real.ptr = NULL;
573 
574     if (bo->initial_domain & RADEON_DOMAIN_VRAM)
575        bo->rws->mapped_vram -= bo->base.size;
576     else
577        bo->rws->mapped_gtt -= bo->base.size;
578 
579     pipe_mutex_unlock(bo->u.real.map_mutex);
580 }
581 
582 static const struct pb_vtbl radeon_bo_vtbl = {
583     radeon_bo_destroy_or_cache
584     /* other functions are never called */
585 };
586 
587 #ifndef RADEON_GEM_GTT_WC
588 #define RADEON_GEM_GTT_WC		(1 << 2)
589 #endif
590 #ifndef RADEON_GEM_CPU_ACCESS
591 /* BO is expected to be accessed by the CPU */
592 #define RADEON_GEM_CPU_ACCESS		(1 << 3)
593 #endif
594 #ifndef RADEON_GEM_NO_CPU_ACCESS
595 /* CPU access is not expected to work for this BO */
596 #define RADEON_GEM_NO_CPU_ACCESS	(1 << 4)
597 #endif
598 
radeon_create_bo(struct radeon_drm_winsys * rws,unsigned size,unsigned alignment,unsigned usage,unsigned initial_domains,unsigned flags,unsigned pb_cache_bucket)599 static struct radeon_bo *radeon_create_bo(struct radeon_drm_winsys *rws,
600                                           unsigned size, unsigned alignment,
601                                           unsigned usage,
602                                           unsigned initial_domains,
603                                           unsigned flags,
604                                           unsigned pb_cache_bucket)
605 {
606     struct radeon_bo *bo;
607     struct drm_radeon_gem_create args;
608     int r;
609 
610     memset(&args, 0, sizeof(args));
611 
612     assert(initial_domains);
613     assert((initial_domains &
614             ~(RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) == 0);
615 
616     args.size = size;
617     args.alignment = alignment;
618     args.initial_domain = initial_domains;
619     args.flags = 0;
620 
621     if (flags & RADEON_FLAG_GTT_WC)
622         args.flags |= RADEON_GEM_GTT_WC;
623     if (flags & RADEON_FLAG_CPU_ACCESS)
624         args.flags |= RADEON_GEM_CPU_ACCESS;
625     if (flags & RADEON_FLAG_NO_CPU_ACCESS)
626         args.flags |= RADEON_GEM_NO_CPU_ACCESS;
627 
628     if (drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_CREATE,
629                             &args, sizeof(args))) {
630         fprintf(stderr, "radeon: Failed to allocate a buffer:\n");
631         fprintf(stderr, "radeon:    size      : %u bytes\n", size);
632         fprintf(stderr, "radeon:    alignment : %u bytes\n", alignment);
633         fprintf(stderr, "radeon:    domains   : %u\n", args.initial_domain);
634         fprintf(stderr, "radeon:    flags     : %u\n", args.flags);
635         return NULL;
636     }
637 
638     assert(args.handle != 0);
639 
640     bo = CALLOC_STRUCT(radeon_bo);
641     if (!bo)
642         return NULL;
643 
644     pipe_reference_init(&bo->base.reference, 1);
645     bo->base.alignment = alignment;
646     bo->base.usage = usage;
647     bo->base.size = size;
648     bo->base.vtbl = &radeon_bo_vtbl;
649     bo->rws = rws;
650     bo->handle = args.handle;
651     bo->va = 0;
652     bo->initial_domain = initial_domains;
653     bo->hash = __sync_fetch_and_add(&rws->next_bo_hash, 1);
654     pipe_mutex_init(bo->u.real.map_mutex);
655     pb_cache_init_entry(&rws->bo_cache, &bo->u.real.cache_entry, &bo->base,
656                         pb_cache_bucket);
657 
658     if (rws->info.has_virtual_memory) {
659         struct drm_radeon_gem_va va;
660         unsigned va_gap_size;
661 
662         va_gap_size = rws->check_vm ? MAX2(4 * alignment, 64 * 1024) : 0;
663         bo->va = radeon_bomgr_find_va(rws, size + va_gap_size, alignment);
664 
665         va.handle = bo->handle;
666         va.vm_id = 0;
667         va.operation = RADEON_VA_MAP;
668         va.flags = RADEON_VM_PAGE_READABLE |
669                    RADEON_VM_PAGE_WRITEABLE |
670                    RADEON_VM_PAGE_SNOOPED;
671         va.offset = bo->va;
672         r = drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
673         if (r && va.operation == RADEON_VA_RESULT_ERROR) {
674             fprintf(stderr, "radeon: Failed to allocate virtual address for buffer:\n");
675             fprintf(stderr, "radeon:    size      : %d bytes\n", size);
676             fprintf(stderr, "radeon:    alignment : %d bytes\n", alignment);
677             fprintf(stderr, "radeon:    domains   : %d\n", args.initial_domain);
678             fprintf(stderr, "radeon:    va        : 0x%016llx\n", (unsigned long long)bo->va);
679             radeon_bo_destroy(&bo->base);
680             return NULL;
681         }
682         pipe_mutex_lock(rws->bo_handles_mutex);
683         if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
684             struct pb_buffer *b = &bo->base;
685             struct radeon_bo *old_bo =
686                 util_hash_table_get(rws->bo_vas, (void*)(uintptr_t)va.offset);
687 
688             pipe_mutex_unlock(rws->bo_handles_mutex);
689             pb_reference(&b, &old_bo->base);
690             return radeon_bo(b);
691         }
692 
693         util_hash_table_set(rws->bo_vas, (void*)(uintptr_t)bo->va, bo);
694         pipe_mutex_unlock(rws->bo_handles_mutex);
695     }
696 
697     if (initial_domains & RADEON_DOMAIN_VRAM)
698         rws->allocated_vram += align(size, rws->info.gart_page_size);
699     else if (initial_domains & RADEON_DOMAIN_GTT)
700         rws->allocated_gtt += align(size, rws->info.gart_page_size);
701 
702     return bo;
703 }
704 
radeon_bo_can_reclaim(struct pb_buffer * _buf)705 bool radeon_bo_can_reclaim(struct pb_buffer *_buf)
706 {
707    struct radeon_bo *bo = radeon_bo(_buf);
708 
709    if (radeon_bo_is_referenced_by_any_cs(bo))
710       return false;
711 
712    return radeon_bo_wait(_buf, 0, RADEON_USAGE_READWRITE);
713 }
714 
radeon_bo_can_reclaim_slab(void * priv,struct pb_slab_entry * entry)715 bool radeon_bo_can_reclaim_slab(void *priv, struct pb_slab_entry *entry)
716 {
717     struct radeon_bo *bo = NULL; /* fix container_of */
718     bo = container_of(entry, bo, u.slab.entry);
719 
720     return radeon_bo_can_reclaim(&bo->base);
721 }
722 
radeon_bo_slab_destroy(struct pb_buffer * _buf)723 static void radeon_bo_slab_destroy(struct pb_buffer *_buf)
724 {
725     struct radeon_bo *bo = radeon_bo(_buf);
726 
727     assert(!bo->handle);
728 
729     pb_slab_free(&bo->rws->bo_slabs, &bo->u.slab.entry);
730 }
731 
732 static const struct pb_vtbl radeon_winsys_bo_slab_vtbl = {
733     radeon_bo_slab_destroy
734     /* other functions are never called */
735 };
736 
radeon_bo_slab_alloc(void * priv,unsigned heap,unsigned entry_size,unsigned group_index)737 struct pb_slab *radeon_bo_slab_alloc(void *priv, unsigned heap,
738                                      unsigned entry_size,
739                                      unsigned group_index)
740 {
741     struct radeon_drm_winsys *ws = priv;
742     struct radeon_slab *slab = CALLOC_STRUCT(radeon_slab);
743     enum radeon_bo_domain domains;
744     enum radeon_bo_flag flags = 0;
745     unsigned base_hash;
746 
747     if (!slab)
748         return NULL;
749 
750     if (heap & 1)
751         flags |= RADEON_FLAG_GTT_WC;
752     if (heap & 2)
753         flags |= RADEON_FLAG_CPU_ACCESS;
754 
755     switch (heap >> 2) {
756     case 0:
757         domains = RADEON_DOMAIN_VRAM;
758         break;
759     default:
760     case 1:
761         domains = RADEON_DOMAIN_VRAM_GTT;
762         break;
763     case 2:
764         domains = RADEON_DOMAIN_GTT;
765         break;
766     }
767 
768     slab->buffer = radeon_bo(radeon_winsys_bo_create(&ws->base,
769                                                      64 * 1024, 64 * 1024,
770                                                      domains, flags));
771     if (!slab->buffer)
772         goto fail;
773 
774     assert(slab->buffer->handle);
775 
776     slab->base.num_entries = slab->buffer->base.size / entry_size;
777     slab->base.num_free = slab->base.num_entries;
778     slab->entries = CALLOC(slab->base.num_entries, sizeof(*slab->entries));
779     if (!slab->entries)
780         goto fail_buffer;
781 
782     LIST_INITHEAD(&slab->base.free);
783 
784     base_hash = __sync_fetch_and_add(&ws->next_bo_hash, slab->base.num_entries);
785 
786     for (unsigned i = 0; i < slab->base.num_entries; ++i) {
787         struct radeon_bo *bo = &slab->entries[i];
788 
789         bo->base.alignment = entry_size;
790         bo->base.usage = slab->buffer->base.usage;
791         bo->base.size = entry_size;
792         bo->base.vtbl = &radeon_winsys_bo_slab_vtbl;
793         bo->rws = ws;
794         bo->va = slab->buffer->va + i * entry_size;
795         bo->initial_domain = domains;
796         bo->hash = base_hash + i;
797         bo->u.slab.entry.slab = &slab->base;
798         bo->u.slab.entry.group_index = group_index;
799         bo->u.slab.real = slab->buffer;
800 
801         LIST_ADDTAIL(&bo->u.slab.entry.head, &slab->base.free);
802     }
803 
804     return &slab->base;
805 
806 fail_buffer:
807     radeon_bo_reference(&slab->buffer, NULL);
808 fail:
809     FREE(slab);
810     return NULL;
811 }
812 
radeon_bo_slab_free(void * priv,struct pb_slab * pslab)813 void radeon_bo_slab_free(void *priv, struct pb_slab *pslab)
814 {
815     struct radeon_slab *slab = (struct radeon_slab *)pslab;
816 
817     for (unsigned i = 0; i < slab->base.num_entries; ++i) {
818         struct radeon_bo *bo = &slab->entries[i];
819         for (unsigned j = 0; j < bo->u.slab.num_fences; ++j)
820             radeon_bo_reference(&bo->u.slab.fences[j], NULL);
821         FREE(bo->u.slab.fences);
822     }
823 
824     FREE(slab->entries);
825     radeon_bo_reference(&slab->buffer, NULL);
826     FREE(slab);
827 }
828 
eg_tile_split(unsigned tile_split)829 static unsigned eg_tile_split(unsigned tile_split)
830 {
831     switch (tile_split) {
832     case 0:     tile_split = 64;    break;
833     case 1:     tile_split = 128;   break;
834     case 2:     tile_split = 256;   break;
835     case 3:     tile_split = 512;   break;
836     default:
837     case 4:     tile_split = 1024;  break;
838     case 5:     tile_split = 2048;  break;
839     case 6:     tile_split = 4096;  break;
840     }
841     return tile_split;
842 }
843 
eg_tile_split_rev(unsigned eg_tile_split)844 static unsigned eg_tile_split_rev(unsigned eg_tile_split)
845 {
846     switch (eg_tile_split) {
847     case 64:    return 0;
848     case 128:   return 1;
849     case 256:   return 2;
850     case 512:   return 3;
851     default:
852     case 1024:  return 4;
853     case 2048:  return 5;
854     case 4096:  return 6;
855     }
856 }
857 
radeon_bo_get_metadata(struct pb_buffer * _buf,struct radeon_bo_metadata * md)858 static void radeon_bo_get_metadata(struct pb_buffer *_buf,
859 				   struct radeon_bo_metadata *md)
860 {
861     struct radeon_bo *bo = radeon_bo(_buf);
862     struct drm_radeon_gem_set_tiling args;
863 
864     assert(bo->handle && "must not be called for slab entries");
865 
866     memset(&args, 0, sizeof(args));
867 
868     args.handle = bo->handle;
869 
870     drmCommandWriteRead(bo->rws->fd,
871                         DRM_RADEON_GEM_GET_TILING,
872                         &args,
873                         sizeof(args));
874 
875     md->microtile = RADEON_LAYOUT_LINEAR;
876     md->macrotile = RADEON_LAYOUT_LINEAR;
877     if (args.tiling_flags & RADEON_TILING_MICRO)
878         md->microtile = RADEON_LAYOUT_TILED;
879     else if (args.tiling_flags & RADEON_TILING_MICRO_SQUARE)
880         md->microtile = RADEON_LAYOUT_SQUARETILED;
881 
882     if (args.tiling_flags & RADEON_TILING_MACRO)
883         md->macrotile = RADEON_LAYOUT_TILED;
884 
885     md->bankw = (args.tiling_flags >> RADEON_TILING_EG_BANKW_SHIFT) & RADEON_TILING_EG_BANKW_MASK;
886     md->bankh = (args.tiling_flags >> RADEON_TILING_EG_BANKH_SHIFT) & RADEON_TILING_EG_BANKH_MASK;
887     md->tile_split = (args.tiling_flags >> RADEON_TILING_EG_TILE_SPLIT_SHIFT) & RADEON_TILING_EG_TILE_SPLIT_MASK;
888     md->mtilea = (args.tiling_flags >> RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT) & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK;
889     md->tile_split = eg_tile_split(md->tile_split);
890     md->scanout = bo->rws->gen >= DRV_SI && !(args.tiling_flags & RADEON_TILING_R600_NO_SCANOUT);
891 }
892 
radeon_bo_set_metadata(struct pb_buffer * _buf,struct radeon_bo_metadata * md)893 static void radeon_bo_set_metadata(struct pb_buffer *_buf,
894                                    struct radeon_bo_metadata *md)
895 {
896     struct radeon_bo *bo = radeon_bo(_buf);
897     struct drm_radeon_gem_set_tiling args;
898 
899     assert(bo->handle && "must not be called for slab entries");
900 
901     memset(&args, 0, sizeof(args));
902 
903     os_wait_until_zero(&bo->num_active_ioctls, PIPE_TIMEOUT_INFINITE);
904 
905     if (md->microtile == RADEON_LAYOUT_TILED)
906         args.tiling_flags |= RADEON_TILING_MICRO;
907     else if (md->microtile == RADEON_LAYOUT_SQUARETILED)
908         args.tiling_flags |= RADEON_TILING_MICRO_SQUARE;
909 
910     if (md->macrotile == RADEON_LAYOUT_TILED)
911         args.tiling_flags |= RADEON_TILING_MACRO;
912 
913     args.tiling_flags |= (md->bankw & RADEON_TILING_EG_BANKW_MASK) <<
914         RADEON_TILING_EG_BANKW_SHIFT;
915     args.tiling_flags |= (md->bankh & RADEON_TILING_EG_BANKH_MASK) <<
916         RADEON_TILING_EG_BANKH_SHIFT;
917     if (md->tile_split) {
918 	args.tiling_flags |= (eg_tile_split_rev(md->tile_split) &
919 			      RADEON_TILING_EG_TILE_SPLIT_MASK) <<
920 	    RADEON_TILING_EG_TILE_SPLIT_SHIFT;
921     }
922     args.tiling_flags |= (md->mtilea & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK) <<
923         RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT;
924 
925     if (bo->rws->gen >= DRV_SI && !md->scanout)
926         args.tiling_flags |= RADEON_TILING_R600_NO_SCANOUT;
927 
928     args.handle = bo->handle;
929     args.pitch = md->stride;
930 
931     drmCommandWriteRead(bo->rws->fd,
932                         DRM_RADEON_GEM_SET_TILING,
933                         &args,
934                         sizeof(args));
935 }
936 
937 static struct pb_buffer *
radeon_winsys_bo_create(struct radeon_winsys * rws,uint64_t size,unsigned alignment,enum radeon_bo_domain domain,enum radeon_bo_flag flags)938 radeon_winsys_bo_create(struct radeon_winsys *rws,
939                         uint64_t size,
940                         unsigned alignment,
941                         enum radeon_bo_domain domain,
942                         enum radeon_bo_flag flags)
943 {
944     struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
945     struct radeon_bo *bo;
946     unsigned usage = 0, pb_cache_bucket;
947 
948     /* Only 32-bit sizes are supported. */
949     if (size > UINT_MAX)
950         return NULL;
951 
952     /* Sub-allocate small buffers from slabs. */
953     if (!(flags & RADEON_FLAG_HANDLE) &&
954         size <= (1 << RADEON_SLAB_MAX_SIZE_LOG2) &&
955         ws->info.has_virtual_memory &&
956         alignment <= MAX2(1 << RADEON_SLAB_MIN_SIZE_LOG2, util_next_power_of_two(size))) {
957         struct pb_slab_entry *entry;
958         unsigned heap = 0;
959 
960         if (flags & RADEON_FLAG_GTT_WC)
961             heap |= 1;
962         if (flags & RADEON_FLAG_CPU_ACCESS)
963             heap |= 2;
964         if (flags & ~(RADEON_FLAG_GTT_WC | RADEON_FLAG_CPU_ACCESS))
965             goto no_slab;
966 
967         switch (domain) {
968         case RADEON_DOMAIN_VRAM:
969             heap |= 0 * 4;
970             break;
971         case RADEON_DOMAIN_VRAM_GTT:
972             heap |= 1 * 4;
973             break;
974         case RADEON_DOMAIN_GTT:
975             heap |= 2 * 4;
976             break;
977         default:
978             goto no_slab;
979         }
980 
981         entry = pb_slab_alloc(&ws->bo_slabs, size, heap);
982         if (!entry) {
983             /* Clear the cache and try again. */
984             pb_cache_release_all_buffers(&ws->bo_cache);
985 
986             entry = pb_slab_alloc(&ws->bo_slabs, size, heap);
987         }
988         if (!entry)
989             return NULL;
990 
991         bo = NULL;
992         bo = container_of(entry, bo, u.slab.entry);
993 
994         pipe_reference_init(&bo->base.reference, 1);
995 
996         return &bo->base;
997     }
998 no_slab:
999 
1000     /* This flag is irrelevant for the cache. */
1001     flags &= ~RADEON_FLAG_HANDLE;
1002 
1003     /* Align size to page size. This is the minimum alignment for normal
1004      * BOs. Aligning this here helps the cached bufmgr. Especially small BOs,
1005      * like constant/uniform buffers, can benefit from better and more reuse.
1006      */
1007     size = align(size, ws->info.gart_page_size);
1008     alignment = align(alignment, ws->info.gart_page_size);
1009 
1010     /* Only set one usage bit each for domains and flags, or the cache manager
1011      * might consider different sets of domains / flags compatible
1012      */
1013     if (domain == RADEON_DOMAIN_VRAM_GTT)
1014         usage = 1 << 2;
1015     else
1016         usage = (unsigned)domain >> 1;
1017     assert(flags < sizeof(usage) * 8 - 3);
1018     usage |= 1 << (flags + 3);
1019 
1020     /* Determine the pb_cache bucket for minimizing pb_cache misses. */
1021     pb_cache_bucket = 0;
1022     if (domain & RADEON_DOMAIN_VRAM) /* VRAM or VRAM+GTT */
1023        pb_cache_bucket += 1;
1024     if (flags == RADEON_FLAG_GTT_WC) /* WC */
1025        pb_cache_bucket += 2;
1026     assert(pb_cache_bucket < ARRAY_SIZE(ws->bo_cache.buckets));
1027 
1028     bo = radeon_bo(pb_cache_reclaim_buffer(&ws->bo_cache, size, alignment,
1029                                            usage, pb_cache_bucket));
1030     if (bo)
1031         return &bo->base;
1032 
1033     bo = radeon_create_bo(ws, size, alignment, usage, domain, flags,
1034                           pb_cache_bucket);
1035     if (!bo) {
1036         /* Clear the cache and try again. */
1037         pb_slabs_reclaim(&ws->bo_slabs);
1038         pb_cache_release_all_buffers(&ws->bo_cache);
1039         bo = radeon_create_bo(ws, size, alignment, usage, domain, flags,
1040                               pb_cache_bucket);
1041         if (!bo)
1042             return NULL;
1043     }
1044 
1045     bo->u.real.use_reusable_pool = true;
1046 
1047     pipe_mutex_lock(ws->bo_handles_mutex);
1048     util_hash_table_set(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo);
1049     pipe_mutex_unlock(ws->bo_handles_mutex);
1050 
1051     return &bo->base;
1052 }
1053 
radeon_winsys_bo_from_ptr(struct radeon_winsys * rws,void * pointer,uint64_t size)1054 static struct pb_buffer *radeon_winsys_bo_from_ptr(struct radeon_winsys *rws,
1055                                                    void *pointer, uint64_t size)
1056 {
1057     struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
1058     struct drm_radeon_gem_userptr args;
1059     struct radeon_bo *bo;
1060     int r;
1061 
1062     bo = CALLOC_STRUCT(radeon_bo);
1063     if (!bo)
1064         return NULL;
1065 
1066     memset(&args, 0, sizeof(args));
1067     args.addr = (uintptr_t)pointer;
1068     args.size = align(size, ws->info.gart_page_size);
1069     args.flags = RADEON_GEM_USERPTR_ANONONLY |
1070         RADEON_GEM_USERPTR_VALIDATE |
1071         RADEON_GEM_USERPTR_REGISTER;
1072     if (drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_USERPTR,
1073                             &args, sizeof(args))) {
1074         FREE(bo);
1075         return NULL;
1076     }
1077 
1078     assert(args.handle != 0);
1079 
1080     pipe_mutex_lock(ws->bo_handles_mutex);
1081 
1082     /* Initialize it. */
1083     pipe_reference_init(&bo->base.reference, 1);
1084     bo->handle = args.handle;
1085     bo->base.alignment = 0;
1086     bo->base.size = size;
1087     bo->base.vtbl = &radeon_bo_vtbl;
1088     bo->rws = ws;
1089     bo->user_ptr = pointer;
1090     bo->va = 0;
1091     bo->initial_domain = RADEON_DOMAIN_GTT;
1092     bo->hash = __sync_fetch_and_add(&ws->next_bo_hash, 1);
1093     pipe_mutex_init(bo->u.real.map_mutex);
1094 
1095     util_hash_table_set(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo);
1096 
1097     pipe_mutex_unlock(ws->bo_handles_mutex);
1098 
1099     if (ws->info.has_virtual_memory) {
1100         struct drm_radeon_gem_va va;
1101 
1102         bo->va = radeon_bomgr_find_va(ws, bo->base.size, 1 << 20);
1103 
1104         va.handle = bo->handle;
1105         va.operation = RADEON_VA_MAP;
1106         va.vm_id = 0;
1107         va.offset = bo->va;
1108         va.flags = RADEON_VM_PAGE_READABLE |
1109                    RADEON_VM_PAGE_WRITEABLE |
1110                    RADEON_VM_PAGE_SNOOPED;
1111         va.offset = bo->va;
1112         r = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
1113         if (r && va.operation == RADEON_VA_RESULT_ERROR) {
1114             fprintf(stderr, "radeon: Failed to assign virtual address space\n");
1115             radeon_bo_destroy(&bo->base);
1116             return NULL;
1117         }
1118         pipe_mutex_lock(ws->bo_handles_mutex);
1119         if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
1120             struct pb_buffer *b = &bo->base;
1121             struct radeon_bo *old_bo =
1122                 util_hash_table_get(ws->bo_vas, (void*)(uintptr_t)va.offset);
1123 
1124             pipe_mutex_unlock(ws->bo_handles_mutex);
1125             pb_reference(&b, &old_bo->base);
1126             return b;
1127         }
1128 
1129         util_hash_table_set(ws->bo_vas, (void*)(uintptr_t)bo->va, bo);
1130         pipe_mutex_unlock(ws->bo_handles_mutex);
1131     }
1132 
1133     ws->allocated_gtt += align(bo->base.size, ws->info.gart_page_size);
1134 
1135     return (struct pb_buffer*)bo;
1136 }
1137 
radeon_winsys_bo_from_handle(struct radeon_winsys * rws,struct winsys_handle * whandle,unsigned * stride,unsigned * offset)1138 static struct pb_buffer *radeon_winsys_bo_from_handle(struct radeon_winsys *rws,
1139                                                       struct winsys_handle *whandle,
1140                                                       unsigned *stride,
1141                                                       unsigned *offset)
1142 {
1143     struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
1144     struct radeon_bo *bo;
1145     int r;
1146     unsigned handle;
1147     uint64_t size = 0;
1148 
1149     if (!offset && whandle->offset != 0) {
1150         fprintf(stderr, "attempt to import unsupported winsys offset %u\n",
1151                 whandle->offset);
1152         return NULL;
1153     }
1154 
1155     /* We must maintain a list of pairs <handle, bo>, so that we always return
1156      * the same BO for one particular handle. If we didn't do that and created
1157      * more than one BO for the same handle and then relocated them in a CS,
1158      * we would hit a deadlock in the kernel.
1159      *
1160      * The list of pairs is guarded by a mutex, of course. */
1161     pipe_mutex_lock(ws->bo_handles_mutex);
1162 
1163     if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) {
1164         /* First check if there already is an existing bo for the handle. */
1165         bo = util_hash_table_get(ws->bo_names, (void*)(uintptr_t)whandle->handle);
1166     } else if (whandle->type == DRM_API_HANDLE_TYPE_FD) {
1167         /* We must first get the GEM handle, as fds are unreliable keys */
1168         r = drmPrimeFDToHandle(ws->fd, whandle->handle, &handle);
1169         if (r)
1170             goto fail;
1171         bo = util_hash_table_get(ws->bo_handles, (void*)(uintptr_t)handle);
1172     } else {
1173         /* Unknown handle type */
1174         goto fail;
1175     }
1176 
1177     if (bo) {
1178         /* Increase the refcount. */
1179         struct pb_buffer *b = NULL;
1180         pb_reference(&b, &bo->base);
1181         goto done;
1182     }
1183 
1184     /* There isn't, create a new one. */
1185     bo = CALLOC_STRUCT(radeon_bo);
1186     if (!bo) {
1187         goto fail;
1188     }
1189 
1190     if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) {
1191         struct drm_gem_open open_arg = {};
1192         memset(&open_arg, 0, sizeof(open_arg));
1193         /* Open the BO. */
1194         open_arg.name = whandle->handle;
1195         if (drmIoctl(ws->fd, DRM_IOCTL_GEM_OPEN, &open_arg)) {
1196             FREE(bo);
1197             goto fail;
1198         }
1199         handle = open_arg.handle;
1200         size = open_arg.size;
1201         bo->flink_name = whandle->handle;
1202     } else if (whandle->type == DRM_API_HANDLE_TYPE_FD) {
1203         size = lseek(whandle->handle, 0, SEEK_END);
1204         /*
1205          * Could check errno to determine whether the kernel is new enough, but
1206          * it doesn't really matter why this failed, just that it failed.
1207          */
1208         if (size == (off_t)-1) {
1209             FREE(bo);
1210             goto fail;
1211         }
1212         lseek(whandle->handle, 0, SEEK_SET);
1213     }
1214 
1215     assert(handle != 0);
1216 
1217     bo->handle = handle;
1218 
1219     /* Initialize it. */
1220     pipe_reference_init(&bo->base.reference, 1);
1221     bo->base.alignment = 0;
1222     bo->base.size = (unsigned) size;
1223     bo->base.vtbl = &radeon_bo_vtbl;
1224     bo->rws = ws;
1225     bo->va = 0;
1226     bo->hash = __sync_fetch_and_add(&ws->next_bo_hash, 1);
1227     pipe_mutex_init(bo->u.real.map_mutex);
1228 
1229     if (bo->flink_name)
1230         util_hash_table_set(ws->bo_names, (void*)(uintptr_t)bo->flink_name, bo);
1231 
1232     util_hash_table_set(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo);
1233 
1234 done:
1235     pipe_mutex_unlock(ws->bo_handles_mutex);
1236 
1237     if (stride)
1238         *stride = whandle->stride;
1239     if (offset)
1240         *offset = whandle->offset;
1241 
1242     if (ws->info.has_virtual_memory && !bo->va) {
1243         struct drm_radeon_gem_va va;
1244 
1245         bo->va = radeon_bomgr_find_va(ws, bo->base.size, 1 << 20);
1246 
1247         va.handle = bo->handle;
1248         va.operation = RADEON_VA_MAP;
1249         va.vm_id = 0;
1250         va.offset = bo->va;
1251         va.flags = RADEON_VM_PAGE_READABLE |
1252                    RADEON_VM_PAGE_WRITEABLE |
1253                    RADEON_VM_PAGE_SNOOPED;
1254         va.offset = bo->va;
1255         r = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
1256         if (r && va.operation == RADEON_VA_RESULT_ERROR) {
1257             fprintf(stderr, "radeon: Failed to assign virtual address space\n");
1258             radeon_bo_destroy(&bo->base);
1259             return NULL;
1260         }
1261         pipe_mutex_lock(ws->bo_handles_mutex);
1262         if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
1263             struct pb_buffer *b = &bo->base;
1264             struct radeon_bo *old_bo =
1265                 util_hash_table_get(ws->bo_vas, (void*)(uintptr_t)va.offset);
1266 
1267             pipe_mutex_unlock(ws->bo_handles_mutex);
1268             pb_reference(&b, &old_bo->base);
1269             return b;
1270         }
1271 
1272         util_hash_table_set(ws->bo_vas, (void*)(uintptr_t)bo->va, bo);
1273         pipe_mutex_unlock(ws->bo_handles_mutex);
1274     }
1275 
1276     bo->initial_domain = radeon_bo_get_initial_domain((void*)bo);
1277 
1278     if (bo->initial_domain & RADEON_DOMAIN_VRAM)
1279         ws->allocated_vram += align(bo->base.size, ws->info.gart_page_size);
1280     else if (bo->initial_domain & RADEON_DOMAIN_GTT)
1281         ws->allocated_gtt += align(bo->base.size, ws->info.gart_page_size);
1282 
1283     return (struct pb_buffer*)bo;
1284 
1285 fail:
1286     pipe_mutex_unlock(ws->bo_handles_mutex);
1287     return NULL;
1288 }
1289 
radeon_winsys_bo_get_handle(struct pb_buffer * buffer,unsigned stride,unsigned offset,unsigned slice_size,struct winsys_handle * whandle)1290 static bool radeon_winsys_bo_get_handle(struct pb_buffer *buffer,
1291                                         unsigned stride, unsigned offset,
1292                                         unsigned slice_size,
1293                                         struct winsys_handle *whandle)
1294 {
1295     struct drm_gem_flink flink;
1296     struct radeon_bo *bo = radeon_bo(buffer);
1297     struct radeon_drm_winsys *ws = bo->rws;
1298 
1299     if (!bo->handle) {
1300         offset += bo->va - bo->u.slab.real->va;
1301         bo = bo->u.slab.real;
1302     }
1303 
1304     memset(&flink, 0, sizeof(flink));
1305 
1306     bo->u.real.use_reusable_pool = false;
1307 
1308     if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) {
1309         if (!bo->flink_name) {
1310             flink.handle = bo->handle;
1311 
1312             if (ioctl(ws->fd, DRM_IOCTL_GEM_FLINK, &flink)) {
1313                 return false;
1314             }
1315 
1316             bo->flink_name = flink.name;
1317 
1318             pipe_mutex_lock(ws->bo_handles_mutex);
1319             util_hash_table_set(ws->bo_names, (void*)(uintptr_t)bo->flink_name, bo);
1320             pipe_mutex_unlock(ws->bo_handles_mutex);
1321         }
1322         whandle->handle = bo->flink_name;
1323     } else if (whandle->type == DRM_API_HANDLE_TYPE_KMS) {
1324         whandle->handle = bo->handle;
1325     } else if (whandle->type == DRM_API_HANDLE_TYPE_FD) {
1326         if (drmPrimeHandleToFD(ws->fd, bo->handle, DRM_CLOEXEC, (int*)&whandle->handle))
1327             return false;
1328     }
1329 
1330     whandle->stride = stride;
1331     whandle->offset = offset;
1332     whandle->offset += slice_size * whandle->layer;
1333 
1334     return true;
1335 }
1336 
radeon_winsys_bo_is_user_ptr(struct pb_buffer * buf)1337 static bool radeon_winsys_bo_is_user_ptr(struct pb_buffer *buf)
1338 {
1339    return ((struct radeon_bo*)buf)->user_ptr != NULL;
1340 }
1341 
radeon_winsys_bo_va(struct pb_buffer * buf)1342 static uint64_t radeon_winsys_bo_va(struct pb_buffer *buf)
1343 {
1344     return ((struct radeon_bo*)buf)->va;
1345 }
1346 
radeon_winsys_bo_get_reloc_offset(struct pb_buffer * buf)1347 static unsigned radeon_winsys_bo_get_reloc_offset(struct pb_buffer *buf)
1348 {
1349     struct radeon_bo *bo = radeon_bo(buf);
1350 
1351     if (bo->handle)
1352         return 0;
1353 
1354     return bo->va - bo->u.slab.real->va;
1355 }
1356 
radeon_drm_bo_init_functions(struct radeon_drm_winsys * ws)1357 void radeon_drm_bo_init_functions(struct radeon_drm_winsys *ws)
1358 {
1359     ws->base.buffer_set_metadata = radeon_bo_set_metadata;
1360     ws->base.buffer_get_metadata = radeon_bo_get_metadata;
1361     ws->base.buffer_map = radeon_bo_map;
1362     ws->base.buffer_unmap = radeon_bo_unmap;
1363     ws->base.buffer_wait = radeon_bo_wait;
1364     ws->base.buffer_create = radeon_winsys_bo_create;
1365     ws->base.buffer_from_handle = radeon_winsys_bo_from_handle;
1366     ws->base.buffer_from_ptr = radeon_winsys_bo_from_ptr;
1367     ws->base.buffer_is_user_ptr = radeon_winsys_bo_is_user_ptr;
1368     ws->base.buffer_get_handle = radeon_winsys_bo_get_handle;
1369     ws->base.buffer_get_virtual_address = radeon_winsys_bo_va;
1370     ws->base.buffer_get_reloc_offset = radeon_winsys_bo_get_reloc_offset;
1371     ws->base.buffer_get_initial_domain = radeon_bo_get_initial_domain;
1372 }
1373