• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2007 Red Hat Inc.
3  * Copyright © 2007-2017 Intel Corporation
4  * Copyright © 2006 VMware, Inc.
5  * All Rights Reserved.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11  * and/or sell copies of the Software, and to permit persons to whom the
12  * Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the next
15  * paragraph) shall be included in all copies or substantial portions of the
16  * Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
21  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24  * IN THE SOFTWARE.
25  */
26 
27 /*
28  * Authors: Thomas Hellström <thellstrom@vmware.com>
29  *          Keith Whitwell <keithw@vmware.com>
30  *          Eric Anholt <eric@anholt.net>
31  *          Dave Airlie <airlied@linux.ie>
32  */
33 
34 #include <xf86drm.h>
35 #include <util/u_atomic.h>
36 #include <fcntl.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <unistd.h>
41 #include <assert.h>
42 #include <sys/ioctl.h>
43 #include <sys/stat.h>
44 #include <sys/types.h>
45 #include <stdbool.h>
46 
47 #include "errno.h"
48 #include "common/intel_clflush.h"
49 #include "dev/intel_debug.h"
50 #include "common/intel_gem.h"
51 #include "dev/intel_device_info.h"
52 #include "libdrm_macros.h"
53 #include "main/macros.h"
54 #include "util/macros.h"
55 #include "util/hash_table.h"
56 #include "util/list.h"
57 #include "util/os_file.h"
58 #include "util/u_dynarray.h"
59 #include "util/vma.h"
60 #include "brw_bufmgr.h"
61 #include "brw_context.h"
62 #include "string.h"
63 
64 #include "drm-uapi/i915_drm.h"
65 
66 #ifdef HAVE_VALGRIND
67 #include <valgrind.h>
68 #include <memcheck.h>
69 #define VG(x) x
70 #else
71 #define VG(x)
72 #endif
73 
74 /* Bufmgr is not aware of brw_context. */
75 #undef WARN_ONCE
76 #define WARN_ONCE(cond, fmt...) do {                            \
77    if (unlikely(cond)) {                                        \
78       static bool _warned = false;                              \
79       if (!_warned) {                                           \
80          fprintf(stderr, "WARNING: ");                          \
81          fprintf(stderr, fmt);                                  \
82          _warned = true;                                        \
83       }                                                         \
84    }                                                            \
85 } while (0)
86 
87 
88 /* VALGRIND_FREELIKE_BLOCK unfortunately does not actually undo the earlier
89  * VALGRIND_MALLOCLIKE_BLOCK but instead leaves vg convinced the memory is
90  * leaked. All because it does not call VG(cli_free) from its
91  * VG_USERREQ__FREELIKE_BLOCK handler. Instead of treating the memory like
92  * and allocation, we mark it available for use upon mmapping and remove
93  * it upon unmapping.
94  */
95 #define VG_DEFINED(ptr, size) VG(VALGRIND_MAKE_MEM_DEFINED(ptr, size))
96 #define VG_NOACCESS(ptr, size) VG(VALGRIND_MAKE_MEM_NOACCESS(ptr, size))
97 
98 /* On FreeBSD PAGE_SIZE is already defined in
99  * /usr/include/machine/param.h that is indirectly
100  * included here.
101  */
102 #ifndef PAGE_SIZE
103 #define PAGE_SIZE 4096
104 #endif
105 
106 #define FILE_DEBUG_FLAG DEBUG_BUFMGR
107 
108 static inline int
atomic_add_unless(int * v,int add,int unless)109 atomic_add_unless(int *v, int add, int unless)
110 {
111    int c, old;
112    c = p_atomic_read(v);
113    while (c != unless && (old = p_atomic_cmpxchg(v, c, c + add)) != c)
114       c = old;
115    return c == unless;
116 }
117 
118 /**
119  * i965 fixed-size bucketing VMA allocator.
120  *
121  * The BO cache maintains "cache buckets" for buffers of various sizes.
122  * All buffers in a given bucket are identically sized - when allocating,
123  * we always round up to the bucket size.  This means that virtually all
124  * allocations are fixed-size; only buffers which are too large to fit in
125  * a bucket can be variably-sized.
126  *
127  * We create an allocator for each bucket.  Each contains a free-list, where
128  * each node contains a <starting address, 64-bit bitmap> pair.  Each bit
129  * represents a bucket-sized block of memory.  (At the first level, each
130  * bit corresponds to a page.  For the second bucket, bits correspond to
131  * two pages, and so on.)  1 means a block is free, and 0 means it's in-use.
132  * The lowest bit in the bitmap is for the first block.
133  *
134  * This makes allocations cheap - any bit of any node will do.  We can pick
135  * the head of the list and use ffs() to find a free block.  If there are
136  * none, we allocate 64 blocks from a larger allocator - either a bigger
137  * bucketing allocator, or a fallback top-level allocator for large objects.
138  */
139 struct vma_bucket_node {
140    uint64_t start_address;
141    uint64_t bitmap;
142 };
143 
144 struct bo_cache_bucket {
145    /** List of cached BOs. */
146    struct list_head head;
147 
148    /** Size of this bucket, in bytes. */
149    uint64_t size;
150 
151    /** List of vma_bucket_nodes. */
152    struct util_dynarray vma_list[BRW_MEMZONE_COUNT];
153 };
154 
155 struct bo_export {
156    /** File descriptor associated with a handle export. */
157    int drm_fd;
158 
159    /** GEM handle in drm_fd */
160    uint32_t gem_handle;
161 
162    struct list_head link;
163 };
164 
165 struct brw_bufmgr {
166    uint32_t refcount;
167 
168    struct list_head link;
169 
170    int fd;
171 
172    mtx_t lock;
173 
174    /** Array of lists of cached gem objects of power-of-two sizes */
175    struct bo_cache_bucket cache_bucket[14 * 4];
176    int num_buckets;
177    time_t time;
178 
179    struct hash_table *name_table;
180    struct hash_table *handle_table;
181 
182    struct util_vma_heap vma_allocator[BRW_MEMZONE_COUNT];
183 
184    bool has_llc:1;
185    bool has_mmap_wc:1;
186    bool has_mmap_offset:1;
187    bool bo_reuse:1;
188 
189    uint64_t initial_kflags;
190 };
191 
192 static mtx_t global_bufmgr_list_mutex = _MTX_INITIALIZER_NP;
193 static struct list_head global_bufmgr_list = {
194    .next = &global_bufmgr_list,
195    .prev = &global_bufmgr_list,
196 };
197 
198 static int bo_set_tiling_internal(struct brw_bo *bo, uint32_t tiling_mode,
199                                   uint32_t stride);
200 
201 static void bo_free(struct brw_bo *bo);
202 
203 static uint64_t vma_alloc(struct brw_bufmgr *bufmgr,
204                           enum brw_memory_zone memzone,
205                           uint64_t size, uint64_t alignment);
206 
207 static struct brw_bo *
hash_find_bo(struct hash_table * ht,unsigned int key)208 hash_find_bo(struct hash_table *ht, unsigned int key)
209 {
210    struct hash_entry *entry = _mesa_hash_table_search(ht, &key);
211    return entry ? (struct brw_bo *) entry->data : NULL;
212 }
213 
214 static uint64_t
bo_tile_size(struct brw_bufmgr * bufmgr,uint64_t size,uint32_t tiling)215 bo_tile_size(struct brw_bufmgr *bufmgr, uint64_t size, uint32_t tiling)
216 {
217    if (tiling == I915_TILING_NONE)
218       return size;
219 
220    /* 965+ just need multiples of page size for tiling */
221    return ALIGN(size, PAGE_SIZE);
222 }
223 
224 /*
225  * Round a given pitch up to the minimum required for X tiling on a
226  * given chip.  We use 512 as the minimum to allow for a later tiling
227  * change.
228  */
229 static uint32_t
bo_tile_pitch(struct brw_bufmgr * bufmgr,uint32_t pitch,uint32_t tiling)230 bo_tile_pitch(struct brw_bufmgr *bufmgr, uint32_t pitch, uint32_t tiling)
231 {
232    unsigned long tile_width;
233 
234    /* If untiled, then just align it so that we can do rendering
235     * to it with the 3D engine.
236     */
237    if (tiling == I915_TILING_NONE)
238       return ALIGN(pitch, 64);
239 
240    if (tiling == I915_TILING_X)
241       tile_width = 512;
242    else
243       tile_width = 128;
244 
245    /* 965 is flexible */
246    return ALIGN(pitch, tile_width);
247 }
248 
249 /**
250  * This function finds the correct bucket fit for the input size.
251  * The function works with O(1) complexity when the requested size
252  * was queried instead of iterating the size through all the buckets.
253  */
254 static struct bo_cache_bucket *
bucket_for_size(struct brw_bufmgr * bufmgr,uint64_t size)255 bucket_for_size(struct brw_bufmgr *bufmgr, uint64_t size)
256 {
257    /* Calculating the pages and rounding up to the page size. */
258    const unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
259 
260    /* Row  Bucket sizes    clz((x-1) | 3)   Row    Column
261     *        in pages                      stride   size
262     *   0:   1  2  3  4 -> 30 30 30 30        4       1
263     *   1:   5  6  7  8 -> 29 29 29 29        4       1
264     *   2:  10 12 14 16 -> 28 28 28 28        8       2
265     *   3:  20 24 28 32 -> 27 27 27 27       16       4
266     */
267    const unsigned row = 30 - __builtin_clz((pages - 1) | 3);
268    const unsigned row_max_pages = 4 << row;
269 
270    /* The '& ~2' is the special case for row 1. In row 1, max pages /
271     * 2 is 2, but the previous row maximum is zero (because there is
272     * no previous row). All row maximum sizes are power of 2, so that
273     * is the only case where that bit will be set.
274     */
275    const unsigned prev_row_max_pages = (row_max_pages / 2) & ~2;
276    int col_size_log2 = row - 1;
277    col_size_log2 += (col_size_log2 < 0);
278 
279    const unsigned col = (pages - prev_row_max_pages +
280                         ((1 << col_size_log2) - 1)) >> col_size_log2;
281 
282    /* Calculating the index based on the row and column. */
283    const unsigned index = (row * 4) + (col - 1);
284 
285    return (index < bufmgr->num_buckets) ?
286           &bufmgr->cache_bucket[index] : NULL;
287 }
288 
289 static enum brw_memory_zone
memzone_for_address(uint64_t address)290 memzone_for_address(uint64_t address)
291 {
292    const uint64_t _4GB = 1ull << 32;
293 
294    if (address >= _4GB)
295       return BRW_MEMZONE_OTHER;
296 
297    return BRW_MEMZONE_LOW_4G;
298 }
299 
300 static uint64_t
bucket_vma_alloc(struct brw_bufmgr * bufmgr,struct bo_cache_bucket * bucket,enum brw_memory_zone memzone)301 bucket_vma_alloc(struct brw_bufmgr *bufmgr,
302                  struct bo_cache_bucket *bucket,
303                  enum brw_memory_zone memzone)
304 {
305    struct util_dynarray *vma_list = &bucket->vma_list[memzone];
306    struct vma_bucket_node *node;
307 
308    if (vma_list->size == 0) {
309       /* This bucket allocator is out of space - allocate a new block of
310        * memory for 64 blocks from a larger allocator (either a larger
311        * bucket or util_vma).
312        *
313        * We align the address to the node size (64 blocks) so that
314        * bucket_vma_free can easily compute the starting address of this
315        * block by rounding any address we return down to the node size.
316        *
317        * Set the first bit used, and return the start address.
318        */
319       uint64_t node_size = 64ull * bucket->size;
320       node = util_dynarray_grow(vma_list, struct vma_bucket_node, 1);
321 
322       if (unlikely(!node))
323          return 0ull;
324 
325       uint64_t addr = vma_alloc(bufmgr, memzone, node_size, node_size);
326       node->start_address = intel_48b_address(addr);
327       node->bitmap = ~1ull;
328       return node->start_address;
329    }
330 
331    /* Pick any bit from any node - they're all the right size and free. */
332    node = util_dynarray_top_ptr(vma_list, struct vma_bucket_node);
333    int bit = ffsll(node->bitmap) - 1;
334    assert(bit >= 0 && bit <= 63);
335 
336    /* Reserve the memory by clearing the bit. */
337    assert((node->bitmap & (1ull << bit)) != 0ull);
338    node->bitmap &= ~(1ull << bit);
339 
340    uint64_t addr = node->start_address + bit * bucket->size;
341 
342    /* If this node is now completely full, remove it from the free list. */
343    if (node->bitmap == 0ull) {
344       (void) util_dynarray_pop(vma_list, struct vma_bucket_node);
345    }
346 
347    return addr;
348 }
349 
350 static void
bucket_vma_free(struct bo_cache_bucket * bucket,uint64_t address)351 bucket_vma_free(struct bo_cache_bucket *bucket, uint64_t address)
352 {
353    enum brw_memory_zone memzone = memzone_for_address(address);
354    struct util_dynarray *vma_list = &bucket->vma_list[memzone];
355    const uint64_t node_bytes = 64ull * bucket->size;
356    struct vma_bucket_node *node = NULL;
357 
358    /* bucket_vma_alloc allocates 64 blocks at a time, and aligns it to
359     * that 64 block size.  So, we can round down to get the starting address.
360     */
361    uint64_t start = (address / node_bytes) * node_bytes;
362 
363    /* Dividing the offset from start by bucket size gives us the bit index. */
364    int bit = (address - start) / bucket->size;
365 
366    assert(start + bit * bucket->size == address);
367 
368    util_dynarray_foreach(vma_list, struct vma_bucket_node, cur) {
369       if (cur->start_address == start) {
370          node = cur;
371          break;
372       }
373    }
374 
375    if (!node) {
376       /* No node - the whole group of 64 blocks must have been in-use. */
377       node = util_dynarray_grow(vma_list, struct vma_bucket_node, 1);
378 
379       if (unlikely(!node))
380          return; /* bogus, leaks some GPU VMA, but nothing we can do... */
381 
382       node->start_address = start;
383       node->bitmap = 0ull;
384    }
385 
386    /* Set the bit to return the memory. */
387    assert((node->bitmap & (1ull << bit)) == 0ull);
388    node->bitmap |= 1ull << bit;
389 
390    /* The block might be entirely free now, and if so, we could return it
391     * to the larger allocator.  But we may as well hang on to it, in case
392     * we get more allocations at this block size.
393     */
394 }
395 
396 static struct bo_cache_bucket *
get_bucket_allocator(struct brw_bufmgr * bufmgr,uint64_t size)397 get_bucket_allocator(struct brw_bufmgr *bufmgr, uint64_t size)
398 {
399    /* Skip using the bucket allocator for very large sizes, as it allocates
400     * 64 of them and this can balloon rather quickly.
401     */
402    if (size > 1024 * PAGE_SIZE)
403       return NULL;
404 
405    struct bo_cache_bucket *bucket = bucket_for_size(bufmgr, size);
406 
407    if (bucket && bucket->size == size)
408       return bucket;
409 
410    return NULL;
411 }
412 
413 /**
414  * Allocate a section of virtual memory for a buffer, assigning an address.
415  *
416  * This uses either the bucket allocator for the given size, or the large
417  * object allocator (util_vma).
418  */
419 static uint64_t
vma_alloc(struct brw_bufmgr * bufmgr,enum brw_memory_zone memzone,uint64_t size,uint64_t alignment)420 vma_alloc(struct brw_bufmgr *bufmgr,
421           enum brw_memory_zone memzone,
422           uint64_t size,
423           uint64_t alignment)
424 {
425    /* Without softpin support, we let the kernel assign addresses. */
426    assert(brw_using_softpin(bufmgr));
427 
428    alignment = ALIGN(alignment, PAGE_SIZE);
429 
430    struct bo_cache_bucket *bucket = get_bucket_allocator(bufmgr, size);
431    uint64_t addr;
432 
433    if (bucket) {
434       addr = bucket_vma_alloc(bufmgr, bucket, memzone);
435    } else {
436       addr = util_vma_heap_alloc(&bufmgr->vma_allocator[memzone], size,
437                                  alignment);
438    }
439 
440    assert((addr >> 48ull) == 0);
441    assert((addr % alignment) == 0);
442 
443    return intel_canonical_address(addr);
444 }
445 
446 /**
447  * Free a virtual memory area, allowing the address to be reused.
448  */
449 static void
vma_free(struct brw_bufmgr * bufmgr,uint64_t address,uint64_t size)450 vma_free(struct brw_bufmgr *bufmgr,
451          uint64_t address,
452          uint64_t size)
453 {
454    assert(brw_using_softpin(bufmgr));
455 
456    /* Un-canonicalize the address. */
457    address = intel_48b_address(address);
458 
459    if (address == 0ull)
460       return;
461 
462    struct bo_cache_bucket *bucket = get_bucket_allocator(bufmgr, size);
463 
464    if (bucket) {
465       bucket_vma_free(bucket, address);
466    } else {
467       enum brw_memory_zone memzone = memzone_for_address(address);
468       util_vma_heap_free(&bufmgr->vma_allocator[memzone], address, size);
469    }
470 }
471 
472 int
brw_bo_busy(struct brw_bo * bo)473 brw_bo_busy(struct brw_bo *bo)
474 {
475    struct brw_bufmgr *bufmgr = bo->bufmgr;
476    struct drm_i915_gem_busy busy = { .handle = bo->gem_handle };
477 
478    int ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_BUSY, &busy);
479    if (ret == 0) {
480       bo->idle = !busy.busy;
481       return busy.busy;
482    }
483    return false;
484 }
485 
486 int
brw_bo_madvise(struct brw_bo * bo,int state)487 brw_bo_madvise(struct brw_bo *bo, int state)
488 {
489    struct drm_i915_gem_madvise madv = {
490       .handle = bo->gem_handle,
491       .madv = state,
492       .retained = 1,
493    };
494 
495    drmIoctl(bo->bufmgr->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv);
496 
497    return madv.retained;
498 }
499 
500 /* drop the oldest entries that have been purged by the kernel */
501 static void
brw_bo_cache_purge_bucket(struct brw_bufmgr * bufmgr,struct bo_cache_bucket * bucket)502 brw_bo_cache_purge_bucket(struct brw_bufmgr *bufmgr,
503                           struct bo_cache_bucket *bucket)
504 {
505    list_for_each_entry_safe(struct brw_bo, bo, &bucket->head, head) {
506       if (brw_bo_madvise(bo, I915_MADV_DONTNEED))
507          break;
508 
509       list_del(&bo->head);
510       bo_free(bo);
511    }
512 }
513 
514 static struct brw_bo *
bo_calloc(void)515 bo_calloc(void)
516 {
517    struct brw_bo *bo = calloc(1, sizeof(*bo));
518    if (!bo)
519       return NULL;
520 
521    list_inithead(&bo->exports);
522 
523    return bo;
524 }
525 
526 static struct brw_bo *
bo_alloc_internal(struct brw_bufmgr * bufmgr,const char * name,uint64_t size,enum brw_memory_zone memzone,unsigned flags,uint32_t tiling_mode,uint32_t stride)527 bo_alloc_internal(struct brw_bufmgr *bufmgr,
528                   const char *name,
529                   uint64_t size,
530                   enum brw_memory_zone memzone,
531                   unsigned flags,
532                   uint32_t tiling_mode,
533                   uint32_t stride)
534 {
535    struct brw_bo *bo;
536    int ret;
537    struct bo_cache_bucket *bucket;
538    bool alloc_from_cache;
539    uint64_t bo_size;
540    bool busy = false;
541    bool zeroed = false;
542 
543    if (flags & BO_ALLOC_BUSY)
544       busy = true;
545 
546    if (flags & BO_ALLOC_ZEROED)
547       zeroed = true;
548 
549    /* BUSY does doesn't really jive with ZEROED as we have to wait for it to
550     * be idle before we can memset.  Just disallow that combination.
551     */
552    assert(!(busy && zeroed));
553 
554    /* Round the allocated size up to a power of two number of pages. */
555    bucket = bucket_for_size(bufmgr, size);
556 
557    /* If we don't have caching at this size, don't actually round the
558     * allocation up.
559     */
560    if (bucket == NULL) {
561       unsigned int page_size = getpagesize();
562       bo_size = size == 0 ? page_size : ALIGN(size, page_size);
563    } else {
564       bo_size = bucket->size;
565    }
566    assert(bo_size);
567 
568    mtx_lock(&bufmgr->lock);
569    /* Get a buffer out of the cache if available */
570 retry:
571    alloc_from_cache = false;
572    if (bucket != NULL && !list_is_empty(&bucket->head)) {
573       if (busy && !zeroed) {
574          /* Allocate new render-target BOs from the tail (MRU)
575           * of the list, as it will likely be hot in the GPU
576           * cache and in the aperture for us.  If the caller
577           * asked us to zero the buffer, we don't want this
578           * because we are going to mmap it.
579           */
580          bo = LIST_ENTRY(struct brw_bo, bucket->head.prev, head);
581          list_del(&bo->head);
582          alloc_from_cache = true;
583       } else {
584          /* For non-render-target BOs (where we're probably
585           * going to map it first thing in order to fill it
586           * with data), check if the last BO in the cache is
587           * unbusy, and only reuse in that case. Otherwise,
588           * allocating a new buffer is probably faster than
589           * waiting for the GPU to finish.
590           */
591          bo = LIST_ENTRY(struct brw_bo, bucket->head.next, head);
592          if (!brw_bo_busy(bo)) {
593             alloc_from_cache = true;
594             list_del(&bo->head);
595          }
596       }
597 
598       if (alloc_from_cache) {
599          assert(list_is_empty(&bo->exports));
600          if (!brw_bo_madvise(bo, I915_MADV_WILLNEED)) {
601             bo_free(bo);
602             brw_bo_cache_purge_bucket(bufmgr, bucket);
603             goto retry;
604          }
605 
606          if (bo_set_tiling_internal(bo, tiling_mode, stride)) {
607             bo_free(bo);
608             goto retry;
609          }
610 
611          if (zeroed) {
612             void *map = brw_bo_map(NULL, bo, MAP_WRITE | MAP_RAW);
613             if (!map) {
614                bo_free(bo);
615                goto retry;
616             }
617             memset(map, 0, bo_size);
618          }
619       }
620    }
621 
622    if (alloc_from_cache) {
623       /* If the cache BO isn't in the right memory zone, free the old
624        * memory and assign it a new address.
625        */
626       if ((bo->kflags & EXEC_OBJECT_PINNED) &&
627           memzone != memzone_for_address(bo->gtt_offset)) {
628          vma_free(bufmgr, bo->gtt_offset, bo->size);
629          bo->gtt_offset = 0ull;
630       }
631    } else {
632       bo = bo_calloc();
633       if (!bo)
634          goto err;
635 
636       bo->size = bo_size;
637       bo->idle = true;
638 
639       struct drm_i915_gem_create create = { .size = bo_size };
640 
641       /* All new BOs we get from the kernel are zeroed, so we don't need to
642        * worry about that here.
643        */
644       ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CREATE, &create);
645       if (ret != 0) {
646          free(bo);
647          goto err;
648       }
649 
650       bo->gem_handle = create.handle;
651 
652       bo->bufmgr = bufmgr;
653 
654       bo->tiling_mode = I915_TILING_NONE;
655       bo->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
656       bo->stride = 0;
657 
658       if (bo_set_tiling_internal(bo, tiling_mode, stride))
659          goto err_free;
660 
661       /* Calling set_domain() will allocate pages for the BO outside of the
662        * struct mutex lock in the kernel, which is more efficient than waiting
663        * to create them during the first execbuf that uses the BO.
664        */
665       struct drm_i915_gem_set_domain sd = {
666          .handle = bo->gem_handle,
667          .read_domains = I915_GEM_DOMAIN_CPU,
668          .write_domain = 0,
669       };
670 
671       if (drmIoctl(bo->bufmgr->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &sd) != 0)
672          goto err_free;
673    }
674 
675    bo->name = name;
676    p_atomic_set(&bo->refcount, 1);
677    bo->reusable = true;
678    bo->cache_coherent = bufmgr->has_llc;
679    bo->index = -1;
680    bo->kflags = bufmgr->initial_kflags;
681 
682    if ((bo->kflags & EXEC_OBJECT_PINNED) && bo->gtt_offset == 0ull) {
683       bo->gtt_offset = vma_alloc(bufmgr, memzone, bo->size, 1);
684 
685       if (bo->gtt_offset == 0ull)
686          goto err_free;
687    }
688 
689    mtx_unlock(&bufmgr->lock);
690 
691    DBG("bo_create: buf %d (%s) %llub\n", bo->gem_handle, bo->name,
692        (unsigned long long) size);
693 
694    return bo;
695 
696 err_free:
697    bo_free(bo);
698 err:
699    mtx_unlock(&bufmgr->lock);
700    return NULL;
701 }
702 
703 struct brw_bo *
brw_bo_alloc(struct brw_bufmgr * bufmgr,const char * name,uint64_t size,enum brw_memory_zone memzone)704 brw_bo_alloc(struct brw_bufmgr *bufmgr,
705              const char *name, uint64_t size,
706              enum brw_memory_zone memzone)
707 {
708    return bo_alloc_internal(bufmgr, name, size, memzone,
709                             0, I915_TILING_NONE, 0);
710 }
711 
712 struct brw_bo *
brw_bo_alloc_tiled(struct brw_bufmgr * bufmgr,const char * name,uint64_t size,enum brw_memory_zone memzone,uint32_t tiling_mode,uint32_t pitch,unsigned flags)713 brw_bo_alloc_tiled(struct brw_bufmgr *bufmgr, const char *name,
714                    uint64_t size, enum brw_memory_zone memzone,
715                    uint32_t tiling_mode, uint32_t pitch,
716                    unsigned flags)
717 {
718    return bo_alloc_internal(bufmgr, name, size, memzone,
719                             flags, tiling_mode, pitch);
720 }
721 
722 struct brw_bo *
brw_bo_alloc_tiled_2d(struct brw_bufmgr * bufmgr,const char * name,int x,int y,int cpp,enum brw_memory_zone memzone,uint32_t tiling,uint32_t * pitch,unsigned flags)723 brw_bo_alloc_tiled_2d(struct brw_bufmgr *bufmgr, const char *name,
724                       int x, int y, int cpp, enum brw_memory_zone memzone,
725                       uint32_t tiling, uint32_t *pitch, unsigned flags)
726 {
727    uint64_t size;
728    uint32_t stride;
729    unsigned long aligned_y, height_alignment;
730 
731    /* If we're tiled, our allocations are in 8 or 32-row blocks,
732     * so failure to align our height means that we won't allocate
733     * enough pages.
734     *
735     * If we're untiled, we still have to align to 2 rows high
736     * because the data port accesses 2x2 blocks even if the
737     * bottom row isn't to be rendered, so failure to align means
738     * we could walk off the end of the GTT and fault.  This is
739     * documented on 965, and may be the case on older chipsets
740     * too so we try to be careful.
741     */
742    aligned_y = y;
743    height_alignment = 2;
744 
745    if (tiling == I915_TILING_X)
746       height_alignment = 8;
747    else if (tiling == I915_TILING_Y)
748       height_alignment = 32;
749    aligned_y = ALIGN(y, height_alignment);
750 
751    stride = x * cpp;
752    stride = bo_tile_pitch(bufmgr, stride, tiling);
753    size = stride * aligned_y;
754    size = bo_tile_size(bufmgr, size, tiling);
755    *pitch = stride;
756 
757    if (tiling == I915_TILING_NONE)
758       stride = 0;
759 
760    return bo_alloc_internal(bufmgr, name, size, memzone,
761                             flags, tiling, stride);
762 }
763 
764 /**
765  * Returns a brw_bo wrapping the given buffer object handle.
766  *
767  * This can be used when one application needs to pass a buffer object
768  * to another.
769  */
770 struct brw_bo *
brw_bo_gem_create_from_name(struct brw_bufmgr * bufmgr,const char * name,unsigned int handle)771 brw_bo_gem_create_from_name(struct brw_bufmgr *bufmgr,
772                             const char *name, unsigned int handle)
773 {
774    struct brw_bo *bo;
775 
776    /* At the moment most applications only have a few named bo.
777     * For instance, in a DRI client only the render buffers passed
778     * between X and the client are named. And since X returns the
779     * alternating names for the front/back buffer a linear search
780     * provides a sufficiently fast match.
781     */
782    mtx_lock(&bufmgr->lock);
783    bo = hash_find_bo(bufmgr->name_table, handle);
784    if (bo) {
785       brw_bo_reference(bo);
786       goto out;
787    }
788 
789    struct drm_gem_open open_arg = { .name = handle };
790    int ret = drmIoctl(bufmgr->fd, DRM_IOCTL_GEM_OPEN, &open_arg);
791    if (ret != 0) {
792       DBG("Couldn't reference %s handle 0x%08x: %s\n",
793           name, handle, strerror(errno));
794       bo = NULL;
795       goto out;
796    }
797    /* Now see if someone has used a prime handle to get this
798     * object from the kernel before by looking through the list
799     * again for a matching gem_handle
800     */
801    bo = hash_find_bo(bufmgr->handle_table, open_arg.handle);
802    if (bo) {
803       brw_bo_reference(bo);
804       goto out;
805    }
806 
807    bo = bo_calloc();
808    if (!bo)
809       goto out;
810 
811    p_atomic_set(&bo->refcount, 1);
812 
813    bo->size = open_arg.size;
814    bo->gtt_offset = 0;
815    bo->bufmgr = bufmgr;
816    bo->gem_handle = open_arg.handle;
817    bo->name = name;
818    bo->global_name = handle;
819    bo->reusable = false;
820    bo->external = true;
821    bo->kflags = bufmgr->initial_kflags;
822 
823    if (bo->kflags & EXEC_OBJECT_PINNED)
824       bo->gtt_offset = vma_alloc(bufmgr, BRW_MEMZONE_OTHER, bo->size, 1);
825 
826    _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo);
827    _mesa_hash_table_insert(bufmgr->name_table, &bo->global_name, bo);
828 
829    struct drm_i915_gem_get_tiling get_tiling = { .handle = bo->gem_handle };
830    ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling);
831    if (ret != 0)
832       goto err_unref;
833 
834    bo->tiling_mode = get_tiling.tiling_mode;
835    bo->swizzle_mode = get_tiling.swizzle_mode;
836    /* XXX stride is unknown */
837    DBG("bo_create_from_handle: %d (%s)\n", handle, bo->name);
838 
839 out:
840    mtx_unlock(&bufmgr->lock);
841    return bo;
842 
843 err_unref:
844    bo_free(bo);
845    mtx_unlock(&bufmgr->lock);
846    return NULL;
847 }
848 
849 static void
bo_free(struct brw_bo * bo)850 bo_free(struct brw_bo *bo)
851 {
852    struct brw_bufmgr *bufmgr = bo->bufmgr;
853 
854    if (bo->map_cpu) {
855       VG_NOACCESS(bo->map_cpu, bo->size);
856       drm_munmap(bo->map_cpu, bo->size);
857    }
858    if (bo->map_wc) {
859       VG_NOACCESS(bo->map_wc, bo->size);
860       drm_munmap(bo->map_wc, bo->size);
861    }
862    if (bo->map_gtt) {
863       VG_NOACCESS(bo->map_gtt, bo->size);
864       drm_munmap(bo->map_gtt, bo->size);
865    }
866 
867    if (bo->external) {
868       struct hash_entry *entry;
869 
870       if (bo->global_name) {
871          entry = _mesa_hash_table_search(bufmgr->name_table, &bo->global_name);
872          _mesa_hash_table_remove(bufmgr->name_table, entry);
873       }
874 
875       entry = _mesa_hash_table_search(bufmgr->handle_table, &bo->gem_handle);
876       _mesa_hash_table_remove(bufmgr->handle_table, entry);
877    } else {
878       assert(list_is_empty(&bo->exports));
879    }
880 
881    /* Close this object */
882    struct drm_gem_close close = { .handle = bo->gem_handle };
883    int ret = drmIoctl(bufmgr->fd, DRM_IOCTL_GEM_CLOSE, &close);
884    if (ret != 0) {
885       DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n",
886           bo->gem_handle, bo->name, strerror(errno));
887    }
888 
889    if (bo->kflags & EXEC_OBJECT_PINNED)
890       vma_free(bo->bufmgr, bo->gtt_offset, bo->size);
891 
892    free(bo);
893 }
894 
895 /** Frees all cached buffers significantly older than @time. */
896 static void
cleanup_bo_cache(struct brw_bufmgr * bufmgr,time_t time)897 cleanup_bo_cache(struct brw_bufmgr *bufmgr, time_t time)
898 {
899    int i;
900 
901    if (bufmgr->time == time)
902       return;
903 
904    for (i = 0; i < bufmgr->num_buckets; i++) {
905       struct bo_cache_bucket *bucket = &bufmgr->cache_bucket[i];
906 
907       list_for_each_entry_safe(struct brw_bo, bo, &bucket->head, head) {
908          if (time - bo->free_time <= 1)
909             break;
910 
911          list_del(&bo->head);
912 
913          bo_free(bo);
914       }
915    }
916 
917    bufmgr->time = time;
918 }
919 
920 static void
bo_unreference_final(struct brw_bo * bo,time_t time)921 bo_unreference_final(struct brw_bo *bo, time_t time)
922 {
923    struct brw_bufmgr *bufmgr = bo->bufmgr;
924    struct bo_cache_bucket *bucket;
925 
926    DBG("bo_unreference final: %d (%s)\n", bo->gem_handle, bo->name);
927 
928    list_for_each_entry_safe(struct bo_export, export, &bo->exports, link) {
929       struct drm_gem_close close = { .handle = export->gem_handle };
930       intel_ioctl(export->drm_fd, DRM_IOCTL_GEM_CLOSE, &close);
931 
932       list_del(&export->link);
933       free(export);
934    }
935 
936    bucket = bucket_for_size(bufmgr, bo->size);
937    /* Put the buffer into our internal cache for reuse if we can. */
938    if (bufmgr->bo_reuse && bo->reusable && bucket != NULL &&
939        brw_bo_madvise(bo, I915_MADV_DONTNEED)) {
940       bo->free_time = time;
941 
942       bo->name = NULL;
943 
944       list_addtail(&bo->head, &bucket->head);
945    } else {
946       bo_free(bo);
947    }
948 }
949 
950 void
brw_bo_unreference(struct brw_bo * bo)951 brw_bo_unreference(struct brw_bo *bo)
952 {
953    if (bo == NULL)
954       return;
955 
956    assert(p_atomic_read(&bo->refcount) > 0);
957 
958    if (atomic_add_unless(&bo->refcount, -1, 1)) {
959       struct brw_bufmgr *bufmgr = bo->bufmgr;
960       struct timespec time;
961 
962       clock_gettime(CLOCK_MONOTONIC, &time);
963 
964       mtx_lock(&bufmgr->lock);
965 
966       if (p_atomic_dec_zero(&bo->refcount)) {
967          bo_unreference_final(bo, time.tv_sec);
968          cleanup_bo_cache(bufmgr, time.tv_sec);
969       }
970 
971       mtx_unlock(&bufmgr->lock);
972    }
973 }
974 
975 static void
bo_wait_with_stall_warning(struct brw_context * brw,struct brw_bo * bo,const char * action)976 bo_wait_with_stall_warning(struct brw_context *brw,
977                            struct brw_bo *bo,
978                            const char *action)
979 {
980    bool busy = brw && brw->perf_debug && !bo->idle;
981    double elapsed = unlikely(busy) ? -get_time() : 0.0;
982 
983    brw_bo_wait_rendering(bo);
984 
985    if (unlikely(busy)) {
986       elapsed += get_time();
987       if (elapsed > 1e-5) /* 0.01ms */
988          perf_debug("%s a busy \"%s\" BO stalled and took %.03f ms.\n",
989                     action, bo->name, elapsed * 1000);
990    }
991 }
992 
993 static void
print_flags(unsigned flags)994 print_flags(unsigned flags)
995 {
996    if (flags & MAP_READ)
997       DBG("READ ");
998    if (flags & MAP_WRITE)
999       DBG("WRITE ");
1000    if (flags & MAP_ASYNC)
1001       DBG("ASYNC ");
1002    if (flags & MAP_PERSISTENT)
1003       DBG("PERSISTENT ");
1004    if (flags & MAP_COHERENT)
1005       DBG("COHERENT ");
1006    if (flags & MAP_RAW)
1007       DBG("RAW ");
1008    DBG("\n");
1009 }
1010 
1011 static void *
brw_bo_gem_mmap_legacy(struct brw_context * brw,struct brw_bo * bo,bool wc)1012 brw_bo_gem_mmap_legacy(struct brw_context *brw, struct brw_bo *bo, bool wc)
1013 {
1014    struct brw_bufmgr *bufmgr = bo->bufmgr;
1015 
1016    struct drm_i915_gem_mmap mmap_arg = {
1017       .handle = bo->gem_handle,
1018       .size = bo->size,
1019       .flags = wc ? I915_MMAP_WC : 0,
1020    };
1021 
1022    int ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_MMAP, &mmap_arg);
1023    if (ret != 0) {
1024       DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
1025           __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
1026       return NULL;
1027    }
1028    void *map = (void *) (uintptr_t) mmap_arg.addr_ptr;
1029 
1030    return map;
1031 }
1032 
1033 static void *
brw_bo_gem_mmap_offset(struct brw_context * brw,struct brw_bo * bo,bool wc)1034 brw_bo_gem_mmap_offset(struct brw_context *brw, struct brw_bo *bo, bool wc)
1035 {
1036    struct brw_bufmgr *bufmgr = bo->bufmgr;
1037 
1038    struct drm_i915_gem_mmap_offset mmap_arg = {
1039       .handle = bo->gem_handle,
1040       .flags = wc ? I915_MMAP_OFFSET_WC : I915_MMAP_OFFSET_WB,
1041    };
1042 
1043    /* Get the fake offset back */
1044    int ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_MMAP_OFFSET, &mmap_arg);
1045    if (ret != 0) {
1046       DBG("%s:%d: Error preparing buffer %d (%s): %s .\n",
1047           __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
1048       return NULL;
1049    }
1050 
1051    /* And map it */
1052    void *map = drm_mmap(0, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED,
1053                         bufmgr->fd, mmap_arg.offset);
1054    if (map == MAP_FAILED) {
1055       DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
1056           __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
1057       return NULL;
1058    }
1059 
1060    return map;
1061 }
1062 
1063 static void *
brw_bo_gem_mmap(struct brw_context * brw,struct brw_bo * bo,bool wc)1064 brw_bo_gem_mmap(struct brw_context *brw, struct brw_bo *bo, bool wc)
1065 {
1066    struct brw_bufmgr *bufmgr = bo->bufmgr;
1067 
1068    if (bufmgr->has_mmap_offset)
1069       return brw_bo_gem_mmap_offset(brw, bo, wc);
1070    else
1071       return brw_bo_gem_mmap_legacy(brw, bo, wc);
1072 }
1073 
1074 static void *
brw_bo_map_cpu(struct brw_context * brw,struct brw_bo * bo,unsigned flags)1075 brw_bo_map_cpu(struct brw_context *brw, struct brw_bo *bo, unsigned flags)
1076 {
1077    /* We disallow CPU maps for writing to non-coherent buffers, as the
1078     * CPU map can become invalidated when a batch is flushed out, which
1079     * can happen at unpredictable times.  You should use WC maps instead.
1080     */
1081    assert(bo->cache_coherent || !(flags & MAP_WRITE));
1082 
1083    if (!bo->map_cpu) {
1084       DBG("brw_bo_map_cpu: %d (%s)\n", bo->gem_handle, bo->name);
1085 
1086       void *map = brw_bo_gem_mmap(brw, bo, false);
1087       VG_DEFINED(map, bo->size);
1088 
1089       if (p_atomic_cmpxchg(&bo->map_cpu, NULL, map)) {
1090          VG_NOACCESS(map, bo->size);
1091          drm_munmap(map, bo->size);
1092       }
1093    }
1094    assert(bo->map_cpu);
1095 
1096    DBG("brw_bo_map_cpu: %d (%s) -> %p, ", bo->gem_handle, bo->name,
1097        bo->map_cpu);
1098    print_flags(flags);
1099 
1100    if (!(flags & MAP_ASYNC)) {
1101       bo_wait_with_stall_warning(brw, bo, "CPU mapping");
1102    }
1103 
1104    if (!bo->cache_coherent && !bo->bufmgr->has_llc) {
1105       /* If we're reusing an existing CPU mapping, the CPU caches may
1106        * contain stale data from the last time we read from that mapping.
1107        * (With the BO cache, it might even be data from a previous buffer!)
1108        * Even if it's a brand new mapping, the kernel may have zeroed the
1109        * buffer via CPU writes.
1110        *
1111        * We need to invalidate those cachelines so that we see the latest
1112        * contents, and so long as we only read from the CPU mmap we do not
1113        * need to write those cachelines back afterwards.
1114        *
1115        * On LLC, the emprical evidence suggests that writes from the GPU
1116        * that bypass the LLC (i.e. for scanout) do *invalidate* the CPU
1117        * cachelines. (Other reads, such as the display engine, bypass the
1118        * LLC entirely requiring us to keep dirty pixels for the scanout
1119        * out of any cache.)
1120        */
1121       intel_invalidate_range(bo->map_cpu, bo->size);
1122    }
1123 
1124    return bo->map_cpu;
1125 }
1126 
1127 static void *
brw_bo_map_wc(struct brw_context * brw,struct brw_bo * bo,unsigned flags)1128 brw_bo_map_wc(struct brw_context *brw, struct brw_bo *bo, unsigned flags)
1129 {
1130    struct brw_bufmgr *bufmgr = bo->bufmgr;
1131 
1132    if (!bufmgr->has_mmap_wc)
1133       return NULL;
1134 
1135    if (!bo->map_wc) {
1136       DBG("brw_bo_map_wc: %d (%s)\n", bo->gem_handle, bo->name);
1137       void *map = brw_bo_gem_mmap(brw, bo, true);
1138       VG_DEFINED(map, bo->size);
1139 
1140       if (p_atomic_cmpxchg(&bo->map_wc, NULL, map)) {
1141          VG_NOACCESS(map, bo->size);
1142          drm_munmap(map, bo->size);
1143       }
1144    }
1145    assert(bo->map_wc);
1146 
1147    DBG("brw_bo_map_wc: %d (%s) -> %p\n", bo->gem_handle, bo->name, bo->map_wc);
1148    print_flags(flags);
1149 
1150    if (!(flags & MAP_ASYNC)) {
1151       bo_wait_with_stall_warning(brw, bo, "WC mapping");
1152    }
1153 
1154    return bo->map_wc;
1155 }
1156 
1157 /**
1158  * Perform an uncached mapping via the GTT.
1159  *
1160  * Write access through the GTT is not quite fully coherent. On low power
1161  * systems especially, like modern Atoms, we can observe reads from RAM before
1162  * the write via GTT has landed. A write memory barrier that flushes the Write
1163  * Combining Buffer (i.e. sfence/mfence) is not sufficient to order the later
1164  * read after the write as the GTT write suffers a small delay through the GTT
1165  * indirection. The kernel uses an uncached mmio read to ensure the GTT write
1166  * is ordered with reads (either by the GPU, WB or WC) and unconditionally
1167  * flushes prior to execbuf submission. However, if we are not informing the
1168  * kernel about our GTT writes, it will not flush before earlier access, such
1169  * as when using the cmdparser. Similarly, we need to be careful if we should
1170  * ever issue a CPU read immediately following a GTT write.
1171  *
1172  * Telling the kernel about write access also has one more important
1173  * side-effect. Upon receiving notification about the write, it cancels any
1174  * scanout buffering for FBC/PSR and friends. Later FBC/PSR is then flushed by
1175  * either SW_FINISH or DIRTYFB. The presumption is that we never write to the
1176  * actual scanout via a mmaping, only to a backbuffer and so all the FBC/PSR
1177  * tracking is handled on the buffer exchange instead.
1178  */
1179 static void *
brw_bo_map_gtt(struct brw_context * brw,struct brw_bo * bo,unsigned flags)1180 brw_bo_map_gtt(struct brw_context *brw, struct brw_bo *bo, unsigned flags)
1181 {
1182    struct brw_bufmgr *bufmgr = bo->bufmgr;
1183 
1184    /* Get a mapping of the buffer if we haven't before. */
1185    if (bo->map_gtt == NULL) {
1186       DBG("bo_map_gtt: mmap %d (%s)\n", bo->gem_handle, bo->name);
1187 
1188       struct drm_i915_gem_mmap_gtt mmap_arg = { .handle = bo->gem_handle };
1189 
1190       /* Get the fake offset back... */
1191       int ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_MMAP_GTT, &mmap_arg);
1192       if (ret != 0) {
1193          DBG("%s:%d: Error preparing buffer map %d (%s): %s .\n",
1194              __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
1195          return NULL;
1196       }
1197 
1198       /* and mmap it. */
1199       void *map = drm_mmap(0, bo->size, PROT_READ | PROT_WRITE,
1200                            MAP_SHARED, bufmgr->fd, mmap_arg.offset);
1201       if (map == MAP_FAILED) {
1202          DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
1203              __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
1204          return NULL;
1205       }
1206 
1207       /* We don't need to use VALGRIND_MALLOCLIKE_BLOCK because Valgrind will
1208        * already intercept this mmap call. However, for consistency between
1209        * all the mmap paths, we mark the pointer as defined now and mark it
1210        * as inaccessible afterwards.
1211        */
1212       VG_DEFINED(map, bo->size);
1213 
1214       if (p_atomic_cmpxchg(&bo->map_gtt, NULL, map)) {
1215          VG_NOACCESS(map, bo->size);
1216          drm_munmap(map, bo->size);
1217       }
1218    }
1219    assert(bo->map_gtt);
1220 
1221    DBG("bo_map_gtt: %d (%s) -> %p, ", bo->gem_handle, bo->name, bo->map_gtt);
1222    print_flags(flags);
1223 
1224    if (!(flags & MAP_ASYNC)) {
1225       bo_wait_with_stall_warning(brw, bo, "GTT mapping");
1226    }
1227 
1228    return bo->map_gtt;
1229 }
1230 
1231 static bool
can_map_cpu(struct brw_bo * bo,unsigned flags)1232 can_map_cpu(struct brw_bo *bo, unsigned flags)
1233 {
1234    if (bo->cache_coherent)
1235       return true;
1236 
1237    /* Even if the buffer itself is not cache-coherent (such as a scanout), on
1238     * an LLC platform reads always are coherent (as they are performed via the
1239     * central system agent). It is just the writes that we need to take special
1240     * care to ensure that land in main memory and not stick in the CPU cache.
1241     */
1242    if (!(flags & MAP_WRITE) && bo->bufmgr->has_llc)
1243       return true;
1244 
1245    /* If PERSISTENT or COHERENT are set, the mmapping needs to remain valid
1246     * across batch flushes where the kernel will change cache domains of the
1247     * bo, invalidating continued access to the CPU mmap on non-LLC device.
1248     *
1249     * Similarly, ASYNC typically means that the buffer will be accessed via
1250     * both the CPU and the GPU simultaneously.  Batches may be executed that
1251     * use the BO even while it is mapped.  While OpenGL technically disallows
1252     * most drawing while non-persistent mappings are active, we may still use
1253     * the GPU for blits or other operations, causing batches to happen at
1254     * inconvenient times.
1255     */
1256    if (flags & (MAP_PERSISTENT | MAP_COHERENT | MAP_ASYNC))
1257       return false;
1258 
1259    return !(flags & MAP_WRITE);
1260 }
1261 
1262 void *
brw_bo_map(struct brw_context * brw,struct brw_bo * bo,unsigned flags)1263 brw_bo_map(struct brw_context *brw, struct brw_bo *bo, unsigned flags)
1264 {
1265    if (bo->tiling_mode != I915_TILING_NONE && !(flags & MAP_RAW))
1266       return brw_bo_map_gtt(brw, bo, flags);
1267 
1268    void *map;
1269 
1270    if (can_map_cpu(bo, flags))
1271       map = brw_bo_map_cpu(brw, bo, flags);
1272    else
1273       map = brw_bo_map_wc(brw, bo, flags);
1274 
1275    /* Allow the attempt to fail by falling back to the GTT where necessary.
1276     *
1277     * Not every buffer can be mmaped directly using the CPU (or WC), for
1278     * example buffers that wrap stolen memory or are imported from other
1279     * devices. For those, we have little choice but to use a GTT mmapping.
1280     * However, if we use a slow GTT mmapping for reads where we expected fast
1281     * access, that order of magnitude difference in throughput will be clearly
1282     * expressed by angry users.
1283     *
1284     * We skip MAP_RAW because we want to avoid map_gtt's fence detiling.
1285     */
1286    if (!map && !(flags & MAP_RAW)) {
1287       if (brw) {
1288          perf_debug("Fallback GTT mapping for %s with access flags %x\n",
1289                     bo->name, flags);
1290       }
1291       map = brw_bo_map_gtt(brw, bo, flags);
1292    }
1293 
1294    return map;
1295 }
1296 
1297 int
brw_bo_subdata(struct brw_bo * bo,uint64_t offset,uint64_t size,const void * data)1298 brw_bo_subdata(struct brw_bo *bo, uint64_t offset,
1299                uint64_t size, const void *data)
1300 {
1301    struct brw_bufmgr *bufmgr = bo->bufmgr;
1302 
1303    struct drm_i915_gem_pwrite pwrite = {
1304       .handle = bo->gem_handle,
1305       .offset = offset,
1306       .size = size,
1307       .data_ptr = (uint64_t) (uintptr_t) data,
1308    };
1309 
1310    int ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_PWRITE, &pwrite);
1311    if (ret != 0) {
1312       ret = -errno;
1313       DBG("%s:%d: Error writing data to buffer %d: "
1314           "(%"PRIu64" %"PRIu64") %s .\n",
1315           __FILE__, __LINE__, bo->gem_handle, offset, size, strerror(errno));
1316    }
1317 
1318    return ret;
1319 }
1320 
1321 /** Waits for all GPU rendering with the object to have completed. */
1322 void
brw_bo_wait_rendering(struct brw_bo * bo)1323 brw_bo_wait_rendering(struct brw_bo *bo)
1324 {
1325    /* We require a kernel recent enough for WAIT_IOCTL support.
1326     * See brw_init_bufmgr()
1327     */
1328    brw_bo_wait(bo, -1);
1329 }
1330 
1331 /**
1332  * Waits on a BO for the given amount of time.
1333  *
1334  * @bo: buffer object to wait for
1335  * @timeout_ns: amount of time to wait in nanoseconds.
1336  *   If value is less than 0, an infinite wait will occur.
1337  *
1338  * Returns 0 if the wait was successful ie. the last batch referencing the
1339  * object has completed within the allotted time. Otherwise some negative return
1340  * value describes the error. Of particular interest is -ETIME when the wait has
1341  * failed to yield the desired result.
1342  *
1343  * Similar to brw_bo_wait_rendering except a timeout parameter allows
1344  * the operation to give up after a certain amount of time. Another subtle
1345  * difference is the internal locking semantics are different (this variant does
1346  * not hold the lock for the duration of the wait). This makes the wait subject
1347  * to a larger userspace race window.
1348  *
1349  * The implementation shall wait until the object is no longer actively
1350  * referenced within a batch buffer at the time of the call. The wait will
1351  * not guarantee that the buffer is re-issued via another thread, or an flinked
1352  * handle. Userspace must make sure this race does not occur if such precision
1353  * is important.
1354  *
1355  * Note that some kernels have broken the inifite wait for negative values
1356  * promise, upgrade to latest stable kernels if this is the case.
1357  */
1358 int
brw_bo_wait(struct brw_bo * bo,int64_t timeout_ns)1359 brw_bo_wait(struct brw_bo *bo, int64_t timeout_ns)
1360 {
1361    struct brw_bufmgr *bufmgr = bo->bufmgr;
1362 
1363    /* If we know it's idle, don't bother with the kernel round trip */
1364    if (bo->idle && !bo->external)
1365       return 0;
1366 
1367    struct drm_i915_gem_wait wait = {
1368       .bo_handle = bo->gem_handle,
1369       .timeout_ns = timeout_ns,
1370    };
1371    int ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_WAIT, &wait);
1372    if (ret != 0)
1373       return -errno;
1374 
1375    bo->idle = true;
1376 
1377    return ret;
1378 }
1379 
1380 void
brw_bufmgr_unref(struct brw_bufmgr * bufmgr)1381 brw_bufmgr_unref(struct brw_bufmgr *bufmgr)
1382 {
1383    mtx_lock(&global_bufmgr_list_mutex);
1384    if (p_atomic_dec_zero(&bufmgr->refcount)) {
1385       list_del(&bufmgr->link);
1386    } else {
1387       bufmgr = NULL;
1388    }
1389    mtx_unlock(&global_bufmgr_list_mutex);
1390 
1391    if (!bufmgr)
1392       return;
1393 
1394    mtx_destroy(&bufmgr->lock);
1395 
1396    /* Free any cached buffer objects we were going to reuse */
1397    for (int i = 0; i < bufmgr->num_buckets; i++) {
1398       struct bo_cache_bucket *bucket = &bufmgr->cache_bucket[i];
1399 
1400       list_for_each_entry_safe(struct brw_bo, bo, &bucket->head, head) {
1401          list_del(&bo->head);
1402 
1403          bo_free(bo);
1404       }
1405 
1406       if (brw_using_softpin(bufmgr)) {
1407          for (int z = 0; z < BRW_MEMZONE_COUNT; z++) {
1408             util_dynarray_fini(&bucket->vma_list[z]);
1409          }
1410       }
1411    }
1412 
1413    _mesa_hash_table_destroy(bufmgr->name_table, NULL);
1414    _mesa_hash_table_destroy(bufmgr->handle_table, NULL);
1415 
1416    if (brw_using_softpin(bufmgr)) {
1417       for (int z = 0; z < BRW_MEMZONE_COUNT; z++) {
1418          util_vma_heap_finish(&bufmgr->vma_allocator[z]);
1419       }
1420    }
1421 
1422    close(bufmgr->fd);
1423    bufmgr->fd = -1;
1424 
1425    free(bufmgr);
1426 }
1427 
1428 static int
bo_set_tiling_internal(struct brw_bo * bo,uint32_t tiling_mode,uint32_t stride)1429 bo_set_tiling_internal(struct brw_bo *bo, uint32_t tiling_mode,
1430                        uint32_t stride)
1431 {
1432    struct brw_bufmgr *bufmgr = bo->bufmgr;
1433    struct drm_i915_gem_set_tiling set_tiling;
1434    int ret;
1435 
1436    if (bo->global_name == 0 &&
1437        tiling_mode == bo->tiling_mode && stride == bo->stride)
1438       return 0;
1439 
1440    memset(&set_tiling, 0, sizeof(set_tiling));
1441    do {
1442       /* set_tiling is slightly broken and overwrites the
1443        * input on the error path, so we have to open code
1444        * rmIoctl.
1445        */
1446       set_tiling.handle = bo->gem_handle;
1447       set_tiling.tiling_mode = tiling_mode;
1448       set_tiling.stride = stride;
1449 
1450       ret = ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling);
1451    } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
1452    if (ret == -1)
1453       return -errno;
1454 
1455    bo->tiling_mode = set_tiling.tiling_mode;
1456    bo->swizzle_mode = set_tiling.swizzle_mode;
1457    bo->stride = set_tiling.stride;
1458    return 0;
1459 }
1460 
1461 int
brw_bo_get_tiling(struct brw_bo * bo,uint32_t * tiling_mode,uint32_t * swizzle_mode)1462 brw_bo_get_tiling(struct brw_bo *bo, uint32_t *tiling_mode,
1463                   uint32_t *swizzle_mode)
1464 {
1465    *tiling_mode = bo->tiling_mode;
1466    *swizzle_mode = bo->swizzle_mode;
1467    return 0;
1468 }
1469 
1470 static struct brw_bo *
brw_bo_gem_create_from_prime_internal(struct brw_bufmgr * bufmgr,int prime_fd,int tiling_mode,uint32_t stride)1471 brw_bo_gem_create_from_prime_internal(struct brw_bufmgr *bufmgr, int prime_fd,
1472                                       int tiling_mode, uint32_t stride)
1473 {
1474    uint32_t handle;
1475    struct brw_bo *bo;
1476 
1477    mtx_lock(&bufmgr->lock);
1478    int ret = drmPrimeFDToHandle(bufmgr->fd, prime_fd, &handle);
1479    if (ret) {
1480       DBG("create_from_prime: failed to obtain handle from fd: %s\n",
1481           strerror(errno));
1482       mtx_unlock(&bufmgr->lock);
1483       return NULL;
1484    }
1485 
1486    /*
1487     * See if the kernel has already returned this buffer to us. Just as
1488     * for named buffers, we must not create two bo's pointing at the same
1489     * kernel object
1490     */
1491    bo = hash_find_bo(bufmgr->handle_table, handle);
1492    if (bo) {
1493       brw_bo_reference(bo);
1494       goto out;
1495    }
1496 
1497    bo = bo_calloc();
1498    if (!bo)
1499       goto out;
1500 
1501    p_atomic_set(&bo->refcount, 1);
1502 
1503    /* Determine size of bo.  The fd-to-handle ioctl really should
1504     * return the size, but it doesn't.  If we have kernel 3.12 or
1505     * later, we can lseek on the prime fd to get the size.  Older
1506     * kernels will just fail, in which case we fall back to the
1507     * provided (estimated or guess size). */
1508    ret = lseek(prime_fd, 0, SEEK_END);
1509    if (ret != -1)
1510       bo->size = ret;
1511 
1512    bo->bufmgr = bufmgr;
1513 
1514    bo->gem_handle = handle;
1515    _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo);
1516 
1517    bo->name = "prime";
1518    bo->reusable = false;
1519    bo->external = true;
1520    bo->kflags = bufmgr->initial_kflags;
1521 
1522    if (bo->kflags & EXEC_OBJECT_PINNED) {
1523       assert(bo->size > 0);
1524       bo->gtt_offset = vma_alloc(bufmgr, BRW_MEMZONE_OTHER, bo->size, 1);
1525    }
1526 
1527    if (tiling_mode < 0) {
1528       struct drm_i915_gem_get_tiling get_tiling = { .handle = bo->gem_handle };
1529       if (drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling))
1530          goto err;
1531 
1532       bo->tiling_mode = get_tiling.tiling_mode;
1533       bo->swizzle_mode = get_tiling.swizzle_mode;
1534       /* XXX stride is unknown */
1535    } else {
1536       bo_set_tiling_internal(bo, tiling_mode, stride);
1537    }
1538 
1539 out:
1540    mtx_unlock(&bufmgr->lock);
1541    return bo;
1542 
1543 err:
1544    bo_free(bo);
1545    mtx_unlock(&bufmgr->lock);
1546    return NULL;
1547 }
1548 
1549 struct brw_bo *
brw_bo_gem_create_from_prime(struct brw_bufmgr * bufmgr,int prime_fd)1550 brw_bo_gem_create_from_prime(struct brw_bufmgr *bufmgr, int prime_fd)
1551 {
1552    return brw_bo_gem_create_from_prime_internal(bufmgr, prime_fd, -1, 0);
1553 }
1554 
1555 struct brw_bo *
brw_bo_gem_create_from_prime_tiled(struct brw_bufmgr * bufmgr,int prime_fd,uint32_t tiling_mode,uint32_t stride)1556 brw_bo_gem_create_from_prime_tiled(struct brw_bufmgr *bufmgr, int prime_fd,
1557                                    uint32_t tiling_mode, uint32_t stride)
1558 {
1559    assert(tiling_mode == I915_TILING_NONE ||
1560           tiling_mode == I915_TILING_X ||
1561           tiling_mode == I915_TILING_Y);
1562 
1563    return brw_bo_gem_create_from_prime_internal(bufmgr, prime_fd,
1564                                                 tiling_mode, stride);
1565 }
1566 
1567 static void
brw_bo_make_external(struct brw_bo * bo)1568 brw_bo_make_external(struct brw_bo *bo)
1569 {
1570    struct brw_bufmgr *bufmgr = bo->bufmgr;
1571 
1572    if (!bo->external) {
1573       mtx_lock(&bufmgr->lock);
1574       if (!bo->external) {
1575          _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo);
1576          bo->external = true;
1577       }
1578       mtx_unlock(&bufmgr->lock);
1579    }
1580 }
1581 
1582 int
brw_bo_gem_export_to_prime(struct brw_bo * bo,int * prime_fd)1583 brw_bo_gem_export_to_prime(struct brw_bo *bo, int *prime_fd)
1584 {
1585    struct brw_bufmgr *bufmgr = bo->bufmgr;
1586 
1587    brw_bo_make_external(bo);
1588 
1589    if (drmPrimeHandleToFD(bufmgr->fd, bo->gem_handle,
1590                           DRM_CLOEXEC | DRM_RDWR, prime_fd) != 0)
1591       return -errno;
1592 
1593    bo->reusable = false;
1594 
1595    return 0;
1596 }
1597 
1598 uint32_t
brw_bo_export_gem_handle(struct brw_bo * bo)1599 brw_bo_export_gem_handle(struct brw_bo *bo)
1600 {
1601    brw_bo_make_external(bo);
1602 
1603    return bo->gem_handle;
1604 }
1605 
1606 int
brw_bo_flink(struct brw_bo * bo,uint32_t * name)1607 brw_bo_flink(struct brw_bo *bo, uint32_t *name)
1608 {
1609    struct brw_bufmgr *bufmgr = bo->bufmgr;
1610 
1611    if (!bo->global_name) {
1612       struct drm_gem_flink flink = { .handle = bo->gem_handle };
1613 
1614       if (drmIoctl(bufmgr->fd, DRM_IOCTL_GEM_FLINK, &flink))
1615          return -errno;
1616 
1617       brw_bo_make_external(bo);
1618       mtx_lock(&bufmgr->lock);
1619       if (!bo->global_name) {
1620          bo->global_name = flink.name;
1621          _mesa_hash_table_insert(bufmgr->name_table, &bo->global_name, bo);
1622       }
1623       mtx_unlock(&bufmgr->lock);
1624 
1625       bo->reusable = false;
1626    }
1627 
1628    *name = bo->global_name;
1629    return 0;
1630 }
1631 
1632 int
brw_bo_export_gem_handle_for_device(struct brw_bo * bo,int drm_fd,uint32_t * out_handle)1633 brw_bo_export_gem_handle_for_device(struct brw_bo *bo, int drm_fd,
1634                                     uint32_t *out_handle)
1635 {
1636    struct brw_bufmgr *bufmgr = bo->bufmgr;
1637 
1638    /* Only add the new GEM handle to the list of export if it belongs to a
1639     * different GEM device. Otherwise we might close the same buffer multiple
1640     * times.
1641     */
1642    int ret = os_same_file_description(drm_fd, bufmgr->fd);
1643    WARN_ONCE(ret < 0,
1644              "Kernel has no file descriptor comparison support: %s\n",
1645              strerror(errno));
1646    if (ret == 0) {
1647       *out_handle = brw_bo_export_gem_handle(bo);
1648       return 0;
1649    }
1650 
1651    struct bo_export *export = calloc(1, sizeof(*export));
1652    if (!export)
1653       return -ENOMEM;
1654 
1655    export->drm_fd = drm_fd;
1656 
1657    int dmabuf_fd = -1;
1658    int err = brw_bo_gem_export_to_prime(bo, &dmabuf_fd);
1659    if (err) {
1660       free(export);
1661       return err;
1662    }
1663 
1664    mtx_lock(&bufmgr->lock);
1665    err = drmPrimeFDToHandle(drm_fd, dmabuf_fd, &export->gem_handle);
1666    close(dmabuf_fd);
1667    if (err) {
1668       mtx_unlock(&bufmgr->lock);
1669       free(export);
1670       return err;
1671    }
1672 
1673    bool found = false;
1674    list_for_each_entry(struct bo_export, iter, &bo->exports, link) {
1675       if (iter->drm_fd != drm_fd)
1676          continue;
1677       /* Here we assume that for a given DRM fd, we'll always get back the
1678        * same GEM handle for a given buffer.
1679        */
1680       assert(iter->gem_handle == export->gem_handle);
1681       free(export);
1682       export = iter;
1683       found = true;
1684       break;
1685    }
1686    if (!found)
1687       list_addtail(&export->link, &bo->exports);
1688 
1689    mtx_unlock(&bufmgr->lock);
1690 
1691    *out_handle = export->gem_handle;
1692 
1693    return 0;
1694 }
1695 
1696 static void
add_bucket(struct brw_bufmgr * bufmgr,int size)1697 add_bucket(struct brw_bufmgr *bufmgr, int size)
1698 {
1699    unsigned int i = bufmgr->num_buckets;
1700 
1701    assert(i < ARRAY_SIZE(bufmgr->cache_bucket));
1702 
1703    list_inithead(&bufmgr->cache_bucket[i].head);
1704    if (brw_using_softpin(bufmgr)) {
1705       for (int z = 0; z < BRW_MEMZONE_COUNT; z++)
1706          util_dynarray_init(&bufmgr->cache_bucket[i].vma_list[z], NULL);
1707    }
1708    bufmgr->cache_bucket[i].size = size;
1709    bufmgr->num_buckets++;
1710 
1711    assert(bucket_for_size(bufmgr, size) == &bufmgr->cache_bucket[i]);
1712    assert(bucket_for_size(bufmgr, size - 2048) == &bufmgr->cache_bucket[i]);
1713    assert(bucket_for_size(bufmgr, size + 1) != &bufmgr->cache_bucket[i]);
1714 }
1715 
1716 static void
init_cache_buckets(struct brw_bufmgr * bufmgr)1717 init_cache_buckets(struct brw_bufmgr *bufmgr)
1718 {
1719    uint64_t size, cache_max_size = 64 * 1024 * 1024;
1720 
1721    /* OK, so power of two buckets was too wasteful of memory.
1722     * Give 3 other sizes between each power of two, to hopefully
1723     * cover things accurately enough.  (The alternative is
1724     * probably to just go for exact matching of sizes, and assume
1725     * that for things like composited window resize the tiled
1726     * width/height alignment and rounding of sizes to pages will
1727     * get us useful cache hit rates anyway)
1728     */
1729    add_bucket(bufmgr, PAGE_SIZE);
1730    add_bucket(bufmgr, PAGE_SIZE * 2);
1731    add_bucket(bufmgr, PAGE_SIZE * 3);
1732 
1733    /* Initialize the linked lists for BO reuse cache. */
1734    for (size = 4 * PAGE_SIZE; size <= cache_max_size; size *= 2) {
1735       add_bucket(bufmgr, size);
1736 
1737       add_bucket(bufmgr, size + size * 1 / 4);
1738       add_bucket(bufmgr, size + size * 2 / 4);
1739       add_bucket(bufmgr, size + size * 3 / 4);
1740    }
1741 }
1742 
1743 uint32_t
brw_create_hw_context(struct brw_bufmgr * bufmgr)1744 brw_create_hw_context(struct brw_bufmgr *bufmgr)
1745 {
1746    struct drm_i915_gem_context_create create = { };
1747    int ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create);
1748    if (ret != 0) {
1749       DBG("DRM_IOCTL_I915_GEM_CONTEXT_CREATE failed: %s\n", strerror(errno));
1750       return 0;
1751    }
1752 
1753    return create.ctx_id;
1754 }
1755 
1756 int
brw_hw_context_set_priority(struct brw_bufmgr * bufmgr,uint32_t ctx_id,int priority)1757 brw_hw_context_set_priority(struct brw_bufmgr *bufmgr,
1758                             uint32_t ctx_id,
1759                             int priority)
1760 {
1761    struct drm_i915_gem_context_param p = {
1762       .ctx_id = ctx_id,
1763       .param = I915_CONTEXT_PARAM_PRIORITY,
1764       .value = priority,
1765    };
1766    int err;
1767 
1768    err = 0;
1769    if (drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM, &p))
1770       err = -errno;
1771 
1772    return err;
1773 }
1774 
1775 void
brw_destroy_hw_context(struct brw_bufmgr * bufmgr,uint32_t ctx_id)1776 brw_destroy_hw_context(struct brw_bufmgr *bufmgr, uint32_t ctx_id)
1777 {
1778    struct drm_i915_gem_context_destroy d = { .ctx_id = ctx_id };
1779 
1780    if (ctx_id != 0 &&
1781        drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY, &d) != 0) {
1782       fprintf(stderr, "DRM_IOCTL_I915_GEM_CONTEXT_DESTROY failed: %s\n",
1783               strerror(errno));
1784    }
1785 }
1786 
1787 int
brw_reg_read(struct brw_bufmgr * bufmgr,uint32_t offset,uint64_t * result)1788 brw_reg_read(struct brw_bufmgr *bufmgr, uint32_t offset, uint64_t *result)
1789 {
1790    struct drm_i915_reg_read reg_read = { .offset = offset };
1791    int ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_REG_READ, &reg_read);
1792 
1793    *result = reg_read.val;
1794    return ret;
1795 }
1796 
1797 static int
gem_param(int fd,int name)1798 gem_param(int fd, int name)
1799 {
1800    int v = -1; /* No param uses (yet) the sign bit, reserve it for errors */
1801 
1802    struct drm_i915_getparam gp = { .param = name, .value = &v };
1803    if (drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp))
1804       return -1;
1805 
1806    return v;
1807 }
1808 
1809 static int
gem_context_getparam(int fd,uint32_t context,uint64_t param,uint64_t * value)1810 gem_context_getparam(int fd, uint32_t context, uint64_t param, uint64_t *value)
1811 {
1812    struct drm_i915_gem_context_param gp = {
1813       .ctx_id = context,
1814       .param = param,
1815    };
1816 
1817    if (drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM, &gp))
1818       return -1;
1819 
1820    *value = gp.value;
1821 
1822    return 0;
1823 }
1824 
1825 bool
brw_using_softpin(struct brw_bufmgr * bufmgr)1826 brw_using_softpin(struct brw_bufmgr *bufmgr)
1827 {
1828    return bufmgr->initial_kflags & EXEC_OBJECT_PINNED;
1829 }
1830 
1831 static struct brw_bufmgr *
brw_bufmgr_ref(struct brw_bufmgr * bufmgr)1832 brw_bufmgr_ref(struct brw_bufmgr *bufmgr)
1833 {
1834    p_atomic_inc(&bufmgr->refcount);
1835    return bufmgr;
1836 }
1837 
1838 /**
1839  * Initializes the GEM buffer manager, which uses the kernel to allocate, map,
1840  * and manage map buffer objections.
1841  *
1842  * \param fd File descriptor of the opened DRM device.
1843  */
1844 static struct brw_bufmgr *
brw_bufmgr_create(struct intel_device_info * devinfo,int fd,bool bo_reuse)1845 brw_bufmgr_create(struct intel_device_info *devinfo, int fd, bool bo_reuse)
1846 {
1847    struct brw_bufmgr *bufmgr;
1848 
1849    bufmgr = calloc(1, sizeof(*bufmgr));
1850    if (bufmgr == NULL)
1851       return NULL;
1852 
1853    /* Handles to buffer objects belong to the device fd and are not
1854     * reference counted by the kernel.  If the same fd is used by
1855     * multiple parties (threads sharing the same screen bufmgr, or
1856     * even worse the same device fd passed to multiple libraries)
1857     * ownership of those handles is shared by those independent parties.
1858     *
1859     * Don't do this! Ensure that each library/bufmgr has its own device
1860     * fd so that its namespace does not clash with another.
1861     */
1862    bufmgr->fd = os_dupfd_cloexec(fd);
1863    if (bufmgr->fd < 0) {
1864       free(bufmgr);
1865       return NULL;
1866    }
1867 
1868    p_atomic_set(&bufmgr->refcount, 1);
1869 
1870    if (mtx_init(&bufmgr->lock, mtx_plain) != 0) {
1871       close(bufmgr->fd);
1872       free(bufmgr);
1873       return NULL;
1874    }
1875 
1876    uint64_t gtt_size;
1877    if (gem_context_getparam(fd, 0, I915_CONTEXT_PARAM_GTT_SIZE, &gtt_size))
1878       gtt_size = 0;
1879 
1880    bufmgr->has_llc = devinfo->has_llc;
1881    bufmgr->has_mmap_wc = gem_param(fd, I915_PARAM_MMAP_VERSION) > 0;
1882    bufmgr->bo_reuse = bo_reuse;
1883    bufmgr->has_mmap_offset = gem_param(fd, I915_PARAM_MMAP_GTT_VERSION) >= 4;
1884 
1885    const uint64_t _4GB = 4ull << 30;
1886 
1887    /* The STATE_BASE_ADDRESS size field can only hold 1 page shy of 4GB */
1888    const uint64_t _4GB_minus_1 = _4GB - PAGE_SIZE;
1889 
1890    if (devinfo->ver >= 8 && gtt_size > _4GB) {
1891       bufmgr->initial_kflags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
1892 
1893       /* Allocate VMA in userspace if we have softpin and full PPGTT. */
1894       if (gem_param(fd, I915_PARAM_HAS_EXEC_SOFTPIN) > 0 &&
1895           gem_param(fd, I915_PARAM_HAS_ALIASING_PPGTT) > 1) {
1896          bufmgr->initial_kflags |= EXEC_OBJECT_PINNED;
1897 
1898          util_vma_heap_init(&bufmgr->vma_allocator[BRW_MEMZONE_LOW_4G],
1899                             PAGE_SIZE, _4GB_minus_1);
1900 
1901          /* Leave the last 4GB out of the high vma range, so that no state
1902           * base address + size can overflow 48 bits.
1903           */
1904          util_vma_heap_init(&bufmgr->vma_allocator[BRW_MEMZONE_OTHER],
1905                             1 * _4GB, gtt_size - 2 * _4GB);
1906       } else if (devinfo->ver >= 10) {
1907          /* Softpin landed in 4.5, but GVT used an aliasing PPGTT until
1908           * kernel commit 6b3816d69628becb7ff35978aa0751798b4a940a in
1909           * 4.14.  Gfx10+ GVT hasn't landed yet, so it's not actually a
1910           * problem - but extending this requirement back to earlier gens
1911           * might actually mean requiring 4.14.
1912           */
1913          fprintf(stderr, "i965 requires softpin (Kernel 4.5) on Gfx10+.");
1914          close(bufmgr->fd);
1915          free(bufmgr);
1916          return NULL;
1917       }
1918    }
1919 
1920    init_cache_buckets(bufmgr);
1921 
1922    bufmgr->name_table =
1923       _mesa_hash_table_create(NULL, _mesa_hash_uint, _mesa_key_uint_equal);
1924    bufmgr->handle_table =
1925       _mesa_hash_table_create(NULL, _mesa_hash_uint, _mesa_key_uint_equal);
1926 
1927    return bufmgr;
1928 }
1929 
1930 struct brw_bufmgr *
brw_bufmgr_get_for_fd(struct intel_device_info * devinfo,int fd,bool bo_reuse)1931 brw_bufmgr_get_for_fd(struct intel_device_info *devinfo, int fd, bool bo_reuse)
1932 {
1933    struct stat st;
1934 
1935    if (fstat(fd, &st))
1936       return NULL;
1937 
1938    struct brw_bufmgr *bufmgr = NULL;
1939 
1940    mtx_lock(&global_bufmgr_list_mutex);
1941    list_for_each_entry(struct brw_bufmgr, iter_bufmgr, &global_bufmgr_list, link) {
1942       struct stat iter_st;
1943       if (fstat(iter_bufmgr->fd, &iter_st))
1944          continue;
1945 
1946       if (st.st_rdev == iter_st.st_rdev) {
1947          assert(iter_bufmgr->bo_reuse == bo_reuse);
1948          bufmgr = brw_bufmgr_ref(iter_bufmgr);
1949          goto unlock;
1950       }
1951    }
1952 
1953    bufmgr = brw_bufmgr_create(devinfo, fd, bo_reuse);
1954    if (bufmgr)
1955       list_addtail(&bufmgr->link, &global_bufmgr_list);
1956 
1957  unlock:
1958    mtx_unlock(&global_bufmgr_list_mutex);
1959 
1960    return bufmgr;
1961 }
1962 
1963 int
brw_bufmgr_get_fd(struct brw_bufmgr * bufmgr)1964 brw_bufmgr_get_fd(struct brw_bufmgr *bufmgr)
1965 {
1966    return bufmgr->fd;
1967 }
1968