• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2017 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included
12  * in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20  * DEALINGS IN THE SOFTWARE.
21  */
22 
23 /**
24  * @file iris_bufmgr.c
25  *
26  * The Iris buffer manager.
27  *
28  * XXX: write better comments
29  * - BOs
30  * - Explain BO cache
31  * - main interface to GEM in the kernel
32  */
33 
34 #include <util/u_atomic.h>
35 #include <fcntl.h>
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <unistd.h>
40 #include <assert.h>
41 #include <sys/ioctl.h>
42 #include <sys/mman.h>
43 #include <sys/stat.h>
44 #include <sys/types.h>
45 #include <stdbool.h>
46 #include <time.h>
47 #include <unistd.h>
48 
49 #include "errno.h"
50 #include "common/intel_aux_map.h"
51 #include "common/intel_mem.h"
52 #include "c99_alloca.h"
53 #include "dev/intel_debug.h"
54 #include "common/intel_gem.h"
55 #include "dev/intel_device_info.h"
56 #include "drm-uapi/dma-buf.h"
57 #include "isl/isl.h"
58 #include "util/os_mman.h"
59 #include "util/u_debug.h"
60 #include "util/macros.h"
61 #include "util/hash_table.h"
62 #include "util/list.h"
63 #include "util/os_file.h"
64 #include "util/u_dynarray.h"
65 #include "util/vma.h"
66 #include "iris_bufmgr.h"
67 #include "iris_context.h"
68 #include "string.h"
69 #include "iris_kmd_backend.h"
70 #include "i915/iris_bufmgr.h"
71 #include "xe/iris_bufmgr.h"
72 
73 #include <xf86drm.h>
74 
75 #ifdef HAVE_VALGRIND
76 #include <valgrind.h>
77 #include <memcheck.h>
78 #define VG(x) x
79 #else
80 #define VG(x)
81 #endif
82 
83 /* VALGRIND_FREELIKE_BLOCK unfortunately does not actually undo the earlier
84  * VALGRIND_MALLOCLIKE_BLOCK but instead leaves vg convinced the memory is
85  * leaked. All because it does not call VG(cli_free) from its
86  * VG_USERREQ__FREELIKE_BLOCK handler. Instead of treating the memory like
87  * and allocation, we mark it available for use upon mmapping and remove
88  * it upon unmapping.
89  */
90 #define VG_DEFINED(ptr, size) VG(VALGRIND_MAKE_MEM_DEFINED(ptr, size))
91 #define VG_NOACCESS(ptr, size) VG(VALGRIND_MAKE_MEM_NOACCESS(ptr, size))
92 
93 /* On FreeBSD PAGE_SIZE is already defined in
94  * /usr/include/machine/param.h that is indirectly
95  * included here.
96  */
97 #ifndef PAGE_SIZE
98 #define PAGE_SIZE 4096
99 #endif
100 
101 #define WARN_ONCE(cond, fmt...) do {                            \
102    if (unlikely(cond)) {                                        \
103       static bool _warned = false;                              \
104       if (!_warned) {                                           \
105          fprintf(stderr, "WARNING: ");                          \
106          fprintf(stderr, fmt);                                  \
107          _warned = true;                                        \
108       }                                                         \
109    }                                                            \
110 } while (0)
111 
112 #define FILE_DEBUG_FLAG DEBUG_BUFMGR
113 
114 /**
115  * For debugging purposes, this returns a time in seconds.
116  */
117 static double
get_time(void)118 get_time(void)
119 {
120    struct timespec tp;
121 
122    clock_gettime(CLOCK_MONOTONIC, &tp);
123 
124    return tp.tv_sec + tp.tv_nsec / 1000000000.0;
125 }
126 
127 static inline int
atomic_add_unless(int * v,int add,int unless)128 atomic_add_unless(int *v, int add, int unless)
129 {
130    int c, old;
131    c = p_atomic_read(v);
132    while (c != unless && (old = p_atomic_cmpxchg(v, c, c + add)) != c)
133       c = old;
134    return c == unless;
135 }
136 
137 static const char *
memzone_name(enum iris_memory_zone memzone)138 memzone_name(enum iris_memory_zone memzone)
139 {
140    const char *names[] = {
141       [IRIS_MEMZONE_SHADER]   = "shader",
142       [IRIS_MEMZONE_BINDER]   = "binder",
143       [IRIS_MEMZONE_SCRATCH]  = "scratchsurf",
144       [IRIS_MEMZONE_SURFACE]  = "surface",
145       [IRIS_MEMZONE_DYNAMIC]  = "dynamic",
146       [IRIS_MEMZONE_OTHER]    = "other",
147       [IRIS_MEMZONE_BORDER_COLOR_POOL] = "bordercolor",
148    };
149    assert(memzone < ARRAY_SIZE(names));
150    return names[memzone];
151 }
152 
153 struct bo_cache_bucket {
154    /** List of cached BOs. */
155    struct list_head head;
156 
157    /** Size of this bucket, in bytes. */
158    uint64_t size;
159 };
160 
161 struct bo_export {
162    /** File descriptor associated with a handle export. */
163    int drm_fd;
164 
165    /** GEM handle in drm_fd */
166    uint32_t gem_handle;
167 
168    struct list_head link;
169 };
170 
171 struct iris_memregion {
172    struct intel_memory_class_instance *region;
173    uint64_t size;
174 };
175 
176 #define NUM_SLAB_ALLOCATORS 3
177 
178 struct iris_slab {
179    struct pb_slab base;
180 
181    /** The BO representing the entire slab */
182    struct iris_bo *bo;
183 
184    /** Array of iris_bo structs representing BOs allocated out of this slab */
185    struct iris_bo *entries;
186 };
187 
188 #define BUCKET_ARRAY_SIZE (14 * 4)
189 
190 struct iris_bucket_cache {
191    struct bo_cache_bucket bucket[BUCKET_ARRAY_SIZE];
192    int num_buckets;
193 };
194 
195 struct iris_bufmgr {
196    /**
197     * List into the list of bufmgr.
198     */
199    struct list_head link;
200 
201    uint32_t refcount;
202 
203    int fd;
204 
205    simple_mtx_t lock;
206    simple_mtx_t bo_deps_lock;
207 
208    /** Array of lists of cached gem objects of power-of-two sizes */
209    struct iris_bucket_cache bucket_cache[IRIS_HEAP_MAX];
210 
211    time_t time;
212 
213    struct hash_table *name_table;
214    struct hash_table *handle_table;
215 
216    /**
217     * List of BOs which we've effectively freed, but are hanging on to
218     * until they're idle before closing and returning the VMA.
219     */
220    struct list_head zombie_list;
221 
222    struct util_vma_heap vma_allocator[IRIS_MEMZONE_COUNT];
223 
224    struct iris_memregion vram, sys;
225 
226    /* Used only when use_global_vm is true. */
227    uint32_t global_vm_id;
228 
229    int next_screen_id;
230 
231    struct intel_device_info devinfo;
232    const struct iris_kmd_backend *kmd_backend;
233    struct intel_bind_timeline bind_timeline; /* Xe only */
234    bool bo_reuse:1;
235    bool use_global_vm:1;
236    bool compute_engine_supported:1;
237 
238    struct intel_aux_map_context *aux_map_ctx;
239 
240    struct pb_slabs bo_slabs[NUM_SLAB_ALLOCATORS];
241 
242    struct iris_border_color_pool border_color_pool;
243 };
244 
245 static simple_mtx_t global_bufmgr_list_mutex = SIMPLE_MTX_INITIALIZER;
246 static struct list_head global_bufmgr_list = {
247    .next = &global_bufmgr_list,
248    .prev = &global_bufmgr_list,
249 };
250 
251 static void bo_free(struct iris_bo *bo);
252 
253 static struct iris_bo *
find_and_ref_external_bo(struct hash_table * ht,unsigned int key)254 find_and_ref_external_bo(struct hash_table *ht, unsigned int key)
255 {
256    struct hash_entry *entry = _mesa_hash_table_search(ht, &key);
257    struct iris_bo *bo = entry ? entry->data : NULL;
258 
259    if (bo) {
260       assert(iris_bo_is_external(bo));
261       assert(iris_bo_is_real(bo));
262       assert(!bo->real.reusable);
263 
264       /* Being non-reusable, the BO cannot be in the cache lists, but it
265        * may be in the zombie list if it had reached zero references, but
266        * we hadn't yet closed it...and then reimported the same BO.  If it
267        * is, then remove it since it's now been resurrected.
268        */
269       if (list_is_linked(&bo->head))
270          list_del(&bo->head);
271 
272       iris_bo_reference(bo);
273    }
274 
275    return bo;
276 }
277 
278 /**
279  * This function finds the correct bucket fit for the input size.
280  * The function works with O(1) complexity when the requested size
281  * was queried instead of iterating the size through all the buckets.
282  */
283 static struct bo_cache_bucket *
bucket_for_size(struct iris_bufmgr * bufmgr,uint64_t size,enum iris_heap heap,unsigned flags)284 bucket_for_size(struct iris_bufmgr *bufmgr, uint64_t size,
285                 enum iris_heap heap, unsigned flags)
286 {
287    if (flags & BO_ALLOC_PROTECTED)
288       return NULL;
289 
290    const struct intel_device_info *devinfo = &bufmgr->devinfo;
291    struct iris_bucket_cache *cache = &bufmgr->bucket_cache[heap];
292 
293    if (devinfo->kmd_type == INTEL_KMD_TYPE_XE &&
294        (flags & (BO_ALLOC_SHARED | BO_ALLOC_SCANOUT)))
295       return NULL;
296 
297    /* Calculating the pages and rounding up to the page size. */
298    const unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
299 
300    /* Row  Bucket sizes    clz((x-1) | 3)   Row    Column
301     *        in pages                      stride   size
302     *   0:   1  2  3  4 -> 30 30 30 30        4       1
303     *   1:   5  6  7  8 -> 29 29 29 29        4       1
304     *   2:  10 12 14 16 -> 28 28 28 28        8       2
305     *   3:  20 24 28 32 -> 27 27 27 27       16       4
306     */
307    const unsigned row = 30 - __builtin_clz((pages - 1) | 3);
308    const unsigned row_max_pages = 4 << row;
309 
310    /* The '& ~2' is the special case for row 1. In row 1, max pages /
311     * 2 is 2, but the previous row maximum is zero (because there is
312     * no previous row). All row maximum sizes are power of 2, so that
313     * is the only case where that bit will be set.
314     */
315    const unsigned prev_row_max_pages = (row_max_pages / 2) & ~2;
316    int col_size_log2 = row - 1;
317    col_size_log2 += (col_size_log2 < 0);
318 
319    const unsigned col = (pages - prev_row_max_pages +
320                         ((1 << col_size_log2) - 1)) >> col_size_log2;
321 
322    /* Calculating the index based on the row and column. */
323    const unsigned index = (row * 4) + (col - 1);
324 
325    return (index < cache->num_buckets) ? &cache->bucket[index] : NULL;
326 }
327 
328 enum iris_memory_zone
iris_memzone_for_address(uint64_t address)329 iris_memzone_for_address(uint64_t address)
330 {
331    STATIC_ASSERT(IRIS_MEMZONE_OTHER_START    > IRIS_MEMZONE_DYNAMIC_START);
332    STATIC_ASSERT(IRIS_MEMZONE_SURFACE_START  > IRIS_MEMZONE_SCRATCH_START);
333    STATIC_ASSERT(IRIS_MEMZONE_SCRATCH_START == IRIS_MEMZONE_BINDER_START);
334    STATIC_ASSERT(IRIS_MEMZONE_BINDER_START   > IRIS_MEMZONE_SHADER_START);
335    STATIC_ASSERT(IRIS_MEMZONE_DYNAMIC_START  > IRIS_MEMZONE_SURFACE_START);
336    STATIC_ASSERT(IRIS_BORDER_COLOR_POOL_ADDRESS == IRIS_MEMZONE_DYNAMIC_START);
337 
338    if (address >= IRIS_MEMZONE_OTHER_START)
339       return IRIS_MEMZONE_OTHER;
340 
341    if (address == IRIS_BORDER_COLOR_POOL_ADDRESS)
342       return IRIS_MEMZONE_BORDER_COLOR_POOL;
343 
344    if (address > IRIS_MEMZONE_DYNAMIC_START)
345       return IRIS_MEMZONE_DYNAMIC;
346 
347    if (address >= IRIS_MEMZONE_SURFACE_START)
348       return IRIS_MEMZONE_SURFACE;
349 
350    if (address >= (IRIS_MEMZONE_BINDER_START + IRIS_SCRATCH_ZONE_SIZE))
351       return IRIS_MEMZONE_BINDER;
352 
353    if (address >= IRIS_MEMZONE_SCRATCH_START)
354       return IRIS_MEMZONE_SCRATCH;
355 
356    return IRIS_MEMZONE_SHADER;
357 }
358 
359 /**
360  * Allocate a section of virtual memory for a buffer, assigning an address.
361  *
362  * This uses either the bucket allocator for the given size, or the large
363  * object allocator (util_vma).
364  */
365 static uint64_t
vma_alloc(struct iris_bufmgr * bufmgr,enum iris_memory_zone memzone,uint64_t size,uint64_t alignment)366 vma_alloc(struct iris_bufmgr *bufmgr,
367           enum iris_memory_zone memzone,
368           uint64_t size,
369           uint64_t alignment)
370 {
371    simple_mtx_assert_locked(&bufmgr->lock);
372 
373    const unsigned _2mb = 2 * 1024 * 1024;
374 
375    /* Force minimum alignment based on device requirements */
376    assert((alignment & (alignment - 1)) == 0);
377    alignment = MAX2(alignment, bufmgr->devinfo.mem_alignment);
378 
379    /* If the allocation is a multiple of 2MB, ensure the virtual address is
380     * aligned to 2MB, so that it's possible for the kernel to use 64K pages.
381     */
382    if (size % _2mb == 0)
383       alignment = MAX2(alignment, _2mb);
384 
385    if (memzone == IRIS_MEMZONE_BORDER_COLOR_POOL)
386       return IRIS_BORDER_COLOR_POOL_ADDRESS;
387 
388    uint64_t addr =
389       util_vma_heap_alloc(&bufmgr->vma_allocator[memzone], size, alignment);
390 
391    assert((addr >> 48ull) == 0);
392    assert((addr % alignment) == 0);
393 
394    return intel_canonical_address(addr);
395 }
396 
397 static void
vma_free(struct iris_bufmgr * bufmgr,uint64_t address,uint64_t size)398 vma_free(struct iris_bufmgr *bufmgr,
399          uint64_t address,
400          uint64_t size)
401 {
402    simple_mtx_assert_locked(&bufmgr->lock);
403 
404    if (address == IRIS_BORDER_COLOR_POOL_ADDRESS)
405       return;
406 
407    /* Un-canonicalize the address. */
408    address = intel_48b_address(address);
409 
410    if (address == 0ull)
411       return;
412 
413    enum iris_memory_zone memzone = iris_memzone_for_address(address);
414 
415    assert(memzone < ARRAY_SIZE(bufmgr->vma_allocator));
416 
417    util_vma_heap_free(&bufmgr->vma_allocator[memzone], address, size);
418 }
419 
420 /* Exports a BO's implicit synchronization state to a drm_syncobj, returning
421  * its wrapping iris_syncobj. The drm_syncobj is created new and has to be
422  * destroyed by the caller after the execbuf ioctl.
423  */
424 struct iris_syncobj *
iris_bo_export_sync_state(struct iris_bo * bo)425 iris_bo_export_sync_state(struct iris_bo *bo)
426 {
427    struct iris_bufmgr *bufmgr = bo->bufmgr;
428    int drm_fd = iris_bufmgr_get_fd(bufmgr);
429 
430    struct iris_syncobj *iris_syncobj = iris_create_syncobj(bufmgr);
431 
432    struct dma_buf_export_sync_file export_sync_file_ioctl = {
433       .flags = DMA_BUF_SYNC_RW, /* TODO */
434       .fd = -1,
435    };
436    if (intel_ioctl(bo->real.prime_fd, DMA_BUF_IOCTL_EXPORT_SYNC_FILE,
437                    &export_sync_file_ioctl)) {
438       fprintf(stderr, "DMA_BUF_IOCTL_EXPORT_SYNC_FILE ioctl failed (%d)\n",
439               errno);
440       goto error_export;
441    }
442 
443    int sync_file_fd = export_sync_file_ioctl.fd;
444    assert(sync_file_fd >= 0);
445 
446    struct drm_syncobj_handle syncobj_import_ioctl = {
447       .handle = iris_syncobj->handle,
448       .flags = DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_IMPORT_SYNC_FILE,
449       .fd = sync_file_fd,
450    };
451    if (intel_ioctl(drm_fd, DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE,
452                    &syncobj_import_ioctl)) {
453       fprintf(stderr, "DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE ioctl failed (%d)\n",
454               errno);
455    }
456 
457    close(sync_file_fd);
458 
459    return iris_syncobj;
460 error_export:
461    iris_syncobj_destroy(bufmgr, iris_syncobj);
462    return NULL;
463 }
464 
465 /* Import the state of a sync_file_fd (which we should have gotten from
466  * batch_syncobj_to_sync_file_fd) into a BO as its implicit synchronization
467  * state.
468  */
469 void
iris_bo_import_sync_state(struct iris_bo * bo,int sync_file_fd)470 iris_bo_import_sync_state(struct iris_bo *bo, int sync_file_fd)
471 {
472    struct dma_buf_import_sync_file import_sync_file_ioctl = {
473       .flags = DMA_BUF_SYNC_WRITE,
474       .fd = sync_file_fd,
475    };
476    if (intel_ioctl(bo->real.prime_fd, DMA_BUF_IOCTL_IMPORT_SYNC_FILE,
477                    &import_sync_file_ioctl))
478       fprintf(stderr, "DMA_BUF_IOCTL_IMPORT_SYNC_FILE ioctl failed (%d)\n",
479               errno);
480 }
481 
482 /* A timeout of 0 just checks for busyness. */
483 static int
iris_bo_wait_syncobj(struct iris_bo * bo,int64_t timeout_ns)484 iris_bo_wait_syncobj(struct iris_bo *bo, int64_t timeout_ns)
485 {
486    int ret = 0;
487    struct iris_bufmgr *bufmgr = bo->bufmgr;
488    const bool is_external = iris_bo_is_real(bo) && bo->real.prime_fd != -1;
489    struct iris_syncobj *external_implicit_syncobj = NULL;
490 
491    /* If we know it's idle, don't bother with the kernel round trip.
492     * Can't do that for Xe KMD with external BOs since we have to check the
493     * implicit synchronization information.
494     */
495    if (!is_external && bo->idle)
496       return 0;
497 
498    simple_mtx_lock(&bufmgr->bo_deps_lock);
499 
500    const int handles_len = bo->deps_size * IRIS_BATCH_COUNT * 2 + is_external;
501    uint32_t *handles = handles_len <= 32 ?
502                         (uint32_t *)alloca(handles_len * sizeof(*handles)) :
503                         (uint32_t *)malloc(handles_len * sizeof(*handles));
504    int handle_count = 0;
505 
506    if (is_external) {
507       external_implicit_syncobj = iris_bo_export_sync_state(bo);
508       if (external_implicit_syncobj)
509          handles[handle_count++] = external_implicit_syncobj->handle;
510    }
511 
512    for (int d = 0; d < bo->deps_size; d++) {
513       for (int b = 0; b < IRIS_BATCH_COUNT; b++) {
514          struct iris_syncobj *r = bo->deps[d].read_syncobjs[b];
515          struct iris_syncobj *w = bo->deps[d].write_syncobjs[b];
516          if (r)
517             handles[handle_count++] = r->handle;
518          if (w)
519             handles[handle_count++] = w->handle;
520       }
521    }
522 
523    if (handle_count == 0)
524       goto out;
525 
526    /* Unlike the gem wait, negative values are not infinite here. */
527    int64_t timeout_abs = os_time_get_absolute_timeout(timeout_ns);
528    if (timeout_abs < 0)
529       timeout_abs = INT64_MAX;
530 
531    struct drm_syncobj_wait args = {
532       .handles = (uintptr_t) handles,
533       .timeout_nsec = timeout_abs,
534       .count_handles = handle_count,
535       .flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL,
536    };
537 
538    ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_SYNCOBJ_WAIT, &args);
539    if (ret != 0) {
540       ret = -errno;
541       goto out;
542    }
543 
544    /* We just waited everything, so clean all the deps. */
545    for (int d = 0; d < bo->deps_size; d++) {
546       for (int b = 0; b < IRIS_BATCH_COUNT; b++) {
547          iris_syncobj_reference(bufmgr, &bo->deps[d].write_syncobjs[b], NULL);
548          iris_syncobj_reference(bufmgr, &bo->deps[d].read_syncobjs[b], NULL);
549       }
550    }
551 
552 out:
553    if (handles_len > 32)
554       free(handles);
555    if (external_implicit_syncobj)
556       iris_syncobj_reference(bufmgr, &external_implicit_syncobj, NULL);
557 
558    simple_mtx_unlock(&bufmgr->bo_deps_lock);
559    return ret;
560 }
561 
562 static bool
iris_bo_busy_syncobj(struct iris_bo * bo)563 iris_bo_busy_syncobj(struct iris_bo *bo)
564 {
565    return iris_bo_wait_syncobj(bo, 0) == -ETIME;
566 }
567 
568 bool
iris_bo_busy(struct iris_bo * bo)569 iris_bo_busy(struct iris_bo *bo)
570 {
571    bool busy;
572 
573    switch (iris_bufmgr_get_device_info(bo->bufmgr)->kmd_type) {
574    case INTEL_KMD_TYPE_I915:
575       if (iris_bo_is_external(bo))
576          busy = iris_i915_bo_busy_gem(bo);
577       else
578          busy = iris_bo_busy_syncobj(bo);
579       break;
580    case INTEL_KMD_TYPE_XE:
581       busy = iris_bo_busy_syncobj(bo);
582       break;
583    default:
584       unreachable("missing");
585       busy = true;
586    }
587 
588    bo->idle = !busy;
589 
590    return busy;
591 }
592 
593 /**
594  * Specify the volatility of the buffer.
595  * \param bo Buffer to create a name for
596  * \param state The purgeable status
597  *
598  * Use IRIS_MADVICE_DONT_NEED to mark the buffer as purgeable, and it will be
599  * reclaimed under memory pressure. If you subsequently require the buffer,
600  * then you must pass IRIS_MADVICE_WILL_NEED to mark the buffer as required.
601  *
602  * Returns true if the buffer was retained, or false if it was discarded
603  * whilst marked as IRIS_MADVICE_DONT_NEED.
604  */
605 static inline bool
iris_bo_madvise(struct iris_bo * bo,enum iris_madvice state)606 iris_bo_madvise(struct iris_bo *bo, enum iris_madvice state)
607 {
608    /* We can't madvise suballocated BOs. */
609    assert(iris_bo_is_real(bo));
610 
611    return bo->bufmgr->kmd_backend->bo_madvise(bo, state);
612 }
613 
614 static struct iris_bo *
bo_calloc(void)615 bo_calloc(void)
616 {
617    struct iris_bo *bo = calloc(1, sizeof(*bo));
618    if (!bo)
619       return NULL;
620 
621    list_inithead(&bo->real.exports);
622 
623    bo->hash = _mesa_hash_pointer(bo);
624 
625    return bo;
626 }
627 
628 static void
bo_unmap(struct iris_bo * bo)629 bo_unmap(struct iris_bo *bo)
630 {
631    assert(iris_bo_is_real(bo));
632 
633    VG_NOACCESS(bo->real.map, bo->size);
634    os_munmap(bo->real.map, bo->size);
635    bo->real.map = NULL;
636 }
637 
638 static struct pb_slabs *
get_slabs(struct iris_bufmgr * bufmgr,uint64_t size)639 get_slabs(struct iris_bufmgr *bufmgr, uint64_t size)
640 {
641    for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
642       struct pb_slabs *slabs = &bufmgr->bo_slabs[i];
643 
644       if (size <= 1ull << (slabs->min_order + slabs->num_orders - 1))
645          return slabs;
646    }
647 
648    unreachable("should have found a valid slab for this size");
649 }
650 
651 /* Return the power of two size of a slab entry matching the input size. */
652 static unsigned
get_slab_pot_entry_size(struct iris_bufmgr * bufmgr,unsigned size)653 get_slab_pot_entry_size(struct iris_bufmgr *bufmgr, unsigned size)
654 {
655    unsigned entry_size = util_next_power_of_two(size);
656    unsigned min_entry_size = 1 << bufmgr->bo_slabs[0].min_order;
657 
658    return MAX2(entry_size, min_entry_size);
659 }
660 
661 /* Return the slab entry alignment. */
662 static unsigned
get_slab_entry_alignment(struct iris_bufmgr * bufmgr,unsigned size)663 get_slab_entry_alignment(struct iris_bufmgr *bufmgr, unsigned size)
664 {
665    unsigned entry_size = get_slab_pot_entry_size(bufmgr, size);
666 
667    if (size <= entry_size * 3 / 4)
668       return entry_size / 4;
669 
670    return entry_size;
671 }
672 
673 static bool
iris_can_reclaim_slab(void * priv,struct pb_slab_entry * entry)674 iris_can_reclaim_slab(void *priv, struct pb_slab_entry *entry)
675 {
676    struct iris_bo *bo = container_of(entry, struct iris_bo, slab.entry);
677 
678    return !iris_bo_busy(bo);
679 }
680 
681 static void
iris_slab_free(void * priv,struct pb_slab * pslab)682 iris_slab_free(void *priv, struct pb_slab *pslab)
683 {
684    struct iris_bufmgr *bufmgr = priv;
685    struct iris_slab *slab = (void *) pslab;
686    struct intel_aux_map_context *aux_map_ctx = bufmgr->aux_map_ctx;
687 
688    assert(!slab->bo->aux_map_address);
689 
690    /* Since we're freeing the whole slab, all buffers allocated out of it
691     * must be reclaimable.  We require buffers to be idle to be reclaimed
692     * (see iris_can_reclaim_slab()), so we know all entries must be idle.
693     * Therefore, we can safely unmap their aux table entries.
694     */
695    for (unsigned i = 0; i < pslab->num_entries; i++) {
696       struct iris_bo *bo = &slab->entries[i];
697       if (aux_map_ctx && bo->aux_map_address) {
698          intel_aux_map_unmap_range(aux_map_ctx, bo->address, bo->size);
699          bo->aux_map_address = 0;
700       }
701 
702       /* Unref read/write dependency syncobjs and free the array. */
703       for (int d = 0; d < bo->deps_size; d++) {
704          for (int b = 0; b < IRIS_BATCH_COUNT; b++) {
705             iris_syncobj_reference(bufmgr, &bo->deps[d].write_syncobjs[b], NULL);
706             iris_syncobj_reference(bufmgr, &bo->deps[d].read_syncobjs[b], NULL);
707          }
708       }
709       free(bo->deps);
710    }
711 
712    iris_bo_unreference(slab->bo);
713 
714    free(slab->entries);
715    free(slab);
716 }
717 
718 static struct pb_slab *
iris_slab_alloc(void * priv,unsigned heap,unsigned entry_size,unsigned group_index)719 iris_slab_alloc(void *priv,
720                 unsigned heap,
721                 unsigned entry_size,
722                 unsigned group_index)
723 {
724    struct iris_bufmgr *bufmgr = priv;
725    struct iris_slab *slab = calloc(1, sizeof(struct iris_slab));
726    uint32_t flags;
727    unsigned slab_size = 0;
728    /* We only support slab allocation for IRIS_MEMZONE_OTHER */
729    enum iris_memory_zone memzone = IRIS_MEMZONE_OTHER;
730 
731    if (!slab)
732       return NULL;
733 
734    struct pb_slabs *slabs = bufmgr->bo_slabs;
735 
736    /* Determine the slab buffer size. */
737    for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
738       unsigned max_entry_size =
739          1 << (slabs[i].min_order + slabs[i].num_orders - 1);
740 
741       if (entry_size <= max_entry_size) {
742          /* The slab size is twice the size of the largest possible entry. */
743          slab_size = max_entry_size * 2;
744 
745          if (!util_is_power_of_two_nonzero(entry_size)) {
746             assert(util_is_power_of_two_nonzero(entry_size * 4 / 3));
747 
748             /* If the entry size is 3/4 of a power of two, we would waste
749              * space and not gain anything if we allocated only twice the
750              * power of two for the backing buffer:
751              *
752              *    2 * 3/4 = 1.5 usable with buffer size 2
753              *
754              * Allocating 5 times the entry size leads us to the next power
755              * of two and results in a much better memory utilization:
756              *
757              *    5 * 3/4 = 3.75 usable with buffer size 4
758              */
759             if (entry_size * 5 > slab_size)
760                slab_size = util_next_power_of_two(entry_size * 5);
761          }
762 
763          /* The largest slab should have the same size as the PTE fragment
764           * size to get faster address translation.
765           *
766           * TODO: move this to intel_device_info?
767           */
768          const unsigned pte_size = 2 * 1024 * 1024;
769 
770          if (i == NUM_SLAB_ALLOCATORS - 1 && slab_size < pte_size)
771             slab_size = pte_size;
772 
773          break;
774       }
775    }
776    assert(slab_size != 0);
777 
778    if (heap == IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT ||
779        heap == IRIS_HEAP_SYSTEM_MEMORY_UNCACHED)
780       flags = BO_ALLOC_SMEM;
781    else if (heap == IRIS_HEAP_DEVICE_LOCAL)
782       flags = BO_ALLOC_LMEM;
783    else
784       flags = BO_ALLOC_PLAIN;
785 
786    slab->bo =
787       iris_bo_alloc(bufmgr, "slab", slab_size, slab_size, memzone, flags);
788    if (!slab->bo)
789       goto fail;
790 
791    slab_size = slab->bo->size;
792 
793    slab->base.num_entries = slab_size / entry_size;
794    slab->base.num_free = slab->base.num_entries;
795    slab->base.group_index = group_index;
796    slab->base.entry_size = entry_size;
797    slab->entries = calloc(slab->base.num_entries, sizeof(*slab->entries));
798    if (!slab->entries)
799       goto fail_bo;
800 
801    list_inithead(&slab->base.free);
802 
803    for (unsigned i = 0; i < slab->base.num_entries; i++) {
804       struct iris_bo *bo = &slab->entries[i];
805 
806       bo->size = entry_size;
807       bo->bufmgr = bufmgr;
808       bo->hash = _mesa_hash_pointer(bo);
809       bo->gem_handle = 0;
810       bo->address = intel_canonical_address(slab->bo->address + i * entry_size);
811       bo->aux_map_address = 0;
812       bo->index = -1;
813       bo->refcount = 0;
814       bo->idle = true;
815       bo->zeroed = slab->bo->zeroed;
816 
817       bo->slab.entry.slab = &slab->base;
818 
819       bo->slab.real = iris_get_backing_bo(slab->bo);
820 
821       list_addtail(&bo->slab.entry.head, &slab->base.free);
822    }
823 
824    return &slab->base;
825 
826 fail_bo:
827    iris_bo_unreference(slab->bo);
828 fail:
829    free(slab);
830    return NULL;
831 }
832 
833 /**
834  * Selects a heap for the given buffer allocation flags.
835  *
836  * This determines the cacheability, coherency, and mmap mode settings.
837  */
838 static enum iris_heap
flags_to_heap(struct iris_bufmgr * bufmgr,unsigned flags)839 flags_to_heap(struct iris_bufmgr *bufmgr, unsigned flags)
840 {
841    const struct intel_device_info *devinfo = &bufmgr->devinfo;
842 
843    if (bufmgr->vram.size > 0) {
844       /* Discrete GPUs currently always snoop CPU caches. */
845       if ((flags & BO_ALLOC_SMEM) || (flags & BO_ALLOC_COHERENT))
846          return IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT;
847 
848       if ((flags & BO_ALLOC_LMEM) ||
849           ((flags & BO_ALLOC_SCANOUT) && !(flags & BO_ALLOC_SHARED)))
850          return IRIS_HEAP_DEVICE_LOCAL;
851 
852       return IRIS_HEAP_DEVICE_LOCAL_PREFERRED;
853    } else if (devinfo->has_llc) {
854       assert(!(flags & BO_ALLOC_LMEM));
855 
856       if (flags & (BO_ALLOC_SCANOUT | BO_ALLOC_SHARED))
857          return IRIS_HEAP_SYSTEM_MEMORY_UNCACHED;
858 
859       return IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT;
860    } else {
861       assert(!devinfo->has_llc);
862       assert(!(flags & BO_ALLOC_LMEM));
863 
864       if (flags & BO_ALLOC_COHERENT)
865          return IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT;
866 
867       return IRIS_HEAP_SYSTEM_MEMORY_UNCACHED;
868    }
869 }
870 
871 static bool
zero_bo(struct iris_bufmgr * bufmgr,unsigned flags,struct iris_bo * bo)872 zero_bo(struct iris_bufmgr *bufmgr,
873         unsigned flags,
874         struct iris_bo *bo)
875 {
876    assert(flags & BO_ALLOC_ZEROED);
877 
878    if (bo->zeroed)
879       return true;
880 
881    if (bufmgr->devinfo.has_flat_ccs && (flags & BO_ALLOC_LMEM)) {
882       /* With flat CCS, all allocations in LMEM have memory ranges with
883        * corresponding CCS elements. These elements are only accessible
884        * through GPU commands, but we don't issue GPU commands here.
885        */
886       return false;
887    }
888 
889    void *map = iris_bo_map(NULL, bo, MAP_WRITE | MAP_RAW);
890    if (!map)
891       return false;
892 
893    memset(map, 0, bo->size);
894    bo->zeroed = true;
895    return true;
896 }
897 
898 static struct iris_bo *
alloc_bo_from_slabs(struct iris_bufmgr * bufmgr,const char * name,uint64_t size,uint32_t alignment,unsigned flags)899 alloc_bo_from_slabs(struct iris_bufmgr *bufmgr,
900                     const char *name,
901                     uint64_t size,
902                     uint32_t alignment,
903                     unsigned flags)
904 {
905    if (flags & BO_ALLOC_NO_SUBALLOC)
906       return NULL;
907 
908    struct pb_slabs *last_slab = &bufmgr->bo_slabs[NUM_SLAB_ALLOCATORS - 1];
909    unsigned max_slab_entry_size =
910       1 << (last_slab->min_order + last_slab->num_orders - 1);
911 
912    if (size > max_slab_entry_size)
913       return NULL;
914 
915    struct pb_slab_entry *entry;
916 
917    enum iris_heap heap = flags_to_heap(bufmgr, flags);
918 
919    unsigned alloc_size = size;
920 
921    /* Always use slabs for sizes less than 4 KB because the kernel aligns
922     * everything to 4 KB.
923     */
924    if (size < alignment && alignment <= 4 * 1024)
925       alloc_size = alignment;
926 
927    if (alignment > get_slab_entry_alignment(bufmgr, alloc_size)) {
928       /* 3/4 allocations can return too small alignment.
929        * Try again with a power of two allocation size.
930        */
931       unsigned pot_size = get_slab_pot_entry_size(bufmgr, alloc_size);
932 
933       if (alignment <= pot_size) {
934          /* This size works but wastes some memory to fulfill the alignment. */
935          alloc_size = pot_size;
936       } else {
937          /* can't fulfill alignment requirements */
938          return NULL;
939       }
940    }
941 
942    struct pb_slabs *slabs = get_slabs(bufmgr, alloc_size);
943    entry = pb_slab_alloc(slabs, alloc_size, heap);
944    if (!entry) {
945       /* Clean up and try again... */
946       pb_slabs_reclaim(slabs);
947 
948       entry = pb_slab_alloc(slabs, alloc_size, heap);
949    }
950    if (!entry)
951       return NULL;
952 
953    struct iris_bo *bo = container_of(entry, struct iris_bo, slab.entry);
954 
955    if (bo->aux_map_address && bo->bufmgr->aux_map_ctx) {
956       /* This buffer was associated with an aux-buffer range.  We only allow
957        * slab allocated buffers to be reclaimed when idle (not in use by an
958        * executing batch).  (See iris_can_reclaim_slab().)  So we know that
959        * our previous aux mapping is no longer in use, and we can safely
960        * remove it.
961        */
962       intel_aux_map_unmap_range(bo->bufmgr->aux_map_ctx, bo->address,
963                                 bo->size);
964       bo->aux_map_address = 0;
965    }
966 
967    p_atomic_set(&bo->refcount, 1);
968    bo->name = name;
969    bo->size = size;
970 
971    /* Zero the contents if necessary.  If this fails, fall back to
972     * allocating a fresh BO, which will always be zeroed by the kernel.
973     */
974    if ((flags & BO_ALLOC_ZEROED) && !zero_bo(bufmgr, flags, bo)) {
975       pb_slab_free(slabs, &bo->slab.entry);
976       return NULL;
977    }
978 
979    return bo;
980 }
981 
982 static struct iris_bo *
alloc_bo_from_cache(struct iris_bufmgr * bufmgr,struct bo_cache_bucket * bucket,uint32_t alignment,enum iris_memory_zone memzone,enum iris_mmap_mode mmap_mode,unsigned flags,bool match_zone)983 alloc_bo_from_cache(struct iris_bufmgr *bufmgr,
984                     struct bo_cache_bucket *bucket,
985                     uint32_t alignment,
986                     enum iris_memory_zone memzone,
987                     enum iris_mmap_mode mmap_mode,
988                     unsigned flags,
989                     bool match_zone)
990 {
991    if (!bucket)
992       return NULL;
993 
994    struct iris_bo *bo = NULL;
995 
996    simple_mtx_assert_locked(&bufmgr->lock);
997 
998    list_for_each_entry_safe(struct iris_bo, cur, &bucket->head, head) {
999       assert(iris_bo_is_real(cur));
1000 
1001       /* Find one that's got the right mapping type.  We used to swap maps
1002        * around but the kernel doesn't allow this on discrete GPUs.
1003        */
1004       if (mmap_mode != cur->real.mmap_mode)
1005          continue;
1006 
1007       /* Try a little harder to find one that's already in the right memzone */
1008       if (match_zone && memzone != iris_memzone_for_address(cur->address))
1009          continue;
1010 
1011       if (cur->real.capture != !!(flags & BO_ALLOC_CAPTURE))
1012          continue;
1013 
1014       /* If the last BO in the cache is busy, there are no idle BOs.  Bail,
1015        * either falling back to a non-matching memzone, or if that fails,
1016        * allocating a fresh buffer.
1017        */
1018       if (iris_bo_busy(cur))
1019          return NULL;
1020 
1021       list_del(&cur->head);
1022 
1023       /* Tell the kernel we need this BO and check if it still exist */
1024       if (!iris_bo_madvise(cur, IRIS_MADVICE_WILL_NEED)) {
1025          /* This BO was purged, throw it out and keep looking. */
1026          bo_free(cur);
1027          continue;
1028       }
1029 
1030       if (cur->aux_map_address) {
1031          /* This buffer was associated with an aux-buffer range. We make sure
1032           * that buffers are not reused from the cache while the buffer is (busy)
1033           * being used by an executing batch. Since we are here, the buffer is no
1034           * longer being used by a batch and the buffer was deleted (in order to
1035           * end up in the cache). Therefore its old aux-buffer range can be
1036           * removed from the aux-map.
1037           */
1038          if (cur->bufmgr->aux_map_ctx)
1039             intel_aux_map_unmap_range(cur->bufmgr->aux_map_ctx, cur->address,
1040                                       cur->size);
1041          cur->aux_map_address = 0;
1042       }
1043 
1044       /* If the cached BO isn't in the right memory zone, or the alignment
1045        * isn't sufficient, free the old memory and assign it a new address.
1046        */
1047       if (memzone != iris_memzone_for_address(cur->address) ||
1048           cur->address % alignment != 0) {
1049          if (!bufmgr->kmd_backend->gem_vm_unbind(cur)) {
1050             DBG("Unable to unbind vm of buf %u\n", cur->gem_handle);
1051             bo_free(cur);
1052             continue;
1053          }
1054 
1055          vma_free(bufmgr, cur->address, cur->size);
1056          cur->address = 0ull;
1057       }
1058 
1059       bo = cur;
1060       break;
1061    }
1062 
1063    if (!bo)
1064       return NULL;
1065 
1066    /* Zero the contents if necessary.  If this fails, fall back to
1067     * allocating a fresh BO, which will always be zeroed by the kernel.
1068     */
1069    assert(bo->zeroed == false);
1070    if ((flags & BO_ALLOC_ZEROED) && !zero_bo(bufmgr, flags, bo)) {
1071       bo_free(bo);
1072       return NULL;
1073    }
1074 
1075    return bo;
1076 }
1077 
1078 static struct iris_bo *
alloc_fresh_bo(struct iris_bufmgr * bufmgr,uint64_t bo_size,unsigned flags)1079 alloc_fresh_bo(struct iris_bufmgr *bufmgr, uint64_t bo_size, unsigned flags)
1080 {
1081    struct iris_bo *bo = bo_calloc();
1082    if (!bo)
1083       return NULL;
1084 
1085    /* Try to allocate memory in multiples of 2MB, as this allows us to use
1086     * 64K pages rather than the less-efficient 4K pages.  Most BOs smaller
1087     * than 64MB should hit the BO cache or slab allocations anyway, so this
1088     * shouldn't waste too much memory.  We do exclude small (< 1MB) sizes to
1089     * be defensive in case any of those bypass the caches and end up here.
1090     */
1091    if (bo_size >= 1024 * 1024)
1092       bo_size = align64(bo_size, 2 * 1024 * 1024);
1093 
1094    bo->real.heap = flags_to_heap(bufmgr, flags);
1095 
1096    const struct intel_memory_class_instance *regions[2];
1097    uint16_t num_regions = 0;
1098 
1099    if (bufmgr->vram.size > 0) {
1100       switch (bo->real.heap) {
1101       case IRIS_HEAP_DEVICE_LOCAL_PREFERRED:
1102          /* For vram allocations, still use system memory as a fallback. */
1103          regions[num_regions++] = bufmgr->vram.region;
1104          regions[num_regions++] = bufmgr->sys.region;
1105          break;
1106       case IRIS_HEAP_DEVICE_LOCAL:
1107          regions[num_regions++] = bufmgr->vram.region;
1108          break;
1109       case IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT:
1110          regions[num_regions++] = bufmgr->sys.region;
1111          break;
1112       case IRIS_HEAP_SYSTEM_MEMORY_UNCACHED:
1113          /* not valid; discrete cards always enable snooping */
1114       case IRIS_HEAP_MAX:
1115          unreachable("invalid heap for BO");
1116       }
1117    } else {
1118       regions[num_regions++] = bufmgr->sys.region;
1119    }
1120 
1121    bo->gem_handle = bufmgr->kmd_backend->gem_create(bufmgr, regions,
1122                                                     num_regions, bo_size,
1123                                                     bo->real.heap, flags);
1124    if (bo->gem_handle == 0) {
1125       free(bo);
1126       return NULL;
1127    }
1128    bo->bufmgr = bufmgr;
1129    bo->size = bo_size;
1130    bo->idle = true;
1131    bo->zeroed = true;
1132    bo->real.capture = (flags & BO_ALLOC_CAPTURE) != 0;
1133 
1134    return bo;
1135 }
1136 
1137 const char *
1138 iris_heap_to_string[IRIS_HEAP_MAX] = {
1139    [IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT] = "system-cached-coherent",
1140    [IRIS_HEAP_SYSTEM_MEMORY_UNCACHED] = "system-uncached",
1141    [IRIS_HEAP_DEVICE_LOCAL] = "local",
1142    [IRIS_HEAP_DEVICE_LOCAL_PREFERRED] = "local-preferred",
1143 };
1144 
1145 static enum iris_mmap_mode
heap_to_mmap_mode(struct iris_bufmgr * bufmgr,enum iris_heap heap)1146 heap_to_mmap_mode(struct iris_bufmgr *bufmgr, enum iris_heap heap)
1147 {
1148    const struct intel_device_info *devinfo = &bufmgr->devinfo;
1149 
1150    switch (heap) {
1151    case IRIS_HEAP_DEVICE_LOCAL:
1152       return intel_vram_all_mappable(devinfo) ? IRIS_MMAP_WC : IRIS_MMAP_NONE;
1153    case IRIS_HEAP_DEVICE_LOCAL_PREFERRED:
1154       return IRIS_MMAP_WC;
1155    case IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT:
1156       return IRIS_MMAP_WB;
1157    case IRIS_HEAP_SYSTEM_MEMORY_UNCACHED:
1158       return IRIS_MMAP_WC;
1159    default:
1160       unreachable("invalid heap");
1161    }
1162 }
1163 
1164 struct iris_bo *
iris_bo_alloc(struct iris_bufmgr * bufmgr,const char * name,uint64_t size,uint32_t alignment,enum iris_memory_zone memzone,unsigned flags)1165 iris_bo_alloc(struct iris_bufmgr *bufmgr,
1166               const char *name,
1167               uint64_t size,
1168               uint32_t alignment,
1169               enum iris_memory_zone memzone,
1170               unsigned flags)
1171 {
1172    struct iris_bo *bo;
1173    unsigned int page_size = getpagesize();
1174    enum iris_heap heap = flags_to_heap(bufmgr, flags);
1175    struct bo_cache_bucket *bucket =
1176       bucket_for_size(bufmgr, size, heap, flags);
1177 
1178    if (memzone != IRIS_MEMZONE_OTHER || (flags & BO_ALLOC_COHERENT))
1179       flags |= BO_ALLOC_NO_SUBALLOC;
1180 
1181    /* By default, capture all driver-internal buffers like shader kernels,
1182     * surface states, dynamic states, border colors, and so on.
1183     */
1184    if (memzone < IRIS_MEMZONE_OTHER || INTEL_DEBUG(DEBUG_CAPTURE_ALL))
1185       flags |= BO_ALLOC_CAPTURE;
1186 
1187    bo = alloc_bo_from_slabs(bufmgr, name, size, alignment, flags);
1188 
1189    if (bo)
1190       return bo;
1191 
1192    /* Round the size up to the bucket size, or if we don't have caching
1193     * at this size, a multiple of the page size.
1194     */
1195    uint64_t bo_size =
1196       bucket ? bucket->size : MAX2(align64(size, page_size), page_size);
1197    enum iris_mmap_mode mmap_mode = heap_to_mmap_mode(bufmgr, heap);
1198 
1199    simple_mtx_lock(&bufmgr->lock);
1200 
1201    /* Get a buffer out of the cache if available.  First, we try to find
1202     * one with a matching memory zone so we can avoid reallocating VMA.
1203     */
1204    bo = alloc_bo_from_cache(bufmgr, bucket, alignment, memzone, mmap_mode,
1205                             flags, true);
1206 
1207    /* If that fails, we try for any cached BO, without matching memzone. */
1208    if (!bo) {
1209       bo = alloc_bo_from_cache(bufmgr, bucket, alignment, memzone, mmap_mode,
1210                                flags, false);
1211    }
1212 
1213    simple_mtx_unlock(&bufmgr->lock);
1214 
1215    if (!bo) {
1216       bo = alloc_fresh_bo(bufmgr, bo_size, flags);
1217       if (!bo)
1218          return NULL;
1219    }
1220 
1221    if (bo->address == 0ull) {
1222       simple_mtx_lock(&bufmgr->lock);
1223       bo->address = vma_alloc(bufmgr, memzone, bo->size, alignment);
1224       simple_mtx_unlock(&bufmgr->lock);
1225 
1226       if (bo->address == 0ull)
1227          goto err_free;
1228 
1229       if (!bufmgr->kmd_backend->gem_vm_bind(bo))
1230          goto err_vm_alloc;
1231    }
1232 
1233    bo->name = name;
1234    p_atomic_set(&bo->refcount, 1);
1235    bo->real.reusable = bucket && bufmgr->bo_reuse;
1236    bo->real.protected = flags & BO_ALLOC_PROTECTED;
1237    bo->index = -1;
1238    bo->real.prime_fd = -1;
1239 
1240    assert(bo->real.map == NULL || bo->real.mmap_mode == mmap_mode);
1241    bo->real.mmap_mode = mmap_mode;
1242 
1243    /* On integrated GPUs, enable snooping to ensure coherency if needed.
1244     * For discrete, we instead use SMEM and avoid WB maps for coherency.
1245     */
1246    if ((flags & BO_ALLOC_COHERENT) &&
1247        !bufmgr->devinfo.has_llc && bufmgr->devinfo.has_caching_uapi) {
1248       if (bufmgr->kmd_backend->bo_set_caching(bo, true) != 0)
1249          goto err_free;
1250    }
1251 
1252    DBG("bo_create: buf %d (%s) (%s memzone) (%s) %llub\n", bo->gem_handle,
1253        bo->name, memzone_name(memzone), iris_heap_to_string[bo->real.heap],
1254        (unsigned long long) size);
1255 
1256    return bo;
1257 
1258 err_vm_alloc:
1259    simple_mtx_lock(&bufmgr->lock);
1260    vma_free(bufmgr, bo->address, bo->size);
1261    simple_mtx_unlock(&bufmgr->lock);
1262 err_free:
1263    simple_mtx_lock(&bufmgr->lock);
1264    bo_free(bo);
1265    simple_mtx_unlock(&bufmgr->lock);
1266    return NULL;
1267 }
1268 
1269 static int
iris_bo_close(int fd,uint32_t gem_handle)1270 iris_bo_close(int fd, uint32_t gem_handle)
1271 {
1272    struct drm_gem_close close = {
1273       .handle = gem_handle,
1274    };
1275    return intel_ioctl(fd, DRM_IOCTL_GEM_CLOSE, &close);
1276 }
1277 
1278 struct iris_bo *
iris_bo_create_userptr(struct iris_bufmgr * bufmgr,const char * name,void * ptr,size_t size,enum iris_memory_zone memzone)1279 iris_bo_create_userptr(struct iris_bufmgr *bufmgr, const char *name,
1280                        void *ptr, size_t size,
1281                        enum iris_memory_zone memzone)
1282 {
1283    struct iris_bo *bo;
1284 
1285    bo = bo_calloc();
1286    if (!bo)
1287       return NULL;
1288 
1289    bo->gem_handle = bufmgr->kmd_backend->gem_create_userptr(bufmgr, ptr, size);
1290    if (bo->gem_handle == 0)
1291       goto err_free;
1292 
1293    bo->name = name;
1294    bo->size = size;
1295    bo->real.map = ptr;
1296    bo->real.userptr = true;
1297 
1298    bo->bufmgr = bufmgr;
1299 
1300    if (INTEL_DEBUG(DEBUG_CAPTURE_ALL))
1301       bo->real.capture = true;
1302 
1303    simple_mtx_lock(&bufmgr->lock);
1304    bo->address = vma_alloc(bufmgr, memzone, size, 1);
1305    simple_mtx_unlock(&bufmgr->lock);
1306 
1307    if (bo->address == 0ull)
1308       goto err_close;
1309 
1310    p_atomic_set(&bo->refcount, 1);
1311    bo->index = -1;
1312    bo->idle = true;
1313    bo->real.heap = IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT;
1314    bo->real.mmap_mode = heap_to_mmap_mode(bufmgr, bo->real.heap);
1315    bo->real.prime_fd = -1;
1316 
1317    if (!bufmgr->kmd_backend->gem_vm_bind(bo))
1318       goto err_vma_free;
1319 
1320    return bo;
1321 
1322 err_vma_free:
1323    simple_mtx_lock(&bufmgr->lock);
1324    vma_free(bufmgr, bo->address, bo->size);
1325    simple_mtx_unlock(&bufmgr->lock);
1326 err_close:
1327    bufmgr->kmd_backend->gem_close(bufmgr, bo);
1328 err_free:
1329    free(bo);
1330    return NULL;
1331 }
1332 
1333 static bool
needs_prime_fd(struct iris_bufmgr * bufmgr)1334 needs_prime_fd(struct iris_bufmgr *bufmgr)
1335 {
1336    return bufmgr->devinfo.kmd_type == INTEL_KMD_TYPE_XE;
1337 }
1338 
1339 static bool
iris_bo_set_prime_fd(struct iris_bo * bo)1340 iris_bo_set_prime_fd(struct iris_bo *bo)
1341 {
1342    struct iris_bufmgr *bufmgr = bo->bufmgr;
1343 
1344    if (needs_prime_fd(bufmgr) && bo->real.prime_fd == -1) {
1345       if (drmPrimeHandleToFD(bufmgr->fd, bo->gem_handle,
1346                              DRM_CLOEXEC | DRM_RDWR, &bo->real.prime_fd)) {
1347          fprintf(stderr, "Failed to get prime fd for bo %s/%u\n",
1348                  bo->name, bo->gem_handle);
1349          return false;
1350       }
1351    }
1352 
1353    return true;
1354 }
1355 
1356 /**
1357  * Returns a iris_bo wrapping the given buffer object handle.
1358  *
1359  * This can be used when one application needs to pass a buffer object
1360  * to another.
1361  */
1362 struct iris_bo *
iris_bo_gem_create_from_name(struct iris_bufmgr * bufmgr,const char * name,unsigned int handle)1363 iris_bo_gem_create_from_name(struct iris_bufmgr *bufmgr,
1364                              const char *name, unsigned int handle)
1365 {
1366    struct iris_bo *bo;
1367 
1368    /* At the moment most applications only have a few named bo.
1369     * For instance, in a DRI client only the render buffers passed
1370     * between X and the client are named. And since X returns the
1371     * alternating names for the front/back buffer a linear search
1372     * provides a sufficiently fast match.
1373     */
1374    simple_mtx_lock(&bufmgr->lock);
1375    bo = find_and_ref_external_bo(bufmgr->name_table, handle);
1376    if (bo)
1377       goto out;
1378 
1379    struct drm_gem_open open_arg = { .name = handle };
1380    int ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_GEM_OPEN, &open_arg);
1381    if (ret != 0) {
1382       DBG("Couldn't reference %s handle 0x%08x: %s\n",
1383           name, handle, strerror(errno));
1384       bo = NULL;
1385       goto out;
1386    }
1387    /* Now see if someone has used a prime handle to get this
1388     * object from the kernel before by looking through the list
1389     * again for a matching gem_handle
1390     */
1391    bo = find_and_ref_external_bo(bufmgr->handle_table, open_arg.handle);
1392    if (bo)
1393       goto out;
1394 
1395    bo = bo_calloc();
1396    if (!bo) {
1397       struct iris_bo close_bo = {
1398             .gem_handle = open_arg.handle,
1399       };
1400       bufmgr->kmd_backend->gem_close(bufmgr, &close_bo);
1401       goto out;
1402    }
1403 
1404    p_atomic_set(&bo->refcount, 1);
1405 
1406    bo->size = open_arg.size;
1407    bo->bufmgr = bufmgr;
1408    bo->gem_handle = open_arg.handle;
1409    bo->name = name;
1410    bo->index = -1;
1411    bo->real.global_name = handle;
1412    bo->real.prime_fd = -1;
1413    bo->real.reusable = false;
1414    bo->real.imported = true;
1415    /* Xe KMD expects at least 1-way coherency for imports */
1416    bo->real.heap = IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT;
1417    bo->real.mmap_mode = IRIS_MMAP_NONE;
1418    if (INTEL_DEBUG(DEBUG_CAPTURE_ALL))
1419       bo->real.capture = true;
1420    bo->address = vma_alloc(bufmgr, IRIS_MEMZONE_OTHER, bo->size, 1);
1421    if (bo->address == 0ull)
1422       goto err_free;
1423 
1424    if (!iris_bo_set_prime_fd(bo))
1425       goto err_vm_alloc;
1426 
1427    if (!bufmgr->kmd_backend->gem_vm_bind(bo))
1428       goto err_vm_alloc;
1429 
1430    _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo);
1431    _mesa_hash_table_insert(bufmgr->name_table, &bo->real.global_name, bo);
1432 
1433    DBG("bo_create_from_handle: %d (%s)\n", handle, bo->name);
1434 
1435 out:
1436    simple_mtx_unlock(&bufmgr->lock);
1437    return bo;
1438 
1439 err_vm_alloc:
1440    vma_free(bufmgr, bo->address, bo->size);
1441 err_free:
1442    bo_free(bo);
1443    simple_mtx_unlock(&bufmgr->lock);
1444    return NULL;
1445 }
1446 
1447 static void
bo_close(struct iris_bo * bo)1448 bo_close(struct iris_bo *bo)
1449 {
1450    struct iris_bufmgr *bufmgr = bo->bufmgr;
1451 
1452    simple_mtx_assert_locked(&bufmgr->lock);
1453    assert(iris_bo_is_real(bo));
1454 
1455    if (iris_bo_is_external(bo)) {
1456       struct hash_entry *entry;
1457 
1458       if (bo->real.global_name) {
1459          entry = _mesa_hash_table_search(bufmgr->name_table,
1460                                          &bo->real.global_name);
1461          _mesa_hash_table_remove(bufmgr->name_table, entry);
1462       }
1463 
1464       entry = _mesa_hash_table_search(bufmgr->handle_table, &bo->gem_handle);
1465       _mesa_hash_table_remove(bufmgr->handle_table, entry);
1466 
1467       list_for_each_entry_safe(struct bo_export, export, &bo->real.exports, link) {
1468          iris_bo_close(export->drm_fd, export->gem_handle);
1469 
1470          list_del(&export->link);
1471          free(export);
1472       }
1473    } else {
1474       assert(list_is_empty(&bo->real.exports));
1475    }
1476 
1477    /* Unbind and return the VMA for reuse */
1478    if (bufmgr->kmd_backend->gem_vm_unbind(bo))
1479       vma_free(bo->bufmgr, bo->address, bo->size);
1480    else
1481       DBG("Unable to unbind vm of buf %u\n", bo->gem_handle);
1482 
1483    if (bo->real.prime_fd != -1)
1484       close(bo->real.prime_fd);
1485 
1486    /* Close this object */
1487    if (bufmgr->kmd_backend->gem_close(bufmgr, bo) != 0) {
1488       DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n",
1489           bo->gem_handle, bo->name, strerror(errno));
1490    }
1491 
1492    if (bo->aux_map_address && bo->bufmgr->aux_map_ctx) {
1493       intel_aux_map_unmap_range(bo->bufmgr->aux_map_ctx, bo->address,
1494                                 bo->size);
1495    }
1496 
1497    for (int d = 0; d < bo->deps_size; d++) {
1498       for (int b = 0; b < IRIS_BATCH_COUNT; b++) {
1499          iris_syncobj_reference(bufmgr, &bo->deps[d].write_syncobjs[b], NULL);
1500          iris_syncobj_reference(bufmgr, &bo->deps[d].read_syncobjs[b], NULL);
1501       }
1502    }
1503    free(bo->deps);
1504 
1505    free(bo);
1506 }
1507 
1508 static void
bo_free(struct iris_bo * bo)1509 bo_free(struct iris_bo *bo)
1510 {
1511    struct iris_bufmgr *bufmgr = bo->bufmgr;
1512 
1513    simple_mtx_assert_locked(&bufmgr->lock);
1514    assert(iris_bo_is_real(bo));
1515 
1516    if (!bo->real.userptr && bo->real.map)
1517       bo_unmap(bo);
1518 
1519    if (bo->idle || !iris_bo_busy(bo)) {
1520       bo_close(bo);
1521    } else {
1522       /* Defer closing the GEM BO and returning the VMA for reuse until the
1523        * BO is idle.  Just move it to the dead list for now.
1524        */
1525       list_addtail(&bo->head, &bufmgr->zombie_list);
1526    }
1527 }
1528 
1529 /** Frees all cached buffers significantly older than @time. */
1530 static void
cleanup_bo_cache(struct iris_bufmgr * bufmgr,time_t time)1531 cleanup_bo_cache(struct iris_bufmgr *bufmgr, time_t time)
1532 {
1533    simple_mtx_assert_locked(&bufmgr->lock);
1534 
1535    if (bufmgr->time == time)
1536       return;
1537 
1538    for (int h = 0; h < IRIS_HEAP_MAX; h++) {
1539       struct iris_bucket_cache *cache = &bufmgr->bucket_cache[h];
1540 
1541       for (int i = 0; i < cache->num_buckets; i++) {
1542          struct bo_cache_bucket *bucket = &cache->bucket[i];
1543 
1544          list_for_each_entry_safe(struct iris_bo, bo, &bucket->head, head) {
1545             if (time - bo->real.free_time <= 1)
1546                break;
1547 
1548             list_del(&bo->head);
1549 
1550             bo_free(bo);
1551          }
1552       }
1553    }
1554 
1555    list_for_each_entry_safe(struct iris_bo, bo, &bufmgr->zombie_list, head) {
1556       /* Stop once we reach a busy BO - all others past this point were
1557        * freed more recently so are likely also busy.
1558        */
1559       if (!bo->idle && iris_bo_busy(bo))
1560          break;
1561 
1562       list_del(&bo->head);
1563       bo_close(bo);
1564    }
1565 
1566    bufmgr->time = time;
1567 }
1568 
1569 static void
bo_unreference_final(struct iris_bo * bo,time_t time)1570 bo_unreference_final(struct iris_bo *bo, time_t time)
1571 {
1572    struct iris_bufmgr *bufmgr = bo->bufmgr;
1573 
1574    DBG("bo_unreference final: %d (%s)\n", bo->gem_handle, bo->name);
1575 
1576    assert(iris_bo_is_real(bo));
1577 
1578    struct bo_cache_bucket *bucket = !bo->real.reusable ? NULL :
1579       bucket_for_size(bufmgr, bo->size, bo->real.heap, 0);
1580 
1581    /* Put the buffer into our internal cache for reuse if we can. */
1582    if (bucket && iris_bo_madvise(bo, IRIS_MADVICE_DONT_NEED)) {
1583       bo->real.free_time = time;
1584       bo->name = NULL;
1585 
1586       list_addtail(&bo->head, &bucket->head);
1587    } else {
1588       bo_free(bo);
1589    }
1590 }
1591 
1592 void
iris_bo_unreference(struct iris_bo * bo)1593 iris_bo_unreference(struct iris_bo *bo)
1594 {
1595    if (bo == NULL)
1596       return;
1597 
1598    assert(p_atomic_read(&bo->refcount) > 0);
1599 
1600    if (atomic_add_unless(&bo->refcount, -1, 1)) {
1601       struct iris_bufmgr *bufmgr = bo->bufmgr;
1602       struct timespec time;
1603 
1604       clock_gettime(CLOCK_MONOTONIC, &time);
1605 
1606       bo->zeroed = false;
1607       if (bo->gem_handle == 0) {
1608          pb_slab_free(get_slabs(bufmgr, bo->size), &bo->slab.entry);
1609       } else {
1610          simple_mtx_lock(&bufmgr->lock);
1611 
1612          if (p_atomic_dec_zero(&bo->refcount)) {
1613             bo_unreference_final(bo, time.tv_sec);
1614             cleanup_bo_cache(bufmgr, time.tv_sec);
1615          }
1616 
1617          simple_mtx_unlock(&bufmgr->lock);
1618       }
1619    }
1620 }
1621 
1622 static void
bo_wait_with_stall_warning(struct util_debug_callback * dbg,struct iris_bo * bo,const char * action)1623 bo_wait_with_stall_warning(struct util_debug_callback *dbg,
1624                            struct iris_bo *bo,
1625                            const char *action)
1626 {
1627    bool busy = dbg && !bo->idle;
1628    double elapsed = unlikely(busy) ? -get_time() : 0.0;
1629 
1630    iris_bo_wait_rendering(bo);
1631 
1632    if (unlikely(busy)) {
1633       elapsed += get_time();
1634       if (elapsed > 1e-5) /* 0.01ms */ {
1635          perf_debug(dbg, "%s a busy \"%s\" BO stalled and took %.03f ms.\n",
1636                     action, bo->name, elapsed * 1000);
1637       }
1638    }
1639 }
1640 
1641 static void
print_flags(unsigned flags)1642 print_flags(unsigned flags)
1643 {
1644    if (flags & MAP_READ)
1645       DBG("READ ");
1646    if (flags & MAP_WRITE)
1647       DBG("WRITE ");
1648    if (flags & MAP_ASYNC)
1649       DBG("ASYNC ");
1650    if (flags & MAP_PERSISTENT)
1651       DBG("PERSISTENT ");
1652    if (flags & MAP_COHERENT)
1653       DBG("COHERENT ");
1654    if (flags & MAP_RAW)
1655       DBG("RAW ");
1656    DBG("\n");
1657 }
1658 
1659 void *
iris_bo_map(struct util_debug_callback * dbg,struct iris_bo * bo,unsigned flags)1660 iris_bo_map(struct util_debug_callback *dbg,
1661             struct iris_bo *bo, unsigned flags)
1662 {
1663    struct iris_bufmgr *bufmgr = bo->bufmgr;
1664    void *map = NULL;
1665 
1666    if (bo->gem_handle == 0) {
1667       struct iris_bo *real = iris_get_backing_bo(bo);
1668       uint64_t offset = bo->address - real->address;
1669       map = iris_bo_map(dbg, real, flags | MAP_ASYNC) + offset;
1670    } else {
1671       assert(bo->real.mmap_mode != IRIS_MMAP_NONE);
1672       if (bo->real.mmap_mode == IRIS_MMAP_NONE)
1673          return NULL;
1674 
1675       if (!bo->real.map) {
1676          DBG("iris_bo_map: %d (%s)\n", bo->gem_handle, bo->name);
1677          map = bufmgr->kmd_backend->gem_mmap(bufmgr, bo);
1678          if (!map) {
1679             return NULL;
1680          }
1681 
1682          VG_DEFINED(map, bo->size);
1683 
1684          if (p_atomic_cmpxchg(&bo->real.map, NULL, map)) {
1685             VG_NOACCESS(map, bo->size);
1686             os_munmap(map, bo->size);
1687          }
1688       }
1689       assert(bo->real.map);
1690       map = bo->real.map;
1691    }
1692 
1693    DBG("iris_bo_map: %d (%s) -> %p\n",
1694        bo->gem_handle, bo->name, bo->real.map);
1695    print_flags(flags);
1696 
1697    if (!(flags & MAP_ASYNC)) {
1698       bo_wait_with_stall_warning(dbg, bo, "memory mapping");
1699    }
1700 
1701    return map;
1702 }
1703 
1704 /**
1705  * Waits on a BO for the given amount of time.
1706  *
1707  * @bo: buffer object to wait for
1708  * @timeout_ns: amount of time to wait in nanoseconds.
1709  *   If value is less than 0, an infinite wait will occur.
1710  *
1711  * Returns 0 if the wait was successful ie. the last batch referencing the
1712  * object has completed within the allotted time. Otherwise some negative return
1713  * value describes the error. Of particular interest is -ETIME when the wait has
1714  * failed to yield the desired result.
1715  *
1716  * Similar to iris_bo_wait_rendering except a timeout parameter allows
1717  * the operation to give up after a certain amount of time. Another subtle
1718  * difference is the internal locking semantics are different (this variant does
1719  * not hold the lock for the duration of the wait). This makes the wait subject
1720  * to a larger userspace race window.
1721  *
1722  * The implementation shall wait until the object is no longer actively
1723  * referenced within a batch buffer at the time of the call. The wait will
1724  * not guarantee that the buffer is re-issued via another thread, or an flinked
1725  * handle. Userspace must make sure this race does not occur if such precision
1726  * is important.
1727  *
1728  * Note that some kernels have broken the infinite wait for negative values
1729  * promise, upgrade to latest stable kernels if this is the case.
1730  */
1731 static inline int
iris_bo_wait(struct iris_bo * bo,int64_t timeout_ns)1732 iris_bo_wait(struct iris_bo *bo, int64_t timeout_ns)
1733 {
1734    int ret;
1735 
1736    switch (iris_bufmgr_get_device_info(bo->bufmgr)->kmd_type) {
1737    case INTEL_KMD_TYPE_I915:
1738       if (iris_bo_is_external(bo))
1739          ret = iris_i915_bo_wait_gem(bo, timeout_ns);
1740       else
1741          ret = iris_bo_wait_syncobj(bo, timeout_ns);
1742       break;
1743    case INTEL_KMD_TYPE_XE:
1744       ret = iris_bo_wait_syncobj(bo, timeout_ns);
1745       break;
1746    default:
1747       unreachable("missing");
1748       ret = -1;
1749    }
1750 
1751    bo->idle = ret == 0;
1752 
1753    return ret;
1754 }
1755 
1756 /** Waits for all GPU rendering with the object to have completed. */
1757 void
iris_bo_wait_rendering(struct iris_bo * bo)1758 iris_bo_wait_rendering(struct iris_bo *bo)
1759 {
1760    /* We require a kernel recent enough for WAIT_IOCTL support.
1761     * See intel_init_bufmgr()
1762     */
1763    iris_bo_wait(bo, -1);
1764 }
1765 
1766 static void
iris_bufmgr_destroy_global_vm(struct iris_bufmgr * bufmgr)1767 iris_bufmgr_destroy_global_vm(struct iris_bufmgr *bufmgr)
1768 {
1769    switch (bufmgr->devinfo.kmd_type) {
1770    case INTEL_KMD_TYPE_I915:
1771       /* Nothing to do in i915 */
1772       break;
1773    case INTEL_KMD_TYPE_XE:
1774       intel_bind_timeline_finish(&bufmgr->bind_timeline, bufmgr->fd);
1775       iris_xe_destroy_global_vm(bufmgr);
1776       break;
1777    default:
1778       unreachable("missing");
1779    }
1780 }
1781 
1782 static void
iris_bufmgr_destroy(struct iris_bufmgr * bufmgr)1783 iris_bufmgr_destroy(struct iris_bufmgr *bufmgr)
1784 {
1785    iris_destroy_border_color_pool(&bufmgr->border_color_pool);
1786 
1787    /* Free aux-map buffers */
1788    intel_aux_map_finish(bufmgr->aux_map_ctx);
1789 
1790    /* bufmgr will no longer try to free VMA entries in the aux-map */
1791    bufmgr->aux_map_ctx = NULL;
1792 
1793    for (int i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
1794       if (bufmgr->bo_slabs[i].groups)
1795          pb_slabs_deinit(&bufmgr->bo_slabs[i]);
1796    }
1797 
1798    simple_mtx_lock(&bufmgr->lock);
1799 
1800    /* Free any cached buffer objects we were going to reuse */
1801    for (int h = 0; h < IRIS_HEAP_MAX; h++) {
1802       struct iris_bucket_cache *cache = &bufmgr->bucket_cache[h];
1803 
1804       for (int i = 0; i < cache->num_buckets; i++) {
1805          struct bo_cache_bucket *bucket = &cache->bucket[i];
1806 
1807          list_for_each_entry_safe(struct iris_bo, bo, &bucket->head, head) {
1808             list_del(&bo->head);
1809 
1810             bo_free(bo);
1811          }
1812       }
1813    }
1814 
1815    /* Close any buffer objects on the dead list. */
1816    list_for_each_entry_safe(struct iris_bo, bo, &bufmgr->zombie_list, head) {
1817       list_del(&bo->head);
1818       bo_close(bo);
1819    }
1820 
1821    _mesa_hash_table_destroy(bufmgr->name_table, NULL);
1822    _mesa_hash_table_destroy(bufmgr->handle_table, NULL);
1823 
1824    for (int z = 0; z < IRIS_MEMZONE_COUNT; z++)
1825          util_vma_heap_finish(&bufmgr->vma_allocator[z]);
1826 
1827    iris_bufmgr_destroy_global_vm(bufmgr);
1828 
1829    close(bufmgr->fd);
1830 
1831    simple_mtx_unlock(&bufmgr->lock);
1832 
1833    simple_mtx_destroy(&bufmgr->lock);
1834    simple_mtx_destroy(&bufmgr->bo_deps_lock);
1835 
1836    free(bufmgr);
1837 }
1838 
1839 int
iris_gem_get_tiling(struct iris_bo * bo,uint32_t * tiling)1840 iris_gem_get_tiling(struct iris_bo *bo, uint32_t *tiling)
1841 {
1842    struct iris_bufmgr *bufmgr = bo->bufmgr;
1843 
1844    if (!bufmgr->devinfo.has_tiling_uapi) {
1845       *tiling = 0;
1846       return 0;
1847    }
1848 
1849    assert(iris_bufmgr_get_device_info(bo->bufmgr)->kmd_type == INTEL_KMD_TYPE_I915);
1850    return iris_i915_bo_get_tiling(bo, tiling);
1851 }
1852 
1853 int
iris_gem_set_tiling(struct iris_bo * bo,const struct isl_surf * surf)1854 iris_gem_set_tiling(struct iris_bo *bo, const struct isl_surf *surf)
1855 {
1856    struct iris_bufmgr *bufmgr = bo->bufmgr;
1857 
1858    /* If we can't do map_gtt, the set/get_tiling API isn't useful. And it's
1859     * actually not supported by the kernel in those cases.
1860     */
1861    if (!bufmgr->devinfo.has_tiling_uapi)
1862       return 0;
1863 
1864    assert(iris_bufmgr_get_device_info(bo->bufmgr)->kmd_type == INTEL_KMD_TYPE_I915);
1865    return iris_i915_bo_set_tiling(bo, surf);
1866 }
1867 
1868 struct iris_bo *
iris_bo_import_dmabuf(struct iris_bufmgr * bufmgr,int prime_fd,const uint64_t modifier)1869 iris_bo_import_dmabuf(struct iris_bufmgr *bufmgr, int prime_fd,
1870                       const uint64_t modifier)
1871 {
1872    uint32_t handle;
1873    struct iris_bo *bo;
1874 
1875    simple_mtx_lock(&bufmgr->lock);
1876    int ret = drmPrimeFDToHandle(bufmgr->fd, prime_fd, &handle);
1877    if (ret) {
1878       DBG("import_dmabuf: failed to obtain handle from fd: %s\n",
1879           strerror(errno));
1880       simple_mtx_unlock(&bufmgr->lock);
1881       return NULL;
1882    }
1883 
1884    /*
1885     * See if the kernel has already returned this buffer to us. Just as
1886     * for named buffers, we must not create two bo's pointing at the same
1887     * kernel object
1888     */
1889    bo = find_and_ref_external_bo(bufmgr->handle_table, handle);
1890    if (bo)
1891       goto out;
1892 
1893    bo = bo_calloc();
1894    if (!bo)
1895       goto out;
1896 
1897    p_atomic_set(&bo->refcount, 1);
1898 
1899    /* Determine size of bo.  The fd-to-handle ioctl really should
1900     * return the size, but it doesn't.  If we have kernel 3.12 or
1901     * later, we can lseek on the prime fd to get the size.  Older
1902     * kernels will just fail, in which case we fall back to the
1903     * provided (estimated or guess size). */
1904    ret = lseek(prime_fd, 0, SEEK_END);
1905    if (ret != -1)
1906       bo->size = ret;
1907 
1908    bo->bufmgr = bufmgr;
1909    bo->name = "prime";
1910    bo->index = -1;
1911    bo->real.reusable = false;
1912    bo->real.imported = true;
1913    /* Xe KMD expects at least 1-way coherency for imports */
1914    bo->real.heap = IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT;
1915    bo->real.mmap_mode = IRIS_MMAP_NONE;
1916    if (INTEL_DEBUG(DEBUG_CAPTURE_ALL))
1917       bo->real.capture = true;
1918    bo->gem_handle = handle;
1919    bo->real.prime_fd = needs_prime_fd(bufmgr) ? dup(prime_fd) : -1;
1920 
1921    uint64_t alignment = 1;
1922 
1923    /* When an aux map will be used, there is an alignment requirement on the
1924     * main surface from the mapping granularity. Some planes of the image may
1925     * have smaller alignment requirements, but this one should work for all.
1926     */
1927    if (bufmgr->devinfo.has_aux_map && isl_drm_modifier_has_aux(modifier))
1928       alignment = intel_aux_map_get_alignment(bufmgr->aux_map_ctx);
1929 
1930    bo->address = vma_alloc(bufmgr, IRIS_MEMZONE_OTHER, bo->size, alignment);
1931    if (bo->address == 0ull)
1932       goto err_free;
1933 
1934    if (!bufmgr->kmd_backend->gem_vm_bind(bo))
1935       goto err_vm_alloc;
1936 
1937    _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo);
1938 
1939 out:
1940    simple_mtx_unlock(&bufmgr->lock);
1941    return bo;
1942 
1943 err_vm_alloc:
1944    vma_free(bufmgr, bo->address, bo->size);
1945 err_free:
1946    bo_free(bo);
1947    simple_mtx_unlock(&bufmgr->lock);
1948    return NULL;
1949 }
1950 
1951 static void
iris_bo_mark_exported_locked(struct iris_bo * bo)1952 iris_bo_mark_exported_locked(struct iris_bo *bo)
1953 {
1954    struct iris_bufmgr *bufmgr = bo->bufmgr;
1955 
1956    /* We cannot export suballocated BOs. */
1957    assert(iris_bo_is_real(bo));
1958    simple_mtx_assert_locked(&bufmgr->lock);
1959 
1960    if (!iris_bo_is_external(bo))
1961       _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo);
1962 
1963    if (!bo->real.exported) {
1964       /* If a BO is going to be used externally, it could be sent to the
1965        * display HW. So make sure our CPU mappings don't assume cache
1966        * coherency since display is outside that cache.
1967        */
1968       bo->real.exported = true;
1969       bo->real.reusable = false;
1970    }
1971 }
1972 
1973 void
iris_bo_mark_exported(struct iris_bo * bo)1974 iris_bo_mark_exported(struct iris_bo *bo)
1975 {
1976    struct iris_bufmgr *bufmgr = bo->bufmgr;
1977 
1978    /* We cannot export suballocated BOs. */
1979    assert(iris_bo_is_real(bo));
1980 
1981    if (bo->real.exported) {
1982       assert(!bo->real.reusable);
1983       return;
1984    }
1985 
1986    simple_mtx_lock(&bufmgr->lock);
1987    iris_bo_mark_exported_locked(bo);
1988    simple_mtx_unlock(&bufmgr->lock);
1989 
1990    iris_bo_set_prime_fd(bo);
1991 }
1992 
1993 int
iris_bo_export_dmabuf(struct iris_bo * bo,int * prime_fd)1994 iris_bo_export_dmabuf(struct iris_bo *bo, int *prime_fd)
1995 {
1996    struct iris_bufmgr *bufmgr = bo->bufmgr;
1997 
1998    /* We cannot export suballocated BOs. */
1999    assert(iris_bo_is_real(bo));
2000 
2001    if (drmPrimeHandleToFD(bufmgr->fd, bo->gem_handle,
2002                           DRM_CLOEXEC | DRM_RDWR, prime_fd) != 0)
2003       return -errno;
2004 
2005    iris_bo_mark_exported(bo);
2006 
2007    return 0;
2008 }
2009 
2010 static uint32_t
iris_bo_export_gem_handle(struct iris_bo * bo)2011 iris_bo_export_gem_handle(struct iris_bo *bo)
2012 {
2013    /* We cannot export suballocated BOs. */
2014    assert(iris_bo_is_real(bo));
2015 
2016    iris_bo_mark_exported(bo);
2017 
2018    return bo->gem_handle;
2019 }
2020 
2021 int
iris_bo_flink(struct iris_bo * bo,uint32_t * name)2022 iris_bo_flink(struct iris_bo *bo, uint32_t *name)
2023 {
2024    struct iris_bufmgr *bufmgr = bo->bufmgr;
2025 
2026    /* We cannot export suballocated BOs. */
2027    assert(iris_bo_is_real(bo));
2028 
2029    if (!bo->real.global_name) {
2030       struct drm_gem_flink flink = { .handle = bo->gem_handle };
2031 
2032       if (intel_ioctl(bufmgr->fd, DRM_IOCTL_GEM_FLINK, &flink))
2033          return -errno;
2034 
2035       simple_mtx_lock(&bufmgr->lock);
2036       if (!bo->real.global_name) {
2037          iris_bo_mark_exported_locked(bo);
2038          bo->real.global_name = flink.name;
2039          _mesa_hash_table_insert(bufmgr->name_table, &bo->real.global_name, bo);
2040       }
2041       simple_mtx_unlock(&bufmgr->lock);
2042 
2043       iris_bo_set_prime_fd(bo);
2044    }
2045 
2046    *name = bo->real.global_name;
2047    return 0;
2048 }
2049 
2050 int
iris_bo_export_gem_handle_for_device(struct iris_bo * bo,int drm_fd,uint32_t * out_handle)2051 iris_bo_export_gem_handle_for_device(struct iris_bo *bo, int drm_fd,
2052                                      uint32_t *out_handle)
2053 {
2054    /* We cannot export suballocated BOs. */
2055    assert(iris_bo_is_real(bo));
2056 
2057    /* Only add the new GEM handle to the list of export if it belongs to a
2058     * different GEM device. Otherwise we might close the same buffer multiple
2059     * times.
2060     */
2061    struct iris_bufmgr *bufmgr = bo->bufmgr;
2062    int ret = os_same_file_description(drm_fd, bufmgr->fd);
2063    WARN_ONCE(ret < 0,
2064              "Kernel has no file descriptor comparison support: %s\n",
2065              strerror(errno));
2066    if (ret == 0) {
2067       *out_handle = iris_bo_export_gem_handle(bo);
2068       return 0;
2069    }
2070 
2071    struct bo_export *export = calloc(1, sizeof(*export));
2072    if (!export)
2073       return -ENOMEM;
2074 
2075    export->drm_fd = drm_fd;
2076 
2077    int dmabuf_fd = -1;
2078    int err = iris_bo_export_dmabuf(bo, &dmabuf_fd);
2079    if (err) {
2080       free(export);
2081       return err;
2082    }
2083 
2084    simple_mtx_lock(&bufmgr->lock);
2085    err = drmPrimeFDToHandle(drm_fd, dmabuf_fd, &export->gem_handle);
2086    close(dmabuf_fd);
2087    if (err) {
2088       simple_mtx_unlock(&bufmgr->lock);
2089       free(export);
2090       return err;
2091    }
2092 
2093    bool found = false;
2094    list_for_each_entry(struct bo_export, iter, &bo->real.exports, link) {
2095       if (iter->drm_fd != drm_fd)
2096          continue;
2097       /* Here we assume that for a given DRM fd, we'll always get back the
2098        * same GEM handle for a given buffer.
2099        */
2100       assert(iter->gem_handle == export->gem_handle);
2101       free(export);
2102       export = iter;
2103       found = true;
2104       break;
2105    }
2106    if (!found)
2107       list_addtail(&export->link, &bo->real.exports);
2108 
2109    simple_mtx_unlock(&bufmgr->lock);
2110 
2111    *out_handle = export->gem_handle;
2112 
2113    return 0;
2114 }
2115 
2116 static void
add_bucket(struct iris_bufmgr * bufmgr,int size,enum iris_heap heap)2117 add_bucket(struct iris_bufmgr *bufmgr, int size, enum iris_heap heap)
2118 {
2119    struct iris_bucket_cache *cache = &bufmgr->bucket_cache[heap];
2120    unsigned int i = cache->num_buckets++;
2121 
2122    list_inithead(&cache->bucket[i].head);
2123    cache->bucket[i].size = size;
2124 
2125    assert(bucket_for_size(bufmgr, size, heap, 0) == &cache->bucket[i]);
2126    assert(bucket_for_size(bufmgr, size - 2048, heap, 0) == &cache->bucket[i]);
2127    assert(bucket_for_size(bufmgr, size + 1, heap, 0) != &cache->bucket[i]);
2128 }
2129 
2130 static void
init_cache_buckets(struct iris_bufmgr * bufmgr,enum iris_heap heap)2131 init_cache_buckets(struct iris_bufmgr *bufmgr, enum iris_heap heap)
2132 {
2133    uint64_t size, cache_max_size = 64 * 1024 * 1024;
2134 
2135    /* OK, so power of two buckets was too wasteful of memory.
2136     * Give 3 other sizes between each power of two, to hopefully
2137     * cover things accurately enough.  (The alternative is
2138     * probably to just go for exact matching of sizes, and assume
2139     * that for things like composited window resize the tiled
2140     * width/height alignment and rounding of sizes to pages will
2141     * get us useful cache hit rates anyway)
2142     */
2143    add_bucket(bufmgr, PAGE_SIZE,     heap);
2144    add_bucket(bufmgr, PAGE_SIZE * 2, heap);
2145    add_bucket(bufmgr, PAGE_SIZE * 3, heap);
2146 
2147    /* Initialize the linked lists for BO reuse cache. */
2148    for (size = 4 * PAGE_SIZE; size <= cache_max_size; size *= 2) {
2149       add_bucket(bufmgr, size, heap);
2150 
2151       add_bucket(bufmgr, size + size * 1 / 4, heap);
2152       add_bucket(bufmgr, size + size * 2 / 4, heap);
2153       add_bucket(bufmgr, size + size * 3 / 4, heap);
2154    }
2155 }
2156 
2157 static struct intel_buffer *
intel_aux_map_buffer_alloc(void * driver_ctx,uint32_t size)2158 intel_aux_map_buffer_alloc(void *driver_ctx, uint32_t size)
2159 {
2160    struct intel_buffer *buf = malloc(sizeof(struct intel_buffer));
2161    if (!buf)
2162       return NULL;
2163 
2164    struct iris_bufmgr *bufmgr = (struct iris_bufmgr *)driver_ctx;
2165 
2166    unsigned int page_size = getpagesize();
2167    size = MAX2(ALIGN(size, page_size), page_size);
2168 
2169    struct iris_bo *bo = alloc_fresh_bo(bufmgr, size, BO_ALLOC_CAPTURE);
2170    if (!bo) {
2171       free(buf);
2172       return NULL;
2173    }
2174 
2175    simple_mtx_lock(&bufmgr->lock);
2176 
2177    bo->address = vma_alloc(bufmgr, IRIS_MEMZONE_OTHER, bo->size, 64 * 1024);
2178    if (bo->address == 0ull)
2179       goto err_free;
2180 
2181    if (!bufmgr->kmd_backend->gem_vm_bind(bo))
2182       goto err_vm_alloc;
2183 
2184    simple_mtx_unlock(&bufmgr->lock);
2185 
2186    bo->name = "aux-map";
2187    p_atomic_set(&bo->refcount, 1);
2188    bo->index = -1;
2189    bo->real.mmap_mode = heap_to_mmap_mode(bufmgr, bo->real.heap);
2190    bo->real.prime_fd = -1;
2191 
2192    buf->driver_bo = bo;
2193    buf->gpu = bo->address;
2194    buf->gpu_end = buf->gpu + bo->size;
2195    buf->map = iris_bo_map(NULL, bo, MAP_WRITE | MAP_RAW);
2196    return buf;
2197 
2198 err_vm_alloc:
2199    vma_free(bufmgr, bo->address, bo->size);
2200 err_free:
2201    free(buf);
2202    bo_free(bo);
2203    simple_mtx_unlock(&bufmgr->lock);
2204    return NULL;
2205 }
2206 
2207 static void
intel_aux_map_buffer_free(void * driver_ctx,struct intel_buffer * buffer)2208 intel_aux_map_buffer_free(void *driver_ctx, struct intel_buffer *buffer)
2209 {
2210    iris_bo_unreference((struct iris_bo*)buffer->driver_bo);
2211    free(buffer);
2212 }
2213 
2214 static struct intel_mapped_pinned_buffer_alloc aux_map_allocator = {
2215    .alloc = intel_aux_map_buffer_alloc,
2216    .free = intel_aux_map_buffer_free,
2217 };
2218 
2219 static bool
iris_bufmgr_get_meminfo(struct iris_bufmgr * bufmgr,struct intel_device_info * devinfo)2220 iris_bufmgr_get_meminfo(struct iris_bufmgr *bufmgr,
2221                         struct intel_device_info *devinfo)
2222 {
2223    bufmgr->sys.region = &devinfo->mem.sram.mem;
2224    bufmgr->sys.size = devinfo->mem.sram.mappable.size;
2225 
2226    /* When the resizable bar feature is disabled,
2227     * then vram.mappable.size is only 256MB.
2228     * The second half of the total size is in the vram.unmappable.size
2229     * variable.
2230     */
2231    bufmgr->vram.region = &devinfo->mem.vram.mem;
2232    bufmgr->vram.size = devinfo->mem.vram.mappable.size +
2233                        devinfo->mem.vram.unmappable.size;
2234 
2235    return true;
2236 }
2237 
2238 static bool
iris_bufmgr_init_global_vm(struct iris_bufmgr * bufmgr)2239 iris_bufmgr_init_global_vm(struct iris_bufmgr *bufmgr)
2240 {
2241    switch (bufmgr->devinfo.kmd_type) {
2242    case INTEL_KMD_TYPE_I915:
2243       bufmgr->use_global_vm = iris_i915_init_global_vm(bufmgr, &bufmgr->global_vm_id);
2244       /* i915 don't require VM, so returning true even if use_global_vm is false */
2245       return true;
2246    case INTEL_KMD_TYPE_XE:
2247       if (!intel_bind_timeline_init(&bufmgr->bind_timeline, bufmgr->fd))
2248          return false;
2249 
2250       bufmgr->use_global_vm = iris_xe_init_global_vm(bufmgr, &bufmgr->global_vm_id);
2251       /* Xe requires VM */
2252       return bufmgr->use_global_vm;
2253    default:
2254       unreachable("missing");
2255       return false;
2256    }
2257 }
2258 
2259 /**
2260  * Initializes the GEM buffer manager, which uses the kernel to allocate, map,
2261  * and manage map buffer objections.
2262  *
2263  * \param fd File descriptor of the opened DRM device.
2264  */
2265 static struct iris_bufmgr *
iris_bufmgr_create(struct intel_device_info * devinfo,int fd,bool bo_reuse)2266 iris_bufmgr_create(struct intel_device_info *devinfo, int fd, bool bo_reuse)
2267 {
2268    if (devinfo->gtt_size <= IRIS_MEMZONE_OTHER_START)
2269       return NULL;
2270 
2271    struct iris_bufmgr *bufmgr = calloc(1, sizeof(*bufmgr));
2272    if (bufmgr == NULL)
2273       return NULL;
2274 
2275    /* Handles to buffer objects belong to the device fd and are not
2276     * reference counted by the kernel.  If the same fd is used by
2277     * multiple parties (threads sharing the same screen bufmgr, or
2278     * even worse the same device fd passed to multiple libraries)
2279     * ownership of those handles is shared by those independent parties.
2280     *
2281     * Don't do this! Ensure that each library/bufmgr has its own device
2282     * fd so that its namespace does not clash with another.
2283     */
2284    bufmgr->fd = os_dupfd_cloexec(fd);
2285    if (bufmgr->fd == -1)
2286       goto error_dup;
2287 
2288    p_atomic_set(&bufmgr->refcount, 1);
2289 
2290    simple_mtx_init(&bufmgr->lock, mtx_plain);
2291    simple_mtx_init(&bufmgr->bo_deps_lock, mtx_plain);
2292 
2293    list_inithead(&bufmgr->zombie_list);
2294 
2295    bufmgr->devinfo = *devinfo;
2296    devinfo = &bufmgr->devinfo;
2297    bufmgr->bo_reuse = bo_reuse;
2298    iris_bufmgr_get_meminfo(bufmgr, devinfo);
2299    bufmgr->kmd_backend = iris_kmd_backend_get(devinfo->kmd_type);
2300 
2301    struct intel_query_engine_info *engine_info;
2302    engine_info = intel_engine_get_info(bufmgr->fd, bufmgr->devinfo.kmd_type);
2303    bufmgr->devinfo.has_compute_engine = engine_info &&
2304                                         intel_engines_count(engine_info,
2305                                                             INTEL_ENGINE_CLASS_COMPUTE);
2306    bufmgr->compute_engine_supported = bufmgr->devinfo.has_compute_engine &&
2307                                       intel_engines_supported_count(bufmgr->fd,
2308                                                                     &bufmgr->devinfo,
2309                                                                     engine_info,
2310                                                                     INTEL_ENGINE_CLASS_COMPUTE);
2311    free(engine_info);
2312 
2313    if (!iris_bufmgr_init_global_vm(bufmgr))
2314       goto error_init_vm;
2315 
2316    STATIC_ASSERT(IRIS_MEMZONE_SHADER_START == 0ull);
2317    const uint64_t _4GB = 1ull << 32;
2318    const uint64_t _2GB = 1ul << 31;
2319 
2320    /* The STATE_BASE_ADDRESS size field can only hold 1 page shy of 4GB */
2321    const uint64_t _4GB_minus_1 = _4GB - PAGE_SIZE;
2322 
2323    const struct {
2324       uint64_t start;
2325       uint64_t size;
2326    } vma[IRIS_MEMZONE_COUNT] = {
2327       [IRIS_MEMZONE_SHADER] = {
2328          .start = PAGE_SIZE,
2329          .size  = _4GB_minus_1 - PAGE_SIZE
2330       },
2331       [IRIS_MEMZONE_BINDER] = {
2332          .start = IRIS_MEMZONE_BINDER_START + IRIS_SCRATCH_ZONE_SIZE,
2333          .size  = IRIS_BINDER_ZONE_SIZE - IRIS_SCRATCH_ZONE_SIZE
2334       },
2335       [IRIS_MEMZONE_SCRATCH] = {
2336          .start = IRIS_MEMZONE_SCRATCH_START,
2337          .size  = IRIS_SCRATCH_ZONE_SIZE
2338       },
2339       [IRIS_MEMZONE_SURFACE] = {
2340          .start = IRIS_MEMZONE_SURFACE_START,
2341          .size = _4GB_minus_1 - IRIS_BINDER_ZONE_SIZE - IRIS_SCRATCH_ZONE_SIZE
2342       },
2343       [IRIS_MEMZONE_DYNAMIC] = {
2344          .start = IRIS_MEMZONE_DYNAMIC_START + IRIS_BORDER_COLOR_POOL_SIZE,
2345 
2346          /* Wa_2209859288: the Tigerlake PRM's workarounds volume says:
2347           *
2348           *    "PSDunit is dropping MSB of the blend state pointer from SD
2349           *     FIFO [...] Limit the Blend State Pointer to < 2G"
2350           *
2351           * We restrict the dynamic state pool to 2GB so that we don't ever
2352           * get a BLEND_STATE pointer with the MSB set.  We aren't likely to
2353           * need the full 4GB for dynamic state anyway.
2354           */
2355          .size  = (devinfo->ver >= 12 ? _2GB : _4GB_minus_1)
2356                   - IRIS_BORDER_COLOR_POOL_SIZE
2357       },
2358       [IRIS_MEMZONE_OTHER] = {
2359          .start = IRIS_MEMZONE_OTHER_START,
2360 
2361          /* Leave the last 4GB out of the high vma range, so that no state
2362           * base address + size can overflow 48 bits.
2363           */
2364          .size  = (devinfo->gtt_size - _4GB) - IRIS_MEMZONE_OTHER_START,
2365       },
2366    };
2367 
2368    for (unsigned i = 0; i < IRIS_MEMZONE_COUNT; i++) {
2369       util_vma_heap_init(&bufmgr->vma_allocator[i],
2370                          vma[i].start, vma[i].size);
2371    }
2372 
2373    if (INTEL_DEBUG(DEBUG_HEAPS)) {
2374       for (unsigned i = 0; i < IRIS_MEMZONE_COUNT; i++) {
2375          fprintf(stderr, "%-11s | 0x%016" PRIx64 "-0x%016" PRIx64 "\n",
2376                  memzone_name(i), vma[i].start,
2377                  vma[i].start + vma[i].size - 1);
2378       }
2379    }
2380 
2381    for (int h = 0; h < IRIS_HEAP_MAX; h++)
2382       init_cache_buckets(bufmgr, h);
2383 
2384    unsigned min_slab_order = 8;  /* 256 bytes */
2385    unsigned max_slab_order = 20; /* 1 MB (slab size = 2 MB) */
2386    unsigned num_slab_orders_per_allocator =
2387       (max_slab_order - min_slab_order) / NUM_SLAB_ALLOCATORS;
2388 
2389    /* Divide the size order range among slab managers. */
2390    for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
2391       unsigned min_order = min_slab_order;
2392       unsigned max_order =
2393          MIN2(min_order + num_slab_orders_per_allocator, max_slab_order);
2394 
2395       if (!pb_slabs_init(&bufmgr->bo_slabs[i], min_order, max_order,
2396                          IRIS_HEAP_MAX, true, bufmgr,
2397                          iris_can_reclaim_slab,
2398                          iris_slab_alloc,
2399                          (void *) iris_slab_free)) {
2400          goto error_slabs_init;
2401       }
2402       min_slab_order = max_order + 1;
2403    }
2404 
2405    bufmgr->name_table =
2406       _mesa_hash_table_create(NULL, _mesa_hash_uint, _mesa_key_uint_equal);
2407    bufmgr->handle_table =
2408       _mesa_hash_table_create(NULL, _mesa_hash_uint, _mesa_key_uint_equal);
2409 
2410    if (devinfo->has_aux_map) {
2411       bufmgr->aux_map_ctx = intel_aux_map_init(bufmgr, &aux_map_allocator,
2412                                                devinfo);
2413       assert(bufmgr->aux_map_ctx);
2414    }
2415 
2416    iris_init_border_color_pool(bufmgr, &bufmgr->border_color_pool);
2417 
2418    return bufmgr;
2419 
2420 error_slabs_init:
2421    for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
2422       if (!bufmgr->bo_slabs[i].groups)
2423          break;
2424 
2425       pb_slabs_deinit(&bufmgr->bo_slabs[i]);
2426    }
2427    iris_bufmgr_destroy_global_vm(bufmgr);
2428 error_init_vm:
2429    close(bufmgr->fd);
2430 error_dup:
2431    free(bufmgr);
2432    return NULL;
2433 }
2434 
2435 static struct iris_bufmgr *
iris_bufmgr_ref(struct iris_bufmgr * bufmgr)2436 iris_bufmgr_ref(struct iris_bufmgr *bufmgr)
2437 {
2438    p_atomic_inc(&bufmgr->refcount);
2439    return bufmgr;
2440 }
2441 
2442 void
iris_bufmgr_unref(struct iris_bufmgr * bufmgr)2443 iris_bufmgr_unref(struct iris_bufmgr *bufmgr)
2444 {
2445    simple_mtx_lock(&global_bufmgr_list_mutex);
2446    if (p_atomic_dec_zero(&bufmgr->refcount)) {
2447       list_del(&bufmgr->link);
2448       iris_bufmgr_destroy(bufmgr);
2449    }
2450    simple_mtx_unlock(&global_bufmgr_list_mutex);
2451 }
2452 
2453 /** Returns a new unique id, to be used by screens. */
2454 int
iris_bufmgr_create_screen_id(struct iris_bufmgr * bufmgr)2455 iris_bufmgr_create_screen_id(struct iris_bufmgr *bufmgr)
2456 {
2457    return p_atomic_inc_return(&bufmgr->next_screen_id) - 1;
2458 }
2459 
2460 /**
2461  * Gets an already existing GEM buffer manager or create a new one.
2462  *
2463  * \param fd File descriptor of the opened DRM device.
2464  */
2465 struct iris_bufmgr *
iris_bufmgr_get_for_fd(int fd,bool bo_reuse)2466 iris_bufmgr_get_for_fd(int fd, bool bo_reuse)
2467 {
2468    struct intel_device_info devinfo;
2469    struct stat st;
2470 
2471    if (fstat(fd, &st))
2472       return NULL;
2473 
2474    struct iris_bufmgr *bufmgr = NULL;
2475 
2476    simple_mtx_lock(&global_bufmgr_list_mutex);
2477    list_for_each_entry(struct iris_bufmgr, iter_bufmgr, &global_bufmgr_list, link) {
2478       struct stat iter_st;
2479       if (fstat(iter_bufmgr->fd, &iter_st))
2480          continue;
2481 
2482       if (st.st_rdev == iter_st.st_rdev) {
2483          assert(iter_bufmgr->bo_reuse == bo_reuse);
2484          bufmgr = iris_bufmgr_ref(iter_bufmgr);
2485          goto unlock;
2486       }
2487    }
2488 
2489    if (!intel_get_device_info_from_fd(fd, &devinfo, 8, -1))
2490       return NULL;
2491 
2492    if (devinfo.ver < 8 || devinfo.platform == INTEL_PLATFORM_CHV)
2493       return NULL;
2494 
2495    bufmgr = iris_bufmgr_create(&devinfo, fd, bo_reuse);
2496    if (bufmgr)
2497       list_addtail(&bufmgr->link, &global_bufmgr_list);
2498 
2499  unlock:
2500    simple_mtx_unlock(&global_bufmgr_list_mutex);
2501 
2502    return bufmgr;
2503 }
2504 
2505 int
iris_bufmgr_get_fd(struct iris_bufmgr * bufmgr)2506 iris_bufmgr_get_fd(struct iris_bufmgr *bufmgr)
2507 {
2508    return bufmgr->fd;
2509 }
2510 
2511 void*
iris_bufmgr_get_aux_map_context(struct iris_bufmgr * bufmgr)2512 iris_bufmgr_get_aux_map_context(struct iris_bufmgr *bufmgr)
2513 {
2514    return bufmgr->aux_map_ctx;
2515 }
2516 
2517 simple_mtx_t *
iris_bufmgr_get_bo_deps_lock(struct iris_bufmgr * bufmgr)2518 iris_bufmgr_get_bo_deps_lock(struct iris_bufmgr *bufmgr)
2519 {
2520    return &bufmgr->bo_deps_lock;
2521 }
2522 
2523 struct iris_border_color_pool *
iris_bufmgr_get_border_color_pool(struct iris_bufmgr * bufmgr)2524 iris_bufmgr_get_border_color_pool(struct iris_bufmgr *bufmgr)
2525 {
2526    return &bufmgr->border_color_pool;
2527 }
2528 
2529 uint64_t
iris_bufmgr_vram_size(struct iris_bufmgr * bufmgr)2530 iris_bufmgr_vram_size(struct iris_bufmgr *bufmgr)
2531 {
2532    return bufmgr->vram.size;
2533 }
2534 
2535 uint64_t
iris_bufmgr_sram_size(struct iris_bufmgr * bufmgr)2536 iris_bufmgr_sram_size(struct iris_bufmgr *bufmgr)
2537 {
2538    return bufmgr->sys.size;
2539 }
2540 
2541 const struct intel_device_info *
iris_bufmgr_get_device_info(struct iris_bufmgr * bufmgr)2542 iris_bufmgr_get_device_info(struct iris_bufmgr *bufmgr)
2543 {
2544    return &bufmgr->devinfo;
2545 }
2546 
2547 const struct iris_kmd_backend *
iris_bufmgr_get_kernel_driver_backend(struct iris_bufmgr * bufmgr)2548 iris_bufmgr_get_kernel_driver_backend(struct iris_bufmgr *bufmgr)
2549 {
2550    return bufmgr->kmd_backend;
2551 }
2552 
2553 uint32_t
iris_bufmgr_get_global_vm_id(struct iris_bufmgr * bufmgr)2554 iris_bufmgr_get_global_vm_id(struct iris_bufmgr *bufmgr)
2555 {
2556    return bufmgr->global_vm_id;
2557 }
2558 
2559 bool
iris_bufmgr_use_global_vm_id(struct iris_bufmgr * bufmgr)2560 iris_bufmgr_use_global_vm_id(struct iris_bufmgr *bufmgr)
2561 {
2562    return bufmgr->use_global_vm;
2563 }
2564 
2565 bool
iris_bufmgr_compute_engine_supported(struct iris_bufmgr * bufmgr)2566 iris_bufmgr_compute_engine_supported(struct iris_bufmgr *bufmgr)
2567 {
2568    return bufmgr->compute_engine_supported;
2569 }
2570 
2571 /**
2572  * Return the pat entry based on the bo heap and allocation flags.
2573  */
2574 const struct intel_device_info_pat_entry *
iris_heap_to_pat_entry(const struct intel_device_info * devinfo,enum iris_heap heap)2575 iris_heap_to_pat_entry(const struct intel_device_info *devinfo,
2576                        enum iris_heap heap)
2577 {
2578    switch (heap) {
2579    case IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT:
2580       return &devinfo->pat.cached_coherent;
2581    case IRIS_HEAP_SYSTEM_MEMORY_UNCACHED:
2582       return &devinfo->pat.writecombining;
2583    case IRIS_HEAP_DEVICE_LOCAL:
2584    case IRIS_HEAP_DEVICE_LOCAL_PREFERRED:
2585       return &devinfo->pat.writecombining;
2586    default:
2587       unreachable("invalid heap for platforms using PAT entries");
2588    }
2589 }
2590 
2591 struct intel_bind_timeline *
iris_bufmgr_get_bind_timeline(struct iris_bufmgr * bufmgr)2592 iris_bufmgr_get_bind_timeline(struct iris_bufmgr *bufmgr)
2593 {
2594    return &bufmgr->bind_timeline;
2595 }
2596