• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2017 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included
12  * in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20  * DEALINGS IN THE SOFTWARE.
21  */
22 
23 /**
24  * @file iris_bufmgr.c
25  *
26  * The Iris buffer manager.
27  *
28  * XXX: write better comments
29  * - BOs
30  * - Explain BO cache
31  * - main interface to GEM in the kernel
32  */
33 
34 #include <util/u_atomic.h>
35 #include <fcntl.h>
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <unistd.h>
40 #include <assert.h>
41 #include <sys/ioctl.h>
42 #include <sys/mman.h>
43 #include <sys/stat.h>
44 #include <sys/types.h>
45 #include <stdbool.h>
46 #include <time.h>
47 #include <unistd.h>
48 
49 #include "errno.h"
50 #include "common/intel_aux_map.h"
51 #include "common/intel_mem.h"
52 #include "c99_alloca.h"
53 #include "dev/intel_debug.h"
54 #include "common/intel_common.h"
55 #include "common/intel_gem.h"
56 #include "dev/intel_device_info.h"
57 #include "drm-uapi/dma-buf.h"
58 #include "isl/isl.h"
59 #include "util/os_mman.h"
60 #include "util/u_debug.h"
61 #include "util/macros.h"
62 #include "util/hash_table.h"
63 #include "util/list.h"
64 #include "util/os_file.h"
65 #include "util/u_dynarray.h"
66 #include "util/vma.h"
67 #include "iris_bufmgr.h"
68 #include "iris_context.h"
69 #include "string.h"
70 #include "iris_kmd_backend.h"
71 #include "i915/iris_bufmgr.h"
72 #include "xe/iris_bufmgr.h"
73 
74 #include <xf86drm.h>
75 
76 #ifdef HAVE_VALGRIND
77 #include <valgrind.h>
78 #include <memcheck.h>
79 #define VG(x) x
80 #else
81 #define VG(x)
82 #endif
83 
84 /* VALGRIND_FREELIKE_BLOCK unfortunately does not actually undo the earlier
85  * VALGRIND_MALLOCLIKE_BLOCK but instead leaves vg convinced the memory is
86  * leaked. All because it does not call VG(cli_free) from its
87  * VG_USERREQ__FREELIKE_BLOCK handler. Instead of treating the memory like
88  * and allocation, we mark it available for use upon mmapping and remove
89  * it upon unmapping.
90  */
91 #define VG_DEFINED(ptr, size) VG(VALGRIND_MAKE_MEM_DEFINED(ptr, size))
92 #define VG_NOACCESS(ptr, size) VG(VALGRIND_MAKE_MEM_NOACCESS(ptr, size))
93 
94 /* On FreeBSD PAGE_SIZE is already defined in
95  * /usr/include/machine/param.h that is indirectly
96  * included here.
97  */
98 #ifndef PAGE_SIZE
99 #define PAGE_SIZE 4096
100 #endif
101 
102 #define WARN_ONCE(cond, fmt...) do {                            \
103    if (unlikely(cond)) {                                        \
104       static bool _warned = false;                              \
105       if (!_warned) {                                           \
106          fprintf(stderr, "WARNING: ");                          \
107          fprintf(stderr, fmt);                                  \
108          _warned = true;                                        \
109       }                                                         \
110    }                                                            \
111 } while (0)
112 
113 #define FILE_DEBUG_FLAG DEBUG_BUFMGR
114 
115 /**
116  * For debugging purposes, this returns a time in seconds.
117  */
118 static double
get_time(void)119 get_time(void)
120 {
121    struct timespec tp;
122 
123    clock_gettime(CLOCK_MONOTONIC, &tp);
124 
125    return tp.tv_sec + tp.tv_nsec / 1000000000.0;
126 }
127 
128 static inline int
atomic_add_unless(int * v,int add,int unless)129 atomic_add_unless(int *v, int add, int unless)
130 {
131    int c, old;
132    c = p_atomic_read(v);
133    while (c != unless && (old = p_atomic_cmpxchg(v, c, c + add)) != c)
134       c = old;
135    return c == unless;
136 }
137 
138 static const char *
memzone_name(enum iris_memory_zone memzone)139 memzone_name(enum iris_memory_zone memzone)
140 {
141    const char *names[] = {
142       [IRIS_MEMZONE_SHADER]   = "shader",
143       [IRIS_MEMZONE_BINDER]   = "binder",
144       [IRIS_MEMZONE_SCRATCH]  = "scratchsurf",
145       [IRIS_MEMZONE_SURFACE]  = "surface",
146       [IRIS_MEMZONE_DYNAMIC]  = "dynamic",
147       [IRIS_MEMZONE_OTHER]    = "other",
148       [IRIS_MEMZONE_BORDER_COLOR_POOL] = "bordercolor",
149    };
150    assert(memzone < ARRAY_SIZE(names));
151    return names[memzone];
152 }
153 
154 struct bo_cache_bucket {
155    /** List of cached BOs. */
156    struct list_head head;
157 
158    /** Size of this bucket, in bytes. */
159    uint64_t size;
160 };
161 
162 struct bo_export {
163    /** File descriptor associated with a handle export. */
164    int drm_fd;
165 
166    /** GEM handle in drm_fd */
167    uint32_t gem_handle;
168 
169    struct list_head link;
170 };
171 
172 struct iris_memregion {
173    struct intel_memory_class_instance *region;
174    uint64_t size;
175 };
176 
177 #define NUM_SLAB_ALLOCATORS 3
178 
179 struct iris_slab {
180    struct pb_slab base;
181 
182    /** The BO representing the entire slab */
183    struct iris_bo *bo;
184 
185    /** Array of iris_bo structs representing BOs allocated out of this slab */
186    struct iris_bo *entries;
187 };
188 
189 #define BUCKET_ARRAY_SIZE 25
190 
191 struct iris_bucket_cache {
192    struct bo_cache_bucket bucket[BUCKET_ARRAY_SIZE];
193    int num_buckets;
194 };
195 
196 struct iris_bufmgr {
197    /**
198     * List into the list of bufmgr.
199     */
200    struct list_head link;
201 
202    uint32_t refcount;
203 
204    int fd;
205 
206    simple_mtx_t lock;
207    simple_mtx_t bo_deps_lock;
208 
209    /** Array of lists of cached gem objects of power-of-two sizes */
210    struct iris_bucket_cache *bucket_cache;
211 
212    time_t time;
213 
214    struct hash_table *name_table;
215    struct hash_table *handle_table;
216 
217    /**
218     * List of BOs which we've effectively freed, but are hanging on to
219     * until they're idle before closing and returning the VMA.
220     */
221    struct list_head zombie_list;
222 
223    struct util_vma_heap vma_allocator[IRIS_MEMZONE_COUNT];
224 
225    struct iris_memregion vram, sys;
226 
227    /* Used only when use_global_vm is true. */
228    uint32_t global_vm_id;
229 
230    int next_screen_id;
231 
232    struct intel_device_info devinfo;
233    const struct iris_kmd_backend *kmd_backend;
234    struct intel_bind_timeline bind_timeline; /* Xe only */
235    bool bo_reuse:1;
236    bool use_global_vm:1;
237 
238    struct intel_aux_map_context *aux_map_ctx;
239 
240    struct pb_slabs bo_slabs[NUM_SLAB_ALLOCATORS];
241 
242    struct iris_border_color_pool border_color_pool;
243 
244    struct iris_bo *dummy_aux_bo;
245    struct iris_bo *mem_fence_bo;
246 };
247 
248 static simple_mtx_t global_bufmgr_list_mutex = SIMPLE_MTX_INITIALIZER;
249 static struct list_head global_bufmgr_list = {
250    .next = &global_bufmgr_list,
251    .prev = &global_bufmgr_list,
252 };
253 
254 static void bo_free(struct iris_bo *bo);
255 
256 static struct iris_bo *
find_and_ref_external_bo(struct hash_table * ht,unsigned int key)257 find_and_ref_external_bo(struct hash_table *ht, unsigned int key)
258 {
259    struct hash_entry *entry = _mesa_hash_table_search(ht, &key);
260    struct iris_bo *bo = entry ? entry->data : NULL;
261 
262    if (bo) {
263       assert(iris_bo_is_external(bo));
264       assert(iris_bo_is_real(bo));
265       assert(!bo->real.reusable);
266 
267       /* Being non-reusable, the BO cannot be in the cache lists, but it
268        * may be in the zombie list if it had reached zero references, but
269        * we hadn't yet closed it...and then reimported the same BO.  If it
270        * is, then remove it since it's now been resurrected.
271        */
272       if (list_is_linked(&bo->head))
273          list_del(&bo->head);
274 
275       iris_bo_reference(bo);
276    }
277 
278    return bo;
279 }
280 
281 /**
282  * This function finds the correct bucket fit for the input size.
283  * The function works with O(1) complexity when the requested size
284  * was queried instead of iterating the size through all the buckets.
285  */
286 static struct bo_cache_bucket *
bucket_for_size(struct iris_bufmgr * bufmgr,uint64_t size,enum iris_heap heap,unsigned flags)287 bucket_for_size(struct iris_bufmgr *bufmgr, uint64_t size,
288                 enum iris_heap heap, unsigned flags)
289 {
290    if (flags & BO_ALLOC_PROTECTED)
291       return NULL;
292 
293    const struct intel_device_info *devinfo = &bufmgr->devinfo;
294    struct iris_bucket_cache *cache = &bufmgr->bucket_cache[heap];
295 
296    if (devinfo->kmd_type == INTEL_KMD_TYPE_XE &&
297        (flags & (BO_ALLOC_SHARED | BO_ALLOC_SCANOUT)))
298       return NULL;
299 
300    const unsigned _4MB = 4 * 1024 * 1024;
301    const unsigned _6MB = 6 * 1024 * 1024;
302    const unsigned _8MB = 8 * 1024 * 1024;
303    const unsigned _64MB = 64 * 1024 * 1024;
304    unsigned index;
305 
306    if (size <= 4096) {
307       index = 0;
308    } else if (size <= _4MB) {
309       index = util_logbase2_ceil(size) - 12;
310    } else if (size <= _6MB) {
311       index = 11;
312    } else if (size <= _8MB) {
313       index = 12;
314    } else if (size <= _64MB) {
315       const unsigned power = util_logbase2(size);
316       const unsigned base_size = 1u << power;
317       const unsigned quarter_size = base_size / 4;
318       const unsigned quarter = DIV_ROUND_UP(size - base_size, quarter_size);
319       index = 12 + (power - 23) * 4 + quarter;
320    } else {
321       return NULL;
322    }
323 
324    return (index < cache->num_buckets) ? &cache->bucket[index] : NULL;
325 }
326 
327 enum iris_memory_zone
iris_memzone_for_address(uint64_t address)328 iris_memzone_for_address(uint64_t address)
329 {
330    STATIC_ASSERT(IRIS_MEMZONE_OTHER_START    > IRIS_MEMZONE_DYNAMIC_START);
331    STATIC_ASSERT(IRIS_MEMZONE_SURFACE_START  > IRIS_MEMZONE_SCRATCH_START);
332    STATIC_ASSERT(IRIS_MEMZONE_SCRATCH_START == IRIS_MEMZONE_BINDER_START);
333    STATIC_ASSERT(IRIS_MEMZONE_BINDER_START   > IRIS_MEMZONE_SHADER_START);
334    STATIC_ASSERT(IRIS_MEMZONE_DYNAMIC_START  > IRIS_MEMZONE_SURFACE_START);
335    STATIC_ASSERT(IRIS_BORDER_COLOR_POOL_ADDRESS == IRIS_MEMZONE_DYNAMIC_START);
336 
337    if (address >= IRIS_MEMZONE_OTHER_START)
338       return IRIS_MEMZONE_OTHER;
339 
340    if (address == IRIS_BORDER_COLOR_POOL_ADDRESS)
341       return IRIS_MEMZONE_BORDER_COLOR_POOL;
342 
343    if (address > IRIS_MEMZONE_DYNAMIC_START)
344       return IRIS_MEMZONE_DYNAMIC;
345 
346    if (address >= IRIS_MEMZONE_SURFACE_START)
347       return IRIS_MEMZONE_SURFACE;
348 
349    if (address >= (IRIS_MEMZONE_BINDER_START + IRIS_SCRATCH_ZONE_SIZE))
350       return IRIS_MEMZONE_BINDER;
351 
352    if (address >= IRIS_MEMZONE_SCRATCH_START)
353       return IRIS_MEMZONE_SCRATCH;
354 
355    return IRIS_MEMZONE_SHADER;
356 }
357 
358 /**
359  * Allocate a section of virtual memory for a buffer, assigning an address.
360  *
361  * This uses either the bucket allocator for the given size, or the large
362  * object allocator (util_vma).
363  */
364 static uint64_t
vma_alloc(struct iris_bufmgr * bufmgr,enum iris_memory_zone memzone,uint64_t size,uint64_t alignment)365 vma_alloc(struct iris_bufmgr *bufmgr,
366           enum iris_memory_zone memzone,
367           uint64_t size,
368           uint64_t alignment)
369 {
370    simple_mtx_assert_locked(&bufmgr->lock);
371 
372    const unsigned _2mb = 2 * 1024 * 1024;
373 
374    /* Force minimum alignment based on device requirements */
375    assert((alignment & (alignment - 1)) == 0);
376    alignment = MAX2(alignment, bufmgr->devinfo.mem_alignment);
377 
378    /* If the allocation is a multiple of 2MB, ensure the virtual address is
379     * aligned to 2MB, so that it's possible for the kernel to use 64K pages.
380     */
381    if (size % _2mb == 0)
382       alignment = MAX2(alignment, _2mb);
383 
384    if (memzone == IRIS_MEMZONE_BORDER_COLOR_POOL)
385       return IRIS_BORDER_COLOR_POOL_ADDRESS;
386 
387    uint64_t addr =
388       util_vma_heap_alloc(&bufmgr->vma_allocator[memzone], size, alignment);
389 
390    assert((addr >> 48ull) == 0);
391    assert((addr % alignment) == 0);
392 
393    return intel_canonical_address(addr);
394 }
395 
396 static void
vma_free(struct iris_bufmgr * bufmgr,uint64_t address,uint64_t size)397 vma_free(struct iris_bufmgr *bufmgr,
398          uint64_t address,
399          uint64_t size)
400 {
401    simple_mtx_assert_locked(&bufmgr->lock);
402 
403    if (address == IRIS_BORDER_COLOR_POOL_ADDRESS)
404       return;
405 
406    /* Un-canonicalize the address. */
407    address = intel_48b_address(address);
408 
409    if (address == 0ull)
410       return;
411 
412    enum iris_memory_zone memzone = iris_memzone_for_address(address);
413 
414    assert(memzone < ARRAY_SIZE(bufmgr->vma_allocator));
415 
416    util_vma_heap_free(&bufmgr->vma_allocator[memzone], address, size);
417 }
418 
419 /* Exports a BO's implicit synchronization state to a drm_syncobj, returning
420  * its wrapping iris_syncobj. The drm_syncobj is created new and has to be
421  * destroyed by the caller after the execbuf ioctl.
422  */
423 struct iris_syncobj *
iris_bo_export_sync_state(struct iris_bo * bo)424 iris_bo_export_sync_state(struct iris_bo *bo)
425 {
426    struct iris_bufmgr *bufmgr = bo->bufmgr;
427    int drm_fd = iris_bufmgr_get_fd(bufmgr);
428 
429    struct iris_syncobj *iris_syncobj = iris_create_syncobj(bufmgr);
430 
431    struct dma_buf_export_sync_file export_sync_file_ioctl = {
432       .flags = DMA_BUF_SYNC_RW, /* TODO */
433       .fd = -1,
434    };
435    if (intel_ioctl(bo->real.prime_fd, DMA_BUF_IOCTL_EXPORT_SYNC_FILE,
436                    &export_sync_file_ioctl)) {
437       fprintf(stderr, "DMA_BUF_IOCTL_EXPORT_SYNC_FILE ioctl failed (%d)\n",
438               errno);
439       goto error_export;
440    }
441 
442    int sync_file_fd = export_sync_file_ioctl.fd;
443    assert(sync_file_fd >= 0);
444 
445    struct drm_syncobj_handle syncobj_import_ioctl = {
446       .handle = iris_syncobj->handle,
447       .flags = DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_IMPORT_SYNC_FILE,
448       .fd = sync_file_fd,
449    };
450    if (intel_ioctl(drm_fd, DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE,
451                    &syncobj_import_ioctl)) {
452       fprintf(stderr, "DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE ioctl failed (%d)\n",
453               errno);
454    }
455 
456    close(sync_file_fd);
457 
458    return iris_syncobj;
459 error_export:
460    iris_syncobj_destroy(bufmgr, iris_syncobj);
461    return NULL;
462 }
463 
464 /* Import the state of a sync_file_fd (which we should have gotten from
465  * batch_syncobj_to_sync_file_fd) into a BO as its implicit synchronization
466  * state.
467  */
468 void
iris_bo_import_sync_state(struct iris_bo * bo,int sync_file_fd)469 iris_bo_import_sync_state(struct iris_bo *bo, int sync_file_fd)
470 {
471    struct dma_buf_import_sync_file import_sync_file_ioctl = {
472       .flags = DMA_BUF_SYNC_WRITE,
473       .fd = sync_file_fd,
474    };
475    if (intel_ioctl(bo->real.prime_fd, DMA_BUF_IOCTL_IMPORT_SYNC_FILE,
476                    &import_sync_file_ioctl))
477       fprintf(stderr, "DMA_BUF_IOCTL_IMPORT_SYNC_FILE ioctl failed (%d)\n",
478               errno);
479 }
480 
481 /* A timeout of 0 just checks for busyness. */
482 static int
iris_bo_wait_syncobj(struct iris_bo * bo,int64_t timeout_ns)483 iris_bo_wait_syncobj(struct iris_bo *bo, int64_t timeout_ns)
484 {
485    int ret = 0;
486    struct iris_bufmgr *bufmgr = bo->bufmgr;
487    const bool is_external = iris_bo_is_real(bo) && bo->real.prime_fd != -1;
488    struct iris_syncobj *external_implicit_syncobj = NULL;
489 
490    /* If we know it's idle, don't bother with the kernel round trip.
491     * Can't do that for Xe KMD with external BOs since we have to check the
492     * implicit synchronization information.
493     */
494    if (!is_external && bo->idle)
495       return 0;
496 
497    simple_mtx_lock(&bufmgr->bo_deps_lock);
498 
499    const int handles_len = bo->deps_size * IRIS_BATCH_COUNT * 2 + is_external;
500    uint32_t *handles = handles_len <= 32 ?
501                         (uint32_t *)alloca(handles_len * sizeof(*handles)) :
502                         (uint32_t *)malloc(handles_len * sizeof(*handles));
503    int handle_count = 0;
504 
505    if (is_external) {
506       external_implicit_syncobj = iris_bo_export_sync_state(bo);
507       if (external_implicit_syncobj)
508          handles[handle_count++] = external_implicit_syncobj->handle;
509    }
510 
511    for (int d = 0; d < bo->deps_size; d++) {
512       for (int b = 0; b < IRIS_BATCH_COUNT; b++) {
513          struct iris_syncobj *r = bo->deps[d].read_syncobjs[b];
514          struct iris_syncobj *w = bo->deps[d].write_syncobjs[b];
515          if (r)
516             handles[handle_count++] = r->handle;
517          if (w)
518             handles[handle_count++] = w->handle;
519       }
520    }
521 
522    if (handle_count == 0)
523       goto out;
524 
525    /* Unlike the gem wait, negative values are not infinite here. */
526    int64_t timeout_abs = os_time_get_absolute_timeout(timeout_ns);
527    if (timeout_abs < 0)
528       timeout_abs = INT64_MAX;
529 
530    struct drm_syncobj_wait args = {
531       .handles = (uintptr_t) handles,
532       .timeout_nsec = timeout_abs,
533       .count_handles = handle_count,
534       .flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL,
535    };
536 
537    ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_SYNCOBJ_WAIT, &args);
538    if (ret != 0) {
539       ret = -errno;
540       goto out;
541    }
542 
543    /* We just waited everything, so clean all the deps. */
544    for (int d = 0; d < bo->deps_size; d++) {
545       for (int b = 0; b < IRIS_BATCH_COUNT; b++) {
546          iris_syncobj_reference(bufmgr, &bo->deps[d].write_syncobjs[b], NULL);
547          iris_syncobj_reference(bufmgr, &bo->deps[d].read_syncobjs[b], NULL);
548       }
549    }
550 
551 out:
552    if (handles_len > 32)
553       free(handles);
554    if (external_implicit_syncobj)
555       iris_syncobj_reference(bufmgr, &external_implicit_syncobj, NULL);
556 
557    simple_mtx_unlock(&bufmgr->bo_deps_lock);
558    return ret;
559 }
560 
561 static bool
iris_bo_busy_syncobj(struct iris_bo * bo)562 iris_bo_busy_syncobj(struct iris_bo *bo)
563 {
564    return iris_bo_wait_syncobj(bo, 0) == -ETIME;
565 }
566 
567 bool
iris_bo_busy(struct iris_bo * bo)568 iris_bo_busy(struct iris_bo *bo)
569 {
570    bool busy;
571 
572    switch (iris_bufmgr_get_device_info(bo->bufmgr)->kmd_type) {
573    case INTEL_KMD_TYPE_I915:
574       if (iris_bo_is_external(bo))
575          busy = iris_i915_bo_busy_gem(bo);
576       else
577          busy = iris_bo_busy_syncobj(bo);
578       break;
579    case INTEL_KMD_TYPE_XE:
580       busy = iris_bo_busy_syncobj(bo);
581       break;
582    default:
583       unreachable("missing");
584       busy = true;
585    }
586 
587    bo->idle = !busy;
588 
589    return busy;
590 }
591 
592 /**
593  * Specify the volatility of the buffer.
594  * \param bo Buffer to create a name for
595  * \param state The purgeable status
596  *
597  * Use IRIS_MADVICE_DONT_NEED to mark the buffer as purgeable, and it will be
598  * reclaimed under memory pressure. If you subsequently require the buffer,
599  * then you must pass IRIS_MADVICE_WILL_NEED to mark the buffer as required.
600  *
601  * Returns true if the buffer was retained, or false if it was discarded
602  * whilst marked as IRIS_MADVICE_DONT_NEED.
603  */
604 static inline bool
iris_bo_madvise(struct iris_bo * bo,enum iris_madvice state)605 iris_bo_madvise(struct iris_bo *bo, enum iris_madvice state)
606 {
607    /* We can't madvise suballocated BOs. */
608    assert(iris_bo_is_real(bo));
609 
610    return bo->bufmgr->kmd_backend->bo_madvise(bo, state);
611 }
612 
613 static struct iris_bo *
bo_calloc(void)614 bo_calloc(void)
615 {
616    struct iris_bo *bo = calloc(1, sizeof(*bo));
617    if (!bo)
618       return NULL;
619 
620    list_inithead(&bo->real.exports);
621 
622    bo->hash = _mesa_hash_pointer(bo);
623 
624    return bo;
625 }
626 
627 static void
bo_unmap(struct iris_bo * bo)628 bo_unmap(struct iris_bo *bo)
629 {
630    assert(iris_bo_is_real(bo));
631 
632    VG_NOACCESS(bo->real.map, bo->size);
633    os_munmap(bo->real.map, bo->size);
634    bo->real.map = NULL;
635 }
636 
637 static struct pb_slabs *
get_slabs(struct iris_bufmgr * bufmgr,uint64_t size)638 get_slabs(struct iris_bufmgr *bufmgr, uint64_t size)
639 {
640    for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
641       struct pb_slabs *slabs = &bufmgr->bo_slabs[i];
642 
643       if (size <= 1ull << (slabs->min_order + slabs->num_orders - 1))
644          return slabs;
645    }
646 
647    unreachable("should have found a valid slab for this size");
648 }
649 
650 /* Return the power of two size of a slab entry matching the input size. */
651 static unsigned
get_slab_pot_entry_size(struct iris_bufmgr * bufmgr,unsigned size)652 get_slab_pot_entry_size(struct iris_bufmgr *bufmgr, unsigned size)
653 {
654    unsigned entry_size = util_next_power_of_two(size);
655    unsigned min_entry_size = 1 << bufmgr->bo_slabs[0].min_order;
656 
657    return MAX2(entry_size, min_entry_size);
658 }
659 
660 /* Return the slab entry alignment. */
661 static unsigned
get_slab_entry_alignment(struct iris_bufmgr * bufmgr,unsigned size)662 get_slab_entry_alignment(struct iris_bufmgr *bufmgr, unsigned size)
663 {
664    unsigned entry_size = get_slab_pot_entry_size(bufmgr, size);
665 
666    if (size <= entry_size * 3 / 4)
667       return entry_size / 4;
668 
669    return entry_size;
670 }
671 
672 static bool
iris_can_reclaim_slab(void * priv,struct pb_slab_entry * entry)673 iris_can_reclaim_slab(void *priv, struct pb_slab_entry *entry)
674 {
675    struct iris_bo *bo = container_of(entry, struct iris_bo, slab.entry);
676 
677    return !iris_bo_busy(bo);
678 }
679 
680 static void
iris_slab_free(void * priv,struct pb_slab * pslab)681 iris_slab_free(void *priv, struct pb_slab *pslab)
682 {
683    struct iris_bufmgr *bufmgr = priv;
684    struct iris_slab *slab = (void *) pslab;
685    struct intel_aux_map_context *aux_map_ctx = bufmgr->aux_map_ctx;
686 
687    assert(!slab->bo->aux_map_address);
688 
689    /* Since we're freeing the whole slab, all buffers allocated out of it
690     * must be reclaimable.  We require buffers to be idle to be reclaimed
691     * (see iris_can_reclaim_slab()), so we know all entries must be idle.
692     * Therefore, we can safely unmap their aux table entries.
693     */
694    for (unsigned i = 0; i < pslab->num_entries; i++) {
695       struct iris_bo *bo = &slab->entries[i];
696       if (aux_map_ctx && bo->aux_map_address) {
697          intel_aux_map_unmap_range(aux_map_ctx, bo->address, bo->size);
698          bo->aux_map_address = 0;
699       }
700 
701       /* Unref read/write dependency syncobjs and free the array. */
702       for (int d = 0; d < bo->deps_size; d++) {
703          for (int b = 0; b < IRIS_BATCH_COUNT; b++) {
704             iris_syncobj_reference(bufmgr, &bo->deps[d].write_syncobjs[b], NULL);
705             iris_syncobj_reference(bufmgr, &bo->deps[d].read_syncobjs[b], NULL);
706          }
707       }
708       free(bo->deps);
709    }
710 
711    iris_bo_unreference(slab->bo);
712 
713    free(slab->entries);
714    free(slab);
715 }
716 
717 static struct pb_slab *
iris_slab_alloc(void * priv,unsigned heap,unsigned entry_size,unsigned group_index)718 iris_slab_alloc(void *priv,
719                 unsigned heap,
720                 unsigned entry_size,
721                 unsigned group_index)
722 {
723    struct iris_bufmgr *bufmgr = priv;
724    struct iris_slab *slab = calloc(1, sizeof(struct iris_slab));
725    uint32_t flags = BO_ALLOC_NO_SUBALLOC;
726    unsigned slab_size = 0;
727    /* We only support slab allocation for IRIS_MEMZONE_OTHER */
728    enum iris_memory_zone memzone = IRIS_MEMZONE_OTHER;
729 
730    if (!slab)
731       return NULL;
732 
733    struct pb_slabs *slabs = bufmgr->bo_slabs;
734 
735    /* Determine the slab buffer size. */
736    for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
737       unsigned max_entry_size =
738          1 << (slabs[i].min_order + slabs[i].num_orders - 1);
739 
740       if (entry_size <= max_entry_size) {
741          /* The slab size is twice the size of the largest possible entry. */
742          slab_size = max_entry_size * 2;
743 
744          if (!util_is_power_of_two_nonzero(entry_size)) {
745             assert(util_is_power_of_two_nonzero(entry_size * 4 / 3));
746 
747             /* If the entry size is 3/4 of a power of two, we would waste
748              * space and not gain anything if we allocated only twice the
749              * power of two for the backing buffer:
750              *
751              *    2 * 3/4 = 1.5 usable with buffer size 2
752              *
753              * Allocating 5 times the entry size leads us to the next power
754              * of two and results in a much better memory utilization:
755              *
756              *    5 * 3/4 = 3.75 usable with buffer size 4
757              */
758             if (entry_size * 5 > slab_size)
759                slab_size = util_next_power_of_two(entry_size * 5);
760          }
761 
762          /* The largest slab should have the same size as the PTE fragment
763           * size to get faster address translation.
764           *
765           * TODO: move this to intel_device_info?
766           */
767          const unsigned pte_size = 2 * 1024 * 1024;
768 
769          if (i == NUM_SLAB_ALLOCATORS - 1 && slab_size < pte_size)
770             slab_size = pte_size;
771 
772          break;
773       }
774    }
775    assert(slab_size != 0);
776 
777    switch (heap) {
778    case IRIS_HEAP_SYSTEM_MEMORY_UNCACHED_COMPRESSED:
779    case IRIS_HEAP_DEVICE_LOCAL_COMPRESSED:
780       flags |= BO_ALLOC_COMPRESSED;
781       break;
782    case IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT:
783    case IRIS_HEAP_SYSTEM_MEMORY_UNCACHED:
784       flags |= BO_ALLOC_SMEM;
785       break;
786    case IRIS_HEAP_DEVICE_LOCAL:
787       flags |= BO_ALLOC_LMEM;
788       break;
789    case IRIS_HEAP_DEVICE_LOCAL_CPU_VISIBLE_SMALL_BAR:
790       flags |= BO_ALLOC_LMEM | BO_ALLOC_CPU_VISIBLE;
791       break;
792    default:
793       flags |= BO_ALLOC_PLAIN;
794    }
795 
796    slab->bo =
797       iris_bo_alloc(bufmgr, "slab", slab_size, slab_size, memzone, flags);
798    if (!slab->bo)
799       goto fail;
800 
801    slab_size = slab->bo->size;
802 
803    slab->base.num_entries = slab_size / entry_size;
804    slab->base.num_free = slab->base.num_entries;
805    slab->base.group_index = group_index;
806    slab->base.entry_size = entry_size;
807    slab->entries = calloc(slab->base.num_entries, sizeof(*slab->entries));
808    if (!slab->entries)
809       goto fail_bo;
810 
811    list_inithead(&slab->base.free);
812 
813    for (unsigned i = 0; i < slab->base.num_entries; i++) {
814       struct iris_bo *bo = &slab->entries[i];
815 
816       bo->size = entry_size;
817       bo->bufmgr = bufmgr;
818       bo->hash = _mesa_hash_pointer(bo);
819       bo->gem_handle = 0;
820       bo->address = intel_canonical_address(slab->bo->address + i * entry_size);
821       bo->aux_map_address = 0;
822       bo->index = -1;
823       bo->refcount = 0;
824       bo->idle = true;
825       bo->zeroed = slab->bo->zeroed;
826 
827       bo->slab.entry.slab = &slab->base;
828 
829       bo->slab.real = iris_get_backing_bo(slab->bo);
830 
831       list_addtail(&bo->slab.entry.head, &slab->base.free);
832    }
833 
834    return &slab->base;
835 
836 fail_bo:
837    iris_bo_unreference(slab->bo);
838 fail:
839    free(slab);
840    return NULL;
841 }
842 
843 /**
844  * Selects a heap for the given buffer allocation flags.
845  *
846  * This determines the cacheability, coherency, and mmap mode settings.
847  */
848 static enum iris_heap
flags_to_heap(struct iris_bufmgr * bufmgr,unsigned flags)849 flags_to_heap(struct iris_bufmgr *bufmgr, unsigned flags)
850 {
851    const struct intel_device_info *devinfo = &bufmgr->devinfo;
852 
853    if (bufmgr->vram.size > 0) {
854       if (flags & BO_ALLOC_COMPRESSED)
855          return IRIS_HEAP_DEVICE_LOCAL_COMPRESSED;
856 
857       /* Discrete GPUs currently always snoop CPU caches. */
858       if ((flags & BO_ALLOC_SMEM) || (flags & BO_ALLOC_CACHED_COHERENT))
859          return IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT;
860 
861       if ((flags & BO_ALLOC_LMEM) ||
862           ((flags & BO_ALLOC_SCANOUT) && !(flags & BO_ALLOC_SHARED))) {
863 
864          if ((flags & BO_ALLOC_CPU_VISIBLE) && !intel_vram_all_mappable(devinfo))
865             return IRIS_HEAP_DEVICE_LOCAL_CPU_VISIBLE_SMALL_BAR;
866 
867          return IRIS_HEAP_DEVICE_LOCAL;
868       }
869 
870       return IRIS_HEAP_DEVICE_LOCAL_PREFERRED;
871    } else if (devinfo->has_llc) {
872       assert(!(flags & BO_ALLOC_LMEM));
873 
874       if (flags & (BO_ALLOC_SCANOUT | BO_ALLOC_SHARED))
875          return IRIS_HEAP_SYSTEM_MEMORY_UNCACHED;
876 
877       return IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT;
878    } else {
879       assert(!devinfo->has_llc);
880       assert(!(flags & BO_ALLOC_LMEM));
881 
882       if (flags & BO_ALLOC_COMPRESSED)
883          return IRIS_HEAP_SYSTEM_MEMORY_UNCACHED_COMPRESSED;
884 
885       if (flags & (BO_ALLOC_SCANOUT | BO_ALLOC_SHARED))
886             return IRIS_HEAP_SYSTEM_MEMORY_UNCACHED;
887 
888       if (flags & BO_ALLOC_CACHED_COHERENT)
889          return IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT;
890 
891       return IRIS_HEAP_SYSTEM_MEMORY_UNCACHED;
892    }
893 }
894 
895 static bool
zero_bo(struct iris_bufmgr * bufmgr,unsigned flags,struct iris_bo * bo)896 zero_bo(struct iris_bufmgr *bufmgr,
897         unsigned flags,
898         struct iris_bo *bo)
899 {
900    assert(flags & BO_ALLOC_ZEROED);
901 
902    if (bo->zeroed)
903       return true;
904 
905    if (bufmgr->devinfo.has_flat_ccs && (flags & BO_ALLOC_LMEM)) {
906       /* With flat CCS, all allocations in LMEM have memory ranges with
907        * corresponding CCS elements. These elements are only accessible
908        * through GPU commands, but we don't issue GPU commands here.
909        */
910       return false;
911    }
912 
913    void *map = iris_bo_map(NULL, bo, MAP_WRITE | MAP_RAW);
914    if (!map)
915       return false;
916 
917    memset(map, 0, bo->size);
918    bo->zeroed = true;
919    return true;
920 }
921 
922 static struct iris_bo *
alloc_bo_from_slabs(struct iris_bufmgr * bufmgr,const char * name,uint64_t size,uint32_t alignment,unsigned flags)923 alloc_bo_from_slabs(struct iris_bufmgr *bufmgr,
924                     const char *name,
925                     uint64_t size,
926                     uint32_t alignment,
927                     unsigned flags)
928 {
929    if (flags & BO_ALLOC_NO_SUBALLOC)
930       return NULL;
931 
932    struct pb_slabs *last_slab = &bufmgr->bo_slabs[NUM_SLAB_ALLOCATORS - 1];
933    unsigned max_slab_entry_size =
934       1 << (last_slab->min_order + last_slab->num_orders - 1);
935 
936    if (size > max_slab_entry_size)
937       return NULL;
938 
939    struct pb_slab_entry *entry;
940 
941    enum iris_heap heap = flags_to_heap(bufmgr, flags);
942 
943    unsigned alloc_size = size;
944 
945    /* Always use slabs for sizes less than 4 KB because the kernel aligns
946     * everything to 4 KB.
947     */
948    if (size < alignment && alignment <= 4 * 1024)
949       alloc_size = alignment;
950 
951    if (alignment > get_slab_entry_alignment(bufmgr, alloc_size)) {
952       /* 3/4 allocations can return too small alignment.
953        * Try again with a power of two allocation size.
954        */
955       unsigned pot_size = get_slab_pot_entry_size(bufmgr, alloc_size);
956 
957       if (alignment <= pot_size) {
958          /* This size works but wastes some memory to fulfill the alignment. */
959          alloc_size = pot_size;
960       } else {
961          /* can't fulfill alignment requirements */
962          return NULL;
963       }
964    }
965 
966    struct pb_slabs *slabs = get_slabs(bufmgr, alloc_size);
967    entry = pb_slab_alloc(slabs, alloc_size, heap);
968    if (!entry) {
969       /* Clean up and try again... */
970       pb_slabs_reclaim(slabs);
971 
972       entry = pb_slab_alloc(slabs, alloc_size, heap);
973    }
974    if (!entry)
975       return NULL;
976 
977    struct iris_bo *bo = container_of(entry, struct iris_bo, slab.entry);
978 
979    if (bo->aux_map_address && bo->bufmgr->aux_map_ctx) {
980       /* This buffer was associated with an aux-buffer range.  We only allow
981        * slab allocated buffers to be reclaimed when idle (not in use by an
982        * executing batch).  (See iris_can_reclaim_slab().)  So we know that
983        * our previous aux mapping is no longer in use, and we can safely
984        * remove it.
985        */
986       intel_aux_map_unmap_range(bo->bufmgr->aux_map_ctx, bo->address,
987                                 bo->size);
988       bo->aux_map_address = 0;
989    }
990 
991    p_atomic_set(&bo->refcount, 1);
992    bo->name = name;
993    bo->size = size;
994 
995    /* Zero the contents if necessary.  If this fails, fall back to
996     * allocating a fresh BO, which will always be zeroed by the kernel.
997     */
998    if ((flags & BO_ALLOC_ZEROED) && !zero_bo(bufmgr, flags, bo)) {
999       pb_slab_free(slabs, &bo->slab.entry);
1000       return NULL;
1001    }
1002 
1003    return bo;
1004 }
1005 
1006 static struct iris_bo *
alloc_bo_from_cache(struct iris_bufmgr * bufmgr,struct bo_cache_bucket * bucket,uint32_t alignment,enum iris_memory_zone memzone,enum iris_mmap_mode mmap_mode,unsigned flags,bool match_zone)1007 alloc_bo_from_cache(struct iris_bufmgr *bufmgr,
1008                     struct bo_cache_bucket *bucket,
1009                     uint32_t alignment,
1010                     enum iris_memory_zone memzone,
1011                     enum iris_mmap_mode mmap_mode,
1012                     unsigned flags,
1013                     bool match_zone)
1014 {
1015    if (!bucket)
1016       return NULL;
1017 
1018    struct iris_bo *bo = NULL;
1019 
1020    simple_mtx_assert_locked(&bufmgr->lock);
1021 
1022    list_for_each_entry_safe(struct iris_bo, cur, &bucket->head, head) {
1023       assert(iris_bo_is_real(cur));
1024 
1025       /* Find one that's got the right mapping type.  We used to swap maps
1026        * around but the kernel doesn't allow this on discrete GPUs.
1027        */
1028       if (mmap_mode != cur->real.mmap_mode)
1029          continue;
1030 
1031       /* Try a little harder to find one that's already in the right memzone */
1032       if (match_zone && memzone != iris_memzone_for_address(cur->address))
1033          continue;
1034 
1035       if (cur->real.capture != !!(flags & BO_ALLOC_CAPTURE))
1036          continue;
1037 
1038       /* If the last BO in the cache is busy, there are no idle BOs.  Bail,
1039        * either falling back to a non-matching memzone, or if that fails,
1040        * allocating a fresh buffer.
1041        */
1042       if (iris_bo_busy(cur))
1043          return NULL;
1044 
1045       list_del(&cur->head);
1046 
1047       /* Tell the kernel we need this BO and check if it still exist */
1048       if (!iris_bo_madvise(cur, IRIS_MADVICE_WILL_NEED)) {
1049          /* This BO was purged, throw it out and keep looking. */
1050          bo_free(cur);
1051          continue;
1052       }
1053 
1054       if (cur->aux_map_address) {
1055          /* This buffer was associated with an aux-buffer range. We make sure
1056           * that buffers are not reused from the cache while the buffer is (busy)
1057           * being used by an executing batch. Since we are here, the buffer is no
1058           * longer being used by a batch and the buffer was deleted (in order to
1059           * end up in the cache). Therefore its old aux-buffer range can be
1060           * removed from the aux-map.
1061           */
1062          if (cur->bufmgr->aux_map_ctx)
1063             intel_aux_map_unmap_range(cur->bufmgr->aux_map_ctx, cur->address,
1064                                       cur->size);
1065          cur->aux_map_address = 0;
1066       }
1067 
1068       /* If the cached BO isn't in the right memory zone, or the alignment
1069        * isn't sufficient, free the old memory and assign it a new address.
1070        */
1071       if (memzone != iris_memzone_for_address(cur->address) ||
1072           cur->address % alignment != 0) {
1073          if (!bufmgr->kmd_backend->gem_vm_unbind(cur)) {
1074             DBG("Unable to unbind vm of buf %u\n", cur->gem_handle);
1075             bo_free(cur);
1076             continue;
1077          }
1078 
1079          vma_free(bufmgr, cur->address, cur->size);
1080          cur->address = 0ull;
1081       }
1082 
1083       bo = cur;
1084       break;
1085    }
1086 
1087    if (!bo)
1088       return NULL;
1089 
1090    /* Zero the contents if necessary.  If this fails, fall back to
1091     * allocating a fresh BO, which will always be zeroed by the kernel.
1092     */
1093    assert(bo->zeroed == false);
1094    if ((flags & BO_ALLOC_ZEROED) && !zero_bo(bufmgr, flags, bo)) {
1095       bo_free(bo);
1096       return NULL;
1097    }
1098 
1099    return bo;
1100 }
1101 
1102 static struct iris_bo *
alloc_fresh_bo(struct iris_bufmgr * bufmgr,uint64_t bo_size,unsigned flags)1103 alloc_fresh_bo(struct iris_bufmgr *bufmgr, uint64_t bo_size, unsigned flags)
1104 {
1105    struct iris_bo *bo = bo_calloc();
1106    if (!bo)
1107       return NULL;
1108 
1109    /* Try to allocate memory in multiples of 2MB, as this allows us to use
1110     * 64K pages rather than the less-efficient 4K pages.  Most BOs smaller
1111     * than 64MB should hit the BO cache or slab allocations anyway, so this
1112     * shouldn't waste too much memory.  We do exclude small (< 1MB) sizes to
1113     * be defensive in case any of those bypass the caches and end up here.
1114     */
1115    if (bo_size >= 1024 * 1024)
1116       bo_size = align64(bo_size, 2 * 1024 * 1024);
1117 
1118    bo->real.heap = flags_to_heap(bufmgr, flags);
1119 
1120    const struct intel_memory_class_instance *regions[2];
1121    uint16_t num_regions = 0;
1122 
1123    if (bufmgr->vram.size > 0) {
1124       switch (bo->real.heap) {
1125       case IRIS_HEAP_DEVICE_LOCAL_PREFERRED:
1126          /* For vram allocations, still use system memory as a fallback. */
1127          regions[num_regions++] = bufmgr->vram.region;
1128          regions[num_regions++] = bufmgr->sys.region;
1129          break;
1130       case IRIS_HEAP_DEVICE_LOCAL:
1131       case IRIS_HEAP_DEVICE_LOCAL_CPU_VISIBLE_SMALL_BAR:
1132       case IRIS_HEAP_DEVICE_LOCAL_COMPRESSED:
1133          regions[num_regions++] = bufmgr->vram.region;
1134          break;
1135       case IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT:
1136          regions[num_regions++] = bufmgr->sys.region;
1137          break;
1138       case IRIS_HEAP_SYSTEM_MEMORY_UNCACHED_COMPRESSED:
1139          /* not valid, compressed in discrete is always created with
1140           * IRIS_HEAP_DEVICE_LOCAL_PREFERRED_COMPRESSED
1141           */
1142       case IRIS_HEAP_SYSTEM_MEMORY_UNCACHED:
1143          /* not valid; discrete cards always enable snooping */
1144       case IRIS_HEAP_MAX:
1145          unreachable("invalid heap for BO");
1146       }
1147    } else {
1148       regions[num_regions++] = bufmgr->sys.region;
1149    }
1150 
1151    bo->gem_handle = bufmgr->kmd_backend->gem_create(bufmgr, regions,
1152                                                     num_regions, bo_size,
1153                                                     bo->real.heap, flags);
1154    if (bo->gem_handle == 0) {
1155       free(bo);
1156       return NULL;
1157    }
1158    bo->bufmgr = bufmgr;
1159    bo->size = bo_size;
1160    bo->idle = true;
1161    bo->zeroed = true;
1162    bo->real.capture = (flags & BO_ALLOC_CAPTURE) != 0;
1163    bo->real.scanout = (flags & BO_ALLOC_SCANOUT) != 0;
1164 
1165    return bo;
1166 }
1167 
1168 const char *
1169 iris_heap_to_string[IRIS_HEAP_MAX] = {
1170    [IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT] = "system-cached-coherent",
1171    [IRIS_HEAP_SYSTEM_MEMORY_UNCACHED] = "system-uncached",
1172    [IRIS_HEAP_SYSTEM_MEMORY_UNCACHED_COMPRESSED] = "system-uncached-compressed",
1173    [IRIS_HEAP_DEVICE_LOCAL] = "local",
1174    [IRIS_HEAP_DEVICE_LOCAL_COMPRESSED] = "local-compressed",
1175    [IRIS_HEAP_DEVICE_LOCAL_PREFERRED] = "local-preferred",
1176    [IRIS_HEAP_DEVICE_LOCAL_CPU_VISIBLE_SMALL_BAR] = "local-cpu-visible-small-bar",
1177 };
1178 
1179 static enum iris_mmap_mode
heap_to_mmap_mode(struct iris_bufmgr * bufmgr,enum iris_heap heap)1180 heap_to_mmap_mode(struct iris_bufmgr *bufmgr, enum iris_heap heap)
1181 {
1182    const struct intel_device_info *devinfo = &bufmgr->devinfo;
1183 
1184    switch (heap) {
1185    case IRIS_HEAP_DEVICE_LOCAL:
1186       return intel_vram_all_mappable(devinfo) ? IRIS_MMAP_WC : IRIS_MMAP_NONE;
1187    case IRIS_HEAP_DEVICE_LOCAL_CPU_VISIBLE_SMALL_BAR:
1188    case IRIS_HEAP_DEVICE_LOCAL_PREFERRED:
1189       return IRIS_MMAP_WC;
1190    case IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT:
1191       return IRIS_MMAP_WB;
1192    case IRIS_HEAP_SYSTEM_MEMORY_UNCACHED:
1193       return IRIS_MMAP_WC;
1194    case IRIS_HEAP_SYSTEM_MEMORY_UNCACHED_COMPRESSED:
1195    case IRIS_HEAP_DEVICE_LOCAL_COMPRESSED:
1196       /* compressed bos are not mmaped */
1197       return IRIS_MMAP_NONE;
1198    default:
1199       unreachable("invalid heap");
1200    }
1201 }
1202 
1203 struct iris_bo *
iris_bo_alloc(struct iris_bufmgr * bufmgr,const char * name,uint64_t size,uint32_t alignment,enum iris_memory_zone memzone,unsigned flags)1204 iris_bo_alloc(struct iris_bufmgr *bufmgr,
1205               const char *name,
1206               uint64_t size,
1207               uint32_t alignment,
1208               enum iris_memory_zone memzone,
1209               unsigned flags)
1210 {
1211    struct iris_bo *bo;
1212    unsigned int page_size = getpagesize();
1213    enum iris_heap heap = flags_to_heap(bufmgr, flags);
1214    struct bo_cache_bucket *bucket =
1215       bucket_for_size(bufmgr, size, heap, flags);
1216 
1217    if (memzone != IRIS_MEMZONE_OTHER || (flags & BO_ALLOC_CACHED_COHERENT))
1218       flags |= BO_ALLOC_NO_SUBALLOC;
1219 
1220    /* By default, capture all driver-internal buffers like shader kernels,
1221     * surface states, dynamic states, border colors, and so on.
1222     */
1223    if (memzone < IRIS_MEMZONE_OTHER || INTEL_DEBUG(DEBUG_CAPTURE_ALL))
1224       flags |= BO_ALLOC_CAPTURE;
1225 
1226    bo = alloc_bo_from_slabs(bufmgr, name, size, alignment, flags);
1227 
1228    if (bo)
1229       return bo;
1230 
1231    /* Round the size up to the bucket size, or if we don't have caching
1232     * at this size, a multiple of the page size.
1233     */
1234    uint64_t bo_size =
1235       bucket ? bucket->size : MAX2(align64(size, page_size), page_size);
1236    enum iris_mmap_mode mmap_mode = heap_to_mmap_mode(bufmgr, heap);
1237 
1238    simple_mtx_lock(&bufmgr->lock);
1239 
1240    /* Get a buffer out of the cache if available.  First, we try to find
1241     * one with a matching memory zone so we can avoid reallocating VMA.
1242     */
1243    bo = alloc_bo_from_cache(bufmgr, bucket, alignment, memzone, mmap_mode,
1244                             flags, true);
1245 
1246    /* If that fails, we try for any cached BO, without matching memzone. */
1247    if (!bo) {
1248       bo = alloc_bo_from_cache(bufmgr, bucket, alignment, memzone, mmap_mode,
1249                                flags, false);
1250    }
1251 
1252    simple_mtx_unlock(&bufmgr->lock);
1253 
1254    if (!bo) {
1255       bo = alloc_fresh_bo(bufmgr, bo_size, flags);
1256       if (!bo)
1257          return NULL;
1258    }
1259 
1260    if (bo->address == 0ull) {
1261       simple_mtx_lock(&bufmgr->lock);
1262       bo->address = vma_alloc(bufmgr, memzone, bo->size, alignment);
1263       simple_mtx_unlock(&bufmgr->lock);
1264 
1265       if (bo->address == 0ull)
1266          goto err_free;
1267 
1268       if (!bufmgr->kmd_backend->gem_vm_bind(bo))
1269          goto err_vm_alloc;
1270    }
1271 
1272    bo->name = name;
1273    p_atomic_set(&bo->refcount, 1);
1274    bo->real.reusable = bucket && bufmgr->bo_reuse;
1275    bo->real.protected = flags & BO_ALLOC_PROTECTED;
1276    bo->index = -1;
1277    bo->real.prime_fd = -1;
1278 
1279    assert(bo->real.map == NULL || bo->real.mmap_mode == mmap_mode);
1280    bo->real.mmap_mode = mmap_mode;
1281 
1282    /* On integrated GPUs, enable snooping to ensure coherency if needed.
1283     * For discrete, we instead use SMEM and avoid WB maps for coherency.
1284     */
1285    if ((flags & BO_ALLOC_CACHED_COHERENT) &&
1286        !bufmgr->devinfo.has_llc && bufmgr->devinfo.has_caching_uapi) {
1287       if (bufmgr->kmd_backend->bo_set_caching(bo, true) != 0)
1288          goto err_free;
1289    }
1290 
1291    DBG("bo_create: buf %d (%s) (%s memzone) (%s) %llub\n", bo->gem_handle,
1292        bo->name, memzone_name(memzone), iris_heap_to_string[bo->real.heap],
1293        (unsigned long long) size);
1294 
1295    return bo;
1296 
1297 err_vm_alloc:
1298    simple_mtx_lock(&bufmgr->lock);
1299    vma_free(bufmgr, bo->address, bo->size);
1300    simple_mtx_unlock(&bufmgr->lock);
1301 err_free:
1302    simple_mtx_lock(&bufmgr->lock);
1303    bo_free(bo);
1304    simple_mtx_unlock(&bufmgr->lock);
1305    return NULL;
1306 }
1307 
1308 static int
iris_bo_close(int fd,uint32_t gem_handle)1309 iris_bo_close(int fd, uint32_t gem_handle)
1310 {
1311    struct drm_gem_close close = {
1312       .handle = gem_handle,
1313    };
1314    return intel_ioctl(fd, DRM_IOCTL_GEM_CLOSE, &close);
1315 }
1316 
1317 struct iris_bo *
iris_bo_create_userptr(struct iris_bufmgr * bufmgr,const char * name,void * ptr,size_t size,enum iris_memory_zone memzone)1318 iris_bo_create_userptr(struct iris_bufmgr *bufmgr, const char *name,
1319                        void *ptr, size_t size,
1320                        enum iris_memory_zone memzone)
1321 {
1322    struct iris_bo *bo;
1323 
1324    bo = bo_calloc();
1325    if (!bo)
1326       return NULL;
1327 
1328    bo->gem_handle = bufmgr->kmd_backend->gem_create_userptr(bufmgr, ptr, size);
1329    if (bo->gem_handle == 0)
1330       goto err_free;
1331 
1332    bo->name = name;
1333    bo->size = size;
1334    bo->real.map = ptr;
1335    bo->real.userptr = true;
1336 
1337    bo->bufmgr = bufmgr;
1338 
1339    if (INTEL_DEBUG(DEBUG_CAPTURE_ALL))
1340       bo->real.capture = true;
1341 
1342    simple_mtx_lock(&bufmgr->lock);
1343    bo->address = vma_alloc(bufmgr, memzone, size, 1);
1344    simple_mtx_unlock(&bufmgr->lock);
1345 
1346    if (bo->address == 0ull)
1347       goto err_close;
1348 
1349    p_atomic_set(&bo->refcount, 1);
1350    bo->index = -1;
1351    bo->idle = true;
1352    bo->real.heap = IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT;
1353    bo->real.mmap_mode = heap_to_mmap_mode(bufmgr, bo->real.heap);
1354    bo->real.prime_fd = -1;
1355 
1356    if (!bufmgr->kmd_backend->gem_vm_bind(bo))
1357       goto err_vma_free;
1358 
1359    return bo;
1360 
1361 err_vma_free:
1362    simple_mtx_lock(&bufmgr->lock);
1363    vma_free(bufmgr, bo->address, bo->size);
1364    simple_mtx_unlock(&bufmgr->lock);
1365 err_close:
1366    bufmgr->kmd_backend->gem_close(bufmgr, bo);
1367 err_free:
1368    free(bo);
1369    return NULL;
1370 }
1371 
1372 static bool
needs_prime_fd(struct iris_bufmgr * bufmgr)1373 needs_prime_fd(struct iris_bufmgr *bufmgr)
1374 {
1375    return bufmgr->devinfo.kmd_type == INTEL_KMD_TYPE_XE;
1376 }
1377 
1378 static bool
iris_bo_set_prime_fd(struct iris_bo * bo)1379 iris_bo_set_prime_fd(struct iris_bo *bo)
1380 {
1381    struct iris_bufmgr *bufmgr = bo->bufmgr;
1382 
1383    if (needs_prime_fd(bufmgr) && bo->real.prime_fd == -1) {
1384       if (drmPrimeHandleToFD(bufmgr->fd, bo->gem_handle,
1385                              DRM_CLOEXEC | DRM_RDWR, &bo->real.prime_fd)) {
1386          fprintf(stderr, "Failed to get prime fd for bo %s/%u\n",
1387                  bo->name, bo->gem_handle);
1388          return false;
1389       }
1390    }
1391 
1392    return true;
1393 }
1394 
1395 /**
1396  * Returns a iris_bo wrapping the given buffer object handle.
1397  *
1398  * This can be used when one application needs to pass a buffer object
1399  * to another.
1400  */
1401 struct iris_bo *
iris_bo_gem_create_from_name(struct iris_bufmgr * bufmgr,const char * name,unsigned int handle)1402 iris_bo_gem_create_from_name(struct iris_bufmgr *bufmgr,
1403                              const char *name, unsigned int handle)
1404 {
1405    struct iris_bo *bo;
1406 
1407    /* At the moment most applications only have a few named bo.
1408     * For instance, in a DRI client only the render buffers passed
1409     * between X and the client are named. And since X returns the
1410     * alternating names for the front/back buffer a linear search
1411     * provides a sufficiently fast match.
1412     */
1413    simple_mtx_lock(&bufmgr->lock);
1414    bo = find_and_ref_external_bo(bufmgr->name_table, handle);
1415    if (bo)
1416       goto out;
1417 
1418    struct drm_gem_open open_arg = { .name = handle };
1419    int ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_GEM_OPEN, &open_arg);
1420    if (ret != 0) {
1421       DBG("Couldn't reference %s handle 0x%08x: %s\n",
1422           name, handle, strerror(errno));
1423       bo = NULL;
1424       goto out;
1425    }
1426    /* Now see if someone has used a prime handle to get this
1427     * object from the kernel before by looking through the list
1428     * again for a matching gem_handle
1429     */
1430    bo = find_and_ref_external_bo(bufmgr->handle_table, open_arg.handle);
1431    if (bo)
1432       goto out;
1433 
1434    bo = bo_calloc();
1435    if (!bo) {
1436       struct iris_bo close_bo = {
1437             .gem_handle = open_arg.handle,
1438       };
1439       bufmgr->kmd_backend->gem_close(bufmgr, &close_bo);
1440       goto out;
1441    }
1442 
1443    p_atomic_set(&bo->refcount, 1);
1444 
1445    bo->size = open_arg.size;
1446    bo->bufmgr = bufmgr;
1447    bo->gem_handle = open_arg.handle;
1448    bo->name = name;
1449    bo->index = -1;
1450    bo->real.global_name = handle;
1451    bo->real.prime_fd = -1;
1452    bo->real.reusable = false;
1453    bo->real.imported = true;
1454    /* Xe KMD expects at least 1-way coherency for imports */
1455    bo->real.heap = IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT;
1456    bo->real.mmap_mode = IRIS_MMAP_NONE;
1457    if (INTEL_DEBUG(DEBUG_CAPTURE_ALL))
1458       bo->real.capture = true;
1459    bo->address = vma_alloc(bufmgr, IRIS_MEMZONE_OTHER, bo->size, 1);
1460    if (bo->address == 0ull)
1461       goto err_free;
1462 
1463    if (!iris_bo_set_prime_fd(bo))
1464       goto err_vm_alloc;
1465 
1466    if (!bufmgr->kmd_backend->gem_vm_bind(bo))
1467       goto err_vm_alloc;
1468 
1469    _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo);
1470    _mesa_hash_table_insert(bufmgr->name_table, &bo->real.global_name, bo);
1471 
1472    DBG("bo_create_from_handle: %d (%s)\n", handle, bo->name);
1473 
1474 out:
1475    simple_mtx_unlock(&bufmgr->lock);
1476    return bo;
1477 
1478 err_vm_alloc:
1479    vma_free(bufmgr, bo->address, bo->size);
1480 err_free:
1481    bo_free(bo);
1482    simple_mtx_unlock(&bufmgr->lock);
1483    return NULL;
1484 }
1485 
1486 static void
bo_close(struct iris_bo * bo)1487 bo_close(struct iris_bo *bo)
1488 {
1489    struct iris_bufmgr *bufmgr = bo->bufmgr;
1490 
1491    simple_mtx_assert_locked(&bufmgr->lock);
1492    assert(iris_bo_is_real(bo));
1493 
1494    if (iris_bo_is_external(bo)) {
1495       struct hash_entry *entry;
1496 
1497       if (bo->real.global_name) {
1498          entry = _mesa_hash_table_search(bufmgr->name_table,
1499                                          &bo->real.global_name);
1500          _mesa_hash_table_remove(bufmgr->name_table, entry);
1501       }
1502 
1503       entry = _mesa_hash_table_search(bufmgr->handle_table, &bo->gem_handle);
1504       _mesa_hash_table_remove(bufmgr->handle_table, entry);
1505 
1506       list_for_each_entry_safe(struct bo_export, export, &bo->real.exports, link) {
1507          iris_bo_close(export->drm_fd, export->gem_handle);
1508 
1509          list_del(&export->link);
1510          free(export);
1511       }
1512    } else {
1513       assert(list_is_empty(&bo->real.exports));
1514    }
1515 
1516    /* Unbind and return the VMA for reuse */
1517    if (bufmgr->kmd_backend->gem_vm_unbind(bo))
1518       vma_free(bo->bufmgr, bo->address, bo->size);
1519    else
1520       DBG("Unable to unbind vm of buf %u\n", bo->gem_handle);
1521 
1522    if (bo->real.prime_fd != -1)
1523       close(bo->real.prime_fd);
1524 
1525    /* Close this object */
1526    if (bufmgr->kmd_backend->gem_close(bufmgr, bo) != 0) {
1527       DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n",
1528           bo->gem_handle, bo->name, strerror(errno));
1529    }
1530 
1531    if (bo->aux_map_address && bo->bufmgr->aux_map_ctx) {
1532       intel_aux_map_unmap_range(bo->bufmgr->aux_map_ctx, bo->address,
1533                                 bo->size);
1534    }
1535 
1536    for (int d = 0; d < bo->deps_size; d++) {
1537       for (int b = 0; b < IRIS_BATCH_COUNT; b++) {
1538          iris_syncobj_reference(bufmgr, &bo->deps[d].write_syncobjs[b], NULL);
1539          iris_syncobj_reference(bufmgr, &bo->deps[d].read_syncobjs[b], NULL);
1540       }
1541    }
1542    free(bo->deps);
1543 
1544    free(bo);
1545 }
1546 
1547 static void
bo_free(struct iris_bo * bo)1548 bo_free(struct iris_bo *bo)
1549 {
1550    struct iris_bufmgr *bufmgr = bo->bufmgr;
1551 
1552    simple_mtx_assert_locked(&bufmgr->lock);
1553    assert(iris_bo_is_real(bo));
1554 
1555    if (!bo->real.userptr && bo->real.map)
1556       bo_unmap(bo);
1557 
1558    if (bo->idle || !iris_bo_busy(bo)) {
1559       bo_close(bo);
1560    } else {
1561       /* Defer closing the GEM BO and returning the VMA for reuse until the
1562        * BO is idle.  Just move it to the dead list for now.
1563        */
1564       list_addtail(&bo->head, &bufmgr->zombie_list);
1565    }
1566 }
1567 
1568 static enum iris_heap
iris_get_heap_max(struct iris_bufmgr * bufmgr)1569 iris_get_heap_max(struct iris_bufmgr *bufmgr)
1570 {
1571    if (bufmgr->vram.size) {
1572       return intel_vram_all_mappable(&bufmgr->devinfo) ?
1573              IRIS_HEAP_MAX_LARGE_BAR : IRIS_HEAP_MAX;
1574    }
1575 
1576    return bufmgr->devinfo.ver >= 20 ? IRIS_HEAP_MAX_NO_VRAM :
1577                                       IRIS_HEAP_SYSTEM_MEMORY_UNCACHED_COMPRESSED;
1578 }
1579 
1580 /** Frees all cached buffers significantly older than @time. */
1581 static void
cleanup_bo_cache(struct iris_bufmgr * bufmgr,time_t time)1582 cleanup_bo_cache(struct iris_bufmgr *bufmgr, time_t time)
1583 {
1584    simple_mtx_assert_locked(&bufmgr->lock);
1585 
1586    if (bufmgr->time == time)
1587       return;
1588 
1589    for (int h = 0; h < iris_get_heap_max(bufmgr); h++) {
1590       struct iris_bucket_cache *cache = &bufmgr->bucket_cache[h];
1591 
1592       for (int i = 0; i < cache->num_buckets; i++) {
1593          struct bo_cache_bucket *bucket = &cache->bucket[i];
1594 
1595          list_for_each_entry_safe(struct iris_bo, bo, &bucket->head, head) {
1596             if (time - bo->real.free_time <= 1)
1597                break;
1598 
1599             list_del(&bo->head);
1600 
1601             bo_free(bo);
1602          }
1603       }
1604    }
1605 
1606    list_for_each_entry_safe(struct iris_bo, bo, &bufmgr->zombie_list, head) {
1607       /* Stop once we reach a busy BO - all others past this point were
1608        * freed more recently so are likely also busy.
1609        */
1610       if (!bo->idle && iris_bo_busy(bo))
1611          break;
1612 
1613       list_del(&bo->head);
1614       bo_close(bo);
1615    }
1616 
1617    bufmgr->time = time;
1618 }
1619 
1620 static void
bo_unreference_final(struct iris_bo * bo,time_t time)1621 bo_unreference_final(struct iris_bo *bo, time_t time)
1622 {
1623    struct iris_bufmgr *bufmgr = bo->bufmgr;
1624 
1625    DBG("bo_unreference final: %d (%s)\n", bo->gem_handle, bo->name);
1626 
1627    assert(iris_bo_is_real(bo));
1628 
1629    struct bo_cache_bucket *bucket = !bo->real.reusable ? NULL :
1630       bucket_for_size(bufmgr, bo->size, bo->real.heap, 0);
1631 
1632    /* Put the buffer into our internal cache for reuse if we can. */
1633    if (bucket && iris_bo_madvise(bo, IRIS_MADVICE_DONT_NEED)) {
1634       bo->real.free_time = time;
1635       bo->name = NULL;
1636 
1637       list_addtail(&bo->head, &bucket->head);
1638    } else {
1639       bo_free(bo);
1640    }
1641 }
1642 
1643 void
iris_bo_unreference(struct iris_bo * bo)1644 iris_bo_unreference(struct iris_bo *bo)
1645 {
1646    if (bo == NULL)
1647       return;
1648 
1649    assert(p_atomic_read(&bo->refcount) > 0);
1650 
1651    if (atomic_add_unless(&bo->refcount, -1, 1)) {
1652       struct iris_bufmgr *bufmgr = bo->bufmgr;
1653       struct timespec time;
1654 
1655       clock_gettime(CLOCK_MONOTONIC, &time);
1656 
1657       bo->zeroed = false;
1658       if (bo->gem_handle == 0) {
1659          pb_slab_free(get_slabs(bufmgr, bo->size), &bo->slab.entry);
1660       } else {
1661          simple_mtx_lock(&bufmgr->lock);
1662 
1663          if (p_atomic_dec_zero(&bo->refcount)) {
1664             bo_unreference_final(bo, time.tv_sec);
1665             cleanup_bo_cache(bufmgr, time.tv_sec);
1666          }
1667 
1668          simple_mtx_unlock(&bufmgr->lock);
1669       }
1670    }
1671 }
1672 
1673 static void
bo_wait_with_stall_warning(struct util_debug_callback * dbg,struct iris_bo * bo,const char * action)1674 bo_wait_with_stall_warning(struct util_debug_callback *dbg,
1675                            struct iris_bo *bo,
1676                            const char *action)
1677 {
1678    bool busy = dbg && !bo->idle;
1679    double elapsed = unlikely(busy) ? -get_time() : 0.0;
1680 
1681    iris_bo_wait_rendering(bo);
1682 
1683    if (unlikely(busy)) {
1684       elapsed += get_time();
1685       if (elapsed > 1e-5) /* 0.01ms */ {
1686          perf_debug(dbg, "%s a busy \"%s\" BO stalled and took %.03f ms.\n",
1687                     action, bo->name, elapsed * 1000);
1688       }
1689    }
1690 }
1691 
1692 static void
print_flags(unsigned flags)1693 print_flags(unsigned flags)
1694 {
1695    if (flags & MAP_READ)
1696       DBG("READ ");
1697    if (flags & MAP_WRITE)
1698       DBG("WRITE ");
1699    if (flags & MAP_ASYNC)
1700       DBG("ASYNC ");
1701    if (flags & MAP_PERSISTENT)
1702       DBG("PERSISTENT ");
1703    if (flags & MAP_COHERENT)
1704       DBG("COHERENT ");
1705    if (flags & MAP_RAW)
1706       DBG("RAW ");
1707    DBG("\n");
1708 }
1709 
1710 void *
iris_bo_map(struct util_debug_callback * dbg,struct iris_bo * bo,unsigned flags)1711 iris_bo_map(struct util_debug_callback *dbg,
1712             struct iris_bo *bo, unsigned flags)
1713 {
1714    struct iris_bufmgr *bufmgr = bo->bufmgr;
1715    void *map = NULL;
1716 
1717    if (bo->gem_handle == 0) {
1718       struct iris_bo *real = iris_get_backing_bo(bo);
1719       uint64_t offset = bo->address - real->address;
1720       map = iris_bo_map(dbg, real, flags | MAP_ASYNC) + offset;
1721    } else {
1722       assert(bo->real.mmap_mode != IRIS_MMAP_NONE);
1723       if (bo->real.mmap_mode == IRIS_MMAP_NONE)
1724          return NULL;
1725 
1726       if (!bo->real.map) {
1727          DBG("iris_bo_map: %d (%s)\n", bo->gem_handle, bo->name);
1728          map = bufmgr->kmd_backend->gem_mmap(bufmgr, bo);
1729          if (!map) {
1730             return NULL;
1731          }
1732 
1733          VG_DEFINED(map, bo->size);
1734 
1735          if (p_atomic_cmpxchg(&bo->real.map, NULL, map)) {
1736             VG_NOACCESS(map, bo->size);
1737             os_munmap(map, bo->size);
1738          }
1739       }
1740       assert(bo->real.map);
1741       map = bo->real.map;
1742    }
1743 
1744    DBG("iris_bo_map: %d (%s) -> %p\n",
1745        bo->gem_handle, bo->name, bo->real.map);
1746    print_flags(flags);
1747 
1748    if (!(flags & MAP_ASYNC)) {
1749       bo_wait_with_stall_warning(dbg, bo, "memory mapping");
1750    }
1751 
1752    return map;
1753 }
1754 
1755 /**
1756  * Waits on a BO for the given amount of time.
1757  *
1758  * @bo: buffer object to wait for
1759  * @timeout_ns: amount of time to wait in nanoseconds.
1760  *   If value is less than 0, an infinite wait will occur.
1761  *
1762  * Returns 0 if the wait was successful ie. the last batch referencing the
1763  * object has completed within the allotted time. Otherwise some negative return
1764  * value describes the error. Of particular interest is -ETIME when the wait has
1765  * failed to yield the desired result.
1766  *
1767  * Similar to iris_bo_wait_rendering except a timeout parameter allows
1768  * the operation to give up after a certain amount of time. Another subtle
1769  * difference is the internal locking semantics are different (this variant does
1770  * not hold the lock for the duration of the wait). This makes the wait subject
1771  * to a larger userspace race window.
1772  *
1773  * The implementation shall wait until the object is no longer actively
1774  * referenced within a batch buffer at the time of the call. The wait will
1775  * not guarantee that the buffer is re-issued via another thread, or an flinked
1776  * handle. Userspace must make sure this race does not occur if such precision
1777  * is important.
1778  *
1779  * Note that some kernels have broken the infinite wait for negative values
1780  * promise, upgrade to latest stable kernels if this is the case.
1781  */
1782 static inline int
iris_bo_wait(struct iris_bo * bo,int64_t timeout_ns)1783 iris_bo_wait(struct iris_bo *bo, int64_t timeout_ns)
1784 {
1785    int ret;
1786 
1787    switch (iris_bufmgr_get_device_info(bo->bufmgr)->kmd_type) {
1788    case INTEL_KMD_TYPE_I915:
1789       if (iris_bo_is_external(bo))
1790          ret = iris_i915_bo_wait_gem(bo, timeout_ns);
1791       else
1792          ret = iris_bo_wait_syncobj(bo, timeout_ns);
1793       break;
1794    case INTEL_KMD_TYPE_XE:
1795       ret = iris_bo_wait_syncobj(bo, timeout_ns);
1796       break;
1797    default:
1798       unreachable("missing");
1799       ret = -1;
1800    }
1801 
1802    bo->idle = ret == 0;
1803 
1804    return ret;
1805 }
1806 
1807 /** Waits for all GPU rendering with the object to have completed. */
1808 void
iris_bo_wait_rendering(struct iris_bo * bo)1809 iris_bo_wait_rendering(struct iris_bo *bo)
1810 {
1811    /* We require a kernel recent enough for WAIT_IOCTL support.
1812     * See intel_init_bufmgr()
1813     */
1814    iris_bo_wait(bo, -1);
1815 }
1816 
1817 static void
iris_bufmgr_destroy_global_vm(struct iris_bufmgr * bufmgr)1818 iris_bufmgr_destroy_global_vm(struct iris_bufmgr *bufmgr)
1819 {
1820    switch (bufmgr->devinfo.kmd_type) {
1821    case INTEL_KMD_TYPE_I915:
1822       /* Nothing to do in i915 */
1823       break;
1824    case INTEL_KMD_TYPE_XE:
1825       intel_bind_timeline_finish(&bufmgr->bind_timeline, bufmgr->fd);
1826       iris_xe_destroy_global_vm(bufmgr);
1827       break;
1828    default:
1829       unreachable("missing");
1830    }
1831 }
1832 
1833 static void
iris_bufmgr_destroy(struct iris_bufmgr * bufmgr)1834 iris_bufmgr_destroy(struct iris_bufmgr *bufmgr)
1835 {
1836    iris_bo_unreference(bufmgr->dummy_aux_bo);
1837    iris_bo_unreference(bufmgr->mem_fence_bo);
1838 
1839    iris_destroy_border_color_pool(&bufmgr->border_color_pool);
1840 
1841    /* Free aux-map buffers */
1842    intel_aux_map_finish(bufmgr->aux_map_ctx);
1843 
1844    /* bufmgr will no longer try to free VMA entries in the aux-map */
1845    bufmgr->aux_map_ctx = NULL;
1846 
1847    for (int i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
1848       if (bufmgr->bo_slabs[i].groups)
1849          pb_slabs_deinit(&bufmgr->bo_slabs[i]);
1850    }
1851 
1852    simple_mtx_lock(&bufmgr->lock);
1853 
1854    /* Free any cached buffer objects we were going to reuse */
1855    for (int h = 0; h < iris_get_heap_max(bufmgr); h++) {
1856       struct iris_bucket_cache *cache = &bufmgr->bucket_cache[h];
1857 
1858       for (int i = 0; i < cache->num_buckets; i++) {
1859          struct bo_cache_bucket *bucket = &cache->bucket[i];
1860 
1861          list_for_each_entry_safe(struct iris_bo, bo, &bucket->head, head) {
1862             list_del(&bo->head);
1863 
1864             bo_free(bo);
1865          }
1866       }
1867    }
1868    free(bufmgr->bucket_cache);
1869 
1870    /* Close any buffer objects on the dead list. */
1871    list_for_each_entry_safe(struct iris_bo, bo, &bufmgr->zombie_list, head) {
1872       list_del(&bo->head);
1873       bo_close(bo);
1874    }
1875 
1876    _mesa_hash_table_destroy(bufmgr->name_table, NULL);
1877    _mesa_hash_table_destroy(bufmgr->handle_table, NULL);
1878 
1879    for (int z = 0; z < IRIS_MEMZONE_COUNT; z++)
1880          util_vma_heap_finish(&bufmgr->vma_allocator[z]);
1881 
1882    iris_bufmgr_destroy_global_vm(bufmgr);
1883 
1884    close(bufmgr->fd);
1885 
1886    simple_mtx_unlock(&bufmgr->lock);
1887 
1888    simple_mtx_destroy(&bufmgr->lock);
1889    simple_mtx_destroy(&bufmgr->bo_deps_lock);
1890 
1891    free(bufmgr);
1892 }
1893 
1894 int
iris_gem_get_tiling(struct iris_bo * bo,uint32_t * tiling)1895 iris_gem_get_tiling(struct iris_bo *bo, uint32_t *tiling)
1896 {
1897    struct iris_bufmgr *bufmgr = bo->bufmgr;
1898 
1899    if (!bufmgr->devinfo.has_tiling_uapi) {
1900       *tiling = 0;
1901       return 0;
1902    }
1903 
1904    assert(iris_bufmgr_get_device_info(bo->bufmgr)->kmd_type == INTEL_KMD_TYPE_I915);
1905    return iris_i915_bo_get_tiling(bo, tiling);
1906 }
1907 
1908 int
iris_gem_set_tiling(struct iris_bo * bo,const struct isl_surf * surf)1909 iris_gem_set_tiling(struct iris_bo *bo, const struct isl_surf *surf)
1910 {
1911    struct iris_bufmgr *bufmgr = bo->bufmgr;
1912 
1913    /* If we can't do map_gtt, the set/get_tiling API isn't useful. And it's
1914     * actually not supported by the kernel in those cases.
1915     */
1916    if (!bufmgr->devinfo.has_tiling_uapi)
1917       return 0;
1918 
1919    assert(iris_bufmgr_get_device_info(bo->bufmgr)->kmd_type == INTEL_KMD_TYPE_I915);
1920    return iris_i915_bo_set_tiling(bo, surf);
1921 }
1922 
1923 struct iris_bo *
iris_bo_import_dmabuf(struct iris_bufmgr * bufmgr,int prime_fd,const uint64_t modifier)1924 iris_bo_import_dmabuf(struct iris_bufmgr *bufmgr, int prime_fd,
1925                       const uint64_t modifier)
1926 {
1927    uint32_t handle;
1928    struct iris_bo *bo;
1929 
1930    simple_mtx_lock(&bufmgr->lock);
1931    int ret = drmPrimeFDToHandle(bufmgr->fd, prime_fd, &handle);
1932    if (ret) {
1933       DBG("import_dmabuf: failed to obtain handle from fd: %s\n",
1934           strerror(errno));
1935       simple_mtx_unlock(&bufmgr->lock);
1936       return NULL;
1937    }
1938 
1939    /*
1940     * See if the kernel has already returned this buffer to us. Just as
1941     * for named buffers, we must not create two bo's pointing at the same
1942     * kernel object
1943     */
1944    bo = find_and_ref_external_bo(bufmgr->handle_table, handle);
1945    if (bo)
1946       goto out;
1947 
1948    bo = bo_calloc();
1949    if (!bo)
1950       goto out;
1951 
1952    p_atomic_set(&bo->refcount, 1);
1953 
1954    /* Determine size of bo.  The fd-to-handle ioctl really should
1955     * return the size, but it doesn't.  If we have kernel 3.12 or
1956     * later, we can lseek on the prime fd to get the size.  Older
1957     * kernels will just fail, in which case we fall back to the
1958     * provided (estimated or guess size). */
1959    ret = lseek(prime_fd, 0, SEEK_END);
1960    if (ret != -1)
1961       bo->size = ret;
1962 
1963    bo->bufmgr = bufmgr;
1964    bo->name = "prime";
1965    bo->index = -1;
1966    bo->real.reusable = false;
1967    bo->real.imported = true;
1968    /* Xe KMD expects at least 1-way coherency for imports */
1969    bo->real.heap = IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT;
1970    bo->real.mmap_mode = IRIS_MMAP_NONE;
1971    if (INTEL_DEBUG(DEBUG_CAPTURE_ALL))
1972       bo->real.capture = true;
1973    bo->gem_handle = handle;
1974    bo->real.prime_fd = needs_prime_fd(bufmgr) ? dup(prime_fd) : -1;
1975 
1976    uint64_t alignment = 1;
1977 
1978    /* When an aux map will be used, there is an alignment requirement on the
1979     * main surface from the mapping granularity. Some planes of the image may
1980     * have smaller alignment requirements, but this one should work for all.
1981     */
1982    if (bufmgr->devinfo.has_aux_map && isl_drm_modifier_has_aux(modifier))
1983       alignment = intel_aux_map_get_alignment(bufmgr->aux_map_ctx);
1984 
1985    bo->address = vma_alloc(bufmgr, IRIS_MEMZONE_OTHER, bo->size, alignment);
1986    if (bo->address == 0ull)
1987       goto err_free;
1988 
1989    if (!bufmgr->kmd_backend->gem_vm_bind(bo))
1990       goto err_vm_alloc;
1991 
1992    _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo);
1993 
1994 out:
1995    simple_mtx_unlock(&bufmgr->lock);
1996    return bo;
1997 
1998 err_vm_alloc:
1999    vma_free(bufmgr, bo->address, bo->size);
2000 err_free:
2001    bo_free(bo);
2002    simple_mtx_unlock(&bufmgr->lock);
2003    return NULL;
2004 }
2005 
2006 static void
iris_bo_mark_exported_locked(struct iris_bo * bo)2007 iris_bo_mark_exported_locked(struct iris_bo *bo)
2008 {
2009    struct iris_bufmgr *bufmgr = bo->bufmgr;
2010 
2011    /* We cannot export suballocated BOs. */
2012    assert(iris_bo_is_real(bo));
2013    simple_mtx_assert_locked(&bufmgr->lock);
2014 
2015    if (!iris_bo_is_external(bo))
2016       _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo);
2017 
2018    if (!bo->real.exported) {
2019       /* If a BO is going to be used externally, it could be sent to the
2020        * display HW. So make sure our CPU mappings don't assume cache
2021        * coherency since display is outside that cache.
2022        */
2023       bo->real.exported = true;
2024       bo->real.reusable = false;
2025    }
2026 }
2027 
2028 void
iris_bo_mark_exported(struct iris_bo * bo)2029 iris_bo_mark_exported(struct iris_bo *bo)
2030 {
2031    struct iris_bufmgr *bufmgr = bo->bufmgr;
2032 
2033    /* We cannot export suballocated BOs. */
2034    assert(iris_bo_is_real(bo));
2035 
2036    if (bo->real.exported) {
2037       assert(!bo->real.reusable);
2038       return;
2039    }
2040 
2041    simple_mtx_lock(&bufmgr->lock);
2042    iris_bo_mark_exported_locked(bo);
2043    simple_mtx_unlock(&bufmgr->lock);
2044 
2045    iris_bo_set_prime_fd(bo);
2046 }
2047 
2048 int
iris_bo_export_dmabuf(struct iris_bo * bo,int * prime_fd)2049 iris_bo_export_dmabuf(struct iris_bo *bo, int *prime_fd)
2050 {
2051    struct iris_bufmgr *bufmgr = bo->bufmgr;
2052 
2053    /* We cannot export suballocated BOs. */
2054    assert(iris_bo_is_real(bo));
2055 
2056    if (drmPrimeHandleToFD(bufmgr->fd, bo->gem_handle,
2057                           DRM_CLOEXEC | DRM_RDWR, prime_fd) != 0)
2058       return -errno;
2059 
2060    iris_bo_mark_exported(bo);
2061 
2062    return 0;
2063 }
2064 
2065 static uint32_t
iris_bo_export_gem_handle(struct iris_bo * bo)2066 iris_bo_export_gem_handle(struct iris_bo *bo)
2067 {
2068    /* We cannot export suballocated BOs. */
2069    assert(iris_bo_is_real(bo));
2070 
2071    iris_bo_mark_exported(bo);
2072 
2073    return bo->gem_handle;
2074 }
2075 
2076 int
iris_bo_flink(struct iris_bo * bo,uint32_t * name)2077 iris_bo_flink(struct iris_bo *bo, uint32_t *name)
2078 {
2079    struct iris_bufmgr *bufmgr = bo->bufmgr;
2080 
2081    /* We cannot export suballocated BOs. */
2082    assert(iris_bo_is_real(bo));
2083 
2084    if (!bo->real.global_name) {
2085       struct drm_gem_flink flink = { .handle = bo->gem_handle };
2086 
2087       if (intel_ioctl(bufmgr->fd, DRM_IOCTL_GEM_FLINK, &flink))
2088          return -errno;
2089 
2090       simple_mtx_lock(&bufmgr->lock);
2091       if (!bo->real.global_name) {
2092          iris_bo_mark_exported_locked(bo);
2093          bo->real.global_name = flink.name;
2094          _mesa_hash_table_insert(bufmgr->name_table, &bo->real.global_name, bo);
2095       }
2096       simple_mtx_unlock(&bufmgr->lock);
2097 
2098       iris_bo_set_prime_fd(bo);
2099    }
2100 
2101    *name = bo->real.global_name;
2102    return 0;
2103 }
2104 
2105 int
iris_bo_export_gem_handle_for_device(struct iris_bo * bo,int drm_fd,uint32_t * out_handle)2106 iris_bo_export_gem_handle_for_device(struct iris_bo *bo, int drm_fd,
2107                                      uint32_t *out_handle)
2108 {
2109    /* We cannot export suballocated BOs. */
2110    assert(iris_bo_is_real(bo));
2111 
2112    /* Only add the new GEM handle to the list of export if it belongs to a
2113     * different GEM device. Otherwise we might close the same buffer multiple
2114     * times.
2115     */
2116    struct iris_bufmgr *bufmgr = bo->bufmgr;
2117    int ret = os_same_file_description(drm_fd, bufmgr->fd);
2118    WARN_ONCE(ret < 0,
2119              "Kernel has no file descriptor comparison support: %s\n",
2120              strerror(errno));
2121    if (ret == 0) {
2122       *out_handle = iris_bo_export_gem_handle(bo);
2123       return 0;
2124    }
2125 
2126    struct bo_export *export = calloc(1, sizeof(*export));
2127    if (!export)
2128       return -ENOMEM;
2129 
2130    export->drm_fd = drm_fd;
2131 
2132    int dmabuf_fd = -1;
2133    int err = iris_bo_export_dmabuf(bo, &dmabuf_fd);
2134    if (err) {
2135       free(export);
2136       return err;
2137    }
2138 
2139    simple_mtx_lock(&bufmgr->lock);
2140    err = drmPrimeFDToHandle(drm_fd, dmabuf_fd, &export->gem_handle);
2141    close(dmabuf_fd);
2142    if (err) {
2143       simple_mtx_unlock(&bufmgr->lock);
2144       free(export);
2145       return err;
2146    }
2147 
2148    bool found = false;
2149    list_for_each_entry(struct bo_export, iter, &bo->real.exports, link) {
2150       if (iter->drm_fd != drm_fd)
2151          continue;
2152       /* Here we assume that for a given DRM fd, we'll always get back the
2153        * same GEM handle for a given buffer.
2154        */
2155       assert(iter->gem_handle == export->gem_handle);
2156       free(export);
2157       export = iter;
2158       found = true;
2159       break;
2160    }
2161    if (!found)
2162       list_addtail(&export->link, &bo->real.exports);
2163 
2164    simple_mtx_unlock(&bufmgr->lock);
2165 
2166    *out_handle = export->gem_handle;
2167 
2168    return 0;
2169 }
2170 
2171 static void
add_bucket(struct iris_bufmgr * bufmgr,int size,enum iris_heap heap)2172 add_bucket(struct iris_bufmgr *bufmgr, int size, enum iris_heap heap)
2173 {
2174    struct iris_bucket_cache *cache = &bufmgr->bucket_cache[heap];
2175    unsigned int i = cache->num_buckets++;
2176 
2177    assert(i < BUCKET_ARRAY_SIZE);
2178 
2179    list_inithead(&cache->bucket[i].head);
2180    cache->bucket[i].size = size;
2181 
2182    assert(bucket_for_size(bufmgr, size, heap, 0) == &cache->bucket[i]);
2183    assert(bucket_for_size(bufmgr, size - 2048, heap, 0) == &cache->bucket[i]);
2184    assert(bucket_for_size(bufmgr, size + 1, heap, 0) != &cache->bucket[i]);
2185 }
2186 
2187 static void
init_cache_buckets(struct iris_bufmgr * bufmgr,enum iris_heap heap)2188 init_cache_buckets(struct iris_bufmgr *bufmgr, enum iris_heap heap)
2189 {
2190    const unsigned _6MB = 6 * 1024 * 1024;
2191    const unsigned _8MB = 8 * 1024 * 1024;
2192    const unsigned _64MB = 64 * 1024 * 1024;
2193 
2194    /* power-of-two buckets from 4K to 4MB */
2195    for (uint64_t size = 4096; size < _8MB; size *= 2)
2196       add_bucket(bufmgr, size, heap);
2197 
2198    /* 6MB */
2199    add_bucket(bufmgr, _6MB, heap);
2200 
2201    /* 8MB+: three sizes between each power of two to reduce waste */
2202    for (uint64_t size = _8MB; size < _64MB; size *= 2) {
2203       add_bucket(bufmgr, size, heap);
2204       add_bucket(bufmgr, size + size * 1 / 4, heap);
2205       add_bucket(bufmgr, size + size * 2 / 4, heap);
2206       add_bucket(bufmgr, size + size * 3 / 4, heap);
2207    }
2208 
2209    /* 64MB */
2210    add_bucket(bufmgr, _64MB, heap);
2211 }
2212 
2213 static struct intel_buffer *
intel_aux_map_buffer_alloc(void * driver_ctx,uint32_t size)2214 intel_aux_map_buffer_alloc(void *driver_ctx, uint32_t size)
2215 {
2216    struct intel_buffer *buf = malloc(sizeof(struct intel_buffer));
2217    if (!buf)
2218       return NULL;
2219 
2220    struct iris_bufmgr *bufmgr = (struct iris_bufmgr *)driver_ctx;
2221 
2222    unsigned int page_size = getpagesize();
2223    size = MAX2(ALIGN(size, page_size), page_size);
2224 
2225    struct iris_bo *bo = alloc_fresh_bo(bufmgr, size, BO_ALLOC_CAPTURE);
2226    if (!bo) {
2227       free(buf);
2228       return NULL;
2229    }
2230 
2231    simple_mtx_lock(&bufmgr->lock);
2232 
2233    bo->address = vma_alloc(bufmgr, IRIS_MEMZONE_OTHER, bo->size, 64 * 1024);
2234    if (bo->address == 0ull)
2235       goto err_free;
2236 
2237    if (!bufmgr->kmd_backend->gem_vm_bind(bo))
2238       goto err_vm_alloc;
2239 
2240    simple_mtx_unlock(&bufmgr->lock);
2241 
2242    bo->name = "aux-map";
2243    p_atomic_set(&bo->refcount, 1);
2244    bo->index = -1;
2245    bo->real.mmap_mode = heap_to_mmap_mode(bufmgr, bo->real.heap);
2246    bo->real.prime_fd = -1;
2247 
2248    buf->driver_bo = bo;
2249    buf->gpu = bo->address;
2250    buf->gpu_end = buf->gpu + bo->size;
2251    buf->map = iris_bo_map(NULL, bo, MAP_WRITE | MAP_RAW);
2252    return buf;
2253 
2254 err_vm_alloc:
2255    vma_free(bufmgr, bo->address, bo->size);
2256 err_free:
2257    free(buf);
2258    bo_free(bo);
2259    simple_mtx_unlock(&bufmgr->lock);
2260    return NULL;
2261 }
2262 
2263 static void
intel_aux_map_buffer_free(void * driver_ctx,struct intel_buffer * buffer)2264 intel_aux_map_buffer_free(void *driver_ctx, struct intel_buffer *buffer)
2265 {
2266    iris_bo_unreference((struct iris_bo*)buffer->driver_bo);
2267    free(buffer);
2268 }
2269 
2270 static struct intel_mapped_pinned_buffer_alloc aux_map_allocator = {
2271    .alloc = intel_aux_map_buffer_alloc,
2272    .free = intel_aux_map_buffer_free,
2273 };
2274 
2275 static bool
iris_bufmgr_get_meminfo(struct iris_bufmgr * bufmgr,struct intel_device_info * devinfo)2276 iris_bufmgr_get_meminfo(struct iris_bufmgr *bufmgr,
2277                         struct intel_device_info *devinfo)
2278 {
2279    bufmgr->sys.region = &devinfo->mem.sram.mem;
2280    bufmgr->sys.size = devinfo->mem.sram.mappable.size;
2281 
2282    /* When the resizable bar feature is disabled,
2283     * then vram.mappable.size is only 256MB.
2284     * The second half of the total size is in the vram.unmappable.size
2285     * variable.
2286     */
2287    bufmgr->vram.region = &devinfo->mem.vram.mem;
2288    bufmgr->vram.size = devinfo->mem.vram.mappable.size +
2289                        devinfo->mem.vram.unmappable.size;
2290 
2291    return true;
2292 }
2293 
2294 static bool
iris_bufmgr_init_global_vm(struct iris_bufmgr * bufmgr)2295 iris_bufmgr_init_global_vm(struct iris_bufmgr *bufmgr)
2296 {
2297    switch (bufmgr->devinfo.kmd_type) {
2298    case INTEL_KMD_TYPE_I915:
2299       bufmgr->use_global_vm = iris_i915_init_global_vm(bufmgr, &bufmgr->global_vm_id);
2300       /* i915 don't require VM, so returning true even if use_global_vm is false */
2301       return true;
2302    case INTEL_KMD_TYPE_XE:
2303       if (!intel_bind_timeline_init(&bufmgr->bind_timeline, bufmgr->fd))
2304          return false;
2305 
2306       bufmgr->use_global_vm = iris_xe_init_global_vm(bufmgr, &bufmgr->global_vm_id);
2307       /* Xe requires VM */
2308       return bufmgr->use_global_vm;
2309    default:
2310       unreachable("missing");
2311       return false;
2312    }
2313 }
2314 
2315 /**
2316  * Initializes the GEM buffer manager, which uses the kernel to allocate, map,
2317  * and manage map buffer objections.
2318  *
2319  * \param fd File descriptor of the opened DRM device.
2320  */
2321 static struct iris_bufmgr *
iris_bufmgr_create(struct intel_device_info * devinfo,int fd,bool bo_reuse)2322 iris_bufmgr_create(struct intel_device_info *devinfo, int fd, bool bo_reuse)
2323 {
2324    if (devinfo->gtt_size <= IRIS_MEMZONE_OTHER_START)
2325       return NULL;
2326 
2327    struct iris_bufmgr *bufmgr = calloc(1, sizeof(*bufmgr));
2328    if (bufmgr == NULL)
2329       return NULL;
2330 
2331    /* Handles to buffer objects belong to the device fd and are not
2332     * reference counted by the kernel.  If the same fd is used by
2333     * multiple parties (threads sharing the same screen bufmgr, or
2334     * even worse the same device fd passed to multiple libraries)
2335     * ownership of those handles is shared by those independent parties.
2336     *
2337     * Don't do this! Ensure that each library/bufmgr has its own device
2338     * fd so that its namespace does not clash with another.
2339     */
2340    bufmgr->fd = os_dupfd_cloexec(fd);
2341    if (bufmgr->fd == -1)
2342       goto error_dup;
2343 
2344    p_atomic_set(&bufmgr->refcount, 1);
2345 
2346    simple_mtx_init(&bufmgr->lock, mtx_plain);
2347    simple_mtx_init(&bufmgr->bo_deps_lock, mtx_plain);
2348 
2349    list_inithead(&bufmgr->zombie_list);
2350 
2351    bufmgr->devinfo = *devinfo;
2352    devinfo = &bufmgr->devinfo;
2353    bufmgr->bo_reuse = bo_reuse;
2354    iris_bufmgr_get_meminfo(bufmgr, devinfo);
2355    bufmgr->kmd_backend = iris_kmd_backend_get(devinfo->kmd_type);
2356 
2357    intel_common_update_device_info(bufmgr->fd, devinfo);
2358 
2359    if (!iris_bufmgr_init_global_vm(bufmgr))
2360       goto error_init_vm;
2361 
2362    STATIC_ASSERT(IRIS_MEMZONE_SHADER_START == 0ull);
2363    const uint64_t _4GB = 1ull << 32;
2364    const uint64_t _2GB = 1ul << 31;
2365 
2366    /* The STATE_BASE_ADDRESS size field can only hold 1 page shy of 4GB */
2367    const uint64_t _4GB_minus_1 = _4GB - PAGE_SIZE;
2368 
2369    const struct {
2370       uint64_t start;
2371       uint64_t size;
2372    } vma[IRIS_MEMZONE_COUNT] = {
2373       [IRIS_MEMZONE_SHADER] = {
2374          .start = PAGE_SIZE,
2375          .size  = _4GB_minus_1 - PAGE_SIZE
2376       },
2377       [IRIS_MEMZONE_BINDER] = {
2378          .start = IRIS_MEMZONE_BINDER_START + IRIS_SCRATCH_ZONE_SIZE,
2379          .size  = IRIS_BINDER_ZONE_SIZE - IRIS_SCRATCH_ZONE_SIZE
2380       },
2381       [IRIS_MEMZONE_SCRATCH] = {
2382          .start = IRIS_MEMZONE_SCRATCH_START,
2383          .size  = IRIS_SCRATCH_ZONE_SIZE
2384       },
2385       [IRIS_MEMZONE_SURFACE] = {
2386          .start = IRIS_MEMZONE_SURFACE_START,
2387          .size = _4GB_minus_1 - IRIS_BINDER_ZONE_SIZE - IRIS_SCRATCH_ZONE_SIZE
2388       },
2389       [IRIS_MEMZONE_DYNAMIC] = {
2390          .start = IRIS_MEMZONE_DYNAMIC_START + IRIS_BORDER_COLOR_POOL_SIZE,
2391 
2392          /* Wa_2209859288: the Tigerlake PRM's workarounds volume says:
2393           *
2394           *    "PSDunit is dropping MSB of the blend state pointer from SD
2395           *     FIFO [...] Limit the Blend State Pointer to < 2G"
2396           *
2397           * We restrict the dynamic state pool to 2GB so that we don't ever
2398           * get a BLEND_STATE pointer with the MSB set.  We aren't likely to
2399           * need the full 4GB for dynamic state anyway.
2400           */
2401          .size  = (devinfo->ver >= 12 ? _2GB : _4GB_minus_1)
2402                   - IRIS_BORDER_COLOR_POOL_SIZE
2403       },
2404       [IRIS_MEMZONE_OTHER] = {
2405          .start = IRIS_MEMZONE_OTHER_START,
2406 
2407          /* Leave the last 4GB out of the high vma range, so that no state
2408           * base address + size can overflow 48 bits.
2409           */
2410          .size  = (devinfo->gtt_size - _4GB) - IRIS_MEMZONE_OTHER_START,
2411       },
2412    };
2413 
2414    for (unsigned i = 0; i < IRIS_MEMZONE_COUNT; i++) {
2415       util_vma_heap_init(&bufmgr->vma_allocator[i],
2416                          vma[i].start, vma[i].size);
2417    }
2418 
2419    if (INTEL_DEBUG(DEBUG_HEAPS)) {
2420       for (unsigned i = 0; i < IRIS_MEMZONE_COUNT; i++) {
2421          fprintf(stderr, "%-11s | 0x%016" PRIx64 "-0x%016" PRIx64 "\n",
2422                  memzone_name(i), vma[i].start,
2423                  vma[i].start + vma[i].size - 1);
2424       }
2425    }
2426 
2427    bufmgr->bucket_cache = calloc(iris_get_heap_max(bufmgr),
2428                                  sizeof(*bufmgr->bucket_cache));
2429    if (!bufmgr->bucket_cache)
2430       goto error_bucket_cache;
2431    for (int h = 0; h < iris_get_heap_max(bufmgr); h++)
2432       init_cache_buckets(bufmgr, h);
2433 
2434    unsigned min_slab_order = 8;  /* 256 bytes */
2435    unsigned max_slab_order = 20; /* 1 MB (slab size = 2 MB) */
2436    unsigned num_slab_orders_per_allocator =
2437       (max_slab_order - min_slab_order) / NUM_SLAB_ALLOCATORS;
2438 
2439    /* Divide the size order range among slab managers. */
2440    for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
2441       unsigned min_order = min_slab_order;
2442       unsigned max_order =
2443          MIN2(min_order + num_slab_orders_per_allocator, max_slab_order);
2444 
2445       if (!pb_slabs_init(&bufmgr->bo_slabs[i], min_order, max_order,
2446                          iris_get_heap_max(bufmgr), true, bufmgr,
2447                          iris_can_reclaim_slab,
2448                          iris_slab_alloc,
2449                          (void *) iris_slab_free)) {
2450          goto error_slabs_init;
2451       }
2452       min_slab_order = max_order + 1;
2453    }
2454 
2455    bufmgr->name_table =
2456       _mesa_hash_table_create(NULL, _mesa_hash_uint, _mesa_key_uint_equal);
2457    bufmgr->handle_table =
2458       _mesa_hash_table_create(NULL, _mesa_hash_uint, _mesa_key_uint_equal);
2459 
2460    if (devinfo->has_aux_map) {
2461       bufmgr->aux_map_ctx = intel_aux_map_init(bufmgr, &aux_map_allocator,
2462                                                devinfo);
2463       assert(bufmgr->aux_map_ctx);
2464    }
2465 
2466    iris_init_border_color_pool(bufmgr, &bufmgr->border_color_pool);
2467 
2468    if (intel_needs_workaround(devinfo, 14019708328)) {
2469       bufmgr->dummy_aux_bo = iris_bo_alloc(bufmgr, "dummy_aux", 4096, 4096,
2470                                            IRIS_MEMZONE_OTHER, BO_ALLOC_PLAIN);
2471          if (!bufmgr->dummy_aux_bo)
2472             goto error_alloc_bo;
2473    }
2474 
2475    /* Programming note from MI_MEM_FENCE specification:
2476     *
2477     *    Software must ensure STATE_SYSTEM_MEM_FENCE_ADDRESS command is
2478     *    programmed prior to programming this command.
2479     *
2480     * HAS 1607240579 then provides the size information: 4K
2481     */
2482    if (devinfo->verx10 >= 200) {
2483       bufmgr->mem_fence_bo = iris_bo_alloc(bufmgr, "mem_fence", 4096, 4096,
2484                                            IRIS_MEMZONE_OTHER, BO_ALLOC_SMEM);
2485       if (!bufmgr->mem_fence_bo)
2486          goto error_alloc_bo;
2487    }
2488 
2489    return bufmgr;
2490 
2491 error_alloc_bo:
2492    iris_bo_unreference(bufmgr->dummy_aux_bo);
2493    iris_bo_unreference(bufmgr->mem_fence_bo);
2494    iris_destroy_border_color_pool(&bufmgr->border_color_pool);
2495    intel_aux_map_finish(bufmgr->aux_map_ctx);
2496    _mesa_hash_table_destroy(bufmgr->handle_table, NULL);
2497    _mesa_hash_table_destroy(bufmgr->name_table, NULL);
2498 error_slabs_init:
2499    for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
2500       if (!bufmgr->bo_slabs[i].groups)
2501          break;
2502 
2503       pb_slabs_deinit(&bufmgr->bo_slabs[i]);
2504    }
2505    free(bufmgr->bucket_cache);
2506 error_bucket_cache:
2507    for (unsigned i = 0; i < IRIS_MEMZONE_COUNT; i++)
2508       util_vma_heap_finish(&bufmgr->vma_allocator[i]);
2509    iris_bufmgr_destroy_global_vm(bufmgr);
2510 error_init_vm:
2511    close(bufmgr->fd);
2512 error_dup:
2513    free(bufmgr);
2514    return NULL;
2515 }
2516 
2517 static struct iris_bufmgr *
iris_bufmgr_ref(struct iris_bufmgr * bufmgr)2518 iris_bufmgr_ref(struct iris_bufmgr *bufmgr)
2519 {
2520    p_atomic_inc(&bufmgr->refcount);
2521    return bufmgr;
2522 }
2523 
2524 void
iris_bufmgr_unref(struct iris_bufmgr * bufmgr)2525 iris_bufmgr_unref(struct iris_bufmgr *bufmgr)
2526 {
2527    simple_mtx_lock(&global_bufmgr_list_mutex);
2528    if (p_atomic_dec_zero(&bufmgr->refcount)) {
2529       list_del(&bufmgr->link);
2530       iris_bufmgr_destroy(bufmgr);
2531    }
2532    simple_mtx_unlock(&global_bufmgr_list_mutex);
2533 }
2534 
2535 /** Returns a new unique id, to be used by screens. */
2536 int
iris_bufmgr_create_screen_id(struct iris_bufmgr * bufmgr)2537 iris_bufmgr_create_screen_id(struct iris_bufmgr *bufmgr)
2538 {
2539    return p_atomic_inc_return(&bufmgr->next_screen_id) - 1;
2540 }
2541 
2542 /**
2543  * Gets an already existing GEM buffer manager or create a new one.
2544  *
2545  * \param fd File descriptor of the opened DRM device.
2546  */
2547 struct iris_bufmgr *
iris_bufmgr_get_for_fd(int fd,bool bo_reuse)2548 iris_bufmgr_get_for_fd(int fd, bool bo_reuse)
2549 {
2550    struct intel_device_info devinfo;
2551    struct stat st;
2552 
2553    if (fstat(fd, &st))
2554       return NULL;
2555 
2556    struct iris_bufmgr *bufmgr = NULL;
2557 
2558    simple_mtx_lock(&global_bufmgr_list_mutex);
2559    list_for_each_entry(struct iris_bufmgr, iter_bufmgr, &global_bufmgr_list, link) {
2560       struct stat iter_st;
2561       if (fstat(iter_bufmgr->fd, &iter_st))
2562          continue;
2563 
2564       if (st.st_rdev == iter_st.st_rdev) {
2565          assert(iter_bufmgr->bo_reuse == bo_reuse);
2566          bufmgr = iris_bufmgr_ref(iter_bufmgr);
2567          goto unlock;
2568       }
2569    }
2570 
2571    if (!intel_get_device_info_from_fd(fd, &devinfo, 8, -1))
2572       return NULL;
2573 
2574    if (devinfo.ver < 8 || devinfo.platform == INTEL_PLATFORM_CHV)
2575       return NULL;
2576 
2577 #ifndef INTEL_USE_ELK
2578    if (devinfo.ver < 9) {
2579       WARN_ONCE(devinfo.ver == 8,
2580                 "ERROR: Iris was compiled without support for Gfx version 8.\n");
2581       return NULL;
2582    }
2583 #endif
2584 
2585    bufmgr = iris_bufmgr_create(&devinfo, fd, bo_reuse);
2586    if (bufmgr)
2587       list_addtail(&bufmgr->link, &global_bufmgr_list);
2588 
2589  unlock:
2590    simple_mtx_unlock(&global_bufmgr_list_mutex);
2591 
2592    return bufmgr;
2593 }
2594 
2595 int
iris_bufmgr_get_fd(struct iris_bufmgr * bufmgr)2596 iris_bufmgr_get_fd(struct iris_bufmgr *bufmgr)
2597 {
2598    return bufmgr->fd;
2599 }
2600 
2601 void*
iris_bufmgr_get_aux_map_context(struct iris_bufmgr * bufmgr)2602 iris_bufmgr_get_aux_map_context(struct iris_bufmgr *bufmgr)
2603 {
2604    return bufmgr->aux_map_ctx;
2605 }
2606 
2607 simple_mtx_t *
iris_bufmgr_get_bo_deps_lock(struct iris_bufmgr * bufmgr)2608 iris_bufmgr_get_bo_deps_lock(struct iris_bufmgr *bufmgr)
2609 {
2610    return &bufmgr->bo_deps_lock;
2611 }
2612 
2613 struct iris_border_color_pool *
iris_bufmgr_get_border_color_pool(struct iris_bufmgr * bufmgr)2614 iris_bufmgr_get_border_color_pool(struct iris_bufmgr *bufmgr)
2615 {
2616    return &bufmgr->border_color_pool;
2617 }
2618 
2619 uint64_t
iris_bufmgr_vram_size(struct iris_bufmgr * bufmgr)2620 iris_bufmgr_vram_size(struct iris_bufmgr *bufmgr)
2621 {
2622    return bufmgr->vram.size;
2623 }
2624 
2625 uint64_t
iris_bufmgr_sram_size(struct iris_bufmgr * bufmgr)2626 iris_bufmgr_sram_size(struct iris_bufmgr *bufmgr)
2627 {
2628    return bufmgr->sys.size;
2629 }
2630 
2631 const struct intel_device_info *
iris_bufmgr_get_device_info(struct iris_bufmgr * bufmgr)2632 iris_bufmgr_get_device_info(struct iris_bufmgr *bufmgr)
2633 {
2634    return &bufmgr->devinfo;
2635 }
2636 
2637 const struct iris_kmd_backend *
iris_bufmgr_get_kernel_driver_backend(struct iris_bufmgr * bufmgr)2638 iris_bufmgr_get_kernel_driver_backend(struct iris_bufmgr *bufmgr)
2639 {
2640    return bufmgr->kmd_backend;
2641 }
2642 
2643 uint32_t
iris_bufmgr_get_global_vm_id(struct iris_bufmgr * bufmgr)2644 iris_bufmgr_get_global_vm_id(struct iris_bufmgr *bufmgr)
2645 {
2646    return bufmgr->global_vm_id;
2647 }
2648 
2649 bool
iris_bufmgr_use_global_vm_id(struct iris_bufmgr * bufmgr)2650 iris_bufmgr_use_global_vm_id(struct iris_bufmgr *bufmgr)
2651 {
2652    return bufmgr->use_global_vm;
2653 }
2654 
2655 bool
iris_bufmgr_compute_engine_supported(struct iris_bufmgr * bufmgr)2656 iris_bufmgr_compute_engine_supported(struct iris_bufmgr *bufmgr)
2657 {
2658    return bufmgr->devinfo.engine_class_supported_count[INTEL_ENGINE_CLASS_COMPUTE];
2659 }
2660 
2661 /**
2662  * Return the pat entry based on the bo heap and allocation flags.
2663  */
2664 const struct intel_device_info_pat_entry *
iris_heap_to_pat_entry(const struct intel_device_info * devinfo,enum iris_heap heap,bool scanout)2665 iris_heap_to_pat_entry(const struct intel_device_info *devinfo,
2666                        enum iris_heap heap, bool scanout)
2667 {
2668    if (scanout) {
2669       if (iris_heap_is_compressed(heap) == false)
2670          return &devinfo->pat.scanout;
2671 
2672       WARN_ONCE(iris_heap_is_compressed(heap),
2673                 "update heap_to_pat_entry when compressed scanout pat entries are added");
2674    }
2675 
2676    switch (heap) {
2677    case IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT:
2678       return &devinfo->pat.cached_coherent;
2679    case IRIS_HEAP_SYSTEM_MEMORY_UNCACHED:
2680       return &devinfo->pat.writecombining;
2681    case IRIS_HEAP_DEVICE_LOCAL:
2682    case IRIS_HEAP_DEVICE_LOCAL_CPU_VISIBLE_SMALL_BAR:
2683    case IRIS_HEAP_DEVICE_LOCAL_PREFERRED:
2684       return &devinfo->pat.writecombining;
2685    case IRIS_HEAP_SYSTEM_MEMORY_UNCACHED_COMPRESSED:
2686    case IRIS_HEAP_DEVICE_LOCAL_COMPRESSED:
2687       return &devinfo->pat.compressed;
2688    default:
2689       unreachable("invalid heap for platforms using PAT entries");
2690    }
2691 }
2692 
2693 struct intel_bind_timeline *
iris_bufmgr_get_bind_timeline(struct iris_bufmgr * bufmgr)2694 iris_bufmgr_get_bind_timeline(struct iris_bufmgr *bufmgr)
2695 {
2696    return &bufmgr->bind_timeline;
2697 }
2698 
2699 uint64_t
iris_bufmgr_get_dummy_aux_address(struct iris_bufmgr * bufmgr)2700 iris_bufmgr_get_dummy_aux_address(struct iris_bufmgr *bufmgr)
2701 {
2702    return bufmgr->dummy_aux_bo ? bufmgr->dummy_aux_bo->address : 0;
2703 }
2704 
2705 struct iris_bo *
iris_bufmgr_get_mem_fence_bo(struct iris_bufmgr * bufmgr)2706 iris_bufmgr_get_mem_fence_bo(struct iris_bufmgr *bufmgr)
2707 {
2708    return bufmgr->mem_fence_bo;
2709 }
2710