• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2017 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included
12  * in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20  * DEALINGS IN THE SOFTWARE.
21  */
22 
23 /**
24  * @file iris_bufmgr.c
25  *
26  * The Iris buffer manager.
27  *
28  * XXX: write better comments
29  * - BOs
30  * - Explain BO cache
31  * - main interface to GEM in the kernel
32  */
33 
34 #include <util/u_atomic.h>
35 #include <fcntl.h>
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <unistd.h>
40 #include <assert.h>
41 #include <sys/ioctl.h>
42 #include <sys/mman.h>
43 #include <sys/stat.h>
44 #include <sys/types.h>
45 #include <stdbool.h>
46 #include <time.h>
47 #include <unistd.h>
48 
49 #include "errno.h"
50 #include "common/intel_aux_map.h"
51 #include "common/intel_mem.h"
52 #include "c99_alloca.h"
53 #include "dev/intel_debug.h"
54 #include "common/intel_common.h"
55 #include "common/intel_gem.h"
56 #include "dev/intel_device_info.h"
57 #include "drm-uapi/dma-buf.h"
58 #include "isl/isl.h"
59 #include "util/os_mman.h"
60 #include "util/u_debug.h"
61 #include "util/macros.h"
62 #include "util/hash_table.h"
63 #include "util/list.h"
64 #include "util/os_file.h"
65 #include "util/u_dynarray.h"
66 #include "util/vma.h"
67 #include "iris_bufmgr.h"
68 #include "iris_context.h"
69 #include "string.h"
70 #include "iris_kmd_backend.h"
71 #include "i915/iris_bufmgr.h"
72 #include "xe/iris_bufmgr.h"
73 
74 #include <xf86drm.h>
75 
76 #ifdef HAVE_VALGRIND
77 #include <valgrind.h>
78 #include <memcheck.h>
79 #define VG(x) x
80 #else
81 #define VG(x)
82 #endif
83 
84 /* VALGRIND_FREELIKE_BLOCK unfortunately does not actually undo the earlier
85  * VALGRIND_MALLOCLIKE_BLOCK but instead leaves vg convinced the memory is
86  * leaked. All because it does not call VG(cli_free) from its
87  * VG_USERREQ__FREELIKE_BLOCK handler. Instead of treating the memory like
88  * and allocation, we mark it available for use upon mmapping and remove
89  * it upon unmapping.
90  */
91 #define VG_DEFINED(ptr, size) VG(VALGRIND_MAKE_MEM_DEFINED(ptr, size))
92 #define VG_NOACCESS(ptr, size) VG(VALGRIND_MAKE_MEM_NOACCESS(ptr, size))
93 
94 /* On FreeBSD PAGE_SIZE is already defined in
95  * /usr/include/machine/param.h that is indirectly
96  * included here.
97  */
98 #ifndef PAGE_SIZE
99 #define PAGE_SIZE 4096
100 #endif
101 
102 #define WARN_ONCE(cond, fmt...) do {                            \
103    if (unlikely(cond)) {                                        \
104       static bool _warned = false;                              \
105       if (!_warned) {                                           \
106          fprintf(stderr, "WARNING: ");                          \
107          fprintf(stderr, fmt);                                  \
108          _warned = true;                                        \
109       }                                                         \
110    }                                                            \
111 } while (0)
112 
113 #define FILE_DEBUG_FLAG DEBUG_BUFMGR
114 
115 /**
116  * For debugging purposes, this returns a time in seconds.
117  */
118 static double
get_time(void)119 get_time(void)
120 {
121    struct timespec tp;
122 
123    clock_gettime(CLOCK_MONOTONIC, &tp);
124 
125    return tp.tv_sec + tp.tv_nsec / 1000000000.0;
126 }
127 
128 static inline int
atomic_add_unless(int * v,int add,int unless)129 atomic_add_unless(int *v, int add, int unless)
130 {
131    int c, old;
132    c = p_atomic_read(v);
133    while (c != unless && (old = p_atomic_cmpxchg(v, c, c + add)) != c)
134       c = old;
135    return c == unless;
136 }
137 
138 static const char *
memzone_name(enum iris_memory_zone memzone)139 memzone_name(enum iris_memory_zone memzone)
140 {
141    const char *names[] = {
142       [IRIS_MEMZONE_SHADER]   = "shader",
143       [IRIS_MEMZONE_BINDER]   = "binder",
144       [IRIS_MEMZONE_SCRATCH]  = "scratchsurf",
145       [IRIS_MEMZONE_SURFACE]  = "surface",
146       [IRIS_MEMZONE_DYNAMIC]  = "dynamic",
147       [IRIS_MEMZONE_OTHER]    = "other",
148       [IRIS_MEMZONE_BORDER_COLOR_POOL] = "bordercolor",
149    };
150    assert(memzone < ARRAY_SIZE(names));
151    return names[memzone];
152 }
153 
154 struct bo_cache_bucket {
155    /** List of cached BOs. */
156    struct list_head head;
157 
158    /** Size of this bucket, in bytes. */
159    uint64_t size;
160 };
161 
162 struct bo_export {
163    /** File descriptor associated with a handle export. */
164    int drm_fd;
165 
166    /** GEM handle in drm_fd */
167    uint32_t gem_handle;
168 
169    struct list_head link;
170 };
171 
172 struct iris_memregion {
173    struct intel_memory_class_instance *region;
174    uint64_t size;
175 };
176 
177 #define NUM_SLAB_ALLOCATORS 3
178 
179 struct iris_slab {
180    struct pb_slab base;
181 
182    /** The BO representing the entire slab */
183    struct iris_bo *bo;
184 
185    /** Array of iris_bo structs representing BOs allocated out of this slab */
186    struct iris_bo *entries;
187 };
188 
189 #define BUCKET_ARRAY_SIZE 25
190 
191 struct iris_bucket_cache {
192    struct bo_cache_bucket bucket[BUCKET_ARRAY_SIZE];
193    int num_buckets;
194 };
195 
196 struct iris_bufmgr {
197    /**
198     * List into the list of bufmgr.
199     */
200    struct list_head link;
201 
202    uint32_t refcount;
203 
204    int fd;
205 
206    simple_mtx_t lock;
207    simple_mtx_t bo_deps_lock;
208 
209    /** Array of lists of cached gem objects of power-of-two sizes */
210    struct iris_bucket_cache *bucket_cache;
211 
212    time_t time;
213 
214    struct hash_table *name_table;
215    struct hash_table *handle_table;
216 
217    /**
218     * List of BOs which we've effectively freed, but are hanging on to
219     * until they're idle before closing and returning the VMA.
220     */
221    struct list_head zombie_list;
222 
223    struct util_vma_heap vma_allocator[IRIS_MEMZONE_COUNT];
224 
225    struct iris_memregion vram, sys;
226 
227    /* Used only when use_global_vm is true. */
228    uint32_t global_vm_id;
229 
230    int next_screen_id;
231 
232    struct intel_device_info devinfo;
233    const struct iris_kmd_backend *kmd_backend;
234    struct intel_bind_timeline bind_timeline; /* Xe only */
235    bool bo_reuse:1;
236    bool use_global_vm:1;
237 
238    struct intel_aux_map_context *aux_map_ctx;
239 
240    struct pb_slabs bo_slabs[NUM_SLAB_ALLOCATORS];
241 
242    struct iris_border_color_pool border_color_pool;
243 
244    struct iris_bo *dummy_aux_bo;
245    struct iris_bo *mem_fence_bo;
246 };
247 
248 static simple_mtx_t global_bufmgr_list_mutex = SIMPLE_MTX_INITIALIZER;
249 static struct list_head global_bufmgr_list = {
250    .next = &global_bufmgr_list,
251    .prev = &global_bufmgr_list,
252 };
253 
254 static void bo_free(struct iris_bo *bo);
255 
256 static struct iris_bo *
find_and_ref_external_bo(struct hash_table * ht,unsigned int key)257 find_and_ref_external_bo(struct hash_table *ht, unsigned int key)
258 {
259    struct hash_entry *entry = _mesa_hash_table_search(ht, &key);
260    struct iris_bo *bo = entry ? entry->data : NULL;
261 
262    if (bo) {
263       assert(iris_bo_is_external(bo));
264       assert(iris_bo_is_real(bo));
265       assert(!bo->real.reusable);
266 
267       /* Being non-reusable, the BO cannot be in the cache lists, but it
268        * may be in the zombie list if it had reached zero references, but
269        * we hadn't yet closed it...and then reimported the same BO.  If it
270        * is, then remove it since it's now been resurrected.
271        */
272       if (list_is_linked(&bo->head))
273          list_del(&bo->head);
274 
275       iris_bo_reference(bo);
276    }
277 
278    return bo;
279 }
280 
281 /**
282  * This function finds the correct bucket fit for the input size.
283  * The function works with O(1) complexity when the requested size
284  * was queried instead of iterating the size through all the buckets.
285  */
286 static struct bo_cache_bucket *
bucket_for_size(struct iris_bufmgr * bufmgr,uint64_t size,enum iris_heap heap,unsigned flags)287 bucket_for_size(struct iris_bufmgr *bufmgr, uint64_t size,
288                 enum iris_heap heap, unsigned flags)
289 {
290    if (flags & BO_ALLOC_PROTECTED)
291       return NULL;
292 
293    const struct intel_device_info *devinfo = &bufmgr->devinfo;
294    struct iris_bucket_cache *cache = &bufmgr->bucket_cache[heap];
295 
296    if (devinfo->kmd_type == INTEL_KMD_TYPE_XE &&
297        (flags & (BO_ALLOC_SHARED | BO_ALLOC_SCANOUT)))
298       return NULL;
299 
300    const unsigned _4MB = 4 * 1024 * 1024;
301    const unsigned _6MB = 6 * 1024 * 1024;
302    const unsigned _8MB = 8 * 1024 * 1024;
303    const unsigned _64MB = 64 * 1024 * 1024;
304    unsigned index;
305 
306    if (size <= 4096) {
307       index = 0;
308    } else if (size <= _4MB) {
309       index = util_logbase2_ceil(size) - 12;
310    } else if (size <= _6MB) {
311       index = 11;
312    } else if (size <= _8MB) {
313       index = 12;
314    } else if (size <= _64MB) {
315       const unsigned power = util_logbase2(size);
316       const unsigned base_size = 1u << power;
317       const unsigned quarter_size = base_size / 4;
318       const unsigned quarter = DIV_ROUND_UP(size - base_size, quarter_size);
319       index = 12 + (power - 23) * 4 + quarter;
320    } else {
321       return NULL;
322    }
323 
324    return (index < cache->num_buckets) ? &cache->bucket[index] : NULL;
325 }
326 
327 enum iris_memory_zone
iris_memzone_for_address(uint64_t address)328 iris_memzone_for_address(uint64_t address)
329 {
330    STATIC_ASSERT(IRIS_MEMZONE_OTHER_START    > IRIS_MEMZONE_DYNAMIC_START);
331    STATIC_ASSERT(IRIS_MEMZONE_SURFACE_START  > IRIS_MEMZONE_SCRATCH_START);
332    STATIC_ASSERT(IRIS_MEMZONE_SCRATCH_START == IRIS_MEMZONE_BINDER_START);
333    STATIC_ASSERT(IRIS_MEMZONE_BINDER_START   > IRIS_MEMZONE_SHADER_START);
334    STATIC_ASSERT(IRIS_MEMZONE_DYNAMIC_START  > IRIS_MEMZONE_SURFACE_START);
335    STATIC_ASSERT(IRIS_BORDER_COLOR_POOL_ADDRESS == IRIS_MEMZONE_DYNAMIC_START);
336 
337    if (address >= IRIS_MEMZONE_OTHER_START)
338       return IRIS_MEMZONE_OTHER;
339 
340    if (address == IRIS_BORDER_COLOR_POOL_ADDRESS)
341       return IRIS_MEMZONE_BORDER_COLOR_POOL;
342 
343    if (address > IRIS_MEMZONE_DYNAMIC_START)
344       return IRIS_MEMZONE_DYNAMIC;
345 
346    if (address >= IRIS_MEMZONE_SURFACE_START)
347       return IRIS_MEMZONE_SURFACE;
348 
349    if (address >= (IRIS_MEMZONE_BINDER_START + IRIS_SCRATCH_ZONE_SIZE))
350       return IRIS_MEMZONE_BINDER;
351 
352    if (address >= IRIS_MEMZONE_SCRATCH_START)
353       return IRIS_MEMZONE_SCRATCH;
354 
355    return IRIS_MEMZONE_SHADER;
356 }
357 
358 /**
359  * Allocate a section of virtual memory for a buffer, assigning an address.
360  *
361  * This uses either the bucket allocator for the given size, or the large
362  * object allocator (util_vma).
363  */
364 static uint64_t
vma_alloc(struct iris_bufmgr * bufmgr,enum iris_memory_zone memzone,uint64_t size,uint64_t alignment)365 vma_alloc(struct iris_bufmgr *bufmgr,
366           enum iris_memory_zone memzone,
367           uint64_t size,
368           uint64_t alignment)
369 {
370    simple_mtx_assert_locked(&bufmgr->lock);
371 
372    const unsigned _2mb = 2 * 1024 * 1024;
373 
374    /* Force minimum alignment based on device requirements */
375    assert((alignment & (alignment - 1)) == 0);
376    alignment = MAX2(alignment, bufmgr->devinfo.mem_alignment);
377 
378    /* If the allocation is a multiple of 2MB, ensure the virtual address is
379     * aligned to 2MB, so that it's possible for the kernel to use 64K pages.
380     */
381    if (size % _2mb == 0)
382       alignment = MAX2(alignment, _2mb);
383 
384    if (memzone == IRIS_MEMZONE_BORDER_COLOR_POOL)
385       return IRIS_BORDER_COLOR_POOL_ADDRESS;
386 
387    uint64_t addr =
388       util_vma_heap_alloc(&bufmgr->vma_allocator[memzone], size, alignment);
389 
390    assert((addr >> 48ull) == 0);
391    assert((addr % alignment) == 0);
392 
393    return intel_canonical_address(addr);
394 }
395 
396 static void
vma_free(struct iris_bufmgr * bufmgr,uint64_t address,uint64_t size)397 vma_free(struct iris_bufmgr *bufmgr,
398          uint64_t address,
399          uint64_t size)
400 {
401    simple_mtx_assert_locked(&bufmgr->lock);
402 
403    if (address == IRIS_BORDER_COLOR_POOL_ADDRESS)
404       return;
405 
406    /* Un-canonicalize the address. */
407    address = intel_48b_address(address);
408 
409    if (address == 0ull)
410       return;
411 
412    enum iris_memory_zone memzone = iris_memzone_for_address(address);
413 
414    assert(memzone < ARRAY_SIZE(bufmgr->vma_allocator));
415 
416    util_vma_heap_free(&bufmgr->vma_allocator[memzone], address, size);
417 }
418 
419 /* Exports a BO's implicit synchronization state to a drm_syncobj, returning
420  * its wrapping iris_syncobj. The drm_syncobj is created new and has to be
421  * destroyed by the caller after the execbuf ioctl.
422  */
423 struct iris_syncobj *
iris_bo_export_sync_state(struct iris_bo * bo)424 iris_bo_export_sync_state(struct iris_bo *bo)
425 {
426    struct iris_bufmgr *bufmgr = bo->bufmgr;
427    int drm_fd = iris_bufmgr_get_fd(bufmgr);
428 
429    struct iris_syncobj *iris_syncobj = iris_create_syncobj(bufmgr);
430 
431    struct dma_buf_export_sync_file export_sync_file_ioctl = {
432       .flags = DMA_BUF_SYNC_RW, /* TODO */
433       .fd = -1,
434    };
435    if (intel_ioctl(bo->real.prime_fd, DMA_BUF_IOCTL_EXPORT_SYNC_FILE,
436                    &export_sync_file_ioctl)) {
437       fprintf(stderr, "DMA_BUF_IOCTL_EXPORT_SYNC_FILE ioctl failed (%d)\n",
438               errno);
439       goto error_export;
440    }
441 
442    int sync_file_fd = export_sync_file_ioctl.fd;
443    assert(sync_file_fd >= 0);
444 
445    struct drm_syncobj_handle syncobj_import_ioctl = {
446       .handle = iris_syncobj->handle,
447       .flags = DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_IMPORT_SYNC_FILE,
448       .fd = sync_file_fd,
449    };
450    if (intel_ioctl(drm_fd, DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE,
451                    &syncobj_import_ioctl)) {
452       fprintf(stderr, "DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE ioctl failed (%d)\n",
453               errno);
454    }
455 
456    close(sync_file_fd);
457 
458    return iris_syncobj;
459 error_export:
460    iris_syncobj_destroy(bufmgr, iris_syncobj);
461    return NULL;
462 }
463 
464 /* Import the state of a sync_file_fd (which we should have gotten from
465  * batch_syncobj_to_sync_file_fd) into a BO as its implicit synchronization
466  * state.
467  */
468 void
iris_bo_import_sync_state(struct iris_bo * bo,int sync_file_fd)469 iris_bo_import_sync_state(struct iris_bo *bo, int sync_file_fd)
470 {
471    struct dma_buf_import_sync_file import_sync_file_ioctl = {
472       .flags = DMA_BUF_SYNC_WRITE,
473       .fd = sync_file_fd,
474    };
475    if (intel_ioctl(bo->real.prime_fd, DMA_BUF_IOCTL_IMPORT_SYNC_FILE,
476                    &import_sync_file_ioctl))
477       fprintf(stderr, "DMA_BUF_IOCTL_IMPORT_SYNC_FILE ioctl failed (%d)\n",
478               errno);
479 }
480 
481 /* A timeout of 0 just checks for busyness. */
482 static int
iris_bo_wait_syncobj(struct iris_bo * bo,int64_t timeout_ns)483 iris_bo_wait_syncobj(struct iris_bo *bo, int64_t timeout_ns)
484 {
485    int ret = 0;
486    struct iris_bufmgr *bufmgr = bo->bufmgr;
487    const bool is_external = iris_bo_is_real(bo) && bo->real.prime_fd != -1;
488    struct iris_syncobj *external_implicit_syncobj = NULL;
489 
490    /* If we know it's idle, don't bother with the kernel round trip.
491     * Can't do that for Xe KMD with external BOs since we have to check the
492     * implicit synchronization information.
493     */
494    if (!is_external && bo->idle)
495       return 0;
496 
497    simple_mtx_lock(&bufmgr->bo_deps_lock);
498 
499    const int handles_len = bo->deps_size * IRIS_BATCH_COUNT * 2 + is_external;
500    uint32_t *handles = handles_len <= 32 ?
501                         (uint32_t *)alloca(handles_len * sizeof(*handles)) :
502                         (uint32_t *)malloc(handles_len * sizeof(*handles));
503    int handle_count = 0;
504 
505    if (is_external) {
506       external_implicit_syncobj = iris_bo_export_sync_state(bo);
507       if (external_implicit_syncobj)
508          handles[handle_count++] = external_implicit_syncobj->handle;
509    }
510 
511    for (int d = 0; d < bo->deps_size; d++) {
512       for (int b = 0; b < IRIS_BATCH_COUNT; b++) {
513          struct iris_syncobj *r = bo->deps[d].read_syncobjs[b];
514          struct iris_syncobj *w = bo->deps[d].write_syncobjs[b];
515          if (r)
516             handles[handle_count++] = r->handle;
517          if (w)
518             handles[handle_count++] = w->handle;
519       }
520    }
521 
522    if (handle_count == 0)
523       goto out;
524 
525    /* Unlike the gem wait, negative values are not infinite here. */
526    int64_t timeout_abs = os_time_get_absolute_timeout(timeout_ns);
527    if (timeout_abs < 0)
528       timeout_abs = INT64_MAX;
529 
530    struct drm_syncobj_wait args = {
531       .handles = (uintptr_t) handles,
532       .timeout_nsec = timeout_abs,
533       .count_handles = handle_count,
534       .flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL,
535    };
536 
537    ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_SYNCOBJ_WAIT, &args);
538    if (ret != 0) {
539       ret = -errno;
540       goto out;
541    }
542 
543    /* We just waited everything, so clean all the deps. */
544    for (int d = 0; d < bo->deps_size; d++) {
545       for (int b = 0; b < IRIS_BATCH_COUNT; b++) {
546          iris_syncobj_reference(bufmgr, &bo->deps[d].write_syncobjs[b], NULL);
547          iris_syncobj_reference(bufmgr, &bo->deps[d].read_syncobjs[b], NULL);
548       }
549    }
550 
551 out:
552    if (handles_len > 32)
553       free(handles);
554    if (external_implicit_syncobj)
555       iris_syncobj_reference(bufmgr, &external_implicit_syncobj, NULL);
556 
557    simple_mtx_unlock(&bufmgr->bo_deps_lock);
558    return ret;
559 }
560 
561 static bool
iris_bo_busy_syncobj(struct iris_bo * bo)562 iris_bo_busy_syncobj(struct iris_bo *bo)
563 {
564    return iris_bo_wait_syncobj(bo, 0) == -ETIME;
565 }
566 
567 bool
iris_bo_busy(struct iris_bo * bo)568 iris_bo_busy(struct iris_bo *bo)
569 {
570    bool busy;
571 
572    switch (iris_bufmgr_get_device_info(bo->bufmgr)->kmd_type) {
573    case INTEL_KMD_TYPE_I915:
574       if (iris_bo_is_external(bo))
575          busy = iris_i915_bo_busy_gem(bo);
576       else
577          busy = iris_bo_busy_syncobj(bo);
578       break;
579    case INTEL_KMD_TYPE_XE:
580       busy = iris_bo_busy_syncobj(bo);
581       break;
582    default:
583       unreachable("missing");
584       busy = true;
585    }
586 
587    bo->idle = !busy;
588 
589    return busy;
590 }
591 
592 /**
593  * Specify the volatility of the buffer.
594  * \param bo Buffer to create a name for
595  * \param state The purgeable status
596  *
597  * Use IRIS_MADVICE_DONT_NEED to mark the buffer as purgeable, and it will be
598  * reclaimed under memory pressure. If you subsequently require the buffer,
599  * then you must pass IRIS_MADVICE_WILL_NEED to mark the buffer as required.
600  *
601  * Returns true if the buffer was retained, or false if it was discarded
602  * whilst marked as IRIS_MADVICE_DONT_NEED.
603  */
604 static inline bool
iris_bo_madvise(struct iris_bo * bo,enum iris_madvice state)605 iris_bo_madvise(struct iris_bo *bo, enum iris_madvice state)
606 {
607    /* We can't madvise suballocated BOs. */
608    assert(iris_bo_is_real(bo));
609 
610    return bo->bufmgr->kmd_backend->bo_madvise(bo, state);
611 }
612 
613 static struct iris_bo *
bo_calloc(void)614 bo_calloc(void)
615 {
616    struct iris_bo *bo = calloc(1, sizeof(*bo));
617    if (!bo)
618       return NULL;
619 
620    list_inithead(&bo->real.exports);
621 
622    bo->hash = _mesa_hash_pointer(bo);
623 
624    return bo;
625 }
626 
627 static void
bo_unmap(struct iris_bo * bo)628 bo_unmap(struct iris_bo *bo)
629 {
630    assert(iris_bo_is_real(bo));
631 
632    VG_NOACCESS(bo->real.map, bo->size);
633    os_munmap(bo->real.map, bo->size);
634    bo->real.map = NULL;
635 }
636 
637 static struct pb_slabs *
get_slabs(struct iris_bufmgr * bufmgr,uint64_t size)638 get_slabs(struct iris_bufmgr *bufmgr, uint64_t size)
639 {
640    for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
641       struct pb_slabs *slabs = &bufmgr->bo_slabs[i];
642 
643       if (size <= 1ull << (slabs->min_order + slabs->num_orders - 1))
644          return slabs;
645    }
646 
647    unreachable("should have found a valid slab for this size");
648 }
649 
650 /* Return the power of two size of a slab entry matching the input size. */
651 static unsigned
get_slab_pot_entry_size(struct iris_bufmgr * bufmgr,unsigned size)652 get_slab_pot_entry_size(struct iris_bufmgr *bufmgr, unsigned size)
653 {
654    unsigned entry_size = util_next_power_of_two(size);
655    unsigned min_entry_size = 1 << bufmgr->bo_slabs[0].min_order;
656 
657    return MAX2(entry_size, min_entry_size);
658 }
659 
660 /* Return the slab entry alignment. */
661 static unsigned
get_slab_entry_alignment(struct iris_bufmgr * bufmgr,unsigned size)662 get_slab_entry_alignment(struct iris_bufmgr *bufmgr, unsigned size)
663 {
664    unsigned entry_size = get_slab_pot_entry_size(bufmgr, size);
665 
666    if (size <= entry_size * 3 / 4)
667       return entry_size / 4;
668 
669    return entry_size;
670 }
671 
672 static bool
iris_can_reclaim_slab(void * priv,struct pb_slab_entry * entry)673 iris_can_reclaim_slab(void *priv, struct pb_slab_entry *entry)
674 {
675    struct iris_bo *bo = container_of(entry, struct iris_bo, slab.entry);
676 
677    return !iris_bo_busy(bo);
678 }
679 
680 static void
iris_slab_free(void * priv,struct pb_slab * pslab)681 iris_slab_free(void *priv, struct pb_slab *pslab)
682 {
683    struct iris_bufmgr *bufmgr = priv;
684    struct iris_slab *slab = (void *) pslab;
685    struct intel_aux_map_context *aux_map_ctx = bufmgr->aux_map_ctx;
686 
687    assert(!slab->bo->aux_map_address);
688 
689    /* Since we're freeing the whole slab, all buffers allocated out of it
690     * must be reclaimable.  We require buffers to be idle to be reclaimed
691     * (see iris_can_reclaim_slab()), so we know all entries must be idle.
692     * Therefore, we can safely unmap their aux table entries.
693     */
694    for (unsigned i = 0; i < pslab->num_entries; i++) {
695       struct iris_bo *bo = &slab->entries[i];
696       if (aux_map_ctx && bo->aux_map_address) {
697          intel_aux_map_unmap_range(aux_map_ctx, bo->address, bo->size);
698          bo->aux_map_address = 0;
699       }
700 
701       /* Unref read/write dependency syncobjs and free the array. */
702       for (int d = 0; d < bo->deps_size; d++) {
703          for (int b = 0; b < IRIS_BATCH_COUNT; b++) {
704             iris_syncobj_reference(bufmgr, &bo->deps[d].write_syncobjs[b], NULL);
705             iris_syncobj_reference(bufmgr, &bo->deps[d].read_syncobjs[b], NULL);
706          }
707       }
708       free(bo->deps);
709    }
710 
711    iris_bo_unreference(slab->bo);
712 
713    free(slab->entries);
714    free(slab);
715 }
716 
717 static struct pb_slab *
iris_slab_alloc(void * priv,unsigned heap,unsigned entry_size,unsigned group_index)718 iris_slab_alloc(void *priv,
719                 unsigned heap,
720                 unsigned entry_size,
721                 unsigned group_index)
722 {
723    struct iris_bufmgr *bufmgr = priv;
724    struct iris_slab *slab = calloc(1, sizeof(struct iris_slab));
725    uint32_t flags = BO_ALLOC_NO_SUBALLOC;
726    unsigned slab_size = 0;
727    /* We only support slab allocation for IRIS_MEMZONE_OTHER */
728    enum iris_memory_zone memzone = IRIS_MEMZONE_OTHER;
729 
730    if (!slab)
731       return NULL;
732 
733    struct pb_slabs *slabs = bufmgr->bo_slabs;
734 
735    /* Determine the slab buffer size. */
736    for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
737       unsigned max_entry_size =
738          1 << (slabs[i].min_order + slabs[i].num_orders - 1);
739 
740       if (entry_size <= max_entry_size) {
741          /* The slab size is twice the size of the largest possible entry. */
742          slab_size = max_entry_size * 2;
743 
744          if (!util_is_power_of_two_nonzero(entry_size)) {
745             assert(util_is_power_of_two_nonzero(entry_size * 4 / 3));
746 
747             /* If the entry size is 3/4 of a power of two, we would waste
748              * space and not gain anything if we allocated only twice the
749              * power of two for the backing buffer:
750              *
751              *    2 * 3/4 = 1.5 usable with buffer size 2
752              *
753              * Allocating 5 times the entry size leads us to the next power
754              * of two and results in a much better memory utilization:
755              *
756              *    5 * 3/4 = 3.75 usable with buffer size 4
757              */
758             if (entry_size * 5 > slab_size)
759                slab_size = util_next_power_of_two(entry_size * 5);
760          }
761 
762          /* The largest slab should have the same size as the PTE fragment
763           * size to get faster address translation.
764           *
765           * TODO: move this to intel_device_info?
766           */
767          const unsigned pte_size = 2 * 1024 * 1024;
768 
769          if (i == NUM_SLAB_ALLOCATORS - 1 && slab_size < pte_size)
770             slab_size = pte_size;
771 
772          break;
773       }
774    }
775    assert(slab_size != 0);
776 
777    switch (heap) {
778    case IRIS_HEAP_SYSTEM_MEMORY_UNCACHED_COMPRESSED:
779    case IRIS_HEAP_DEVICE_LOCAL_COMPRESSED:
780       flags |= BO_ALLOC_COMPRESSED;
781       break;
782    case IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT:
783    case IRIS_HEAP_SYSTEM_MEMORY_UNCACHED:
784       flags |= BO_ALLOC_SMEM;
785       break;
786    case IRIS_HEAP_DEVICE_LOCAL:
787       flags |= BO_ALLOC_LMEM;
788       break;
789    case IRIS_HEAP_DEVICE_LOCAL_CPU_VISIBLE_SMALL_BAR:
790       flags |= BO_ALLOC_LMEM | BO_ALLOC_CPU_VISIBLE;
791       break;
792    default:
793       flags |= BO_ALLOC_PLAIN;
794    }
795 
796    slab->bo =
797       iris_bo_alloc(bufmgr, "slab", slab_size, slab_size, memzone, flags);
798    if (!slab->bo)
799       goto fail;
800 
801    slab_size = slab->bo->size;
802 
803    slab->base.num_entries = slab_size / entry_size;
804    slab->base.num_free = slab->base.num_entries;
805    slab->base.group_index = group_index;
806    slab->base.entry_size = entry_size;
807    slab->entries = calloc(slab->base.num_entries, sizeof(*slab->entries));
808    if (!slab->entries)
809       goto fail_bo;
810 
811    list_inithead(&slab->base.free);
812 
813    for (unsigned i = 0; i < slab->base.num_entries; i++) {
814       struct iris_bo *bo = &slab->entries[i];
815 
816       bo->size = entry_size;
817       bo->bufmgr = bufmgr;
818       bo->hash = _mesa_hash_pointer(bo);
819       bo->gem_handle = 0;
820       bo->address = intel_canonical_address(slab->bo->address + i * entry_size);
821       bo->aux_map_address = 0;
822       bo->index = -1;
823       bo->refcount = 0;
824       bo->idle = true;
825       bo->zeroed = slab->bo->zeroed;
826 
827       bo->slab.entry.slab = &slab->base;
828 
829       bo->slab.real = iris_get_backing_bo(slab->bo);
830 
831       list_addtail(&bo->slab.entry.head, &slab->base.free);
832    }
833 
834    return &slab->base;
835 
836 fail_bo:
837    iris_bo_unreference(slab->bo);
838 fail:
839    free(slab);
840    return NULL;
841 }
842 
843 /**
844  * Selects a heap for the given buffer allocation flags.
845  *
846  * This determines the cacheability, coherency, and mmap mode settings.
847  */
848 static enum iris_heap
flags_to_heap(struct iris_bufmgr * bufmgr,unsigned flags)849 flags_to_heap(struct iris_bufmgr *bufmgr, unsigned flags)
850 {
851    const struct intel_device_info *devinfo = &bufmgr->devinfo;
852 
853    if (bufmgr->vram.size > 0) {
854       if (flags & BO_ALLOC_COMPRESSED)
855          return IRIS_HEAP_DEVICE_LOCAL_COMPRESSED;
856 
857       /* Discrete GPUs currently always snoop CPU caches. */
858       if ((flags & BO_ALLOC_SMEM) || (flags & BO_ALLOC_CACHED_COHERENT))
859          return IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT;
860 
861       if ((flags & BO_ALLOC_LMEM) ||
862           ((flags & BO_ALLOC_SCANOUT) && !(flags & BO_ALLOC_SHARED))) {
863 
864          if ((flags & BO_ALLOC_CPU_VISIBLE) && !intel_vram_all_mappable(devinfo))
865             return IRIS_HEAP_DEVICE_LOCAL_CPU_VISIBLE_SMALL_BAR;
866 
867          return IRIS_HEAP_DEVICE_LOCAL;
868       }
869 
870       return IRIS_HEAP_DEVICE_LOCAL_PREFERRED;
871    } else if (devinfo->has_llc) {
872       assert(!(flags & BO_ALLOC_LMEM));
873 
874       if (flags & (BO_ALLOC_SCANOUT | BO_ALLOC_SHARED))
875          return IRIS_HEAP_SYSTEM_MEMORY_UNCACHED;
876 
877       return IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT;
878    } else {
879       assert(!devinfo->has_llc);
880       assert(!(flags & BO_ALLOC_LMEM));
881 
882       if (flags & BO_ALLOC_COMPRESSED)
883          return IRIS_HEAP_SYSTEM_MEMORY_UNCACHED_COMPRESSED;
884 
885       if (flags & BO_ALLOC_CACHED_COHERENT)
886          return IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT;
887 
888       return IRIS_HEAP_SYSTEM_MEMORY_UNCACHED;
889    }
890 }
891 
892 static bool
zero_bo(struct iris_bufmgr * bufmgr,unsigned flags,struct iris_bo * bo)893 zero_bo(struct iris_bufmgr *bufmgr,
894         unsigned flags,
895         struct iris_bo *bo)
896 {
897    assert(flags & BO_ALLOC_ZEROED);
898 
899    if (bo->zeroed)
900       return true;
901 
902    if (bufmgr->devinfo.has_flat_ccs && (flags & BO_ALLOC_LMEM)) {
903       /* With flat CCS, all allocations in LMEM have memory ranges with
904        * corresponding CCS elements. These elements are only accessible
905        * through GPU commands, but we don't issue GPU commands here.
906        */
907       return false;
908    }
909 
910    void *map = iris_bo_map(NULL, bo, MAP_WRITE | MAP_RAW);
911    if (!map)
912       return false;
913 
914    memset(map, 0, bo->size);
915    bo->zeroed = true;
916    return true;
917 }
918 
919 static struct iris_bo *
alloc_bo_from_slabs(struct iris_bufmgr * bufmgr,const char * name,uint64_t size,uint32_t alignment,unsigned flags)920 alloc_bo_from_slabs(struct iris_bufmgr *bufmgr,
921                     const char *name,
922                     uint64_t size,
923                     uint32_t alignment,
924                     unsigned flags)
925 {
926    if (flags & BO_ALLOC_NO_SUBALLOC)
927       return NULL;
928 
929    struct pb_slabs *last_slab = &bufmgr->bo_slabs[NUM_SLAB_ALLOCATORS - 1];
930    unsigned max_slab_entry_size =
931       1 << (last_slab->min_order + last_slab->num_orders - 1);
932 
933    if (size > max_slab_entry_size)
934       return NULL;
935 
936    struct pb_slab_entry *entry;
937 
938    enum iris_heap heap = flags_to_heap(bufmgr, flags);
939 
940    unsigned alloc_size = size;
941 
942    /* Always use slabs for sizes less than 4 KB because the kernel aligns
943     * everything to 4 KB.
944     */
945    if (size < alignment && alignment <= 4 * 1024)
946       alloc_size = alignment;
947 
948    if (alignment > get_slab_entry_alignment(bufmgr, alloc_size)) {
949       /* 3/4 allocations can return too small alignment.
950        * Try again with a power of two allocation size.
951        */
952       unsigned pot_size = get_slab_pot_entry_size(bufmgr, alloc_size);
953 
954       if (alignment <= pot_size) {
955          /* This size works but wastes some memory to fulfill the alignment. */
956          alloc_size = pot_size;
957       } else {
958          /* can't fulfill alignment requirements */
959          return NULL;
960       }
961    }
962 
963    struct pb_slabs *slabs = get_slabs(bufmgr, alloc_size);
964    entry = pb_slab_alloc(slabs, alloc_size, heap);
965    if (!entry) {
966       /* Clean up and try again... */
967       pb_slabs_reclaim(slabs);
968 
969       entry = pb_slab_alloc(slabs, alloc_size, heap);
970    }
971    if (!entry)
972       return NULL;
973 
974    struct iris_bo *bo = container_of(entry, struct iris_bo, slab.entry);
975 
976    if (bo->aux_map_address && bo->bufmgr->aux_map_ctx) {
977       /* This buffer was associated with an aux-buffer range.  We only allow
978        * slab allocated buffers to be reclaimed when idle (not in use by an
979        * executing batch).  (See iris_can_reclaim_slab().)  So we know that
980        * our previous aux mapping is no longer in use, and we can safely
981        * remove it.
982        */
983       intel_aux_map_unmap_range(bo->bufmgr->aux_map_ctx, bo->address,
984                                 bo->size);
985       bo->aux_map_address = 0;
986    }
987 
988    p_atomic_set(&bo->refcount, 1);
989    bo->name = name;
990    bo->size = size;
991 
992    /* Zero the contents if necessary.  If this fails, fall back to
993     * allocating a fresh BO, which will always be zeroed by the kernel.
994     */
995    if ((flags & BO_ALLOC_ZEROED) && !zero_bo(bufmgr, flags, bo)) {
996       pb_slab_free(slabs, &bo->slab.entry);
997       return NULL;
998    }
999 
1000    return bo;
1001 }
1002 
1003 static struct iris_bo *
alloc_bo_from_cache(struct iris_bufmgr * bufmgr,struct bo_cache_bucket * bucket,uint32_t alignment,enum iris_memory_zone memzone,enum iris_mmap_mode mmap_mode,unsigned flags,bool match_zone)1004 alloc_bo_from_cache(struct iris_bufmgr *bufmgr,
1005                     struct bo_cache_bucket *bucket,
1006                     uint32_t alignment,
1007                     enum iris_memory_zone memzone,
1008                     enum iris_mmap_mode mmap_mode,
1009                     unsigned flags,
1010                     bool match_zone)
1011 {
1012    if (!bucket)
1013       return NULL;
1014 
1015    struct iris_bo *bo = NULL;
1016 
1017    simple_mtx_assert_locked(&bufmgr->lock);
1018 
1019    list_for_each_entry_safe(struct iris_bo, cur, &bucket->head, head) {
1020       assert(iris_bo_is_real(cur));
1021 
1022       /* Find one that's got the right mapping type.  We used to swap maps
1023        * around but the kernel doesn't allow this on discrete GPUs.
1024        */
1025       if (mmap_mode != cur->real.mmap_mode)
1026          continue;
1027 
1028       /* Try a little harder to find one that's already in the right memzone */
1029       if (match_zone && memzone != iris_memzone_for_address(cur->address))
1030          continue;
1031 
1032       if (cur->real.capture != !!(flags & BO_ALLOC_CAPTURE))
1033          continue;
1034 
1035       /* If the last BO in the cache is busy, there are no idle BOs.  Bail,
1036        * either falling back to a non-matching memzone, or if that fails,
1037        * allocating a fresh buffer.
1038        */
1039       if (iris_bo_busy(cur))
1040          return NULL;
1041 
1042       list_del(&cur->head);
1043 
1044       /* Tell the kernel we need this BO and check if it still exist */
1045       if (!iris_bo_madvise(cur, IRIS_MADVICE_WILL_NEED)) {
1046          /* This BO was purged, throw it out and keep looking. */
1047          bo_free(cur);
1048          continue;
1049       }
1050 
1051       if (cur->aux_map_address) {
1052          /* This buffer was associated with an aux-buffer range. We make sure
1053           * that buffers are not reused from the cache while the buffer is (busy)
1054           * being used by an executing batch. Since we are here, the buffer is no
1055           * longer being used by a batch and the buffer was deleted (in order to
1056           * end up in the cache). Therefore its old aux-buffer range can be
1057           * removed from the aux-map.
1058           */
1059          if (cur->bufmgr->aux_map_ctx)
1060             intel_aux_map_unmap_range(cur->bufmgr->aux_map_ctx, cur->address,
1061                                       cur->size);
1062          cur->aux_map_address = 0;
1063       }
1064 
1065       /* If the cached BO isn't in the right memory zone, or the alignment
1066        * isn't sufficient, free the old memory and assign it a new address.
1067        */
1068       if (memzone != iris_memzone_for_address(cur->address) ||
1069           cur->address % alignment != 0) {
1070          if (!bufmgr->kmd_backend->gem_vm_unbind(cur)) {
1071             DBG("Unable to unbind vm of buf %u\n", cur->gem_handle);
1072             bo_free(cur);
1073             continue;
1074          }
1075 
1076          vma_free(bufmgr, cur->address, cur->size);
1077          cur->address = 0ull;
1078       }
1079 
1080       bo = cur;
1081       break;
1082    }
1083 
1084    if (!bo)
1085       return NULL;
1086 
1087    /* Zero the contents if necessary.  If this fails, fall back to
1088     * allocating a fresh BO, which will always be zeroed by the kernel.
1089     */
1090    assert(bo->zeroed == false);
1091    if ((flags & BO_ALLOC_ZEROED) && !zero_bo(bufmgr, flags, bo)) {
1092       bo_free(bo);
1093       return NULL;
1094    }
1095 
1096    return bo;
1097 }
1098 
1099 static struct iris_bo *
alloc_fresh_bo(struct iris_bufmgr * bufmgr,uint64_t bo_size,unsigned flags)1100 alloc_fresh_bo(struct iris_bufmgr *bufmgr, uint64_t bo_size, unsigned flags)
1101 {
1102    struct iris_bo *bo = bo_calloc();
1103    if (!bo)
1104       return NULL;
1105 
1106    /* Try to allocate memory in multiples of 2MB, as this allows us to use
1107     * 64K pages rather than the less-efficient 4K pages.  Most BOs smaller
1108     * than 64MB should hit the BO cache or slab allocations anyway, so this
1109     * shouldn't waste too much memory.  We do exclude small (< 1MB) sizes to
1110     * be defensive in case any of those bypass the caches and end up here.
1111     */
1112    if (bo_size >= 1024 * 1024)
1113       bo_size = align64(bo_size, 2 * 1024 * 1024);
1114 
1115    bo->real.heap = flags_to_heap(bufmgr, flags);
1116 
1117    const struct intel_memory_class_instance *regions[2];
1118    uint16_t num_regions = 0;
1119 
1120    if (bufmgr->vram.size > 0) {
1121       switch (bo->real.heap) {
1122       case IRIS_HEAP_DEVICE_LOCAL_PREFERRED:
1123          /* For vram allocations, still use system memory as a fallback. */
1124          regions[num_regions++] = bufmgr->vram.region;
1125          regions[num_regions++] = bufmgr->sys.region;
1126          break;
1127       case IRIS_HEAP_DEVICE_LOCAL:
1128       case IRIS_HEAP_DEVICE_LOCAL_CPU_VISIBLE_SMALL_BAR:
1129       case IRIS_HEAP_DEVICE_LOCAL_COMPRESSED:
1130          regions[num_regions++] = bufmgr->vram.region;
1131          break;
1132       case IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT:
1133          regions[num_regions++] = bufmgr->sys.region;
1134          break;
1135       case IRIS_HEAP_SYSTEM_MEMORY_UNCACHED_COMPRESSED:
1136          /* not valid, compressed in discrete is always created with
1137           * IRIS_HEAP_DEVICE_LOCAL_PREFERRED_COMPRESSED
1138           */
1139       case IRIS_HEAP_SYSTEM_MEMORY_UNCACHED:
1140          /* not valid; discrete cards always enable snooping */
1141       case IRIS_HEAP_MAX:
1142          unreachable("invalid heap for BO");
1143       }
1144    } else {
1145       regions[num_regions++] = bufmgr->sys.region;
1146    }
1147 
1148    bo->gem_handle = bufmgr->kmd_backend->gem_create(bufmgr, regions,
1149                                                     num_regions, bo_size,
1150                                                     bo->real.heap, flags);
1151    if (bo->gem_handle == 0) {
1152       free(bo);
1153       return NULL;
1154    }
1155    bo->bufmgr = bufmgr;
1156    bo->size = bo_size;
1157    bo->idle = true;
1158    bo->zeroed = true;
1159    bo->real.capture = (flags & BO_ALLOC_CAPTURE) != 0;
1160 
1161    return bo;
1162 }
1163 
1164 const char *
1165 iris_heap_to_string[IRIS_HEAP_MAX] = {
1166    [IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT] = "system-cached-coherent",
1167    [IRIS_HEAP_SYSTEM_MEMORY_UNCACHED] = "system-uncached",
1168    [IRIS_HEAP_SYSTEM_MEMORY_UNCACHED_COMPRESSED] = "system-uncached-compressed",
1169    [IRIS_HEAP_DEVICE_LOCAL] = "local",
1170    [IRIS_HEAP_DEVICE_LOCAL_COMPRESSED] = "local-compressed",
1171    [IRIS_HEAP_DEVICE_LOCAL_PREFERRED] = "local-preferred",
1172    [IRIS_HEAP_DEVICE_LOCAL_CPU_VISIBLE_SMALL_BAR] = "local-cpu-visible-small-bar",
1173 };
1174 
1175 static enum iris_mmap_mode
heap_to_mmap_mode(struct iris_bufmgr * bufmgr,enum iris_heap heap)1176 heap_to_mmap_mode(struct iris_bufmgr *bufmgr, enum iris_heap heap)
1177 {
1178    const struct intel_device_info *devinfo = &bufmgr->devinfo;
1179 
1180    switch (heap) {
1181    case IRIS_HEAP_DEVICE_LOCAL:
1182       return intel_vram_all_mappable(devinfo) ? IRIS_MMAP_WC : IRIS_MMAP_NONE;
1183    case IRIS_HEAP_DEVICE_LOCAL_CPU_VISIBLE_SMALL_BAR:
1184    case IRIS_HEAP_DEVICE_LOCAL_PREFERRED:
1185       return IRIS_MMAP_WC;
1186    case IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT:
1187       return IRIS_MMAP_WB;
1188    case IRIS_HEAP_SYSTEM_MEMORY_UNCACHED:
1189       return IRIS_MMAP_WC;
1190    case IRIS_HEAP_SYSTEM_MEMORY_UNCACHED_COMPRESSED:
1191    case IRIS_HEAP_DEVICE_LOCAL_COMPRESSED:
1192       /* compressed bos are not mmaped */
1193       return IRIS_MMAP_NONE;
1194    default:
1195       unreachable("invalid heap");
1196    }
1197 }
1198 
1199 struct iris_bo *
iris_bo_alloc(struct iris_bufmgr * bufmgr,const char * name,uint64_t size,uint32_t alignment,enum iris_memory_zone memzone,unsigned flags)1200 iris_bo_alloc(struct iris_bufmgr *bufmgr,
1201               const char *name,
1202               uint64_t size,
1203               uint32_t alignment,
1204               enum iris_memory_zone memzone,
1205               unsigned flags)
1206 {
1207    struct iris_bo *bo;
1208    unsigned int page_size = getpagesize();
1209    enum iris_heap heap = flags_to_heap(bufmgr, flags);
1210    struct bo_cache_bucket *bucket =
1211       bucket_for_size(bufmgr, size, heap, flags);
1212 
1213    if (memzone != IRIS_MEMZONE_OTHER || (flags & BO_ALLOC_CACHED_COHERENT))
1214       flags |= BO_ALLOC_NO_SUBALLOC;
1215 
1216    /* By default, capture all driver-internal buffers like shader kernels,
1217     * surface states, dynamic states, border colors, and so on.
1218     */
1219    if (memzone < IRIS_MEMZONE_OTHER || INTEL_DEBUG(DEBUG_CAPTURE_ALL))
1220       flags |= BO_ALLOC_CAPTURE;
1221 
1222    bo = alloc_bo_from_slabs(bufmgr, name, size, alignment, flags);
1223 
1224    if (bo)
1225       return bo;
1226 
1227    /* Round the size up to the bucket size, or if we don't have caching
1228     * at this size, a multiple of the page size.
1229     */
1230    uint64_t bo_size =
1231       bucket ? bucket->size : MAX2(align64(size, page_size), page_size);
1232    enum iris_mmap_mode mmap_mode = heap_to_mmap_mode(bufmgr, heap);
1233 
1234    simple_mtx_lock(&bufmgr->lock);
1235 
1236    /* Get a buffer out of the cache if available.  First, we try to find
1237     * one with a matching memory zone so we can avoid reallocating VMA.
1238     */
1239    bo = alloc_bo_from_cache(bufmgr, bucket, alignment, memzone, mmap_mode,
1240                             flags, true);
1241 
1242    /* If that fails, we try for any cached BO, without matching memzone. */
1243    if (!bo) {
1244       bo = alloc_bo_from_cache(bufmgr, bucket, alignment, memzone, mmap_mode,
1245                                flags, false);
1246    }
1247 
1248    simple_mtx_unlock(&bufmgr->lock);
1249 
1250    if (!bo) {
1251       bo = alloc_fresh_bo(bufmgr, bo_size, flags);
1252       if (!bo)
1253          return NULL;
1254    }
1255 
1256    if (bo->address == 0ull) {
1257       simple_mtx_lock(&bufmgr->lock);
1258       bo->address = vma_alloc(bufmgr, memzone, bo->size, alignment);
1259       simple_mtx_unlock(&bufmgr->lock);
1260 
1261       if (bo->address == 0ull)
1262          goto err_free;
1263 
1264       if (!bufmgr->kmd_backend->gem_vm_bind(bo))
1265          goto err_vm_alloc;
1266    }
1267 
1268    bo->name = name;
1269    p_atomic_set(&bo->refcount, 1);
1270    bo->real.reusable = bucket && bufmgr->bo_reuse;
1271    bo->real.protected = flags & BO_ALLOC_PROTECTED;
1272    bo->index = -1;
1273    bo->real.prime_fd = -1;
1274 
1275    assert(bo->real.map == NULL || bo->real.mmap_mode == mmap_mode);
1276    bo->real.mmap_mode = mmap_mode;
1277 
1278    /* On integrated GPUs, enable snooping to ensure coherency if needed.
1279     * For discrete, we instead use SMEM and avoid WB maps for coherency.
1280     */
1281    if ((flags & BO_ALLOC_CACHED_COHERENT) &&
1282        !bufmgr->devinfo.has_llc && bufmgr->devinfo.has_caching_uapi) {
1283       if (bufmgr->kmd_backend->bo_set_caching(bo, true) != 0)
1284          goto err_free;
1285    }
1286 
1287    DBG("bo_create: buf %d (%s) (%s memzone) (%s) %llub\n", bo->gem_handle,
1288        bo->name, memzone_name(memzone), iris_heap_to_string[bo->real.heap],
1289        (unsigned long long) size);
1290 
1291    return bo;
1292 
1293 err_vm_alloc:
1294    simple_mtx_lock(&bufmgr->lock);
1295    vma_free(bufmgr, bo->address, bo->size);
1296    simple_mtx_unlock(&bufmgr->lock);
1297 err_free:
1298    simple_mtx_lock(&bufmgr->lock);
1299    bo_free(bo);
1300    simple_mtx_unlock(&bufmgr->lock);
1301    return NULL;
1302 }
1303 
1304 static int
iris_bo_close(int fd,uint32_t gem_handle)1305 iris_bo_close(int fd, uint32_t gem_handle)
1306 {
1307    struct drm_gem_close close = {
1308       .handle = gem_handle,
1309    };
1310    return intel_ioctl(fd, DRM_IOCTL_GEM_CLOSE, &close);
1311 }
1312 
1313 struct iris_bo *
iris_bo_create_userptr(struct iris_bufmgr * bufmgr,const char * name,void * ptr,size_t size,enum iris_memory_zone memzone)1314 iris_bo_create_userptr(struct iris_bufmgr *bufmgr, const char *name,
1315                        void *ptr, size_t size,
1316                        enum iris_memory_zone memzone)
1317 {
1318    struct iris_bo *bo;
1319 
1320    bo = bo_calloc();
1321    if (!bo)
1322       return NULL;
1323 
1324    bo->gem_handle = bufmgr->kmd_backend->gem_create_userptr(bufmgr, ptr, size);
1325    if (bo->gem_handle == 0)
1326       goto err_free;
1327 
1328    bo->name = name;
1329    bo->size = size;
1330    bo->real.map = ptr;
1331    bo->real.userptr = true;
1332 
1333    bo->bufmgr = bufmgr;
1334 
1335    if (INTEL_DEBUG(DEBUG_CAPTURE_ALL))
1336       bo->real.capture = true;
1337 
1338    simple_mtx_lock(&bufmgr->lock);
1339    bo->address = vma_alloc(bufmgr, memzone, size, 1);
1340    simple_mtx_unlock(&bufmgr->lock);
1341 
1342    if (bo->address == 0ull)
1343       goto err_close;
1344 
1345    p_atomic_set(&bo->refcount, 1);
1346    bo->index = -1;
1347    bo->idle = true;
1348    bo->real.heap = IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT;
1349    bo->real.mmap_mode = heap_to_mmap_mode(bufmgr, bo->real.heap);
1350    bo->real.prime_fd = -1;
1351 
1352    if (!bufmgr->kmd_backend->gem_vm_bind(bo))
1353       goto err_vma_free;
1354 
1355    return bo;
1356 
1357 err_vma_free:
1358    simple_mtx_lock(&bufmgr->lock);
1359    vma_free(bufmgr, bo->address, bo->size);
1360    simple_mtx_unlock(&bufmgr->lock);
1361 err_close:
1362    bufmgr->kmd_backend->gem_close(bufmgr, bo);
1363 err_free:
1364    free(bo);
1365    return NULL;
1366 }
1367 
1368 static bool
needs_prime_fd(struct iris_bufmgr * bufmgr)1369 needs_prime_fd(struct iris_bufmgr *bufmgr)
1370 {
1371    return bufmgr->devinfo.kmd_type == INTEL_KMD_TYPE_XE;
1372 }
1373 
1374 static bool
iris_bo_set_prime_fd(struct iris_bo * bo)1375 iris_bo_set_prime_fd(struct iris_bo *bo)
1376 {
1377    struct iris_bufmgr *bufmgr = bo->bufmgr;
1378 
1379    if (needs_prime_fd(bufmgr) && bo->real.prime_fd == -1) {
1380       if (drmPrimeHandleToFD(bufmgr->fd, bo->gem_handle,
1381                              DRM_CLOEXEC | DRM_RDWR, &bo->real.prime_fd)) {
1382          fprintf(stderr, "Failed to get prime fd for bo %s/%u\n",
1383                  bo->name, bo->gem_handle);
1384          return false;
1385       }
1386    }
1387 
1388    return true;
1389 }
1390 
1391 /**
1392  * Returns a iris_bo wrapping the given buffer object handle.
1393  *
1394  * This can be used when one application needs to pass a buffer object
1395  * to another.
1396  */
1397 struct iris_bo *
iris_bo_gem_create_from_name(struct iris_bufmgr * bufmgr,const char * name,unsigned int handle)1398 iris_bo_gem_create_from_name(struct iris_bufmgr *bufmgr,
1399                              const char *name, unsigned int handle)
1400 {
1401    struct iris_bo *bo;
1402 
1403    /* At the moment most applications only have a few named bo.
1404     * For instance, in a DRI client only the render buffers passed
1405     * between X and the client are named. And since X returns the
1406     * alternating names for the front/back buffer a linear search
1407     * provides a sufficiently fast match.
1408     */
1409    simple_mtx_lock(&bufmgr->lock);
1410    bo = find_and_ref_external_bo(bufmgr->name_table, handle);
1411    if (bo)
1412       goto out;
1413 
1414    struct drm_gem_open open_arg = { .name = handle };
1415    int ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_GEM_OPEN, &open_arg);
1416    if (ret != 0) {
1417       DBG("Couldn't reference %s handle 0x%08x: %s\n",
1418           name, handle, strerror(errno));
1419       bo = NULL;
1420       goto out;
1421    }
1422    /* Now see if someone has used a prime handle to get this
1423     * object from the kernel before by looking through the list
1424     * again for a matching gem_handle
1425     */
1426    bo = find_and_ref_external_bo(bufmgr->handle_table, open_arg.handle);
1427    if (bo)
1428       goto out;
1429 
1430    bo = bo_calloc();
1431    if (!bo) {
1432       struct iris_bo close_bo = {
1433             .gem_handle = open_arg.handle,
1434       };
1435       bufmgr->kmd_backend->gem_close(bufmgr, &close_bo);
1436       goto out;
1437    }
1438 
1439    p_atomic_set(&bo->refcount, 1);
1440 
1441    bo->size = open_arg.size;
1442    bo->bufmgr = bufmgr;
1443    bo->gem_handle = open_arg.handle;
1444    bo->name = name;
1445    bo->index = -1;
1446    bo->real.global_name = handle;
1447    bo->real.prime_fd = -1;
1448    bo->real.reusable = false;
1449    bo->real.imported = true;
1450    /* Xe KMD expects at least 1-way coherency for imports */
1451    bo->real.heap = IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT;
1452    bo->real.mmap_mode = IRIS_MMAP_NONE;
1453    if (INTEL_DEBUG(DEBUG_CAPTURE_ALL))
1454       bo->real.capture = true;
1455    bo->address = vma_alloc(bufmgr, IRIS_MEMZONE_OTHER, bo->size, 1);
1456    if (bo->address == 0ull)
1457       goto err_free;
1458 
1459    if (!iris_bo_set_prime_fd(bo))
1460       goto err_vm_alloc;
1461 
1462    if (!bufmgr->kmd_backend->gem_vm_bind(bo))
1463       goto err_vm_alloc;
1464 
1465    _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo);
1466    _mesa_hash_table_insert(bufmgr->name_table, &bo->real.global_name, bo);
1467 
1468    DBG("bo_create_from_handle: %d (%s)\n", handle, bo->name);
1469 
1470 out:
1471    simple_mtx_unlock(&bufmgr->lock);
1472    return bo;
1473 
1474 err_vm_alloc:
1475    vma_free(bufmgr, bo->address, bo->size);
1476 err_free:
1477    bo_free(bo);
1478    simple_mtx_unlock(&bufmgr->lock);
1479    return NULL;
1480 }
1481 
1482 static void
bo_close(struct iris_bo * bo)1483 bo_close(struct iris_bo *bo)
1484 {
1485    struct iris_bufmgr *bufmgr = bo->bufmgr;
1486 
1487    simple_mtx_assert_locked(&bufmgr->lock);
1488    assert(iris_bo_is_real(bo));
1489 
1490    if (iris_bo_is_external(bo)) {
1491       struct hash_entry *entry;
1492 
1493       if (bo->real.global_name) {
1494          entry = _mesa_hash_table_search(bufmgr->name_table,
1495                                          &bo->real.global_name);
1496          _mesa_hash_table_remove(bufmgr->name_table, entry);
1497       }
1498 
1499       entry = _mesa_hash_table_search(bufmgr->handle_table, &bo->gem_handle);
1500       _mesa_hash_table_remove(bufmgr->handle_table, entry);
1501 
1502       list_for_each_entry_safe(struct bo_export, export, &bo->real.exports, link) {
1503          iris_bo_close(export->drm_fd, export->gem_handle);
1504 
1505          list_del(&export->link);
1506          free(export);
1507       }
1508    } else {
1509       assert(list_is_empty(&bo->real.exports));
1510    }
1511 
1512    /* Unbind and return the VMA for reuse */
1513    if (bufmgr->kmd_backend->gem_vm_unbind(bo))
1514       vma_free(bo->bufmgr, bo->address, bo->size);
1515    else
1516       DBG("Unable to unbind vm of buf %u\n", bo->gem_handle);
1517 
1518    if (bo->real.prime_fd != -1)
1519       close(bo->real.prime_fd);
1520 
1521    /* Close this object */
1522    if (bufmgr->kmd_backend->gem_close(bufmgr, bo) != 0) {
1523       DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n",
1524           bo->gem_handle, bo->name, strerror(errno));
1525    }
1526 
1527    if (bo->aux_map_address && bo->bufmgr->aux_map_ctx) {
1528       intel_aux_map_unmap_range(bo->bufmgr->aux_map_ctx, bo->address,
1529                                 bo->size);
1530    }
1531 
1532    for (int d = 0; d < bo->deps_size; d++) {
1533       for (int b = 0; b < IRIS_BATCH_COUNT; b++) {
1534          iris_syncobj_reference(bufmgr, &bo->deps[d].write_syncobjs[b], NULL);
1535          iris_syncobj_reference(bufmgr, &bo->deps[d].read_syncobjs[b], NULL);
1536       }
1537    }
1538    free(bo->deps);
1539 
1540    free(bo);
1541 }
1542 
1543 static void
bo_free(struct iris_bo * bo)1544 bo_free(struct iris_bo *bo)
1545 {
1546    struct iris_bufmgr *bufmgr = bo->bufmgr;
1547 
1548    simple_mtx_assert_locked(&bufmgr->lock);
1549    assert(iris_bo_is_real(bo));
1550 
1551    if (!bo->real.userptr && bo->real.map)
1552       bo_unmap(bo);
1553 
1554    if (bo->idle || !iris_bo_busy(bo)) {
1555       bo_close(bo);
1556    } else {
1557       /* Defer closing the GEM BO and returning the VMA for reuse until the
1558        * BO is idle.  Just move it to the dead list for now.
1559        */
1560       list_addtail(&bo->head, &bufmgr->zombie_list);
1561    }
1562 }
1563 
1564 static enum iris_heap
iris_get_heap_max(struct iris_bufmgr * bufmgr)1565 iris_get_heap_max(struct iris_bufmgr *bufmgr)
1566 {
1567    if (bufmgr->vram.size) {
1568       return intel_vram_all_mappable(&bufmgr->devinfo) ?
1569              IRIS_HEAP_MAX_LARGE_BAR : IRIS_HEAP_MAX;
1570    }
1571 
1572    return bufmgr->devinfo.ver >= 20 ? IRIS_HEAP_MAX_NO_VRAM :
1573                                       IRIS_HEAP_SYSTEM_MEMORY_UNCACHED_COMPRESSED;
1574 }
1575 
1576 /** Frees all cached buffers significantly older than @time. */
1577 static void
cleanup_bo_cache(struct iris_bufmgr * bufmgr,time_t time)1578 cleanup_bo_cache(struct iris_bufmgr *bufmgr, time_t time)
1579 {
1580    simple_mtx_assert_locked(&bufmgr->lock);
1581 
1582    if (bufmgr->time == time)
1583       return;
1584 
1585    for (int h = 0; h < iris_get_heap_max(bufmgr); h++) {
1586       struct iris_bucket_cache *cache = &bufmgr->bucket_cache[h];
1587 
1588       for (int i = 0; i < cache->num_buckets; i++) {
1589          struct bo_cache_bucket *bucket = &cache->bucket[i];
1590 
1591          list_for_each_entry_safe(struct iris_bo, bo, &bucket->head, head) {
1592             if (time - bo->real.free_time <= 1)
1593                break;
1594 
1595             list_del(&bo->head);
1596 
1597             bo_free(bo);
1598          }
1599       }
1600    }
1601 
1602    list_for_each_entry_safe(struct iris_bo, bo, &bufmgr->zombie_list, head) {
1603       /* Stop once we reach a busy BO - all others past this point were
1604        * freed more recently so are likely also busy.
1605        */
1606       if (!bo->idle && iris_bo_busy(bo))
1607          break;
1608 
1609       list_del(&bo->head);
1610       bo_close(bo);
1611    }
1612 
1613    bufmgr->time = time;
1614 }
1615 
1616 static void
bo_unreference_final(struct iris_bo * bo,time_t time)1617 bo_unreference_final(struct iris_bo *bo, time_t time)
1618 {
1619    struct iris_bufmgr *bufmgr = bo->bufmgr;
1620 
1621    DBG("bo_unreference final: %d (%s)\n", bo->gem_handle, bo->name);
1622 
1623    assert(iris_bo_is_real(bo));
1624 
1625    struct bo_cache_bucket *bucket = !bo->real.reusable ? NULL :
1626       bucket_for_size(bufmgr, bo->size, bo->real.heap, 0);
1627 
1628    /* Put the buffer into our internal cache for reuse if we can. */
1629    if (bucket && iris_bo_madvise(bo, IRIS_MADVICE_DONT_NEED)) {
1630       bo->real.free_time = time;
1631       bo->name = NULL;
1632 
1633       list_addtail(&bo->head, &bucket->head);
1634    } else {
1635       bo_free(bo);
1636    }
1637 }
1638 
1639 void
iris_bo_unreference(struct iris_bo * bo)1640 iris_bo_unreference(struct iris_bo *bo)
1641 {
1642    if (bo == NULL)
1643       return;
1644 
1645    assert(p_atomic_read(&bo->refcount) > 0);
1646 
1647    if (atomic_add_unless(&bo->refcount, -1, 1)) {
1648       struct iris_bufmgr *bufmgr = bo->bufmgr;
1649       struct timespec time;
1650 
1651       clock_gettime(CLOCK_MONOTONIC, &time);
1652 
1653       bo->zeroed = false;
1654       if (bo->gem_handle == 0) {
1655          pb_slab_free(get_slabs(bufmgr, bo->size), &bo->slab.entry);
1656       } else {
1657          simple_mtx_lock(&bufmgr->lock);
1658 
1659          if (p_atomic_dec_zero(&bo->refcount)) {
1660             bo_unreference_final(bo, time.tv_sec);
1661             cleanup_bo_cache(bufmgr, time.tv_sec);
1662          }
1663 
1664          simple_mtx_unlock(&bufmgr->lock);
1665       }
1666    }
1667 }
1668 
1669 static void
bo_wait_with_stall_warning(struct util_debug_callback * dbg,struct iris_bo * bo,const char * action)1670 bo_wait_with_stall_warning(struct util_debug_callback *dbg,
1671                            struct iris_bo *bo,
1672                            const char *action)
1673 {
1674    bool busy = dbg && !bo->idle;
1675    double elapsed = unlikely(busy) ? -get_time() : 0.0;
1676 
1677    iris_bo_wait_rendering(bo);
1678 
1679    if (unlikely(busy)) {
1680       elapsed += get_time();
1681       if (elapsed > 1e-5) /* 0.01ms */ {
1682          perf_debug(dbg, "%s a busy \"%s\" BO stalled and took %.03f ms.\n",
1683                     action, bo->name, elapsed * 1000);
1684       }
1685    }
1686 }
1687 
1688 static void
print_flags(unsigned flags)1689 print_flags(unsigned flags)
1690 {
1691    if (flags & MAP_READ)
1692       DBG("READ ");
1693    if (flags & MAP_WRITE)
1694       DBG("WRITE ");
1695    if (flags & MAP_ASYNC)
1696       DBG("ASYNC ");
1697    if (flags & MAP_PERSISTENT)
1698       DBG("PERSISTENT ");
1699    if (flags & MAP_COHERENT)
1700       DBG("COHERENT ");
1701    if (flags & MAP_RAW)
1702       DBG("RAW ");
1703    DBG("\n");
1704 }
1705 
1706 void *
iris_bo_map(struct util_debug_callback * dbg,struct iris_bo * bo,unsigned flags)1707 iris_bo_map(struct util_debug_callback *dbg,
1708             struct iris_bo *bo, unsigned flags)
1709 {
1710    struct iris_bufmgr *bufmgr = bo->bufmgr;
1711    void *map = NULL;
1712 
1713    if (bo->gem_handle == 0) {
1714       struct iris_bo *real = iris_get_backing_bo(bo);
1715       uint64_t offset = bo->address - real->address;
1716       map = iris_bo_map(dbg, real, flags | MAP_ASYNC) + offset;
1717    } else {
1718       assert(bo->real.mmap_mode != IRIS_MMAP_NONE);
1719       if (bo->real.mmap_mode == IRIS_MMAP_NONE)
1720          return NULL;
1721 
1722       if (!bo->real.map) {
1723          DBG("iris_bo_map: %d (%s)\n", bo->gem_handle, bo->name);
1724          map = bufmgr->kmd_backend->gem_mmap(bufmgr, bo);
1725          if (!map) {
1726             return NULL;
1727          }
1728 
1729          VG_DEFINED(map, bo->size);
1730 
1731          if (p_atomic_cmpxchg(&bo->real.map, NULL, map)) {
1732             VG_NOACCESS(map, bo->size);
1733             os_munmap(map, bo->size);
1734          }
1735       }
1736       assert(bo->real.map);
1737       map = bo->real.map;
1738    }
1739 
1740    DBG("iris_bo_map: %d (%s) -> %p\n",
1741        bo->gem_handle, bo->name, bo->real.map);
1742    print_flags(flags);
1743 
1744    if (!(flags & MAP_ASYNC)) {
1745       bo_wait_with_stall_warning(dbg, bo, "memory mapping");
1746    }
1747 
1748    return map;
1749 }
1750 
1751 /**
1752  * Waits on a BO for the given amount of time.
1753  *
1754  * @bo: buffer object to wait for
1755  * @timeout_ns: amount of time to wait in nanoseconds.
1756  *   If value is less than 0, an infinite wait will occur.
1757  *
1758  * Returns 0 if the wait was successful ie. the last batch referencing the
1759  * object has completed within the allotted time. Otherwise some negative return
1760  * value describes the error. Of particular interest is -ETIME when the wait has
1761  * failed to yield the desired result.
1762  *
1763  * Similar to iris_bo_wait_rendering except a timeout parameter allows
1764  * the operation to give up after a certain amount of time. Another subtle
1765  * difference is the internal locking semantics are different (this variant does
1766  * not hold the lock for the duration of the wait). This makes the wait subject
1767  * to a larger userspace race window.
1768  *
1769  * The implementation shall wait until the object is no longer actively
1770  * referenced within a batch buffer at the time of the call. The wait will
1771  * not guarantee that the buffer is re-issued via another thread, or an flinked
1772  * handle. Userspace must make sure this race does not occur if such precision
1773  * is important.
1774  *
1775  * Note that some kernels have broken the infinite wait for negative values
1776  * promise, upgrade to latest stable kernels if this is the case.
1777  */
1778 static inline int
iris_bo_wait(struct iris_bo * bo,int64_t timeout_ns)1779 iris_bo_wait(struct iris_bo *bo, int64_t timeout_ns)
1780 {
1781    int ret;
1782 
1783    switch (iris_bufmgr_get_device_info(bo->bufmgr)->kmd_type) {
1784    case INTEL_KMD_TYPE_I915:
1785       if (iris_bo_is_external(bo))
1786          ret = iris_i915_bo_wait_gem(bo, timeout_ns);
1787       else
1788          ret = iris_bo_wait_syncobj(bo, timeout_ns);
1789       break;
1790    case INTEL_KMD_TYPE_XE:
1791       ret = iris_bo_wait_syncobj(bo, timeout_ns);
1792       break;
1793    default:
1794       unreachable("missing");
1795       ret = -1;
1796    }
1797 
1798    bo->idle = ret == 0;
1799 
1800    return ret;
1801 }
1802 
1803 /** Waits for all GPU rendering with the object to have completed. */
1804 void
iris_bo_wait_rendering(struct iris_bo * bo)1805 iris_bo_wait_rendering(struct iris_bo *bo)
1806 {
1807    /* We require a kernel recent enough for WAIT_IOCTL support.
1808     * See intel_init_bufmgr()
1809     */
1810    iris_bo_wait(bo, -1);
1811 }
1812 
1813 static void
iris_bufmgr_destroy_global_vm(struct iris_bufmgr * bufmgr)1814 iris_bufmgr_destroy_global_vm(struct iris_bufmgr *bufmgr)
1815 {
1816    switch (bufmgr->devinfo.kmd_type) {
1817    case INTEL_KMD_TYPE_I915:
1818       /* Nothing to do in i915 */
1819       break;
1820    case INTEL_KMD_TYPE_XE:
1821       intel_bind_timeline_finish(&bufmgr->bind_timeline, bufmgr->fd);
1822       iris_xe_destroy_global_vm(bufmgr);
1823       break;
1824    default:
1825       unreachable("missing");
1826    }
1827 }
1828 
1829 static void
iris_bufmgr_destroy(struct iris_bufmgr * bufmgr)1830 iris_bufmgr_destroy(struct iris_bufmgr *bufmgr)
1831 {
1832    iris_bo_unreference(bufmgr->dummy_aux_bo);
1833    iris_bo_unreference(bufmgr->mem_fence_bo);
1834 
1835    iris_destroy_border_color_pool(&bufmgr->border_color_pool);
1836 
1837    /* Free aux-map buffers */
1838    intel_aux_map_finish(bufmgr->aux_map_ctx);
1839 
1840    /* bufmgr will no longer try to free VMA entries in the aux-map */
1841    bufmgr->aux_map_ctx = NULL;
1842 
1843    for (int i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
1844       if (bufmgr->bo_slabs[i].groups)
1845          pb_slabs_deinit(&bufmgr->bo_slabs[i]);
1846    }
1847 
1848    simple_mtx_lock(&bufmgr->lock);
1849 
1850    /* Free any cached buffer objects we were going to reuse */
1851    for (int h = 0; h < iris_get_heap_max(bufmgr); h++) {
1852       struct iris_bucket_cache *cache = &bufmgr->bucket_cache[h];
1853 
1854       for (int i = 0; i < cache->num_buckets; i++) {
1855          struct bo_cache_bucket *bucket = &cache->bucket[i];
1856 
1857          list_for_each_entry_safe(struct iris_bo, bo, &bucket->head, head) {
1858             list_del(&bo->head);
1859 
1860             bo_free(bo);
1861          }
1862       }
1863    }
1864    free(bufmgr->bucket_cache);
1865 
1866    /* Close any buffer objects on the dead list. */
1867    list_for_each_entry_safe(struct iris_bo, bo, &bufmgr->zombie_list, head) {
1868       list_del(&bo->head);
1869       bo_close(bo);
1870    }
1871 
1872    _mesa_hash_table_destroy(bufmgr->name_table, NULL);
1873    _mesa_hash_table_destroy(bufmgr->handle_table, NULL);
1874 
1875    for (int z = 0; z < IRIS_MEMZONE_COUNT; z++)
1876          util_vma_heap_finish(&bufmgr->vma_allocator[z]);
1877 
1878    iris_bufmgr_destroy_global_vm(bufmgr);
1879 
1880    close(bufmgr->fd);
1881 
1882    simple_mtx_unlock(&bufmgr->lock);
1883 
1884    simple_mtx_destroy(&bufmgr->lock);
1885    simple_mtx_destroy(&bufmgr->bo_deps_lock);
1886 
1887    free(bufmgr);
1888 }
1889 
1890 int
iris_gem_get_tiling(struct iris_bo * bo,uint32_t * tiling)1891 iris_gem_get_tiling(struct iris_bo *bo, uint32_t *tiling)
1892 {
1893    struct iris_bufmgr *bufmgr = bo->bufmgr;
1894 
1895    if (!bufmgr->devinfo.has_tiling_uapi) {
1896       *tiling = 0;
1897       return 0;
1898    }
1899 
1900    assert(iris_bufmgr_get_device_info(bo->bufmgr)->kmd_type == INTEL_KMD_TYPE_I915);
1901    return iris_i915_bo_get_tiling(bo, tiling);
1902 }
1903 
1904 int
iris_gem_set_tiling(struct iris_bo * bo,const struct isl_surf * surf)1905 iris_gem_set_tiling(struct iris_bo *bo, const struct isl_surf *surf)
1906 {
1907    struct iris_bufmgr *bufmgr = bo->bufmgr;
1908 
1909    /* If we can't do map_gtt, the set/get_tiling API isn't useful. And it's
1910     * actually not supported by the kernel in those cases.
1911     */
1912    if (!bufmgr->devinfo.has_tiling_uapi)
1913       return 0;
1914 
1915    assert(iris_bufmgr_get_device_info(bo->bufmgr)->kmd_type == INTEL_KMD_TYPE_I915);
1916    return iris_i915_bo_set_tiling(bo, surf);
1917 }
1918 
1919 struct iris_bo *
iris_bo_import_dmabuf(struct iris_bufmgr * bufmgr,int prime_fd,const uint64_t modifier)1920 iris_bo_import_dmabuf(struct iris_bufmgr *bufmgr, int prime_fd,
1921                       const uint64_t modifier)
1922 {
1923    uint32_t handle;
1924    struct iris_bo *bo;
1925 
1926    simple_mtx_lock(&bufmgr->lock);
1927    int ret = drmPrimeFDToHandle(bufmgr->fd, prime_fd, &handle);
1928    if (ret) {
1929       DBG("import_dmabuf: failed to obtain handle from fd: %s\n",
1930           strerror(errno));
1931       simple_mtx_unlock(&bufmgr->lock);
1932       return NULL;
1933    }
1934 
1935    /*
1936     * See if the kernel has already returned this buffer to us. Just as
1937     * for named buffers, we must not create two bo's pointing at the same
1938     * kernel object
1939     */
1940    bo = find_and_ref_external_bo(bufmgr->handle_table, handle);
1941    if (bo)
1942       goto out;
1943 
1944    bo = bo_calloc();
1945    if (!bo)
1946       goto out;
1947 
1948    p_atomic_set(&bo->refcount, 1);
1949 
1950    /* Determine size of bo.  The fd-to-handle ioctl really should
1951     * return the size, but it doesn't.  If we have kernel 3.12 or
1952     * later, we can lseek on the prime fd to get the size.  Older
1953     * kernels will just fail, in which case we fall back to the
1954     * provided (estimated or guess size). */
1955    ret = lseek(prime_fd, 0, SEEK_END);
1956    if (ret != -1)
1957       bo->size = ret;
1958 
1959    bo->bufmgr = bufmgr;
1960    bo->name = "prime";
1961    bo->index = -1;
1962    bo->real.reusable = false;
1963    bo->real.imported = true;
1964    /* Xe KMD expects at least 1-way coherency for imports */
1965    bo->real.heap = IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT;
1966    bo->real.mmap_mode = IRIS_MMAP_NONE;
1967    if (INTEL_DEBUG(DEBUG_CAPTURE_ALL))
1968       bo->real.capture = true;
1969    bo->gem_handle = handle;
1970    bo->real.prime_fd = needs_prime_fd(bufmgr) ? dup(prime_fd) : -1;
1971 
1972    uint64_t alignment = 1;
1973 
1974    /* When an aux map will be used, there is an alignment requirement on the
1975     * main surface from the mapping granularity. Some planes of the image may
1976     * have smaller alignment requirements, but this one should work for all.
1977     */
1978    if (bufmgr->devinfo.has_aux_map && isl_drm_modifier_has_aux(modifier))
1979       alignment = intel_aux_map_get_alignment(bufmgr->aux_map_ctx);
1980 
1981    bo->address = vma_alloc(bufmgr, IRIS_MEMZONE_OTHER, bo->size, alignment);
1982    if (bo->address == 0ull)
1983       goto err_free;
1984 
1985    if (!bufmgr->kmd_backend->gem_vm_bind(bo))
1986       goto err_vm_alloc;
1987 
1988    _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo);
1989 
1990 out:
1991    simple_mtx_unlock(&bufmgr->lock);
1992    return bo;
1993 
1994 err_vm_alloc:
1995    vma_free(bufmgr, bo->address, bo->size);
1996 err_free:
1997    bo_free(bo);
1998    simple_mtx_unlock(&bufmgr->lock);
1999    return NULL;
2000 }
2001 
2002 static void
iris_bo_mark_exported_locked(struct iris_bo * bo)2003 iris_bo_mark_exported_locked(struct iris_bo *bo)
2004 {
2005    struct iris_bufmgr *bufmgr = bo->bufmgr;
2006 
2007    /* We cannot export suballocated BOs. */
2008    assert(iris_bo_is_real(bo));
2009    simple_mtx_assert_locked(&bufmgr->lock);
2010 
2011    if (!iris_bo_is_external(bo))
2012       _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo);
2013 
2014    if (!bo->real.exported) {
2015       /* If a BO is going to be used externally, it could be sent to the
2016        * display HW. So make sure our CPU mappings don't assume cache
2017        * coherency since display is outside that cache.
2018        */
2019       bo->real.exported = true;
2020       bo->real.reusable = false;
2021    }
2022 }
2023 
2024 void
iris_bo_mark_exported(struct iris_bo * bo)2025 iris_bo_mark_exported(struct iris_bo *bo)
2026 {
2027    struct iris_bufmgr *bufmgr = bo->bufmgr;
2028 
2029    /* We cannot export suballocated BOs. */
2030    assert(iris_bo_is_real(bo));
2031 
2032    if (bo->real.exported) {
2033       assert(!bo->real.reusable);
2034       return;
2035    }
2036 
2037    simple_mtx_lock(&bufmgr->lock);
2038    iris_bo_mark_exported_locked(bo);
2039    simple_mtx_unlock(&bufmgr->lock);
2040 
2041    iris_bo_set_prime_fd(bo);
2042 }
2043 
2044 int
iris_bo_export_dmabuf(struct iris_bo * bo,int * prime_fd)2045 iris_bo_export_dmabuf(struct iris_bo *bo, int *prime_fd)
2046 {
2047    struct iris_bufmgr *bufmgr = bo->bufmgr;
2048 
2049    /* We cannot export suballocated BOs. */
2050    assert(iris_bo_is_real(bo));
2051 
2052    if (drmPrimeHandleToFD(bufmgr->fd, bo->gem_handle,
2053                           DRM_CLOEXEC | DRM_RDWR, prime_fd) != 0)
2054       return -errno;
2055 
2056    iris_bo_mark_exported(bo);
2057 
2058    return 0;
2059 }
2060 
2061 static uint32_t
iris_bo_export_gem_handle(struct iris_bo * bo)2062 iris_bo_export_gem_handle(struct iris_bo *bo)
2063 {
2064    /* We cannot export suballocated BOs. */
2065    assert(iris_bo_is_real(bo));
2066 
2067    iris_bo_mark_exported(bo);
2068 
2069    return bo->gem_handle;
2070 }
2071 
2072 int
iris_bo_flink(struct iris_bo * bo,uint32_t * name)2073 iris_bo_flink(struct iris_bo *bo, uint32_t *name)
2074 {
2075    struct iris_bufmgr *bufmgr = bo->bufmgr;
2076 
2077    /* We cannot export suballocated BOs. */
2078    assert(iris_bo_is_real(bo));
2079 
2080    if (!bo->real.global_name) {
2081       struct drm_gem_flink flink = { .handle = bo->gem_handle };
2082 
2083       if (intel_ioctl(bufmgr->fd, DRM_IOCTL_GEM_FLINK, &flink))
2084          return -errno;
2085 
2086       simple_mtx_lock(&bufmgr->lock);
2087       if (!bo->real.global_name) {
2088          iris_bo_mark_exported_locked(bo);
2089          bo->real.global_name = flink.name;
2090          _mesa_hash_table_insert(bufmgr->name_table, &bo->real.global_name, bo);
2091       }
2092       simple_mtx_unlock(&bufmgr->lock);
2093 
2094       iris_bo_set_prime_fd(bo);
2095    }
2096 
2097    *name = bo->real.global_name;
2098    return 0;
2099 }
2100 
2101 int
iris_bo_export_gem_handle_for_device(struct iris_bo * bo,int drm_fd,uint32_t * out_handle)2102 iris_bo_export_gem_handle_for_device(struct iris_bo *bo, int drm_fd,
2103                                      uint32_t *out_handle)
2104 {
2105    /* We cannot export suballocated BOs. */
2106    assert(iris_bo_is_real(bo));
2107 
2108    /* Only add the new GEM handle to the list of export if it belongs to a
2109     * different GEM device. Otherwise we might close the same buffer multiple
2110     * times.
2111     */
2112    struct iris_bufmgr *bufmgr = bo->bufmgr;
2113    int ret = os_same_file_description(drm_fd, bufmgr->fd);
2114    WARN_ONCE(ret < 0,
2115              "Kernel has no file descriptor comparison support: %s\n",
2116              strerror(errno));
2117    if (ret == 0) {
2118       *out_handle = iris_bo_export_gem_handle(bo);
2119       return 0;
2120    }
2121 
2122    struct bo_export *export = calloc(1, sizeof(*export));
2123    if (!export)
2124       return -ENOMEM;
2125 
2126    export->drm_fd = drm_fd;
2127 
2128    int dmabuf_fd = -1;
2129    int err = iris_bo_export_dmabuf(bo, &dmabuf_fd);
2130    if (err) {
2131       free(export);
2132       return err;
2133    }
2134 
2135    simple_mtx_lock(&bufmgr->lock);
2136    err = drmPrimeFDToHandle(drm_fd, dmabuf_fd, &export->gem_handle);
2137    close(dmabuf_fd);
2138    if (err) {
2139       simple_mtx_unlock(&bufmgr->lock);
2140       free(export);
2141       return err;
2142    }
2143 
2144    bool found = false;
2145    list_for_each_entry(struct bo_export, iter, &bo->real.exports, link) {
2146       if (iter->drm_fd != drm_fd)
2147          continue;
2148       /* Here we assume that for a given DRM fd, we'll always get back the
2149        * same GEM handle for a given buffer.
2150        */
2151       assert(iter->gem_handle == export->gem_handle);
2152       free(export);
2153       export = iter;
2154       found = true;
2155       break;
2156    }
2157    if (!found)
2158       list_addtail(&export->link, &bo->real.exports);
2159 
2160    simple_mtx_unlock(&bufmgr->lock);
2161 
2162    *out_handle = export->gem_handle;
2163 
2164    return 0;
2165 }
2166 
2167 static void
add_bucket(struct iris_bufmgr * bufmgr,int size,enum iris_heap heap)2168 add_bucket(struct iris_bufmgr *bufmgr, int size, enum iris_heap heap)
2169 {
2170    struct iris_bucket_cache *cache = &bufmgr->bucket_cache[heap];
2171    unsigned int i = cache->num_buckets++;
2172 
2173    assert(i < BUCKET_ARRAY_SIZE);
2174 
2175    list_inithead(&cache->bucket[i].head);
2176    cache->bucket[i].size = size;
2177 
2178    assert(bucket_for_size(bufmgr, size, heap, 0) == &cache->bucket[i]);
2179    assert(bucket_for_size(bufmgr, size - 2048, heap, 0) == &cache->bucket[i]);
2180    assert(bucket_for_size(bufmgr, size + 1, heap, 0) != &cache->bucket[i]);
2181 }
2182 
2183 static void
init_cache_buckets(struct iris_bufmgr * bufmgr,enum iris_heap heap)2184 init_cache_buckets(struct iris_bufmgr *bufmgr, enum iris_heap heap)
2185 {
2186    const unsigned _6MB = 6 * 1024 * 1024;
2187    const unsigned _8MB = 8 * 1024 * 1024;
2188    const unsigned _64MB = 64 * 1024 * 1024;
2189 
2190    /* power-of-two buckets from 4K to 4MB */
2191    for (uint64_t size = 4096; size < _8MB; size *= 2)
2192       add_bucket(bufmgr, size, heap);
2193 
2194    /* 6MB */
2195    add_bucket(bufmgr, _6MB, heap);
2196 
2197    /* 8MB+: three sizes between each power of two to reduce waste */
2198    for (uint64_t size = _8MB; size < _64MB; size *= 2) {
2199       add_bucket(bufmgr, size, heap);
2200       add_bucket(bufmgr, size + size * 1 / 4, heap);
2201       add_bucket(bufmgr, size + size * 2 / 4, heap);
2202       add_bucket(bufmgr, size + size * 3 / 4, heap);
2203    }
2204 
2205    /* 64MB */
2206    add_bucket(bufmgr, _64MB, heap);
2207 }
2208 
2209 static struct intel_buffer *
intel_aux_map_buffer_alloc(void * driver_ctx,uint32_t size)2210 intel_aux_map_buffer_alloc(void *driver_ctx, uint32_t size)
2211 {
2212    struct intel_buffer *buf = malloc(sizeof(struct intel_buffer));
2213    if (!buf)
2214       return NULL;
2215 
2216    struct iris_bufmgr *bufmgr = (struct iris_bufmgr *)driver_ctx;
2217 
2218    unsigned int page_size = getpagesize();
2219    size = MAX2(ALIGN(size, page_size), page_size);
2220 
2221    struct iris_bo *bo = alloc_fresh_bo(bufmgr, size, BO_ALLOC_CAPTURE);
2222    if (!bo) {
2223       free(buf);
2224       return NULL;
2225    }
2226 
2227    simple_mtx_lock(&bufmgr->lock);
2228 
2229    bo->address = vma_alloc(bufmgr, IRIS_MEMZONE_OTHER, bo->size, 64 * 1024);
2230    if (bo->address == 0ull)
2231       goto err_free;
2232 
2233    if (!bufmgr->kmd_backend->gem_vm_bind(bo))
2234       goto err_vm_alloc;
2235 
2236    simple_mtx_unlock(&bufmgr->lock);
2237 
2238    bo->name = "aux-map";
2239    p_atomic_set(&bo->refcount, 1);
2240    bo->index = -1;
2241    bo->real.mmap_mode = heap_to_mmap_mode(bufmgr, bo->real.heap);
2242    bo->real.prime_fd = -1;
2243 
2244    buf->driver_bo = bo;
2245    buf->gpu = bo->address;
2246    buf->gpu_end = buf->gpu + bo->size;
2247    buf->map = iris_bo_map(NULL, bo, MAP_WRITE | MAP_RAW);
2248    return buf;
2249 
2250 err_vm_alloc:
2251    vma_free(bufmgr, bo->address, bo->size);
2252 err_free:
2253    free(buf);
2254    bo_free(bo);
2255    simple_mtx_unlock(&bufmgr->lock);
2256    return NULL;
2257 }
2258 
2259 static void
intel_aux_map_buffer_free(void * driver_ctx,struct intel_buffer * buffer)2260 intel_aux_map_buffer_free(void *driver_ctx, struct intel_buffer *buffer)
2261 {
2262    iris_bo_unreference((struct iris_bo*)buffer->driver_bo);
2263    free(buffer);
2264 }
2265 
2266 static struct intel_mapped_pinned_buffer_alloc aux_map_allocator = {
2267    .alloc = intel_aux_map_buffer_alloc,
2268    .free = intel_aux_map_buffer_free,
2269 };
2270 
2271 static bool
iris_bufmgr_get_meminfo(struct iris_bufmgr * bufmgr,struct intel_device_info * devinfo)2272 iris_bufmgr_get_meminfo(struct iris_bufmgr *bufmgr,
2273                         struct intel_device_info *devinfo)
2274 {
2275    bufmgr->sys.region = &devinfo->mem.sram.mem;
2276    bufmgr->sys.size = devinfo->mem.sram.mappable.size;
2277 
2278    /* When the resizable bar feature is disabled,
2279     * then vram.mappable.size is only 256MB.
2280     * The second half of the total size is in the vram.unmappable.size
2281     * variable.
2282     */
2283    bufmgr->vram.region = &devinfo->mem.vram.mem;
2284    bufmgr->vram.size = devinfo->mem.vram.mappable.size +
2285                        devinfo->mem.vram.unmappable.size;
2286 
2287    return true;
2288 }
2289 
2290 static bool
iris_bufmgr_init_global_vm(struct iris_bufmgr * bufmgr)2291 iris_bufmgr_init_global_vm(struct iris_bufmgr *bufmgr)
2292 {
2293    switch (bufmgr->devinfo.kmd_type) {
2294    case INTEL_KMD_TYPE_I915:
2295       bufmgr->use_global_vm = iris_i915_init_global_vm(bufmgr, &bufmgr->global_vm_id);
2296       /* i915 don't require VM, so returning true even if use_global_vm is false */
2297       return true;
2298    case INTEL_KMD_TYPE_XE:
2299       if (!intel_bind_timeline_init(&bufmgr->bind_timeline, bufmgr->fd))
2300          return false;
2301 
2302       bufmgr->use_global_vm = iris_xe_init_global_vm(bufmgr, &bufmgr->global_vm_id);
2303       /* Xe requires VM */
2304       return bufmgr->use_global_vm;
2305    default:
2306       unreachable("missing");
2307       return false;
2308    }
2309 }
2310 
2311 /**
2312  * Initializes the GEM buffer manager, which uses the kernel to allocate, map,
2313  * and manage map buffer objections.
2314  *
2315  * \param fd File descriptor of the opened DRM device.
2316  */
2317 static struct iris_bufmgr *
iris_bufmgr_create(struct intel_device_info * devinfo,int fd,bool bo_reuse)2318 iris_bufmgr_create(struct intel_device_info *devinfo, int fd, bool bo_reuse)
2319 {
2320    if (devinfo->gtt_size <= IRIS_MEMZONE_OTHER_START)
2321       return NULL;
2322 
2323    struct iris_bufmgr *bufmgr = calloc(1, sizeof(*bufmgr));
2324    if (bufmgr == NULL)
2325       return NULL;
2326 
2327    /* Handles to buffer objects belong to the device fd and are not
2328     * reference counted by the kernel.  If the same fd is used by
2329     * multiple parties (threads sharing the same screen bufmgr, or
2330     * even worse the same device fd passed to multiple libraries)
2331     * ownership of those handles is shared by those independent parties.
2332     *
2333     * Don't do this! Ensure that each library/bufmgr has its own device
2334     * fd so that its namespace does not clash with another.
2335     */
2336    bufmgr->fd = os_dupfd_cloexec(fd);
2337    if (bufmgr->fd == -1)
2338       goto error_dup;
2339 
2340    p_atomic_set(&bufmgr->refcount, 1);
2341 
2342    simple_mtx_init(&bufmgr->lock, mtx_plain);
2343    simple_mtx_init(&bufmgr->bo_deps_lock, mtx_plain);
2344 
2345    list_inithead(&bufmgr->zombie_list);
2346 
2347    bufmgr->devinfo = *devinfo;
2348    devinfo = &bufmgr->devinfo;
2349    bufmgr->bo_reuse = bo_reuse;
2350    iris_bufmgr_get_meminfo(bufmgr, devinfo);
2351    bufmgr->kmd_backend = iris_kmd_backend_get(devinfo->kmd_type);
2352 
2353    intel_common_update_device_info(bufmgr->fd, devinfo);
2354 
2355    if (!iris_bufmgr_init_global_vm(bufmgr))
2356       goto error_init_vm;
2357 
2358    STATIC_ASSERT(IRIS_MEMZONE_SHADER_START == 0ull);
2359    const uint64_t _4GB = 1ull << 32;
2360    const uint64_t _2GB = 1ul << 31;
2361 
2362    /* The STATE_BASE_ADDRESS size field can only hold 1 page shy of 4GB */
2363    const uint64_t _4GB_minus_1 = _4GB - PAGE_SIZE;
2364 
2365    const struct {
2366       uint64_t start;
2367       uint64_t size;
2368    } vma[IRIS_MEMZONE_COUNT] = {
2369       [IRIS_MEMZONE_SHADER] = {
2370          .start = PAGE_SIZE,
2371          .size  = _4GB_minus_1 - PAGE_SIZE
2372       },
2373       [IRIS_MEMZONE_BINDER] = {
2374          .start = IRIS_MEMZONE_BINDER_START + IRIS_SCRATCH_ZONE_SIZE,
2375          .size  = IRIS_BINDER_ZONE_SIZE - IRIS_SCRATCH_ZONE_SIZE
2376       },
2377       [IRIS_MEMZONE_SCRATCH] = {
2378          .start = IRIS_MEMZONE_SCRATCH_START,
2379          .size  = IRIS_SCRATCH_ZONE_SIZE
2380       },
2381       [IRIS_MEMZONE_SURFACE] = {
2382          .start = IRIS_MEMZONE_SURFACE_START,
2383          .size = _4GB_minus_1 - IRIS_BINDER_ZONE_SIZE - IRIS_SCRATCH_ZONE_SIZE
2384       },
2385       [IRIS_MEMZONE_DYNAMIC] = {
2386          .start = IRIS_MEMZONE_DYNAMIC_START + IRIS_BORDER_COLOR_POOL_SIZE,
2387 
2388          /* Wa_2209859288: the Tigerlake PRM's workarounds volume says:
2389           *
2390           *    "PSDunit is dropping MSB of the blend state pointer from SD
2391           *     FIFO [...] Limit the Blend State Pointer to < 2G"
2392           *
2393           * We restrict the dynamic state pool to 2GB so that we don't ever
2394           * get a BLEND_STATE pointer with the MSB set.  We aren't likely to
2395           * need the full 4GB for dynamic state anyway.
2396           */
2397          .size  = (devinfo->ver >= 12 ? _2GB : _4GB_minus_1)
2398                   - IRIS_BORDER_COLOR_POOL_SIZE
2399       },
2400       [IRIS_MEMZONE_OTHER] = {
2401          .start = IRIS_MEMZONE_OTHER_START,
2402 
2403          /* Leave the last 4GB out of the high vma range, so that no state
2404           * base address + size can overflow 48 bits.
2405           */
2406          .size  = (devinfo->gtt_size - _4GB) - IRIS_MEMZONE_OTHER_START,
2407       },
2408    };
2409 
2410    for (unsigned i = 0; i < IRIS_MEMZONE_COUNT; i++) {
2411       util_vma_heap_init(&bufmgr->vma_allocator[i],
2412                          vma[i].start, vma[i].size);
2413    }
2414 
2415    if (INTEL_DEBUG(DEBUG_HEAPS)) {
2416       for (unsigned i = 0; i < IRIS_MEMZONE_COUNT; i++) {
2417          fprintf(stderr, "%-11s | 0x%016" PRIx64 "-0x%016" PRIx64 "\n",
2418                  memzone_name(i), vma[i].start,
2419                  vma[i].start + vma[i].size - 1);
2420       }
2421    }
2422 
2423    bufmgr->bucket_cache = calloc(iris_get_heap_max(bufmgr),
2424                                  sizeof(*bufmgr->bucket_cache));
2425    if (!bufmgr->bucket_cache)
2426       goto error_bucket_cache;
2427    for (int h = 0; h < iris_get_heap_max(bufmgr); h++)
2428       init_cache_buckets(bufmgr, h);
2429 
2430    unsigned min_slab_order = 8;  /* 256 bytes */
2431    unsigned max_slab_order = 20; /* 1 MB (slab size = 2 MB) */
2432    unsigned num_slab_orders_per_allocator =
2433       (max_slab_order - min_slab_order) / NUM_SLAB_ALLOCATORS;
2434 
2435    /* Divide the size order range among slab managers. */
2436    for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
2437       unsigned min_order = min_slab_order;
2438       unsigned max_order =
2439          MIN2(min_order + num_slab_orders_per_allocator, max_slab_order);
2440 
2441       if (!pb_slabs_init(&bufmgr->bo_slabs[i], min_order, max_order,
2442                          iris_get_heap_max(bufmgr), true, bufmgr,
2443                          iris_can_reclaim_slab,
2444                          iris_slab_alloc,
2445                          (void *) iris_slab_free)) {
2446          goto error_slabs_init;
2447       }
2448       min_slab_order = max_order + 1;
2449    }
2450 
2451    bufmgr->name_table =
2452       _mesa_hash_table_create(NULL, _mesa_hash_uint, _mesa_key_uint_equal);
2453    bufmgr->handle_table =
2454       _mesa_hash_table_create(NULL, _mesa_hash_uint, _mesa_key_uint_equal);
2455 
2456    if (devinfo->has_aux_map) {
2457       bufmgr->aux_map_ctx = intel_aux_map_init(bufmgr, &aux_map_allocator,
2458                                                devinfo);
2459       assert(bufmgr->aux_map_ctx);
2460    }
2461 
2462    iris_init_border_color_pool(bufmgr, &bufmgr->border_color_pool);
2463 
2464    if (intel_needs_workaround(devinfo, 14019708328)) {
2465       bufmgr->dummy_aux_bo = iris_bo_alloc(bufmgr, "dummy_aux", 4096, 4096,
2466                                            IRIS_MEMZONE_OTHER, BO_ALLOC_PLAIN);
2467          if (!bufmgr->dummy_aux_bo)
2468             goto error_alloc_bo;
2469    }
2470 
2471    /* Programming note from MI_MEM_FENCE specification:
2472     *
2473     *    Software must ensure STATE_SYSTEM_MEM_FENCE_ADDRESS command is
2474     *    programmed prior to programming this command.
2475     *
2476     * HAS 1607240579 then provides the size information: 4K
2477     */
2478    if (devinfo->verx10 >= 200) {
2479       bufmgr->mem_fence_bo = iris_bo_alloc(bufmgr, "mem_fence", 4096, 4096,
2480                                            IRIS_MEMZONE_OTHER, BO_ALLOC_SMEM);
2481       if (!bufmgr->mem_fence_bo)
2482          goto error_alloc_bo;
2483    }
2484 
2485    return bufmgr;
2486 
2487 error_alloc_bo:
2488    iris_bo_unreference(bufmgr->dummy_aux_bo);
2489    iris_bo_unreference(bufmgr->mem_fence_bo);
2490    iris_destroy_border_color_pool(&bufmgr->border_color_pool);
2491    intel_aux_map_finish(bufmgr->aux_map_ctx);
2492    _mesa_hash_table_destroy(bufmgr->handle_table, NULL);
2493    _mesa_hash_table_destroy(bufmgr->name_table, NULL);
2494 error_slabs_init:
2495    for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
2496       if (!bufmgr->bo_slabs[i].groups)
2497          break;
2498 
2499       pb_slabs_deinit(&bufmgr->bo_slabs[i]);
2500    }
2501    free(bufmgr->bucket_cache);
2502 error_bucket_cache:
2503    for (unsigned i = 0; i < IRIS_MEMZONE_COUNT; i++)
2504       util_vma_heap_finish(&bufmgr->vma_allocator[i]);
2505    iris_bufmgr_destroy_global_vm(bufmgr);
2506 error_init_vm:
2507    close(bufmgr->fd);
2508 error_dup:
2509    free(bufmgr);
2510    return NULL;
2511 }
2512 
2513 static struct iris_bufmgr *
iris_bufmgr_ref(struct iris_bufmgr * bufmgr)2514 iris_bufmgr_ref(struct iris_bufmgr *bufmgr)
2515 {
2516    p_atomic_inc(&bufmgr->refcount);
2517    return bufmgr;
2518 }
2519 
2520 void
iris_bufmgr_unref(struct iris_bufmgr * bufmgr)2521 iris_bufmgr_unref(struct iris_bufmgr *bufmgr)
2522 {
2523    simple_mtx_lock(&global_bufmgr_list_mutex);
2524    if (p_atomic_dec_zero(&bufmgr->refcount)) {
2525       list_del(&bufmgr->link);
2526       iris_bufmgr_destroy(bufmgr);
2527    }
2528    simple_mtx_unlock(&global_bufmgr_list_mutex);
2529 }
2530 
2531 /** Returns a new unique id, to be used by screens. */
2532 int
iris_bufmgr_create_screen_id(struct iris_bufmgr * bufmgr)2533 iris_bufmgr_create_screen_id(struct iris_bufmgr *bufmgr)
2534 {
2535    return p_atomic_inc_return(&bufmgr->next_screen_id) - 1;
2536 }
2537 
2538 /**
2539  * Gets an already existing GEM buffer manager or create a new one.
2540  *
2541  * \param fd File descriptor of the opened DRM device.
2542  */
2543 struct iris_bufmgr *
iris_bufmgr_get_for_fd(int fd,bool bo_reuse)2544 iris_bufmgr_get_for_fd(int fd, bool bo_reuse)
2545 {
2546    struct intel_device_info devinfo;
2547    struct stat st;
2548 
2549    if (fstat(fd, &st))
2550       return NULL;
2551 
2552    struct iris_bufmgr *bufmgr = NULL;
2553 
2554    simple_mtx_lock(&global_bufmgr_list_mutex);
2555    list_for_each_entry(struct iris_bufmgr, iter_bufmgr, &global_bufmgr_list, link) {
2556       struct stat iter_st;
2557       if (fstat(iter_bufmgr->fd, &iter_st))
2558          continue;
2559 
2560       if (st.st_rdev == iter_st.st_rdev) {
2561          assert(iter_bufmgr->bo_reuse == bo_reuse);
2562          bufmgr = iris_bufmgr_ref(iter_bufmgr);
2563          goto unlock;
2564       }
2565    }
2566 
2567    if (!intel_get_device_info_from_fd(fd, &devinfo, 8, -1))
2568       return NULL;
2569 
2570    if (devinfo.ver < 8 || devinfo.platform == INTEL_PLATFORM_CHV)
2571       return NULL;
2572 
2573    bufmgr = iris_bufmgr_create(&devinfo, fd, bo_reuse);
2574    if (bufmgr)
2575       list_addtail(&bufmgr->link, &global_bufmgr_list);
2576 
2577  unlock:
2578    simple_mtx_unlock(&global_bufmgr_list_mutex);
2579 
2580    return bufmgr;
2581 }
2582 
2583 int
iris_bufmgr_get_fd(struct iris_bufmgr * bufmgr)2584 iris_bufmgr_get_fd(struct iris_bufmgr *bufmgr)
2585 {
2586    return bufmgr->fd;
2587 }
2588 
2589 void*
iris_bufmgr_get_aux_map_context(struct iris_bufmgr * bufmgr)2590 iris_bufmgr_get_aux_map_context(struct iris_bufmgr *bufmgr)
2591 {
2592    return bufmgr->aux_map_ctx;
2593 }
2594 
2595 simple_mtx_t *
iris_bufmgr_get_bo_deps_lock(struct iris_bufmgr * bufmgr)2596 iris_bufmgr_get_bo_deps_lock(struct iris_bufmgr *bufmgr)
2597 {
2598    return &bufmgr->bo_deps_lock;
2599 }
2600 
2601 struct iris_border_color_pool *
iris_bufmgr_get_border_color_pool(struct iris_bufmgr * bufmgr)2602 iris_bufmgr_get_border_color_pool(struct iris_bufmgr *bufmgr)
2603 {
2604    return &bufmgr->border_color_pool;
2605 }
2606 
2607 uint64_t
iris_bufmgr_vram_size(struct iris_bufmgr * bufmgr)2608 iris_bufmgr_vram_size(struct iris_bufmgr *bufmgr)
2609 {
2610    return bufmgr->vram.size;
2611 }
2612 
2613 uint64_t
iris_bufmgr_sram_size(struct iris_bufmgr * bufmgr)2614 iris_bufmgr_sram_size(struct iris_bufmgr *bufmgr)
2615 {
2616    return bufmgr->sys.size;
2617 }
2618 
2619 const struct intel_device_info *
iris_bufmgr_get_device_info(struct iris_bufmgr * bufmgr)2620 iris_bufmgr_get_device_info(struct iris_bufmgr *bufmgr)
2621 {
2622    return &bufmgr->devinfo;
2623 }
2624 
2625 const struct iris_kmd_backend *
iris_bufmgr_get_kernel_driver_backend(struct iris_bufmgr * bufmgr)2626 iris_bufmgr_get_kernel_driver_backend(struct iris_bufmgr *bufmgr)
2627 {
2628    return bufmgr->kmd_backend;
2629 }
2630 
2631 uint32_t
iris_bufmgr_get_global_vm_id(struct iris_bufmgr * bufmgr)2632 iris_bufmgr_get_global_vm_id(struct iris_bufmgr *bufmgr)
2633 {
2634    return bufmgr->global_vm_id;
2635 }
2636 
2637 bool
iris_bufmgr_use_global_vm_id(struct iris_bufmgr * bufmgr)2638 iris_bufmgr_use_global_vm_id(struct iris_bufmgr *bufmgr)
2639 {
2640    return bufmgr->use_global_vm;
2641 }
2642 
2643 bool
iris_bufmgr_compute_engine_supported(struct iris_bufmgr * bufmgr)2644 iris_bufmgr_compute_engine_supported(struct iris_bufmgr *bufmgr)
2645 {
2646    return bufmgr->devinfo.engine_class_supported_count[INTEL_ENGINE_CLASS_COMPUTE];
2647 }
2648 
2649 /**
2650  * Return the pat entry based on the bo heap and allocation flags.
2651  */
2652 const struct intel_device_info_pat_entry *
iris_heap_to_pat_entry(const struct intel_device_info * devinfo,enum iris_heap heap)2653 iris_heap_to_pat_entry(const struct intel_device_info *devinfo,
2654                        enum iris_heap heap)
2655 {
2656    switch (heap) {
2657    case IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT:
2658       return &devinfo->pat.cached_coherent;
2659    case IRIS_HEAP_SYSTEM_MEMORY_UNCACHED:
2660       return &devinfo->pat.writecombining;
2661    case IRIS_HEAP_DEVICE_LOCAL:
2662    case IRIS_HEAP_DEVICE_LOCAL_CPU_VISIBLE_SMALL_BAR:
2663    case IRIS_HEAP_DEVICE_LOCAL_PREFERRED:
2664       return &devinfo->pat.writecombining;
2665    case IRIS_HEAP_SYSTEM_MEMORY_UNCACHED_COMPRESSED:
2666    case IRIS_HEAP_DEVICE_LOCAL_COMPRESSED:
2667       return &devinfo->pat.compressed;
2668    default:
2669       unreachable("invalid heap for platforms using PAT entries");
2670    }
2671 }
2672 
2673 struct intel_bind_timeline *
iris_bufmgr_get_bind_timeline(struct iris_bufmgr * bufmgr)2674 iris_bufmgr_get_bind_timeline(struct iris_bufmgr *bufmgr)
2675 {
2676    return &bufmgr->bind_timeline;
2677 }
2678 
2679 uint64_t
iris_bufmgr_get_dummy_aux_address(struct iris_bufmgr * bufmgr)2680 iris_bufmgr_get_dummy_aux_address(struct iris_bufmgr *bufmgr)
2681 {
2682    return bufmgr->dummy_aux_bo ? bufmgr->dummy_aux_bo->address : 0;
2683 }
2684 
2685 struct iris_bo *
iris_bufmgr_get_mem_fence_bo(struct iris_bufmgr * bufmgr)2686 iris_bufmgr_get_mem_fence_bo(struct iris_bufmgr *bufmgr)
2687 {
2688    return bufmgr->mem_fence_bo;
2689 }
2690