1 /*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23 /**
24 * @file iris_bufmgr.c
25 *
26 * The Iris buffer manager.
27 *
28 * XXX: write better comments
29 * - BOs
30 * - Explain BO cache
31 * - main interface to GEM in the kernel
32 */
33
34 #include <util/u_atomic.h>
35 #include <fcntl.h>
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <unistd.h>
40 #include <assert.h>
41 #include <sys/ioctl.h>
42 #include <sys/mman.h>
43 #include <sys/stat.h>
44 #include <sys/types.h>
45 #include <stdbool.h>
46 #include <time.h>
47 #include <unistd.h>
48
49 #include "errno.h"
50 #include "common/intel_aux_map.h"
51 #include "common/intel_mem.h"
52 #include "c99_alloca.h"
53 #include "dev/intel_debug.h"
54 #include "common/intel_common.h"
55 #include "common/intel_gem.h"
56 #include "dev/intel_device_info.h"
57 #include "drm-uapi/dma-buf.h"
58 #include "isl/isl.h"
59 #include "util/os_mman.h"
60 #include "util/u_debug.h"
61 #include "util/macros.h"
62 #include "util/hash_table.h"
63 #include "util/list.h"
64 #include "util/os_file.h"
65 #include "util/u_dynarray.h"
66 #include "util/vma.h"
67 #include "iris_bufmgr.h"
68 #include "iris_context.h"
69 #include "string.h"
70 #include "iris_kmd_backend.h"
71 #include "i915/iris_bufmgr.h"
72 #include "xe/iris_bufmgr.h"
73
74 #include <xf86drm.h>
75
76 #ifdef HAVE_VALGRIND
77 #include <valgrind.h>
78 #include <memcheck.h>
79 #define VG(x) x
80 #else
81 #define VG(x)
82 #endif
83
84 /* VALGRIND_FREELIKE_BLOCK unfortunately does not actually undo the earlier
85 * VALGRIND_MALLOCLIKE_BLOCK but instead leaves vg convinced the memory is
86 * leaked. All because it does not call VG(cli_free) from its
87 * VG_USERREQ__FREELIKE_BLOCK handler. Instead of treating the memory like
88 * and allocation, we mark it available for use upon mmapping and remove
89 * it upon unmapping.
90 */
91 #define VG_DEFINED(ptr, size) VG(VALGRIND_MAKE_MEM_DEFINED(ptr, size))
92 #define VG_NOACCESS(ptr, size) VG(VALGRIND_MAKE_MEM_NOACCESS(ptr, size))
93
94 /* On FreeBSD PAGE_SIZE is already defined in
95 * /usr/include/machine/param.h that is indirectly
96 * included here.
97 */
98 #ifndef PAGE_SIZE
99 #define PAGE_SIZE 4096
100 #endif
101
102 #define WARN_ONCE(cond, fmt...) do { \
103 if (unlikely(cond)) { \
104 static bool _warned = false; \
105 if (!_warned) { \
106 fprintf(stderr, "WARNING: "); \
107 fprintf(stderr, fmt); \
108 _warned = true; \
109 } \
110 } \
111 } while (0)
112
113 #define FILE_DEBUG_FLAG DEBUG_BUFMGR
114
115 /**
116 * For debugging purposes, this returns a time in seconds.
117 */
118 static double
get_time(void)119 get_time(void)
120 {
121 struct timespec tp;
122
123 clock_gettime(CLOCK_MONOTONIC, &tp);
124
125 return tp.tv_sec + tp.tv_nsec / 1000000000.0;
126 }
127
128 static inline int
atomic_add_unless(int * v,int add,int unless)129 atomic_add_unless(int *v, int add, int unless)
130 {
131 int c, old;
132 c = p_atomic_read(v);
133 while (c != unless && (old = p_atomic_cmpxchg(v, c, c + add)) != c)
134 c = old;
135 return c == unless;
136 }
137
138 static const char *
memzone_name(enum iris_memory_zone memzone)139 memzone_name(enum iris_memory_zone memzone)
140 {
141 const char *names[] = {
142 [IRIS_MEMZONE_SHADER] = "shader",
143 [IRIS_MEMZONE_BINDER] = "binder",
144 [IRIS_MEMZONE_SCRATCH] = "scratchsurf",
145 [IRIS_MEMZONE_SURFACE] = "surface",
146 [IRIS_MEMZONE_DYNAMIC] = "dynamic",
147 [IRIS_MEMZONE_OTHER] = "other",
148 [IRIS_MEMZONE_BORDER_COLOR_POOL] = "bordercolor",
149 };
150 assert(memzone < ARRAY_SIZE(names));
151 return names[memzone];
152 }
153
154 struct bo_cache_bucket {
155 /** List of cached BOs. */
156 struct list_head head;
157
158 /** Size of this bucket, in bytes. */
159 uint64_t size;
160 };
161
162 struct bo_export {
163 /** File descriptor associated with a handle export. */
164 int drm_fd;
165
166 /** GEM handle in drm_fd */
167 uint32_t gem_handle;
168
169 struct list_head link;
170 };
171
172 struct iris_memregion {
173 struct intel_memory_class_instance *region;
174 uint64_t size;
175 };
176
177 #define NUM_SLAB_ALLOCATORS 3
178
179 struct iris_slab {
180 struct pb_slab base;
181
182 /** The BO representing the entire slab */
183 struct iris_bo *bo;
184
185 /** Array of iris_bo structs representing BOs allocated out of this slab */
186 struct iris_bo *entries;
187 };
188
189 #define BUCKET_ARRAY_SIZE 25
190
191 struct iris_bucket_cache {
192 struct bo_cache_bucket bucket[BUCKET_ARRAY_SIZE];
193 int num_buckets;
194 };
195
196 struct iris_bufmgr {
197 /**
198 * List into the list of bufmgr.
199 */
200 struct list_head link;
201
202 uint32_t refcount;
203
204 int fd;
205
206 simple_mtx_t lock;
207 simple_mtx_t bo_deps_lock;
208
209 /** Array of lists of cached gem objects of power-of-two sizes */
210 struct iris_bucket_cache *bucket_cache;
211
212 time_t time;
213
214 struct hash_table *name_table;
215 struct hash_table *handle_table;
216
217 /**
218 * List of BOs which we've effectively freed, but are hanging on to
219 * until they're idle before closing and returning the VMA.
220 */
221 struct list_head zombie_list;
222
223 struct util_vma_heap vma_allocator[IRIS_MEMZONE_COUNT];
224
225 struct iris_memregion vram, sys;
226
227 /* Used only when use_global_vm is true. */
228 uint32_t global_vm_id;
229
230 int next_screen_id;
231
232 struct intel_device_info devinfo;
233 const struct iris_kmd_backend *kmd_backend;
234 struct intel_bind_timeline bind_timeline; /* Xe only */
235 bool bo_reuse:1;
236 bool use_global_vm:1;
237
238 struct intel_aux_map_context *aux_map_ctx;
239
240 struct pb_slabs bo_slabs[NUM_SLAB_ALLOCATORS];
241
242 struct iris_border_color_pool border_color_pool;
243
244 struct iris_bo *dummy_aux_bo;
245 struct iris_bo *mem_fence_bo;
246 };
247
248 static simple_mtx_t global_bufmgr_list_mutex = SIMPLE_MTX_INITIALIZER;
249 static struct list_head global_bufmgr_list = {
250 .next = &global_bufmgr_list,
251 .prev = &global_bufmgr_list,
252 };
253
254 static void bo_free(struct iris_bo *bo);
255
256 static struct iris_bo *
find_and_ref_external_bo(struct hash_table * ht,unsigned int key)257 find_and_ref_external_bo(struct hash_table *ht, unsigned int key)
258 {
259 struct hash_entry *entry = _mesa_hash_table_search(ht, &key);
260 struct iris_bo *bo = entry ? entry->data : NULL;
261
262 if (bo) {
263 assert(iris_bo_is_external(bo));
264 assert(iris_bo_is_real(bo));
265 assert(!bo->real.reusable);
266
267 /* Being non-reusable, the BO cannot be in the cache lists, but it
268 * may be in the zombie list if it had reached zero references, but
269 * we hadn't yet closed it...and then reimported the same BO. If it
270 * is, then remove it since it's now been resurrected.
271 */
272 if (list_is_linked(&bo->head))
273 list_del(&bo->head);
274
275 iris_bo_reference(bo);
276 }
277
278 return bo;
279 }
280
281 /**
282 * This function finds the correct bucket fit for the input size.
283 * The function works with O(1) complexity when the requested size
284 * was queried instead of iterating the size through all the buckets.
285 */
286 static struct bo_cache_bucket *
bucket_for_size(struct iris_bufmgr * bufmgr,uint64_t size,enum iris_heap heap,unsigned flags)287 bucket_for_size(struct iris_bufmgr *bufmgr, uint64_t size,
288 enum iris_heap heap, unsigned flags)
289 {
290 if (flags & BO_ALLOC_PROTECTED)
291 return NULL;
292
293 const struct intel_device_info *devinfo = &bufmgr->devinfo;
294 struct iris_bucket_cache *cache = &bufmgr->bucket_cache[heap];
295
296 if (devinfo->kmd_type == INTEL_KMD_TYPE_XE &&
297 (flags & (BO_ALLOC_SHARED | BO_ALLOC_SCANOUT)))
298 return NULL;
299
300 const unsigned _4MB = 4 * 1024 * 1024;
301 const unsigned _6MB = 6 * 1024 * 1024;
302 const unsigned _8MB = 8 * 1024 * 1024;
303 const unsigned _64MB = 64 * 1024 * 1024;
304 unsigned index;
305
306 if (size <= 4096) {
307 index = 0;
308 } else if (size <= _4MB) {
309 index = util_logbase2_ceil(size) - 12;
310 } else if (size <= _6MB) {
311 index = 11;
312 } else if (size <= _8MB) {
313 index = 12;
314 } else if (size <= _64MB) {
315 const unsigned power = util_logbase2(size);
316 const unsigned base_size = 1u << power;
317 const unsigned quarter_size = base_size / 4;
318 const unsigned quarter = DIV_ROUND_UP(size - base_size, quarter_size);
319 index = 12 + (power - 23) * 4 + quarter;
320 } else {
321 return NULL;
322 }
323
324 return (index < cache->num_buckets) ? &cache->bucket[index] : NULL;
325 }
326
327 enum iris_memory_zone
iris_memzone_for_address(uint64_t address)328 iris_memzone_for_address(uint64_t address)
329 {
330 STATIC_ASSERT(IRIS_MEMZONE_OTHER_START > IRIS_MEMZONE_DYNAMIC_START);
331 STATIC_ASSERT(IRIS_MEMZONE_SURFACE_START > IRIS_MEMZONE_SCRATCH_START);
332 STATIC_ASSERT(IRIS_MEMZONE_SCRATCH_START == IRIS_MEMZONE_BINDER_START);
333 STATIC_ASSERT(IRIS_MEMZONE_BINDER_START > IRIS_MEMZONE_SHADER_START);
334 STATIC_ASSERT(IRIS_MEMZONE_DYNAMIC_START > IRIS_MEMZONE_SURFACE_START);
335 STATIC_ASSERT(IRIS_BORDER_COLOR_POOL_ADDRESS == IRIS_MEMZONE_DYNAMIC_START);
336
337 if (address >= IRIS_MEMZONE_OTHER_START)
338 return IRIS_MEMZONE_OTHER;
339
340 if (address == IRIS_BORDER_COLOR_POOL_ADDRESS)
341 return IRIS_MEMZONE_BORDER_COLOR_POOL;
342
343 if (address > IRIS_MEMZONE_DYNAMIC_START)
344 return IRIS_MEMZONE_DYNAMIC;
345
346 if (address >= IRIS_MEMZONE_SURFACE_START)
347 return IRIS_MEMZONE_SURFACE;
348
349 if (address >= (IRIS_MEMZONE_BINDER_START + IRIS_SCRATCH_ZONE_SIZE))
350 return IRIS_MEMZONE_BINDER;
351
352 if (address >= IRIS_MEMZONE_SCRATCH_START)
353 return IRIS_MEMZONE_SCRATCH;
354
355 return IRIS_MEMZONE_SHADER;
356 }
357
358 /**
359 * Allocate a section of virtual memory for a buffer, assigning an address.
360 *
361 * This uses either the bucket allocator for the given size, or the large
362 * object allocator (util_vma).
363 */
364 static uint64_t
vma_alloc(struct iris_bufmgr * bufmgr,enum iris_memory_zone memzone,uint64_t size,uint64_t alignment)365 vma_alloc(struct iris_bufmgr *bufmgr,
366 enum iris_memory_zone memzone,
367 uint64_t size,
368 uint64_t alignment)
369 {
370 simple_mtx_assert_locked(&bufmgr->lock);
371
372 const unsigned _2mb = 2 * 1024 * 1024;
373
374 /* Force minimum alignment based on device requirements */
375 assert((alignment & (alignment - 1)) == 0);
376 alignment = MAX2(alignment, bufmgr->devinfo.mem_alignment);
377
378 /* If the allocation is a multiple of 2MB, ensure the virtual address is
379 * aligned to 2MB, so that it's possible for the kernel to use 64K pages.
380 */
381 if (size % _2mb == 0)
382 alignment = MAX2(alignment, _2mb);
383
384 if (memzone == IRIS_MEMZONE_BORDER_COLOR_POOL)
385 return IRIS_BORDER_COLOR_POOL_ADDRESS;
386
387 uint64_t addr =
388 util_vma_heap_alloc(&bufmgr->vma_allocator[memzone], size, alignment);
389
390 assert((addr >> 48ull) == 0);
391 assert((addr % alignment) == 0);
392
393 return intel_canonical_address(addr);
394 }
395
396 static void
vma_free(struct iris_bufmgr * bufmgr,uint64_t address,uint64_t size)397 vma_free(struct iris_bufmgr *bufmgr,
398 uint64_t address,
399 uint64_t size)
400 {
401 simple_mtx_assert_locked(&bufmgr->lock);
402
403 if (address == IRIS_BORDER_COLOR_POOL_ADDRESS)
404 return;
405
406 /* Un-canonicalize the address. */
407 address = intel_48b_address(address);
408
409 if (address == 0ull)
410 return;
411
412 enum iris_memory_zone memzone = iris_memzone_for_address(address);
413
414 assert(memzone < ARRAY_SIZE(bufmgr->vma_allocator));
415
416 util_vma_heap_free(&bufmgr->vma_allocator[memzone], address, size);
417 }
418
419 /* Exports a BO's implicit synchronization state to a drm_syncobj, returning
420 * its wrapping iris_syncobj. The drm_syncobj is created new and has to be
421 * destroyed by the caller after the execbuf ioctl.
422 */
423 struct iris_syncobj *
iris_bo_export_sync_state(struct iris_bo * bo)424 iris_bo_export_sync_state(struct iris_bo *bo)
425 {
426 struct iris_bufmgr *bufmgr = bo->bufmgr;
427 int drm_fd = iris_bufmgr_get_fd(bufmgr);
428
429 struct iris_syncobj *iris_syncobj = iris_create_syncobj(bufmgr);
430
431 struct dma_buf_export_sync_file export_sync_file_ioctl = {
432 .flags = DMA_BUF_SYNC_RW, /* TODO */
433 .fd = -1,
434 };
435 if (intel_ioctl(bo->real.prime_fd, DMA_BUF_IOCTL_EXPORT_SYNC_FILE,
436 &export_sync_file_ioctl)) {
437 fprintf(stderr, "DMA_BUF_IOCTL_EXPORT_SYNC_FILE ioctl failed (%d)\n",
438 errno);
439 goto error_export;
440 }
441
442 int sync_file_fd = export_sync_file_ioctl.fd;
443 assert(sync_file_fd >= 0);
444
445 struct drm_syncobj_handle syncobj_import_ioctl = {
446 .handle = iris_syncobj->handle,
447 .flags = DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_IMPORT_SYNC_FILE,
448 .fd = sync_file_fd,
449 };
450 if (intel_ioctl(drm_fd, DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE,
451 &syncobj_import_ioctl)) {
452 fprintf(stderr, "DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE ioctl failed (%d)\n",
453 errno);
454 }
455
456 close(sync_file_fd);
457
458 return iris_syncobj;
459 error_export:
460 iris_syncobj_destroy(bufmgr, iris_syncobj);
461 return NULL;
462 }
463
464 /* Import the state of a sync_file_fd (which we should have gotten from
465 * batch_syncobj_to_sync_file_fd) into a BO as its implicit synchronization
466 * state.
467 */
468 void
iris_bo_import_sync_state(struct iris_bo * bo,int sync_file_fd)469 iris_bo_import_sync_state(struct iris_bo *bo, int sync_file_fd)
470 {
471 struct dma_buf_import_sync_file import_sync_file_ioctl = {
472 .flags = DMA_BUF_SYNC_WRITE,
473 .fd = sync_file_fd,
474 };
475 if (intel_ioctl(bo->real.prime_fd, DMA_BUF_IOCTL_IMPORT_SYNC_FILE,
476 &import_sync_file_ioctl))
477 fprintf(stderr, "DMA_BUF_IOCTL_IMPORT_SYNC_FILE ioctl failed (%d)\n",
478 errno);
479 }
480
481 /* A timeout of 0 just checks for busyness. */
482 static int
iris_bo_wait_syncobj(struct iris_bo * bo,int64_t timeout_ns)483 iris_bo_wait_syncobj(struct iris_bo *bo, int64_t timeout_ns)
484 {
485 int ret = 0;
486 struct iris_bufmgr *bufmgr = bo->bufmgr;
487 const bool is_external = iris_bo_is_real(bo) && bo->real.prime_fd != -1;
488 struct iris_syncobj *external_implicit_syncobj = NULL;
489
490 /* If we know it's idle, don't bother with the kernel round trip.
491 * Can't do that for Xe KMD with external BOs since we have to check the
492 * implicit synchronization information.
493 */
494 if (!is_external && bo->idle)
495 return 0;
496
497 simple_mtx_lock(&bufmgr->bo_deps_lock);
498
499 const int handles_len = bo->deps_size * IRIS_BATCH_COUNT * 2 + is_external;
500 uint32_t *handles = handles_len <= 32 ?
501 (uint32_t *)alloca(handles_len * sizeof(*handles)) :
502 (uint32_t *)malloc(handles_len * sizeof(*handles));
503 int handle_count = 0;
504
505 if (is_external) {
506 external_implicit_syncobj = iris_bo_export_sync_state(bo);
507 if (external_implicit_syncobj)
508 handles[handle_count++] = external_implicit_syncobj->handle;
509 }
510
511 for (int d = 0; d < bo->deps_size; d++) {
512 for (int b = 0; b < IRIS_BATCH_COUNT; b++) {
513 struct iris_syncobj *r = bo->deps[d].read_syncobjs[b];
514 struct iris_syncobj *w = bo->deps[d].write_syncobjs[b];
515 if (r)
516 handles[handle_count++] = r->handle;
517 if (w)
518 handles[handle_count++] = w->handle;
519 }
520 }
521
522 if (handle_count == 0)
523 goto out;
524
525 /* Unlike the gem wait, negative values are not infinite here. */
526 int64_t timeout_abs = os_time_get_absolute_timeout(timeout_ns);
527 if (timeout_abs < 0)
528 timeout_abs = INT64_MAX;
529
530 struct drm_syncobj_wait args = {
531 .handles = (uintptr_t) handles,
532 .timeout_nsec = timeout_abs,
533 .count_handles = handle_count,
534 .flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL,
535 };
536
537 ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_SYNCOBJ_WAIT, &args);
538 if (ret != 0) {
539 ret = -errno;
540 goto out;
541 }
542
543 /* We just waited everything, so clean all the deps. */
544 for (int d = 0; d < bo->deps_size; d++) {
545 for (int b = 0; b < IRIS_BATCH_COUNT; b++) {
546 iris_syncobj_reference(bufmgr, &bo->deps[d].write_syncobjs[b], NULL);
547 iris_syncobj_reference(bufmgr, &bo->deps[d].read_syncobjs[b], NULL);
548 }
549 }
550
551 out:
552 if (handles_len > 32)
553 free(handles);
554 if (external_implicit_syncobj)
555 iris_syncobj_reference(bufmgr, &external_implicit_syncobj, NULL);
556
557 simple_mtx_unlock(&bufmgr->bo_deps_lock);
558 return ret;
559 }
560
561 static bool
iris_bo_busy_syncobj(struct iris_bo * bo)562 iris_bo_busy_syncobj(struct iris_bo *bo)
563 {
564 return iris_bo_wait_syncobj(bo, 0) == -ETIME;
565 }
566
567 bool
iris_bo_busy(struct iris_bo * bo)568 iris_bo_busy(struct iris_bo *bo)
569 {
570 bool busy;
571
572 switch (iris_bufmgr_get_device_info(bo->bufmgr)->kmd_type) {
573 case INTEL_KMD_TYPE_I915:
574 if (iris_bo_is_external(bo))
575 busy = iris_i915_bo_busy_gem(bo);
576 else
577 busy = iris_bo_busy_syncobj(bo);
578 break;
579 case INTEL_KMD_TYPE_XE:
580 busy = iris_bo_busy_syncobj(bo);
581 break;
582 default:
583 unreachable("missing");
584 busy = true;
585 }
586
587 bo->idle = !busy;
588
589 return busy;
590 }
591
592 /**
593 * Specify the volatility of the buffer.
594 * \param bo Buffer to create a name for
595 * \param state The purgeable status
596 *
597 * Use IRIS_MADVICE_DONT_NEED to mark the buffer as purgeable, and it will be
598 * reclaimed under memory pressure. If you subsequently require the buffer,
599 * then you must pass IRIS_MADVICE_WILL_NEED to mark the buffer as required.
600 *
601 * Returns true if the buffer was retained, or false if it was discarded
602 * whilst marked as IRIS_MADVICE_DONT_NEED.
603 */
604 static inline bool
iris_bo_madvise(struct iris_bo * bo,enum iris_madvice state)605 iris_bo_madvise(struct iris_bo *bo, enum iris_madvice state)
606 {
607 /* We can't madvise suballocated BOs. */
608 assert(iris_bo_is_real(bo));
609
610 return bo->bufmgr->kmd_backend->bo_madvise(bo, state);
611 }
612
613 static struct iris_bo *
bo_calloc(void)614 bo_calloc(void)
615 {
616 struct iris_bo *bo = calloc(1, sizeof(*bo));
617 if (!bo)
618 return NULL;
619
620 list_inithead(&bo->real.exports);
621
622 bo->hash = _mesa_hash_pointer(bo);
623
624 return bo;
625 }
626
627 static void
bo_unmap(struct iris_bo * bo)628 bo_unmap(struct iris_bo *bo)
629 {
630 assert(iris_bo_is_real(bo));
631
632 VG_NOACCESS(bo->real.map, bo->size);
633 os_munmap(bo->real.map, bo->size);
634 bo->real.map = NULL;
635 }
636
637 static struct pb_slabs *
get_slabs(struct iris_bufmgr * bufmgr,uint64_t size)638 get_slabs(struct iris_bufmgr *bufmgr, uint64_t size)
639 {
640 for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
641 struct pb_slabs *slabs = &bufmgr->bo_slabs[i];
642
643 if (size <= 1ull << (slabs->min_order + slabs->num_orders - 1))
644 return slabs;
645 }
646
647 unreachable("should have found a valid slab for this size");
648 }
649
650 /* Return the power of two size of a slab entry matching the input size. */
651 static unsigned
get_slab_pot_entry_size(struct iris_bufmgr * bufmgr,unsigned size)652 get_slab_pot_entry_size(struct iris_bufmgr *bufmgr, unsigned size)
653 {
654 unsigned entry_size = util_next_power_of_two(size);
655 unsigned min_entry_size = 1 << bufmgr->bo_slabs[0].min_order;
656
657 return MAX2(entry_size, min_entry_size);
658 }
659
660 /* Return the slab entry alignment. */
661 static unsigned
get_slab_entry_alignment(struct iris_bufmgr * bufmgr,unsigned size)662 get_slab_entry_alignment(struct iris_bufmgr *bufmgr, unsigned size)
663 {
664 unsigned entry_size = get_slab_pot_entry_size(bufmgr, size);
665
666 if (size <= entry_size * 3 / 4)
667 return entry_size / 4;
668
669 return entry_size;
670 }
671
672 static bool
iris_can_reclaim_slab(void * priv,struct pb_slab_entry * entry)673 iris_can_reclaim_slab(void *priv, struct pb_slab_entry *entry)
674 {
675 struct iris_bo *bo = container_of(entry, struct iris_bo, slab.entry);
676
677 return !iris_bo_busy(bo);
678 }
679
680 static void
iris_slab_free(void * priv,struct pb_slab * pslab)681 iris_slab_free(void *priv, struct pb_slab *pslab)
682 {
683 struct iris_bufmgr *bufmgr = priv;
684 struct iris_slab *slab = (void *) pslab;
685 struct intel_aux_map_context *aux_map_ctx = bufmgr->aux_map_ctx;
686
687 assert(!slab->bo->aux_map_address);
688
689 /* Since we're freeing the whole slab, all buffers allocated out of it
690 * must be reclaimable. We require buffers to be idle to be reclaimed
691 * (see iris_can_reclaim_slab()), so we know all entries must be idle.
692 * Therefore, we can safely unmap their aux table entries.
693 */
694 for (unsigned i = 0; i < pslab->num_entries; i++) {
695 struct iris_bo *bo = &slab->entries[i];
696 if (aux_map_ctx && bo->aux_map_address) {
697 intel_aux_map_unmap_range(aux_map_ctx, bo->address, bo->size);
698 bo->aux_map_address = 0;
699 }
700
701 /* Unref read/write dependency syncobjs and free the array. */
702 for (int d = 0; d < bo->deps_size; d++) {
703 for (int b = 0; b < IRIS_BATCH_COUNT; b++) {
704 iris_syncobj_reference(bufmgr, &bo->deps[d].write_syncobjs[b], NULL);
705 iris_syncobj_reference(bufmgr, &bo->deps[d].read_syncobjs[b], NULL);
706 }
707 }
708 free(bo->deps);
709 }
710
711 iris_bo_unreference(slab->bo);
712
713 free(slab->entries);
714 free(slab);
715 }
716
717 static struct pb_slab *
iris_slab_alloc(void * priv,unsigned heap,unsigned entry_size,unsigned group_index)718 iris_slab_alloc(void *priv,
719 unsigned heap,
720 unsigned entry_size,
721 unsigned group_index)
722 {
723 struct iris_bufmgr *bufmgr = priv;
724 struct iris_slab *slab = calloc(1, sizeof(struct iris_slab));
725 uint32_t flags = BO_ALLOC_NO_SUBALLOC;
726 unsigned slab_size = 0;
727 /* We only support slab allocation for IRIS_MEMZONE_OTHER */
728 enum iris_memory_zone memzone = IRIS_MEMZONE_OTHER;
729
730 if (!slab)
731 return NULL;
732
733 struct pb_slabs *slabs = bufmgr->bo_slabs;
734
735 /* Determine the slab buffer size. */
736 for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
737 unsigned max_entry_size =
738 1 << (slabs[i].min_order + slabs[i].num_orders - 1);
739
740 if (entry_size <= max_entry_size) {
741 /* The slab size is twice the size of the largest possible entry. */
742 slab_size = max_entry_size * 2;
743
744 if (!util_is_power_of_two_nonzero(entry_size)) {
745 assert(util_is_power_of_two_nonzero(entry_size * 4 / 3));
746
747 /* If the entry size is 3/4 of a power of two, we would waste
748 * space and not gain anything if we allocated only twice the
749 * power of two for the backing buffer:
750 *
751 * 2 * 3/4 = 1.5 usable with buffer size 2
752 *
753 * Allocating 5 times the entry size leads us to the next power
754 * of two and results in a much better memory utilization:
755 *
756 * 5 * 3/4 = 3.75 usable with buffer size 4
757 */
758 if (entry_size * 5 > slab_size)
759 slab_size = util_next_power_of_two(entry_size * 5);
760 }
761
762 /* The largest slab should have the same size as the PTE fragment
763 * size to get faster address translation.
764 *
765 * TODO: move this to intel_device_info?
766 */
767 const unsigned pte_size = 2 * 1024 * 1024;
768
769 if (i == NUM_SLAB_ALLOCATORS - 1 && slab_size < pte_size)
770 slab_size = pte_size;
771
772 break;
773 }
774 }
775 assert(slab_size != 0);
776
777 switch (heap) {
778 case IRIS_HEAP_SYSTEM_MEMORY_UNCACHED_COMPRESSED:
779 case IRIS_HEAP_DEVICE_LOCAL_COMPRESSED:
780 flags |= BO_ALLOC_COMPRESSED;
781 break;
782 case IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT:
783 case IRIS_HEAP_SYSTEM_MEMORY_UNCACHED:
784 flags |= BO_ALLOC_SMEM;
785 break;
786 case IRIS_HEAP_DEVICE_LOCAL:
787 flags |= BO_ALLOC_LMEM;
788 break;
789 case IRIS_HEAP_DEVICE_LOCAL_CPU_VISIBLE_SMALL_BAR:
790 flags |= BO_ALLOC_LMEM | BO_ALLOC_CPU_VISIBLE;
791 break;
792 default:
793 flags |= BO_ALLOC_PLAIN;
794 }
795
796 slab->bo =
797 iris_bo_alloc(bufmgr, "slab", slab_size, slab_size, memzone, flags);
798 if (!slab->bo)
799 goto fail;
800
801 slab_size = slab->bo->size;
802
803 slab->base.num_entries = slab_size / entry_size;
804 slab->base.num_free = slab->base.num_entries;
805 slab->base.group_index = group_index;
806 slab->base.entry_size = entry_size;
807 slab->entries = calloc(slab->base.num_entries, sizeof(*slab->entries));
808 if (!slab->entries)
809 goto fail_bo;
810
811 list_inithead(&slab->base.free);
812
813 for (unsigned i = 0; i < slab->base.num_entries; i++) {
814 struct iris_bo *bo = &slab->entries[i];
815
816 bo->size = entry_size;
817 bo->bufmgr = bufmgr;
818 bo->hash = _mesa_hash_pointer(bo);
819 bo->gem_handle = 0;
820 bo->address = intel_canonical_address(slab->bo->address + i * entry_size);
821 bo->aux_map_address = 0;
822 bo->index = -1;
823 bo->refcount = 0;
824 bo->idle = true;
825 bo->zeroed = slab->bo->zeroed;
826
827 bo->slab.entry.slab = &slab->base;
828
829 bo->slab.real = iris_get_backing_bo(slab->bo);
830
831 list_addtail(&bo->slab.entry.head, &slab->base.free);
832 }
833
834 return &slab->base;
835
836 fail_bo:
837 iris_bo_unreference(slab->bo);
838 fail:
839 free(slab);
840 return NULL;
841 }
842
843 /**
844 * Selects a heap for the given buffer allocation flags.
845 *
846 * This determines the cacheability, coherency, and mmap mode settings.
847 */
848 static enum iris_heap
flags_to_heap(struct iris_bufmgr * bufmgr,unsigned flags)849 flags_to_heap(struct iris_bufmgr *bufmgr, unsigned flags)
850 {
851 const struct intel_device_info *devinfo = &bufmgr->devinfo;
852
853 if (bufmgr->vram.size > 0) {
854 if (flags & BO_ALLOC_COMPRESSED)
855 return IRIS_HEAP_DEVICE_LOCAL_COMPRESSED;
856
857 /* Discrete GPUs currently always snoop CPU caches. */
858 if ((flags & BO_ALLOC_SMEM) || (flags & BO_ALLOC_CACHED_COHERENT))
859 return IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT;
860
861 if ((flags & BO_ALLOC_LMEM) ||
862 ((flags & BO_ALLOC_SCANOUT) && !(flags & BO_ALLOC_SHARED))) {
863
864 if ((flags & BO_ALLOC_CPU_VISIBLE) && !intel_vram_all_mappable(devinfo))
865 return IRIS_HEAP_DEVICE_LOCAL_CPU_VISIBLE_SMALL_BAR;
866
867 return IRIS_HEAP_DEVICE_LOCAL;
868 }
869
870 return IRIS_HEAP_DEVICE_LOCAL_PREFERRED;
871 } else if (devinfo->has_llc) {
872 assert(!(flags & BO_ALLOC_LMEM));
873
874 if (flags & (BO_ALLOC_SCANOUT | BO_ALLOC_SHARED))
875 return IRIS_HEAP_SYSTEM_MEMORY_UNCACHED;
876
877 return IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT;
878 } else {
879 assert(!devinfo->has_llc);
880 assert(!(flags & BO_ALLOC_LMEM));
881
882 if (flags & BO_ALLOC_COMPRESSED)
883 return IRIS_HEAP_SYSTEM_MEMORY_UNCACHED_COMPRESSED;
884
885 if (flags & BO_ALLOC_CACHED_COHERENT)
886 return IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT;
887
888 return IRIS_HEAP_SYSTEM_MEMORY_UNCACHED;
889 }
890 }
891
892 static bool
zero_bo(struct iris_bufmgr * bufmgr,unsigned flags,struct iris_bo * bo)893 zero_bo(struct iris_bufmgr *bufmgr,
894 unsigned flags,
895 struct iris_bo *bo)
896 {
897 assert(flags & BO_ALLOC_ZEROED);
898
899 if (bo->zeroed)
900 return true;
901
902 if (bufmgr->devinfo.has_flat_ccs && (flags & BO_ALLOC_LMEM)) {
903 /* With flat CCS, all allocations in LMEM have memory ranges with
904 * corresponding CCS elements. These elements are only accessible
905 * through GPU commands, but we don't issue GPU commands here.
906 */
907 return false;
908 }
909
910 void *map = iris_bo_map(NULL, bo, MAP_WRITE | MAP_RAW);
911 if (!map)
912 return false;
913
914 memset(map, 0, bo->size);
915 bo->zeroed = true;
916 return true;
917 }
918
919 static struct iris_bo *
alloc_bo_from_slabs(struct iris_bufmgr * bufmgr,const char * name,uint64_t size,uint32_t alignment,unsigned flags)920 alloc_bo_from_slabs(struct iris_bufmgr *bufmgr,
921 const char *name,
922 uint64_t size,
923 uint32_t alignment,
924 unsigned flags)
925 {
926 if (flags & BO_ALLOC_NO_SUBALLOC)
927 return NULL;
928
929 struct pb_slabs *last_slab = &bufmgr->bo_slabs[NUM_SLAB_ALLOCATORS - 1];
930 unsigned max_slab_entry_size =
931 1 << (last_slab->min_order + last_slab->num_orders - 1);
932
933 if (size > max_slab_entry_size)
934 return NULL;
935
936 struct pb_slab_entry *entry;
937
938 enum iris_heap heap = flags_to_heap(bufmgr, flags);
939
940 unsigned alloc_size = size;
941
942 /* Always use slabs for sizes less than 4 KB because the kernel aligns
943 * everything to 4 KB.
944 */
945 if (size < alignment && alignment <= 4 * 1024)
946 alloc_size = alignment;
947
948 if (alignment > get_slab_entry_alignment(bufmgr, alloc_size)) {
949 /* 3/4 allocations can return too small alignment.
950 * Try again with a power of two allocation size.
951 */
952 unsigned pot_size = get_slab_pot_entry_size(bufmgr, alloc_size);
953
954 if (alignment <= pot_size) {
955 /* This size works but wastes some memory to fulfill the alignment. */
956 alloc_size = pot_size;
957 } else {
958 /* can't fulfill alignment requirements */
959 return NULL;
960 }
961 }
962
963 struct pb_slabs *slabs = get_slabs(bufmgr, alloc_size);
964 entry = pb_slab_alloc(slabs, alloc_size, heap);
965 if (!entry) {
966 /* Clean up and try again... */
967 pb_slabs_reclaim(slabs);
968
969 entry = pb_slab_alloc(slabs, alloc_size, heap);
970 }
971 if (!entry)
972 return NULL;
973
974 struct iris_bo *bo = container_of(entry, struct iris_bo, slab.entry);
975
976 if (bo->aux_map_address && bo->bufmgr->aux_map_ctx) {
977 /* This buffer was associated with an aux-buffer range. We only allow
978 * slab allocated buffers to be reclaimed when idle (not in use by an
979 * executing batch). (See iris_can_reclaim_slab().) So we know that
980 * our previous aux mapping is no longer in use, and we can safely
981 * remove it.
982 */
983 intel_aux_map_unmap_range(bo->bufmgr->aux_map_ctx, bo->address,
984 bo->size);
985 bo->aux_map_address = 0;
986 }
987
988 p_atomic_set(&bo->refcount, 1);
989 bo->name = name;
990 bo->size = size;
991
992 /* Zero the contents if necessary. If this fails, fall back to
993 * allocating a fresh BO, which will always be zeroed by the kernel.
994 */
995 if ((flags & BO_ALLOC_ZEROED) && !zero_bo(bufmgr, flags, bo)) {
996 pb_slab_free(slabs, &bo->slab.entry);
997 return NULL;
998 }
999
1000 return bo;
1001 }
1002
1003 static struct iris_bo *
alloc_bo_from_cache(struct iris_bufmgr * bufmgr,struct bo_cache_bucket * bucket,uint32_t alignment,enum iris_memory_zone memzone,enum iris_mmap_mode mmap_mode,unsigned flags,bool match_zone)1004 alloc_bo_from_cache(struct iris_bufmgr *bufmgr,
1005 struct bo_cache_bucket *bucket,
1006 uint32_t alignment,
1007 enum iris_memory_zone memzone,
1008 enum iris_mmap_mode mmap_mode,
1009 unsigned flags,
1010 bool match_zone)
1011 {
1012 if (!bucket)
1013 return NULL;
1014
1015 struct iris_bo *bo = NULL;
1016
1017 simple_mtx_assert_locked(&bufmgr->lock);
1018
1019 list_for_each_entry_safe(struct iris_bo, cur, &bucket->head, head) {
1020 assert(iris_bo_is_real(cur));
1021
1022 /* Find one that's got the right mapping type. We used to swap maps
1023 * around but the kernel doesn't allow this on discrete GPUs.
1024 */
1025 if (mmap_mode != cur->real.mmap_mode)
1026 continue;
1027
1028 /* Try a little harder to find one that's already in the right memzone */
1029 if (match_zone && memzone != iris_memzone_for_address(cur->address))
1030 continue;
1031
1032 if (cur->real.capture != !!(flags & BO_ALLOC_CAPTURE))
1033 continue;
1034
1035 /* If the last BO in the cache is busy, there are no idle BOs. Bail,
1036 * either falling back to a non-matching memzone, or if that fails,
1037 * allocating a fresh buffer.
1038 */
1039 if (iris_bo_busy(cur))
1040 return NULL;
1041
1042 list_del(&cur->head);
1043
1044 /* Tell the kernel we need this BO and check if it still exist */
1045 if (!iris_bo_madvise(cur, IRIS_MADVICE_WILL_NEED)) {
1046 /* This BO was purged, throw it out and keep looking. */
1047 bo_free(cur);
1048 continue;
1049 }
1050
1051 if (cur->aux_map_address) {
1052 /* This buffer was associated with an aux-buffer range. We make sure
1053 * that buffers are not reused from the cache while the buffer is (busy)
1054 * being used by an executing batch. Since we are here, the buffer is no
1055 * longer being used by a batch and the buffer was deleted (in order to
1056 * end up in the cache). Therefore its old aux-buffer range can be
1057 * removed from the aux-map.
1058 */
1059 if (cur->bufmgr->aux_map_ctx)
1060 intel_aux_map_unmap_range(cur->bufmgr->aux_map_ctx, cur->address,
1061 cur->size);
1062 cur->aux_map_address = 0;
1063 }
1064
1065 /* If the cached BO isn't in the right memory zone, or the alignment
1066 * isn't sufficient, free the old memory and assign it a new address.
1067 */
1068 if (memzone != iris_memzone_for_address(cur->address) ||
1069 cur->address % alignment != 0) {
1070 if (!bufmgr->kmd_backend->gem_vm_unbind(cur)) {
1071 DBG("Unable to unbind vm of buf %u\n", cur->gem_handle);
1072 bo_free(cur);
1073 continue;
1074 }
1075
1076 vma_free(bufmgr, cur->address, cur->size);
1077 cur->address = 0ull;
1078 }
1079
1080 bo = cur;
1081 break;
1082 }
1083
1084 if (!bo)
1085 return NULL;
1086
1087 /* Zero the contents if necessary. If this fails, fall back to
1088 * allocating a fresh BO, which will always be zeroed by the kernel.
1089 */
1090 assert(bo->zeroed == false);
1091 if ((flags & BO_ALLOC_ZEROED) && !zero_bo(bufmgr, flags, bo)) {
1092 bo_free(bo);
1093 return NULL;
1094 }
1095
1096 return bo;
1097 }
1098
1099 static struct iris_bo *
alloc_fresh_bo(struct iris_bufmgr * bufmgr,uint64_t bo_size,unsigned flags)1100 alloc_fresh_bo(struct iris_bufmgr *bufmgr, uint64_t bo_size, unsigned flags)
1101 {
1102 struct iris_bo *bo = bo_calloc();
1103 if (!bo)
1104 return NULL;
1105
1106 /* Try to allocate memory in multiples of 2MB, as this allows us to use
1107 * 64K pages rather than the less-efficient 4K pages. Most BOs smaller
1108 * than 64MB should hit the BO cache or slab allocations anyway, so this
1109 * shouldn't waste too much memory. We do exclude small (< 1MB) sizes to
1110 * be defensive in case any of those bypass the caches and end up here.
1111 */
1112 if (bo_size >= 1024 * 1024)
1113 bo_size = align64(bo_size, 2 * 1024 * 1024);
1114
1115 bo->real.heap = flags_to_heap(bufmgr, flags);
1116
1117 const struct intel_memory_class_instance *regions[2];
1118 uint16_t num_regions = 0;
1119
1120 if (bufmgr->vram.size > 0) {
1121 switch (bo->real.heap) {
1122 case IRIS_HEAP_DEVICE_LOCAL_PREFERRED:
1123 /* For vram allocations, still use system memory as a fallback. */
1124 regions[num_regions++] = bufmgr->vram.region;
1125 regions[num_regions++] = bufmgr->sys.region;
1126 break;
1127 case IRIS_HEAP_DEVICE_LOCAL:
1128 case IRIS_HEAP_DEVICE_LOCAL_CPU_VISIBLE_SMALL_BAR:
1129 case IRIS_HEAP_DEVICE_LOCAL_COMPRESSED:
1130 regions[num_regions++] = bufmgr->vram.region;
1131 break;
1132 case IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT:
1133 regions[num_regions++] = bufmgr->sys.region;
1134 break;
1135 case IRIS_HEAP_SYSTEM_MEMORY_UNCACHED_COMPRESSED:
1136 /* not valid, compressed in discrete is always created with
1137 * IRIS_HEAP_DEVICE_LOCAL_PREFERRED_COMPRESSED
1138 */
1139 case IRIS_HEAP_SYSTEM_MEMORY_UNCACHED:
1140 /* not valid; discrete cards always enable snooping */
1141 case IRIS_HEAP_MAX:
1142 unreachable("invalid heap for BO");
1143 }
1144 } else {
1145 regions[num_regions++] = bufmgr->sys.region;
1146 }
1147
1148 bo->gem_handle = bufmgr->kmd_backend->gem_create(bufmgr, regions,
1149 num_regions, bo_size,
1150 bo->real.heap, flags);
1151 if (bo->gem_handle == 0) {
1152 free(bo);
1153 return NULL;
1154 }
1155 bo->bufmgr = bufmgr;
1156 bo->size = bo_size;
1157 bo->idle = true;
1158 bo->zeroed = true;
1159 bo->real.capture = (flags & BO_ALLOC_CAPTURE) != 0;
1160
1161 return bo;
1162 }
1163
1164 const char *
1165 iris_heap_to_string[IRIS_HEAP_MAX] = {
1166 [IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT] = "system-cached-coherent",
1167 [IRIS_HEAP_SYSTEM_MEMORY_UNCACHED] = "system-uncached",
1168 [IRIS_HEAP_SYSTEM_MEMORY_UNCACHED_COMPRESSED] = "system-uncached-compressed",
1169 [IRIS_HEAP_DEVICE_LOCAL] = "local",
1170 [IRIS_HEAP_DEVICE_LOCAL_COMPRESSED] = "local-compressed",
1171 [IRIS_HEAP_DEVICE_LOCAL_PREFERRED] = "local-preferred",
1172 [IRIS_HEAP_DEVICE_LOCAL_CPU_VISIBLE_SMALL_BAR] = "local-cpu-visible-small-bar",
1173 };
1174
1175 static enum iris_mmap_mode
heap_to_mmap_mode(struct iris_bufmgr * bufmgr,enum iris_heap heap)1176 heap_to_mmap_mode(struct iris_bufmgr *bufmgr, enum iris_heap heap)
1177 {
1178 const struct intel_device_info *devinfo = &bufmgr->devinfo;
1179
1180 switch (heap) {
1181 case IRIS_HEAP_DEVICE_LOCAL:
1182 return intel_vram_all_mappable(devinfo) ? IRIS_MMAP_WC : IRIS_MMAP_NONE;
1183 case IRIS_HEAP_DEVICE_LOCAL_CPU_VISIBLE_SMALL_BAR:
1184 case IRIS_HEAP_DEVICE_LOCAL_PREFERRED:
1185 return IRIS_MMAP_WC;
1186 case IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT:
1187 return IRIS_MMAP_WB;
1188 case IRIS_HEAP_SYSTEM_MEMORY_UNCACHED:
1189 return IRIS_MMAP_WC;
1190 case IRIS_HEAP_SYSTEM_MEMORY_UNCACHED_COMPRESSED:
1191 case IRIS_HEAP_DEVICE_LOCAL_COMPRESSED:
1192 /* compressed bos are not mmaped */
1193 return IRIS_MMAP_NONE;
1194 default:
1195 unreachable("invalid heap");
1196 }
1197 }
1198
1199 struct iris_bo *
iris_bo_alloc(struct iris_bufmgr * bufmgr,const char * name,uint64_t size,uint32_t alignment,enum iris_memory_zone memzone,unsigned flags)1200 iris_bo_alloc(struct iris_bufmgr *bufmgr,
1201 const char *name,
1202 uint64_t size,
1203 uint32_t alignment,
1204 enum iris_memory_zone memzone,
1205 unsigned flags)
1206 {
1207 struct iris_bo *bo;
1208 unsigned int page_size = getpagesize();
1209 enum iris_heap heap = flags_to_heap(bufmgr, flags);
1210 struct bo_cache_bucket *bucket =
1211 bucket_for_size(bufmgr, size, heap, flags);
1212
1213 if (memzone != IRIS_MEMZONE_OTHER || (flags & BO_ALLOC_CACHED_COHERENT))
1214 flags |= BO_ALLOC_NO_SUBALLOC;
1215
1216 /* By default, capture all driver-internal buffers like shader kernels,
1217 * surface states, dynamic states, border colors, and so on.
1218 */
1219 if (memzone < IRIS_MEMZONE_OTHER || INTEL_DEBUG(DEBUG_CAPTURE_ALL))
1220 flags |= BO_ALLOC_CAPTURE;
1221
1222 bo = alloc_bo_from_slabs(bufmgr, name, size, alignment, flags);
1223
1224 if (bo)
1225 return bo;
1226
1227 /* Round the size up to the bucket size, or if we don't have caching
1228 * at this size, a multiple of the page size.
1229 */
1230 uint64_t bo_size =
1231 bucket ? bucket->size : MAX2(align64(size, page_size), page_size);
1232 enum iris_mmap_mode mmap_mode = heap_to_mmap_mode(bufmgr, heap);
1233
1234 simple_mtx_lock(&bufmgr->lock);
1235
1236 /* Get a buffer out of the cache if available. First, we try to find
1237 * one with a matching memory zone so we can avoid reallocating VMA.
1238 */
1239 bo = alloc_bo_from_cache(bufmgr, bucket, alignment, memzone, mmap_mode,
1240 flags, true);
1241
1242 /* If that fails, we try for any cached BO, without matching memzone. */
1243 if (!bo) {
1244 bo = alloc_bo_from_cache(bufmgr, bucket, alignment, memzone, mmap_mode,
1245 flags, false);
1246 }
1247
1248 simple_mtx_unlock(&bufmgr->lock);
1249
1250 if (!bo) {
1251 bo = alloc_fresh_bo(bufmgr, bo_size, flags);
1252 if (!bo)
1253 return NULL;
1254 }
1255
1256 if (bo->address == 0ull) {
1257 simple_mtx_lock(&bufmgr->lock);
1258 bo->address = vma_alloc(bufmgr, memzone, bo->size, alignment);
1259 simple_mtx_unlock(&bufmgr->lock);
1260
1261 if (bo->address == 0ull)
1262 goto err_free;
1263
1264 if (!bufmgr->kmd_backend->gem_vm_bind(bo))
1265 goto err_vm_alloc;
1266 }
1267
1268 bo->name = name;
1269 p_atomic_set(&bo->refcount, 1);
1270 bo->real.reusable = bucket && bufmgr->bo_reuse;
1271 bo->real.protected = flags & BO_ALLOC_PROTECTED;
1272 bo->index = -1;
1273 bo->real.prime_fd = -1;
1274
1275 assert(bo->real.map == NULL || bo->real.mmap_mode == mmap_mode);
1276 bo->real.mmap_mode = mmap_mode;
1277
1278 /* On integrated GPUs, enable snooping to ensure coherency if needed.
1279 * For discrete, we instead use SMEM and avoid WB maps for coherency.
1280 */
1281 if ((flags & BO_ALLOC_CACHED_COHERENT) &&
1282 !bufmgr->devinfo.has_llc && bufmgr->devinfo.has_caching_uapi) {
1283 if (bufmgr->kmd_backend->bo_set_caching(bo, true) != 0)
1284 goto err_free;
1285 }
1286
1287 DBG("bo_create: buf %d (%s) (%s memzone) (%s) %llub\n", bo->gem_handle,
1288 bo->name, memzone_name(memzone), iris_heap_to_string[bo->real.heap],
1289 (unsigned long long) size);
1290
1291 return bo;
1292
1293 err_vm_alloc:
1294 simple_mtx_lock(&bufmgr->lock);
1295 vma_free(bufmgr, bo->address, bo->size);
1296 simple_mtx_unlock(&bufmgr->lock);
1297 err_free:
1298 simple_mtx_lock(&bufmgr->lock);
1299 bo_free(bo);
1300 simple_mtx_unlock(&bufmgr->lock);
1301 return NULL;
1302 }
1303
1304 static int
iris_bo_close(int fd,uint32_t gem_handle)1305 iris_bo_close(int fd, uint32_t gem_handle)
1306 {
1307 struct drm_gem_close close = {
1308 .handle = gem_handle,
1309 };
1310 return intel_ioctl(fd, DRM_IOCTL_GEM_CLOSE, &close);
1311 }
1312
1313 struct iris_bo *
iris_bo_create_userptr(struct iris_bufmgr * bufmgr,const char * name,void * ptr,size_t size,enum iris_memory_zone memzone)1314 iris_bo_create_userptr(struct iris_bufmgr *bufmgr, const char *name,
1315 void *ptr, size_t size,
1316 enum iris_memory_zone memzone)
1317 {
1318 struct iris_bo *bo;
1319
1320 bo = bo_calloc();
1321 if (!bo)
1322 return NULL;
1323
1324 bo->gem_handle = bufmgr->kmd_backend->gem_create_userptr(bufmgr, ptr, size);
1325 if (bo->gem_handle == 0)
1326 goto err_free;
1327
1328 bo->name = name;
1329 bo->size = size;
1330 bo->real.map = ptr;
1331 bo->real.userptr = true;
1332
1333 bo->bufmgr = bufmgr;
1334
1335 if (INTEL_DEBUG(DEBUG_CAPTURE_ALL))
1336 bo->real.capture = true;
1337
1338 simple_mtx_lock(&bufmgr->lock);
1339 bo->address = vma_alloc(bufmgr, memzone, size, 1);
1340 simple_mtx_unlock(&bufmgr->lock);
1341
1342 if (bo->address == 0ull)
1343 goto err_close;
1344
1345 p_atomic_set(&bo->refcount, 1);
1346 bo->index = -1;
1347 bo->idle = true;
1348 bo->real.heap = IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT;
1349 bo->real.mmap_mode = heap_to_mmap_mode(bufmgr, bo->real.heap);
1350 bo->real.prime_fd = -1;
1351
1352 if (!bufmgr->kmd_backend->gem_vm_bind(bo))
1353 goto err_vma_free;
1354
1355 return bo;
1356
1357 err_vma_free:
1358 simple_mtx_lock(&bufmgr->lock);
1359 vma_free(bufmgr, bo->address, bo->size);
1360 simple_mtx_unlock(&bufmgr->lock);
1361 err_close:
1362 bufmgr->kmd_backend->gem_close(bufmgr, bo);
1363 err_free:
1364 free(bo);
1365 return NULL;
1366 }
1367
1368 static bool
needs_prime_fd(struct iris_bufmgr * bufmgr)1369 needs_prime_fd(struct iris_bufmgr *bufmgr)
1370 {
1371 return bufmgr->devinfo.kmd_type == INTEL_KMD_TYPE_XE;
1372 }
1373
1374 static bool
iris_bo_set_prime_fd(struct iris_bo * bo)1375 iris_bo_set_prime_fd(struct iris_bo *bo)
1376 {
1377 struct iris_bufmgr *bufmgr = bo->bufmgr;
1378
1379 if (needs_prime_fd(bufmgr) && bo->real.prime_fd == -1) {
1380 if (drmPrimeHandleToFD(bufmgr->fd, bo->gem_handle,
1381 DRM_CLOEXEC | DRM_RDWR, &bo->real.prime_fd)) {
1382 fprintf(stderr, "Failed to get prime fd for bo %s/%u\n",
1383 bo->name, bo->gem_handle);
1384 return false;
1385 }
1386 }
1387
1388 return true;
1389 }
1390
1391 /**
1392 * Returns a iris_bo wrapping the given buffer object handle.
1393 *
1394 * This can be used when one application needs to pass a buffer object
1395 * to another.
1396 */
1397 struct iris_bo *
iris_bo_gem_create_from_name(struct iris_bufmgr * bufmgr,const char * name,unsigned int handle)1398 iris_bo_gem_create_from_name(struct iris_bufmgr *bufmgr,
1399 const char *name, unsigned int handle)
1400 {
1401 struct iris_bo *bo;
1402
1403 /* At the moment most applications only have a few named bo.
1404 * For instance, in a DRI client only the render buffers passed
1405 * between X and the client are named. And since X returns the
1406 * alternating names for the front/back buffer a linear search
1407 * provides a sufficiently fast match.
1408 */
1409 simple_mtx_lock(&bufmgr->lock);
1410 bo = find_and_ref_external_bo(bufmgr->name_table, handle);
1411 if (bo)
1412 goto out;
1413
1414 struct drm_gem_open open_arg = { .name = handle };
1415 int ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_GEM_OPEN, &open_arg);
1416 if (ret != 0) {
1417 DBG("Couldn't reference %s handle 0x%08x: %s\n",
1418 name, handle, strerror(errno));
1419 bo = NULL;
1420 goto out;
1421 }
1422 /* Now see if someone has used a prime handle to get this
1423 * object from the kernel before by looking through the list
1424 * again for a matching gem_handle
1425 */
1426 bo = find_and_ref_external_bo(bufmgr->handle_table, open_arg.handle);
1427 if (bo)
1428 goto out;
1429
1430 bo = bo_calloc();
1431 if (!bo) {
1432 struct iris_bo close_bo = {
1433 .gem_handle = open_arg.handle,
1434 };
1435 bufmgr->kmd_backend->gem_close(bufmgr, &close_bo);
1436 goto out;
1437 }
1438
1439 p_atomic_set(&bo->refcount, 1);
1440
1441 bo->size = open_arg.size;
1442 bo->bufmgr = bufmgr;
1443 bo->gem_handle = open_arg.handle;
1444 bo->name = name;
1445 bo->index = -1;
1446 bo->real.global_name = handle;
1447 bo->real.prime_fd = -1;
1448 bo->real.reusable = false;
1449 bo->real.imported = true;
1450 /* Xe KMD expects at least 1-way coherency for imports */
1451 bo->real.heap = IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT;
1452 bo->real.mmap_mode = IRIS_MMAP_NONE;
1453 if (INTEL_DEBUG(DEBUG_CAPTURE_ALL))
1454 bo->real.capture = true;
1455 bo->address = vma_alloc(bufmgr, IRIS_MEMZONE_OTHER, bo->size, 1);
1456 if (bo->address == 0ull)
1457 goto err_free;
1458
1459 if (!iris_bo_set_prime_fd(bo))
1460 goto err_vm_alloc;
1461
1462 if (!bufmgr->kmd_backend->gem_vm_bind(bo))
1463 goto err_vm_alloc;
1464
1465 _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo);
1466 _mesa_hash_table_insert(bufmgr->name_table, &bo->real.global_name, bo);
1467
1468 DBG("bo_create_from_handle: %d (%s)\n", handle, bo->name);
1469
1470 out:
1471 simple_mtx_unlock(&bufmgr->lock);
1472 return bo;
1473
1474 err_vm_alloc:
1475 vma_free(bufmgr, bo->address, bo->size);
1476 err_free:
1477 bo_free(bo);
1478 simple_mtx_unlock(&bufmgr->lock);
1479 return NULL;
1480 }
1481
1482 static void
bo_close(struct iris_bo * bo)1483 bo_close(struct iris_bo *bo)
1484 {
1485 struct iris_bufmgr *bufmgr = bo->bufmgr;
1486
1487 simple_mtx_assert_locked(&bufmgr->lock);
1488 assert(iris_bo_is_real(bo));
1489
1490 if (iris_bo_is_external(bo)) {
1491 struct hash_entry *entry;
1492
1493 if (bo->real.global_name) {
1494 entry = _mesa_hash_table_search(bufmgr->name_table,
1495 &bo->real.global_name);
1496 _mesa_hash_table_remove(bufmgr->name_table, entry);
1497 }
1498
1499 entry = _mesa_hash_table_search(bufmgr->handle_table, &bo->gem_handle);
1500 _mesa_hash_table_remove(bufmgr->handle_table, entry);
1501
1502 list_for_each_entry_safe(struct bo_export, export, &bo->real.exports, link) {
1503 iris_bo_close(export->drm_fd, export->gem_handle);
1504
1505 list_del(&export->link);
1506 free(export);
1507 }
1508 } else {
1509 assert(list_is_empty(&bo->real.exports));
1510 }
1511
1512 /* Unbind and return the VMA for reuse */
1513 if (bufmgr->kmd_backend->gem_vm_unbind(bo))
1514 vma_free(bo->bufmgr, bo->address, bo->size);
1515 else
1516 DBG("Unable to unbind vm of buf %u\n", bo->gem_handle);
1517
1518 if (bo->real.prime_fd != -1)
1519 close(bo->real.prime_fd);
1520
1521 /* Close this object */
1522 if (bufmgr->kmd_backend->gem_close(bufmgr, bo) != 0) {
1523 DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n",
1524 bo->gem_handle, bo->name, strerror(errno));
1525 }
1526
1527 if (bo->aux_map_address && bo->bufmgr->aux_map_ctx) {
1528 intel_aux_map_unmap_range(bo->bufmgr->aux_map_ctx, bo->address,
1529 bo->size);
1530 }
1531
1532 for (int d = 0; d < bo->deps_size; d++) {
1533 for (int b = 0; b < IRIS_BATCH_COUNT; b++) {
1534 iris_syncobj_reference(bufmgr, &bo->deps[d].write_syncobjs[b], NULL);
1535 iris_syncobj_reference(bufmgr, &bo->deps[d].read_syncobjs[b], NULL);
1536 }
1537 }
1538 free(bo->deps);
1539
1540 free(bo);
1541 }
1542
1543 static void
bo_free(struct iris_bo * bo)1544 bo_free(struct iris_bo *bo)
1545 {
1546 struct iris_bufmgr *bufmgr = bo->bufmgr;
1547
1548 simple_mtx_assert_locked(&bufmgr->lock);
1549 assert(iris_bo_is_real(bo));
1550
1551 if (!bo->real.userptr && bo->real.map)
1552 bo_unmap(bo);
1553
1554 if (bo->idle || !iris_bo_busy(bo)) {
1555 bo_close(bo);
1556 } else {
1557 /* Defer closing the GEM BO and returning the VMA for reuse until the
1558 * BO is idle. Just move it to the dead list for now.
1559 */
1560 list_addtail(&bo->head, &bufmgr->zombie_list);
1561 }
1562 }
1563
1564 static enum iris_heap
iris_get_heap_max(struct iris_bufmgr * bufmgr)1565 iris_get_heap_max(struct iris_bufmgr *bufmgr)
1566 {
1567 if (bufmgr->vram.size) {
1568 return intel_vram_all_mappable(&bufmgr->devinfo) ?
1569 IRIS_HEAP_MAX_LARGE_BAR : IRIS_HEAP_MAX;
1570 }
1571
1572 return bufmgr->devinfo.ver >= 20 ? IRIS_HEAP_MAX_NO_VRAM :
1573 IRIS_HEAP_SYSTEM_MEMORY_UNCACHED_COMPRESSED;
1574 }
1575
1576 /** Frees all cached buffers significantly older than @time. */
1577 static void
cleanup_bo_cache(struct iris_bufmgr * bufmgr,time_t time)1578 cleanup_bo_cache(struct iris_bufmgr *bufmgr, time_t time)
1579 {
1580 simple_mtx_assert_locked(&bufmgr->lock);
1581
1582 if (bufmgr->time == time)
1583 return;
1584
1585 for (int h = 0; h < iris_get_heap_max(bufmgr); h++) {
1586 struct iris_bucket_cache *cache = &bufmgr->bucket_cache[h];
1587
1588 for (int i = 0; i < cache->num_buckets; i++) {
1589 struct bo_cache_bucket *bucket = &cache->bucket[i];
1590
1591 list_for_each_entry_safe(struct iris_bo, bo, &bucket->head, head) {
1592 if (time - bo->real.free_time <= 1)
1593 break;
1594
1595 list_del(&bo->head);
1596
1597 bo_free(bo);
1598 }
1599 }
1600 }
1601
1602 list_for_each_entry_safe(struct iris_bo, bo, &bufmgr->zombie_list, head) {
1603 /* Stop once we reach a busy BO - all others past this point were
1604 * freed more recently so are likely also busy.
1605 */
1606 if (!bo->idle && iris_bo_busy(bo))
1607 break;
1608
1609 list_del(&bo->head);
1610 bo_close(bo);
1611 }
1612
1613 bufmgr->time = time;
1614 }
1615
1616 static void
bo_unreference_final(struct iris_bo * bo,time_t time)1617 bo_unreference_final(struct iris_bo *bo, time_t time)
1618 {
1619 struct iris_bufmgr *bufmgr = bo->bufmgr;
1620
1621 DBG("bo_unreference final: %d (%s)\n", bo->gem_handle, bo->name);
1622
1623 assert(iris_bo_is_real(bo));
1624
1625 struct bo_cache_bucket *bucket = !bo->real.reusable ? NULL :
1626 bucket_for_size(bufmgr, bo->size, bo->real.heap, 0);
1627
1628 /* Put the buffer into our internal cache for reuse if we can. */
1629 if (bucket && iris_bo_madvise(bo, IRIS_MADVICE_DONT_NEED)) {
1630 bo->real.free_time = time;
1631 bo->name = NULL;
1632
1633 list_addtail(&bo->head, &bucket->head);
1634 } else {
1635 bo_free(bo);
1636 }
1637 }
1638
1639 void
iris_bo_unreference(struct iris_bo * bo)1640 iris_bo_unreference(struct iris_bo *bo)
1641 {
1642 if (bo == NULL)
1643 return;
1644
1645 assert(p_atomic_read(&bo->refcount) > 0);
1646
1647 if (atomic_add_unless(&bo->refcount, -1, 1)) {
1648 struct iris_bufmgr *bufmgr = bo->bufmgr;
1649 struct timespec time;
1650
1651 clock_gettime(CLOCK_MONOTONIC, &time);
1652
1653 bo->zeroed = false;
1654 if (bo->gem_handle == 0) {
1655 pb_slab_free(get_slabs(bufmgr, bo->size), &bo->slab.entry);
1656 } else {
1657 simple_mtx_lock(&bufmgr->lock);
1658
1659 if (p_atomic_dec_zero(&bo->refcount)) {
1660 bo_unreference_final(bo, time.tv_sec);
1661 cleanup_bo_cache(bufmgr, time.tv_sec);
1662 }
1663
1664 simple_mtx_unlock(&bufmgr->lock);
1665 }
1666 }
1667 }
1668
1669 static void
bo_wait_with_stall_warning(struct util_debug_callback * dbg,struct iris_bo * bo,const char * action)1670 bo_wait_with_stall_warning(struct util_debug_callback *dbg,
1671 struct iris_bo *bo,
1672 const char *action)
1673 {
1674 bool busy = dbg && !bo->idle;
1675 double elapsed = unlikely(busy) ? -get_time() : 0.0;
1676
1677 iris_bo_wait_rendering(bo);
1678
1679 if (unlikely(busy)) {
1680 elapsed += get_time();
1681 if (elapsed > 1e-5) /* 0.01ms */ {
1682 perf_debug(dbg, "%s a busy \"%s\" BO stalled and took %.03f ms.\n",
1683 action, bo->name, elapsed * 1000);
1684 }
1685 }
1686 }
1687
1688 static void
print_flags(unsigned flags)1689 print_flags(unsigned flags)
1690 {
1691 if (flags & MAP_READ)
1692 DBG("READ ");
1693 if (flags & MAP_WRITE)
1694 DBG("WRITE ");
1695 if (flags & MAP_ASYNC)
1696 DBG("ASYNC ");
1697 if (flags & MAP_PERSISTENT)
1698 DBG("PERSISTENT ");
1699 if (flags & MAP_COHERENT)
1700 DBG("COHERENT ");
1701 if (flags & MAP_RAW)
1702 DBG("RAW ");
1703 DBG("\n");
1704 }
1705
1706 void *
iris_bo_map(struct util_debug_callback * dbg,struct iris_bo * bo,unsigned flags)1707 iris_bo_map(struct util_debug_callback *dbg,
1708 struct iris_bo *bo, unsigned flags)
1709 {
1710 struct iris_bufmgr *bufmgr = bo->bufmgr;
1711 void *map = NULL;
1712
1713 if (bo->gem_handle == 0) {
1714 struct iris_bo *real = iris_get_backing_bo(bo);
1715 uint64_t offset = bo->address - real->address;
1716 map = iris_bo_map(dbg, real, flags | MAP_ASYNC) + offset;
1717 } else {
1718 assert(bo->real.mmap_mode != IRIS_MMAP_NONE);
1719 if (bo->real.mmap_mode == IRIS_MMAP_NONE)
1720 return NULL;
1721
1722 if (!bo->real.map) {
1723 DBG("iris_bo_map: %d (%s)\n", bo->gem_handle, bo->name);
1724 map = bufmgr->kmd_backend->gem_mmap(bufmgr, bo);
1725 if (!map) {
1726 return NULL;
1727 }
1728
1729 VG_DEFINED(map, bo->size);
1730
1731 if (p_atomic_cmpxchg(&bo->real.map, NULL, map)) {
1732 VG_NOACCESS(map, bo->size);
1733 os_munmap(map, bo->size);
1734 }
1735 }
1736 assert(bo->real.map);
1737 map = bo->real.map;
1738 }
1739
1740 DBG("iris_bo_map: %d (%s) -> %p\n",
1741 bo->gem_handle, bo->name, bo->real.map);
1742 print_flags(flags);
1743
1744 if (!(flags & MAP_ASYNC)) {
1745 bo_wait_with_stall_warning(dbg, bo, "memory mapping");
1746 }
1747
1748 return map;
1749 }
1750
1751 /**
1752 * Waits on a BO for the given amount of time.
1753 *
1754 * @bo: buffer object to wait for
1755 * @timeout_ns: amount of time to wait in nanoseconds.
1756 * If value is less than 0, an infinite wait will occur.
1757 *
1758 * Returns 0 if the wait was successful ie. the last batch referencing the
1759 * object has completed within the allotted time. Otherwise some negative return
1760 * value describes the error. Of particular interest is -ETIME when the wait has
1761 * failed to yield the desired result.
1762 *
1763 * Similar to iris_bo_wait_rendering except a timeout parameter allows
1764 * the operation to give up after a certain amount of time. Another subtle
1765 * difference is the internal locking semantics are different (this variant does
1766 * not hold the lock for the duration of the wait). This makes the wait subject
1767 * to a larger userspace race window.
1768 *
1769 * The implementation shall wait until the object is no longer actively
1770 * referenced within a batch buffer at the time of the call. The wait will
1771 * not guarantee that the buffer is re-issued via another thread, or an flinked
1772 * handle. Userspace must make sure this race does not occur if such precision
1773 * is important.
1774 *
1775 * Note that some kernels have broken the infinite wait for negative values
1776 * promise, upgrade to latest stable kernels if this is the case.
1777 */
1778 static inline int
iris_bo_wait(struct iris_bo * bo,int64_t timeout_ns)1779 iris_bo_wait(struct iris_bo *bo, int64_t timeout_ns)
1780 {
1781 int ret;
1782
1783 switch (iris_bufmgr_get_device_info(bo->bufmgr)->kmd_type) {
1784 case INTEL_KMD_TYPE_I915:
1785 if (iris_bo_is_external(bo))
1786 ret = iris_i915_bo_wait_gem(bo, timeout_ns);
1787 else
1788 ret = iris_bo_wait_syncobj(bo, timeout_ns);
1789 break;
1790 case INTEL_KMD_TYPE_XE:
1791 ret = iris_bo_wait_syncobj(bo, timeout_ns);
1792 break;
1793 default:
1794 unreachable("missing");
1795 ret = -1;
1796 }
1797
1798 bo->idle = ret == 0;
1799
1800 return ret;
1801 }
1802
1803 /** Waits for all GPU rendering with the object to have completed. */
1804 void
iris_bo_wait_rendering(struct iris_bo * bo)1805 iris_bo_wait_rendering(struct iris_bo *bo)
1806 {
1807 /* We require a kernel recent enough for WAIT_IOCTL support.
1808 * See intel_init_bufmgr()
1809 */
1810 iris_bo_wait(bo, -1);
1811 }
1812
1813 static void
iris_bufmgr_destroy_global_vm(struct iris_bufmgr * bufmgr)1814 iris_bufmgr_destroy_global_vm(struct iris_bufmgr *bufmgr)
1815 {
1816 switch (bufmgr->devinfo.kmd_type) {
1817 case INTEL_KMD_TYPE_I915:
1818 /* Nothing to do in i915 */
1819 break;
1820 case INTEL_KMD_TYPE_XE:
1821 intel_bind_timeline_finish(&bufmgr->bind_timeline, bufmgr->fd);
1822 iris_xe_destroy_global_vm(bufmgr);
1823 break;
1824 default:
1825 unreachable("missing");
1826 }
1827 }
1828
1829 static void
iris_bufmgr_destroy(struct iris_bufmgr * bufmgr)1830 iris_bufmgr_destroy(struct iris_bufmgr *bufmgr)
1831 {
1832 iris_bo_unreference(bufmgr->dummy_aux_bo);
1833 iris_bo_unreference(bufmgr->mem_fence_bo);
1834
1835 iris_destroy_border_color_pool(&bufmgr->border_color_pool);
1836
1837 /* Free aux-map buffers */
1838 intel_aux_map_finish(bufmgr->aux_map_ctx);
1839
1840 /* bufmgr will no longer try to free VMA entries in the aux-map */
1841 bufmgr->aux_map_ctx = NULL;
1842
1843 for (int i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
1844 if (bufmgr->bo_slabs[i].groups)
1845 pb_slabs_deinit(&bufmgr->bo_slabs[i]);
1846 }
1847
1848 simple_mtx_lock(&bufmgr->lock);
1849
1850 /* Free any cached buffer objects we were going to reuse */
1851 for (int h = 0; h < iris_get_heap_max(bufmgr); h++) {
1852 struct iris_bucket_cache *cache = &bufmgr->bucket_cache[h];
1853
1854 for (int i = 0; i < cache->num_buckets; i++) {
1855 struct bo_cache_bucket *bucket = &cache->bucket[i];
1856
1857 list_for_each_entry_safe(struct iris_bo, bo, &bucket->head, head) {
1858 list_del(&bo->head);
1859
1860 bo_free(bo);
1861 }
1862 }
1863 }
1864 free(bufmgr->bucket_cache);
1865
1866 /* Close any buffer objects on the dead list. */
1867 list_for_each_entry_safe(struct iris_bo, bo, &bufmgr->zombie_list, head) {
1868 list_del(&bo->head);
1869 bo_close(bo);
1870 }
1871
1872 _mesa_hash_table_destroy(bufmgr->name_table, NULL);
1873 _mesa_hash_table_destroy(bufmgr->handle_table, NULL);
1874
1875 for (int z = 0; z < IRIS_MEMZONE_COUNT; z++)
1876 util_vma_heap_finish(&bufmgr->vma_allocator[z]);
1877
1878 iris_bufmgr_destroy_global_vm(bufmgr);
1879
1880 close(bufmgr->fd);
1881
1882 simple_mtx_unlock(&bufmgr->lock);
1883
1884 simple_mtx_destroy(&bufmgr->lock);
1885 simple_mtx_destroy(&bufmgr->bo_deps_lock);
1886
1887 free(bufmgr);
1888 }
1889
1890 int
iris_gem_get_tiling(struct iris_bo * bo,uint32_t * tiling)1891 iris_gem_get_tiling(struct iris_bo *bo, uint32_t *tiling)
1892 {
1893 struct iris_bufmgr *bufmgr = bo->bufmgr;
1894
1895 if (!bufmgr->devinfo.has_tiling_uapi) {
1896 *tiling = 0;
1897 return 0;
1898 }
1899
1900 assert(iris_bufmgr_get_device_info(bo->bufmgr)->kmd_type == INTEL_KMD_TYPE_I915);
1901 return iris_i915_bo_get_tiling(bo, tiling);
1902 }
1903
1904 int
iris_gem_set_tiling(struct iris_bo * bo,const struct isl_surf * surf)1905 iris_gem_set_tiling(struct iris_bo *bo, const struct isl_surf *surf)
1906 {
1907 struct iris_bufmgr *bufmgr = bo->bufmgr;
1908
1909 /* If we can't do map_gtt, the set/get_tiling API isn't useful. And it's
1910 * actually not supported by the kernel in those cases.
1911 */
1912 if (!bufmgr->devinfo.has_tiling_uapi)
1913 return 0;
1914
1915 assert(iris_bufmgr_get_device_info(bo->bufmgr)->kmd_type == INTEL_KMD_TYPE_I915);
1916 return iris_i915_bo_set_tiling(bo, surf);
1917 }
1918
1919 struct iris_bo *
iris_bo_import_dmabuf(struct iris_bufmgr * bufmgr,int prime_fd,const uint64_t modifier)1920 iris_bo_import_dmabuf(struct iris_bufmgr *bufmgr, int prime_fd,
1921 const uint64_t modifier)
1922 {
1923 uint32_t handle;
1924 struct iris_bo *bo;
1925
1926 simple_mtx_lock(&bufmgr->lock);
1927 int ret = drmPrimeFDToHandle(bufmgr->fd, prime_fd, &handle);
1928 if (ret) {
1929 DBG("import_dmabuf: failed to obtain handle from fd: %s\n",
1930 strerror(errno));
1931 simple_mtx_unlock(&bufmgr->lock);
1932 return NULL;
1933 }
1934
1935 /*
1936 * See if the kernel has already returned this buffer to us. Just as
1937 * for named buffers, we must not create two bo's pointing at the same
1938 * kernel object
1939 */
1940 bo = find_and_ref_external_bo(bufmgr->handle_table, handle);
1941 if (bo)
1942 goto out;
1943
1944 bo = bo_calloc();
1945 if (!bo)
1946 goto out;
1947
1948 p_atomic_set(&bo->refcount, 1);
1949
1950 /* Determine size of bo. The fd-to-handle ioctl really should
1951 * return the size, but it doesn't. If we have kernel 3.12 or
1952 * later, we can lseek on the prime fd to get the size. Older
1953 * kernels will just fail, in which case we fall back to the
1954 * provided (estimated or guess size). */
1955 ret = lseek(prime_fd, 0, SEEK_END);
1956 if (ret != -1)
1957 bo->size = ret;
1958
1959 bo->bufmgr = bufmgr;
1960 bo->name = "prime";
1961 bo->index = -1;
1962 bo->real.reusable = false;
1963 bo->real.imported = true;
1964 /* Xe KMD expects at least 1-way coherency for imports */
1965 bo->real.heap = IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT;
1966 bo->real.mmap_mode = IRIS_MMAP_NONE;
1967 if (INTEL_DEBUG(DEBUG_CAPTURE_ALL))
1968 bo->real.capture = true;
1969 bo->gem_handle = handle;
1970 bo->real.prime_fd = needs_prime_fd(bufmgr) ? dup(prime_fd) : -1;
1971
1972 uint64_t alignment = 1;
1973
1974 /* When an aux map will be used, there is an alignment requirement on the
1975 * main surface from the mapping granularity. Some planes of the image may
1976 * have smaller alignment requirements, but this one should work for all.
1977 */
1978 if (bufmgr->devinfo.has_aux_map && isl_drm_modifier_has_aux(modifier))
1979 alignment = intel_aux_map_get_alignment(bufmgr->aux_map_ctx);
1980
1981 bo->address = vma_alloc(bufmgr, IRIS_MEMZONE_OTHER, bo->size, alignment);
1982 if (bo->address == 0ull)
1983 goto err_free;
1984
1985 if (!bufmgr->kmd_backend->gem_vm_bind(bo))
1986 goto err_vm_alloc;
1987
1988 _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo);
1989
1990 out:
1991 simple_mtx_unlock(&bufmgr->lock);
1992 return bo;
1993
1994 err_vm_alloc:
1995 vma_free(bufmgr, bo->address, bo->size);
1996 err_free:
1997 bo_free(bo);
1998 simple_mtx_unlock(&bufmgr->lock);
1999 return NULL;
2000 }
2001
2002 static void
iris_bo_mark_exported_locked(struct iris_bo * bo)2003 iris_bo_mark_exported_locked(struct iris_bo *bo)
2004 {
2005 struct iris_bufmgr *bufmgr = bo->bufmgr;
2006
2007 /* We cannot export suballocated BOs. */
2008 assert(iris_bo_is_real(bo));
2009 simple_mtx_assert_locked(&bufmgr->lock);
2010
2011 if (!iris_bo_is_external(bo))
2012 _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo);
2013
2014 if (!bo->real.exported) {
2015 /* If a BO is going to be used externally, it could be sent to the
2016 * display HW. So make sure our CPU mappings don't assume cache
2017 * coherency since display is outside that cache.
2018 */
2019 bo->real.exported = true;
2020 bo->real.reusable = false;
2021 }
2022 }
2023
2024 void
iris_bo_mark_exported(struct iris_bo * bo)2025 iris_bo_mark_exported(struct iris_bo *bo)
2026 {
2027 struct iris_bufmgr *bufmgr = bo->bufmgr;
2028
2029 /* We cannot export suballocated BOs. */
2030 assert(iris_bo_is_real(bo));
2031
2032 if (bo->real.exported) {
2033 assert(!bo->real.reusable);
2034 return;
2035 }
2036
2037 simple_mtx_lock(&bufmgr->lock);
2038 iris_bo_mark_exported_locked(bo);
2039 simple_mtx_unlock(&bufmgr->lock);
2040
2041 iris_bo_set_prime_fd(bo);
2042 }
2043
2044 int
iris_bo_export_dmabuf(struct iris_bo * bo,int * prime_fd)2045 iris_bo_export_dmabuf(struct iris_bo *bo, int *prime_fd)
2046 {
2047 struct iris_bufmgr *bufmgr = bo->bufmgr;
2048
2049 /* We cannot export suballocated BOs. */
2050 assert(iris_bo_is_real(bo));
2051
2052 if (drmPrimeHandleToFD(bufmgr->fd, bo->gem_handle,
2053 DRM_CLOEXEC | DRM_RDWR, prime_fd) != 0)
2054 return -errno;
2055
2056 iris_bo_mark_exported(bo);
2057
2058 return 0;
2059 }
2060
2061 static uint32_t
iris_bo_export_gem_handle(struct iris_bo * bo)2062 iris_bo_export_gem_handle(struct iris_bo *bo)
2063 {
2064 /* We cannot export suballocated BOs. */
2065 assert(iris_bo_is_real(bo));
2066
2067 iris_bo_mark_exported(bo);
2068
2069 return bo->gem_handle;
2070 }
2071
2072 int
iris_bo_flink(struct iris_bo * bo,uint32_t * name)2073 iris_bo_flink(struct iris_bo *bo, uint32_t *name)
2074 {
2075 struct iris_bufmgr *bufmgr = bo->bufmgr;
2076
2077 /* We cannot export suballocated BOs. */
2078 assert(iris_bo_is_real(bo));
2079
2080 if (!bo->real.global_name) {
2081 struct drm_gem_flink flink = { .handle = bo->gem_handle };
2082
2083 if (intel_ioctl(bufmgr->fd, DRM_IOCTL_GEM_FLINK, &flink))
2084 return -errno;
2085
2086 simple_mtx_lock(&bufmgr->lock);
2087 if (!bo->real.global_name) {
2088 iris_bo_mark_exported_locked(bo);
2089 bo->real.global_name = flink.name;
2090 _mesa_hash_table_insert(bufmgr->name_table, &bo->real.global_name, bo);
2091 }
2092 simple_mtx_unlock(&bufmgr->lock);
2093
2094 iris_bo_set_prime_fd(bo);
2095 }
2096
2097 *name = bo->real.global_name;
2098 return 0;
2099 }
2100
2101 int
iris_bo_export_gem_handle_for_device(struct iris_bo * bo,int drm_fd,uint32_t * out_handle)2102 iris_bo_export_gem_handle_for_device(struct iris_bo *bo, int drm_fd,
2103 uint32_t *out_handle)
2104 {
2105 /* We cannot export suballocated BOs. */
2106 assert(iris_bo_is_real(bo));
2107
2108 /* Only add the new GEM handle to the list of export if it belongs to a
2109 * different GEM device. Otherwise we might close the same buffer multiple
2110 * times.
2111 */
2112 struct iris_bufmgr *bufmgr = bo->bufmgr;
2113 int ret = os_same_file_description(drm_fd, bufmgr->fd);
2114 WARN_ONCE(ret < 0,
2115 "Kernel has no file descriptor comparison support: %s\n",
2116 strerror(errno));
2117 if (ret == 0) {
2118 *out_handle = iris_bo_export_gem_handle(bo);
2119 return 0;
2120 }
2121
2122 struct bo_export *export = calloc(1, sizeof(*export));
2123 if (!export)
2124 return -ENOMEM;
2125
2126 export->drm_fd = drm_fd;
2127
2128 int dmabuf_fd = -1;
2129 int err = iris_bo_export_dmabuf(bo, &dmabuf_fd);
2130 if (err) {
2131 free(export);
2132 return err;
2133 }
2134
2135 simple_mtx_lock(&bufmgr->lock);
2136 err = drmPrimeFDToHandle(drm_fd, dmabuf_fd, &export->gem_handle);
2137 close(dmabuf_fd);
2138 if (err) {
2139 simple_mtx_unlock(&bufmgr->lock);
2140 free(export);
2141 return err;
2142 }
2143
2144 bool found = false;
2145 list_for_each_entry(struct bo_export, iter, &bo->real.exports, link) {
2146 if (iter->drm_fd != drm_fd)
2147 continue;
2148 /* Here we assume that for a given DRM fd, we'll always get back the
2149 * same GEM handle for a given buffer.
2150 */
2151 assert(iter->gem_handle == export->gem_handle);
2152 free(export);
2153 export = iter;
2154 found = true;
2155 break;
2156 }
2157 if (!found)
2158 list_addtail(&export->link, &bo->real.exports);
2159
2160 simple_mtx_unlock(&bufmgr->lock);
2161
2162 *out_handle = export->gem_handle;
2163
2164 return 0;
2165 }
2166
2167 static void
add_bucket(struct iris_bufmgr * bufmgr,int size,enum iris_heap heap)2168 add_bucket(struct iris_bufmgr *bufmgr, int size, enum iris_heap heap)
2169 {
2170 struct iris_bucket_cache *cache = &bufmgr->bucket_cache[heap];
2171 unsigned int i = cache->num_buckets++;
2172
2173 assert(i < BUCKET_ARRAY_SIZE);
2174
2175 list_inithead(&cache->bucket[i].head);
2176 cache->bucket[i].size = size;
2177
2178 assert(bucket_for_size(bufmgr, size, heap, 0) == &cache->bucket[i]);
2179 assert(bucket_for_size(bufmgr, size - 2048, heap, 0) == &cache->bucket[i]);
2180 assert(bucket_for_size(bufmgr, size + 1, heap, 0) != &cache->bucket[i]);
2181 }
2182
2183 static void
init_cache_buckets(struct iris_bufmgr * bufmgr,enum iris_heap heap)2184 init_cache_buckets(struct iris_bufmgr *bufmgr, enum iris_heap heap)
2185 {
2186 const unsigned _6MB = 6 * 1024 * 1024;
2187 const unsigned _8MB = 8 * 1024 * 1024;
2188 const unsigned _64MB = 64 * 1024 * 1024;
2189
2190 /* power-of-two buckets from 4K to 4MB */
2191 for (uint64_t size = 4096; size < _8MB; size *= 2)
2192 add_bucket(bufmgr, size, heap);
2193
2194 /* 6MB */
2195 add_bucket(bufmgr, _6MB, heap);
2196
2197 /* 8MB+: three sizes between each power of two to reduce waste */
2198 for (uint64_t size = _8MB; size < _64MB; size *= 2) {
2199 add_bucket(bufmgr, size, heap);
2200 add_bucket(bufmgr, size + size * 1 / 4, heap);
2201 add_bucket(bufmgr, size + size * 2 / 4, heap);
2202 add_bucket(bufmgr, size + size * 3 / 4, heap);
2203 }
2204
2205 /* 64MB */
2206 add_bucket(bufmgr, _64MB, heap);
2207 }
2208
2209 static struct intel_buffer *
intel_aux_map_buffer_alloc(void * driver_ctx,uint32_t size)2210 intel_aux_map_buffer_alloc(void *driver_ctx, uint32_t size)
2211 {
2212 struct intel_buffer *buf = malloc(sizeof(struct intel_buffer));
2213 if (!buf)
2214 return NULL;
2215
2216 struct iris_bufmgr *bufmgr = (struct iris_bufmgr *)driver_ctx;
2217
2218 unsigned int page_size = getpagesize();
2219 size = MAX2(ALIGN(size, page_size), page_size);
2220
2221 struct iris_bo *bo = alloc_fresh_bo(bufmgr, size, BO_ALLOC_CAPTURE);
2222 if (!bo) {
2223 free(buf);
2224 return NULL;
2225 }
2226
2227 simple_mtx_lock(&bufmgr->lock);
2228
2229 bo->address = vma_alloc(bufmgr, IRIS_MEMZONE_OTHER, bo->size, 64 * 1024);
2230 if (bo->address == 0ull)
2231 goto err_free;
2232
2233 if (!bufmgr->kmd_backend->gem_vm_bind(bo))
2234 goto err_vm_alloc;
2235
2236 simple_mtx_unlock(&bufmgr->lock);
2237
2238 bo->name = "aux-map";
2239 p_atomic_set(&bo->refcount, 1);
2240 bo->index = -1;
2241 bo->real.mmap_mode = heap_to_mmap_mode(bufmgr, bo->real.heap);
2242 bo->real.prime_fd = -1;
2243
2244 buf->driver_bo = bo;
2245 buf->gpu = bo->address;
2246 buf->gpu_end = buf->gpu + bo->size;
2247 buf->map = iris_bo_map(NULL, bo, MAP_WRITE | MAP_RAW);
2248 return buf;
2249
2250 err_vm_alloc:
2251 vma_free(bufmgr, bo->address, bo->size);
2252 err_free:
2253 free(buf);
2254 bo_free(bo);
2255 simple_mtx_unlock(&bufmgr->lock);
2256 return NULL;
2257 }
2258
2259 static void
intel_aux_map_buffer_free(void * driver_ctx,struct intel_buffer * buffer)2260 intel_aux_map_buffer_free(void *driver_ctx, struct intel_buffer *buffer)
2261 {
2262 iris_bo_unreference((struct iris_bo*)buffer->driver_bo);
2263 free(buffer);
2264 }
2265
2266 static struct intel_mapped_pinned_buffer_alloc aux_map_allocator = {
2267 .alloc = intel_aux_map_buffer_alloc,
2268 .free = intel_aux_map_buffer_free,
2269 };
2270
2271 static bool
iris_bufmgr_get_meminfo(struct iris_bufmgr * bufmgr,struct intel_device_info * devinfo)2272 iris_bufmgr_get_meminfo(struct iris_bufmgr *bufmgr,
2273 struct intel_device_info *devinfo)
2274 {
2275 bufmgr->sys.region = &devinfo->mem.sram.mem;
2276 bufmgr->sys.size = devinfo->mem.sram.mappable.size;
2277
2278 /* When the resizable bar feature is disabled,
2279 * then vram.mappable.size is only 256MB.
2280 * The second half of the total size is in the vram.unmappable.size
2281 * variable.
2282 */
2283 bufmgr->vram.region = &devinfo->mem.vram.mem;
2284 bufmgr->vram.size = devinfo->mem.vram.mappable.size +
2285 devinfo->mem.vram.unmappable.size;
2286
2287 return true;
2288 }
2289
2290 static bool
iris_bufmgr_init_global_vm(struct iris_bufmgr * bufmgr)2291 iris_bufmgr_init_global_vm(struct iris_bufmgr *bufmgr)
2292 {
2293 switch (bufmgr->devinfo.kmd_type) {
2294 case INTEL_KMD_TYPE_I915:
2295 bufmgr->use_global_vm = iris_i915_init_global_vm(bufmgr, &bufmgr->global_vm_id);
2296 /* i915 don't require VM, so returning true even if use_global_vm is false */
2297 return true;
2298 case INTEL_KMD_TYPE_XE:
2299 if (!intel_bind_timeline_init(&bufmgr->bind_timeline, bufmgr->fd))
2300 return false;
2301
2302 bufmgr->use_global_vm = iris_xe_init_global_vm(bufmgr, &bufmgr->global_vm_id);
2303 /* Xe requires VM */
2304 return bufmgr->use_global_vm;
2305 default:
2306 unreachable("missing");
2307 return false;
2308 }
2309 }
2310
2311 /**
2312 * Initializes the GEM buffer manager, which uses the kernel to allocate, map,
2313 * and manage map buffer objections.
2314 *
2315 * \param fd File descriptor of the opened DRM device.
2316 */
2317 static struct iris_bufmgr *
iris_bufmgr_create(struct intel_device_info * devinfo,int fd,bool bo_reuse)2318 iris_bufmgr_create(struct intel_device_info *devinfo, int fd, bool bo_reuse)
2319 {
2320 if (devinfo->gtt_size <= IRIS_MEMZONE_OTHER_START)
2321 return NULL;
2322
2323 struct iris_bufmgr *bufmgr = calloc(1, sizeof(*bufmgr));
2324 if (bufmgr == NULL)
2325 return NULL;
2326
2327 /* Handles to buffer objects belong to the device fd and are not
2328 * reference counted by the kernel. If the same fd is used by
2329 * multiple parties (threads sharing the same screen bufmgr, or
2330 * even worse the same device fd passed to multiple libraries)
2331 * ownership of those handles is shared by those independent parties.
2332 *
2333 * Don't do this! Ensure that each library/bufmgr has its own device
2334 * fd so that its namespace does not clash with another.
2335 */
2336 bufmgr->fd = os_dupfd_cloexec(fd);
2337 if (bufmgr->fd == -1)
2338 goto error_dup;
2339
2340 p_atomic_set(&bufmgr->refcount, 1);
2341
2342 simple_mtx_init(&bufmgr->lock, mtx_plain);
2343 simple_mtx_init(&bufmgr->bo_deps_lock, mtx_plain);
2344
2345 list_inithead(&bufmgr->zombie_list);
2346
2347 bufmgr->devinfo = *devinfo;
2348 devinfo = &bufmgr->devinfo;
2349 bufmgr->bo_reuse = bo_reuse;
2350 iris_bufmgr_get_meminfo(bufmgr, devinfo);
2351 bufmgr->kmd_backend = iris_kmd_backend_get(devinfo->kmd_type);
2352
2353 intel_common_update_device_info(bufmgr->fd, devinfo);
2354
2355 if (!iris_bufmgr_init_global_vm(bufmgr))
2356 goto error_init_vm;
2357
2358 STATIC_ASSERT(IRIS_MEMZONE_SHADER_START == 0ull);
2359 const uint64_t _4GB = 1ull << 32;
2360 const uint64_t _2GB = 1ul << 31;
2361
2362 /* The STATE_BASE_ADDRESS size field can only hold 1 page shy of 4GB */
2363 const uint64_t _4GB_minus_1 = _4GB - PAGE_SIZE;
2364
2365 const struct {
2366 uint64_t start;
2367 uint64_t size;
2368 } vma[IRIS_MEMZONE_COUNT] = {
2369 [IRIS_MEMZONE_SHADER] = {
2370 .start = PAGE_SIZE,
2371 .size = _4GB_minus_1 - PAGE_SIZE
2372 },
2373 [IRIS_MEMZONE_BINDER] = {
2374 .start = IRIS_MEMZONE_BINDER_START + IRIS_SCRATCH_ZONE_SIZE,
2375 .size = IRIS_BINDER_ZONE_SIZE - IRIS_SCRATCH_ZONE_SIZE
2376 },
2377 [IRIS_MEMZONE_SCRATCH] = {
2378 .start = IRIS_MEMZONE_SCRATCH_START,
2379 .size = IRIS_SCRATCH_ZONE_SIZE
2380 },
2381 [IRIS_MEMZONE_SURFACE] = {
2382 .start = IRIS_MEMZONE_SURFACE_START,
2383 .size = _4GB_minus_1 - IRIS_BINDER_ZONE_SIZE - IRIS_SCRATCH_ZONE_SIZE
2384 },
2385 [IRIS_MEMZONE_DYNAMIC] = {
2386 .start = IRIS_MEMZONE_DYNAMIC_START + IRIS_BORDER_COLOR_POOL_SIZE,
2387
2388 /* Wa_2209859288: the Tigerlake PRM's workarounds volume says:
2389 *
2390 * "PSDunit is dropping MSB of the blend state pointer from SD
2391 * FIFO [...] Limit the Blend State Pointer to < 2G"
2392 *
2393 * We restrict the dynamic state pool to 2GB so that we don't ever
2394 * get a BLEND_STATE pointer with the MSB set. We aren't likely to
2395 * need the full 4GB for dynamic state anyway.
2396 */
2397 .size = (devinfo->ver >= 12 ? _2GB : _4GB_minus_1)
2398 - IRIS_BORDER_COLOR_POOL_SIZE
2399 },
2400 [IRIS_MEMZONE_OTHER] = {
2401 .start = IRIS_MEMZONE_OTHER_START,
2402
2403 /* Leave the last 4GB out of the high vma range, so that no state
2404 * base address + size can overflow 48 bits.
2405 */
2406 .size = (devinfo->gtt_size - _4GB) - IRIS_MEMZONE_OTHER_START,
2407 },
2408 };
2409
2410 for (unsigned i = 0; i < IRIS_MEMZONE_COUNT; i++) {
2411 util_vma_heap_init(&bufmgr->vma_allocator[i],
2412 vma[i].start, vma[i].size);
2413 }
2414
2415 if (INTEL_DEBUG(DEBUG_HEAPS)) {
2416 for (unsigned i = 0; i < IRIS_MEMZONE_COUNT; i++) {
2417 fprintf(stderr, "%-11s | 0x%016" PRIx64 "-0x%016" PRIx64 "\n",
2418 memzone_name(i), vma[i].start,
2419 vma[i].start + vma[i].size - 1);
2420 }
2421 }
2422
2423 bufmgr->bucket_cache = calloc(iris_get_heap_max(bufmgr),
2424 sizeof(*bufmgr->bucket_cache));
2425 if (!bufmgr->bucket_cache)
2426 goto error_bucket_cache;
2427 for (int h = 0; h < iris_get_heap_max(bufmgr); h++)
2428 init_cache_buckets(bufmgr, h);
2429
2430 unsigned min_slab_order = 8; /* 256 bytes */
2431 unsigned max_slab_order = 20; /* 1 MB (slab size = 2 MB) */
2432 unsigned num_slab_orders_per_allocator =
2433 (max_slab_order - min_slab_order) / NUM_SLAB_ALLOCATORS;
2434
2435 /* Divide the size order range among slab managers. */
2436 for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
2437 unsigned min_order = min_slab_order;
2438 unsigned max_order =
2439 MIN2(min_order + num_slab_orders_per_allocator, max_slab_order);
2440
2441 if (!pb_slabs_init(&bufmgr->bo_slabs[i], min_order, max_order,
2442 iris_get_heap_max(bufmgr), true, bufmgr,
2443 iris_can_reclaim_slab,
2444 iris_slab_alloc,
2445 (void *) iris_slab_free)) {
2446 goto error_slabs_init;
2447 }
2448 min_slab_order = max_order + 1;
2449 }
2450
2451 bufmgr->name_table =
2452 _mesa_hash_table_create(NULL, _mesa_hash_uint, _mesa_key_uint_equal);
2453 bufmgr->handle_table =
2454 _mesa_hash_table_create(NULL, _mesa_hash_uint, _mesa_key_uint_equal);
2455
2456 if (devinfo->has_aux_map) {
2457 bufmgr->aux_map_ctx = intel_aux_map_init(bufmgr, &aux_map_allocator,
2458 devinfo);
2459 assert(bufmgr->aux_map_ctx);
2460 }
2461
2462 iris_init_border_color_pool(bufmgr, &bufmgr->border_color_pool);
2463
2464 if (intel_needs_workaround(devinfo, 14019708328)) {
2465 bufmgr->dummy_aux_bo = iris_bo_alloc(bufmgr, "dummy_aux", 4096, 4096,
2466 IRIS_MEMZONE_OTHER, BO_ALLOC_PLAIN);
2467 if (!bufmgr->dummy_aux_bo)
2468 goto error_alloc_bo;
2469 }
2470
2471 /* Programming note from MI_MEM_FENCE specification:
2472 *
2473 * Software must ensure STATE_SYSTEM_MEM_FENCE_ADDRESS command is
2474 * programmed prior to programming this command.
2475 *
2476 * HAS 1607240579 then provides the size information: 4K
2477 */
2478 if (devinfo->verx10 >= 200) {
2479 bufmgr->mem_fence_bo = iris_bo_alloc(bufmgr, "mem_fence", 4096, 4096,
2480 IRIS_MEMZONE_OTHER, BO_ALLOC_SMEM);
2481 if (!bufmgr->mem_fence_bo)
2482 goto error_alloc_bo;
2483 }
2484
2485 return bufmgr;
2486
2487 error_alloc_bo:
2488 iris_bo_unreference(bufmgr->dummy_aux_bo);
2489 iris_bo_unreference(bufmgr->mem_fence_bo);
2490 iris_destroy_border_color_pool(&bufmgr->border_color_pool);
2491 intel_aux_map_finish(bufmgr->aux_map_ctx);
2492 _mesa_hash_table_destroy(bufmgr->handle_table, NULL);
2493 _mesa_hash_table_destroy(bufmgr->name_table, NULL);
2494 error_slabs_init:
2495 for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
2496 if (!bufmgr->bo_slabs[i].groups)
2497 break;
2498
2499 pb_slabs_deinit(&bufmgr->bo_slabs[i]);
2500 }
2501 free(bufmgr->bucket_cache);
2502 error_bucket_cache:
2503 for (unsigned i = 0; i < IRIS_MEMZONE_COUNT; i++)
2504 util_vma_heap_finish(&bufmgr->vma_allocator[i]);
2505 iris_bufmgr_destroy_global_vm(bufmgr);
2506 error_init_vm:
2507 close(bufmgr->fd);
2508 error_dup:
2509 free(bufmgr);
2510 return NULL;
2511 }
2512
2513 static struct iris_bufmgr *
iris_bufmgr_ref(struct iris_bufmgr * bufmgr)2514 iris_bufmgr_ref(struct iris_bufmgr *bufmgr)
2515 {
2516 p_atomic_inc(&bufmgr->refcount);
2517 return bufmgr;
2518 }
2519
2520 void
iris_bufmgr_unref(struct iris_bufmgr * bufmgr)2521 iris_bufmgr_unref(struct iris_bufmgr *bufmgr)
2522 {
2523 simple_mtx_lock(&global_bufmgr_list_mutex);
2524 if (p_atomic_dec_zero(&bufmgr->refcount)) {
2525 list_del(&bufmgr->link);
2526 iris_bufmgr_destroy(bufmgr);
2527 }
2528 simple_mtx_unlock(&global_bufmgr_list_mutex);
2529 }
2530
2531 /** Returns a new unique id, to be used by screens. */
2532 int
iris_bufmgr_create_screen_id(struct iris_bufmgr * bufmgr)2533 iris_bufmgr_create_screen_id(struct iris_bufmgr *bufmgr)
2534 {
2535 return p_atomic_inc_return(&bufmgr->next_screen_id) - 1;
2536 }
2537
2538 /**
2539 * Gets an already existing GEM buffer manager or create a new one.
2540 *
2541 * \param fd File descriptor of the opened DRM device.
2542 */
2543 struct iris_bufmgr *
iris_bufmgr_get_for_fd(int fd,bool bo_reuse)2544 iris_bufmgr_get_for_fd(int fd, bool bo_reuse)
2545 {
2546 struct intel_device_info devinfo;
2547 struct stat st;
2548
2549 if (fstat(fd, &st))
2550 return NULL;
2551
2552 struct iris_bufmgr *bufmgr = NULL;
2553
2554 simple_mtx_lock(&global_bufmgr_list_mutex);
2555 list_for_each_entry(struct iris_bufmgr, iter_bufmgr, &global_bufmgr_list, link) {
2556 struct stat iter_st;
2557 if (fstat(iter_bufmgr->fd, &iter_st))
2558 continue;
2559
2560 if (st.st_rdev == iter_st.st_rdev) {
2561 assert(iter_bufmgr->bo_reuse == bo_reuse);
2562 bufmgr = iris_bufmgr_ref(iter_bufmgr);
2563 goto unlock;
2564 }
2565 }
2566
2567 if (!intel_get_device_info_from_fd(fd, &devinfo, 8, -1))
2568 return NULL;
2569
2570 if (devinfo.ver < 8 || devinfo.platform == INTEL_PLATFORM_CHV)
2571 return NULL;
2572
2573 bufmgr = iris_bufmgr_create(&devinfo, fd, bo_reuse);
2574 if (bufmgr)
2575 list_addtail(&bufmgr->link, &global_bufmgr_list);
2576
2577 unlock:
2578 simple_mtx_unlock(&global_bufmgr_list_mutex);
2579
2580 return bufmgr;
2581 }
2582
2583 int
iris_bufmgr_get_fd(struct iris_bufmgr * bufmgr)2584 iris_bufmgr_get_fd(struct iris_bufmgr *bufmgr)
2585 {
2586 return bufmgr->fd;
2587 }
2588
2589 void*
iris_bufmgr_get_aux_map_context(struct iris_bufmgr * bufmgr)2590 iris_bufmgr_get_aux_map_context(struct iris_bufmgr *bufmgr)
2591 {
2592 return bufmgr->aux_map_ctx;
2593 }
2594
2595 simple_mtx_t *
iris_bufmgr_get_bo_deps_lock(struct iris_bufmgr * bufmgr)2596 iris_bufmgr_get_bo_deps_lock(struct iris_bufmgr *bufmgr)
2597 {
2598 return &bufmgr->bo_deps_lock;
2599 }
2600
2601 struct iris_border_color_pool *
iris_bufmgr_get_border_color_pool(struct iris_bufmgr * bufmgr)2602 iris_bufmgr_get_border_color_pool(struct iris_bufmgr *bufmgr)
2603 {
2604 return &bufmgr->border_color_pool;
2605 }
2606
2607 uint64_t
iris_bufmgr_vram_size(struct iris_bufmgr * bufmgr)2608 iris_bufmgr_vram_size(struct iris_bufmgr *bufmgr)
2609 {
2610 return bufmgr->vram.size;
2611 }
2612
2613 uint64_t
iris_bufmgr_sram_size(struct iris_bufmgr * bufmgr)2614 iris_bufmgr_sram_size(struct iris_bufmgr *bufmgr)
2615 {
2616 return bufmgr->sys.size;
2617 }
2618
2619 const struct intel_device_info *
iris_bufmgr_get_device_info(struct iris_bufmgr * bufmgr)2620 iris_bufmgr_get_device_info(struct iris_bufmgr *bufmgr)
2621 {
2622 return &bufmgr->devinfo;
2623 }
2624
2625 const struct iris_kmd_backend *
iris_bufmgr_get_kernel_driver_backend(struct iris_bufmgr * bufmgr)2626 iris_bufmgr_get_kernel_driver_backend(struct iris_bufmgr *bufmgr)
2627 {
2628 return bufmgr->kmd_backend;
2629 }
2630
2631 uint32_t
iris_bufmgr_get_global_vm_id(struct iris_bufmgr * bufmgr)2632 iris_bufmgr_get_global_vm_id(struct iris_bufmgr *bufmgr)
2633 {
2634 return bufmgr->global_vm_id;
2635 }
2636
2637 bool
iris_bufmgr_use_global_vm_id(struct iris_bufmgr * bufmgr)2638 iris_bufmgr_use_global_vm_id(struct iris_bufmgr *bufmgr)
2639 {
2640 return bufmgr->use_global_vm;
2641 }
2642
2643 bool
iris_bufmgr_compute_engine_supported(struct iris_bufmgr * bufmgr)2644 iris_bufmgr_compute_engine_supported(struct iris_bufmgr *bufmgr)
2645 {
2646 return bufmgr->devinfo.engine_class_supported_count[INTEL_ENGINE_CLASS_COMPUTE];
2647 }
2648
2649 /**
2650 * Return the pat entry based on the bo heap and allocation flags.
2651 */
2652 const struct intel_device_info_pat_entry *
iris_heap_to_pat_entry(const struct intel_device_info * devinfo,enum iris_heap heap)2653 iris_heap_to_pat_entry(const struct intel_device_info *devinfo,
2654 enum iris_heap heap)
2655 {
2656 switch (heap) {
2657 case IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT:
2658 return &devinfo->pat.cached_coherent;
2659 case IRIS_HEAP_SYSTEM_MEMORY_UNCACHED:
2660 return &devinfo->pat.writecombining;
2661 case IRIS_HEAP_DEVICE_LOCAL:
2662 case IRIS_HEAP_DEVICE_LOCAL_CPU_VISIBLE_SMALL_BAR:
2663 case IRIS_HEAP_DEVICE_LOCAL_PREFERRED:
2664 return &devinfo->pat.writecombining;
2665 case IRIS_HEAP_SYSTEM_MEMORY_UNCACHED_COMPRESSED:
2666 case IRIS_HEAP_DEVICE_LOCAL_COMPRESSED:
2667 return &devinfo->pat.compressed;
2668 default:
2669 unreachable("invalid heap for platforms using PAT entries");
2670 }
2671 }
2672
2673 struct intel_bind_timeline *
iris_bufmgr_get_bind_timeline(struct iris_bufmgr * bufmgr)2674 iris_bufmgr_get_bind_timeline(struct iris_bufmgr *bufmgr)
2675 {
2676 return &bufmgr->bind_timeline;
2677 }
2678
2679 uint64_t
iris_bufmgr_get_dummy_aux_address(struct iris_bufmgr * bufmgr)2680 iris_bufmgr_get_dummy_aux_address(struct iris_bufmgr *bufmgr)
2681 {
2682 return bufmgr->dummy_aux_bo ? bufmgr->dummy_aux_bo->address : 0;
2683 }
2684
2685 struct iris_bo *
iris_bufmgr_get_mem_fence_bo(struct iris_bufmgr * bufmgr)2686 iris_bufmgr_get_mem_fence_bo(struct iris_bufmgr *bufmgr)
2687 {
2688 return bufmgr->mem_fence_bo;
2689 }
2690