1 /*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23 /**
24 * @file iris_bufmgr.c
25 *
26 * The Iris buffer manager.
27 *
28 * XXX: write better comments
29 * - BOs
30 * - Explain BO cache
31 * - main interface to GEM in the kernel
32 */
33
34 #include <util/u_atomic.h>
35 #include <fcntl.h>
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <unistd.h>
40 #include <assert.h>
41 #include <sys/ioctl.h>
42 #include <sys/mman.h>
43 #include <sys/stat.h>
44 #include <sys/types.h>
45 #include <stdbool.h>
46 #include <time.h>
47 #include <unistd.h>
48
49 #include "errno.h"
50 #include "common/intel_aux_map.h"
51 #include "common/intel_mem.h"
52 #include "c99_alloca.h"
53 #include "dev/intel_debug.h"
54 #include "common/intel_gem.h"
55 #include "dev/intel_device_info.h"
56 #include "drm-uapi/dma-buf.h"
57 #include "isl/isl.h"
58 #include "util/os_mman.h"
59 #include "util/u_debug.h"
60 #include "util/macros.h"
61 #include "util/hash_table.h"
62 #include "util/list.h"
63 #include "util/os_file.h"
64 #include "util/u_dynarray.h"
65 #include "util/vma.h"
66 #include "iris_bufmgr.h"
67 #include "iris_context.h"
68 #include "string.h"
69 #include "iris_kmd_backend.h"
70 #include "i915/iris_bufmgr.h"
71 #include "xe/iris_bufmgr.h"
72
73 #include <xf86drm.h>
74
75 #ifdef HAVE_VALGRIND
76 #include <valgrind.h>
77 #include <memcheck.h>
78 #define VG(x) x
79 #else
80 #define VG(x)
81 #endif
82
83 /* VALGRIND_FREELIKE_BLOCK unfortunately does not actually undo the earlier
84 * VALGRIND_MALLOCLIKE_BLOCK but instead leaves vg convinced the memory is
85 * leaked. All because it does not call VG(cli_free) from its
86 * VG_USERREQ__FREELIKE_BLOCK handler. Instead of treating the memory like
87 * and allocation, we mark it available for use upon mmapping and remove
88 * it upon unmapping.
89 */
90 #define VG_DEFINED(ptr, size) VG(VALGRIND_MAKE_MEM_DEFINED(ptr, size))
91 #define VG_NOACCESS(ptr, size) VG(VALGRIND_MAKE_MEM_NOACCESS(ptr, size))
92
93 /* On FreeBSD PAGE_SIZE is already defined in
94 * /usr/include/machine/param.h that is indirectly
95 * included here.
96 */
97 #ifndef PAGE_SIZE
98 #define PAGE_SIZE 4096
99 #endif
100
101 #define WARN_ONCE(cond, fmt...) do { \
102 if (unlikely(cond)) { \
103 static bool _warned = false; \
104 if (!_warned) { \
105 fprintf(stderr, "WARNING: "); \
106 fprintf(stderr, fmt); \
107 _warned = true; \
108 } \
109 } \
110 } while (0)
111
112 #define FILE_DEBUG_FLAG DEBUG_BUFMGR
113
114 /**
115 * For debugging purposes, this returns a time in seconds.
116 */
117 static double
get_time(void)118 get_time(void)
119 {
120 struct timespec tp;
121
122 clock_gettime(CLOCK_MONOTONIC, &tp);
123
124 return tp.tv_sec + tp.tv_nsec / 1000000000.0;
125 }
126
127 static inline int
atomic_add_unless(int * v,int add,int unless)128 atomic_add_unless(int *v, int add, int unless)
129 {
130 int c, old;
131 c = p_atomic_read(v);
132 while (c != unless && (old = p_atomic_cmpxchg(v, c, c + add)) != c)
133 c = old;
134 return c == unless;
135 }
136
137 static const char *
memzone_name(enum iris_memory_zone memzone)138 memzone_name(enum iris_memory_zone memzone)
139 {
140 const char *names[] = {
141 [IRIS_MEMZONE_SHADER] = "shader",
142 [IRIS_MEMZONE_BINDER] = "binder",
143 [IRIS_MEMZONE_SCRATCH] = "scratchsurf",
144 [IRIS_MEMZONE_SURFACE] = "surface",
145 [IRIS_MEMZONE_DYNAMIC] = "dynamic",
146 [IRIS_MEMZONE_OTHER] = "other",
147 [IRIS_MEMZONE_BORDER_COLOR_POOL] = "bordercolor",
148 };
149 assert(memzone < ARRAY_SIZE(names));
150 return names[memzone];
151 }
152
153 struct bo_cache_bucket {
154 /** List of cached BOs. */
155 struct list_head head;
156
157 /** Size of this bucket, in bytes. */
158 uint64_t size;
159 };
160
161 struct bo_export {
162 /** File descriptor associated with a handle export. */
163 int drm_fd;
164
165 /** GEM handle in drm_fd */
166 uint32_t gem_handle;
167
168 struct list_head link;
169 };
170
171 struct iris_memregion {
172 struct intel_memory_class_instance *region;
173 uint64_t size;
174 };
175
176 #define NUM_SLAB_ALLOCATORS 3
177
178 struct iris_slab {
179 struct pb_slab base;
180
181 /** The BO representing the entire slab */
182 struct iris_bo *bo;
183
184 /** Array of iris_bo structs representing BOs allocated out of this slab */
185 struct iris_bo *entries;
186 };
187
188 #define BUCKET_ARRAY_SIZE (14 * 4)
189
190 struct iris_bucket_cache {
191 struct bo_cache_bucket bucket[BUCKET_ARRAY_SIZE];
192 int num_buckets;
193 };
194
195 struct iris_bufmgr {
196 /**
197 * List into the list of bufmgr.
198 */
199 struct list_head link;
200
201 uint32_t refcount;
202
203 int fd;
204
205 simple_mtx_t lock;
206 simple_mtx_t bo_deps_lock;
207
208 /** Array of lists of cached gem objects of power-of-two sizes */
209 struct iris_bucket_cache bucket_cache[IRIS_HEAP_MAX];
210
211 time_t time;
212
213 struct hash_table *name_table;
214 struct hash_table *handle_table;
215
216 /**
217 * List of BOs which we've effectively freed, but are hanging on to
218 * until they're idle before closing and returning the VMA.
219 */
220 struct list_head zombie_list;
221
222 struct util_vma_heap vma_allocator[IRIS_MEMZONE_COUNT];
223
224 struct iris_memregion vram, sys;
225
226 /* Used only when use_global_vm is true. */
227 uint32_t global_vm_id;
228
229 int next_screen_id;
230
231 struct intel_device_info devinfo;
232 const struct iris_kmd_backend *kmd_backend;
233 struct intel_bind_timeline bind_timeline; /* Xe only */
234 bool bo_reuse:1;
235 bool use_global_vm:1;
236 bool compute_engine_supported:1;
237
238 struct intel_aux_map_context *aux_map_ctx;
239
240 struct pb_slabs bo_slabs[NUM_SLAB_ALLOCATORS];
241
242 struct iris_border_color_pool border_color_pool;
243 };
244
245 static simple_mtx_t global_bufmgr_list_mutex = SIMPLE_MTX_INITIALIZER;
246 static struct list_head global_bufmgr_list = {
247 .next = &global_bufmgr_list,
248 .prev = &global_bufmgr_list,
249 };
250
251 static void bo_free(struct iris_bo *bo);
252
253 static struct iris_bo *
find_and_ref_external_bo(struct hash_table * ht,unsigned int key)254 find_and_ref_external_bo(struct hash_table *ht, unsigned int key)
255 {
256 struct hash_entry *entry = _mesa_hash_table_search(ht, &key);
257 struct iris_bo *bo = entry ? entry->data : NULL;
258
259 if (bo) {
260 assert(iris_bo_is_external(bo));
261 assert(iris_bo_is_real(bo));
262 assert(!bo->real.reusable);
263
264 /* Being non-reusable, the BO cannot be in the cache lists, but it
265 * may be in the zombie list if it had reached zero references, but
266 * we hadn't yet closed it...and then reimported the same BO. If it
267 * is, then remove it since it's now been resurrected.
268 */
269 if (list_is_linked(&bo->head))
270 list_del(&bo->head);
271
272 iris_bo_reference(bo);
273 }
274
275 return bo;
276 }
277
278 /**
279 * This function finds the correct bucket fit for the input size.
280 * The function works with O(1) complexity when the requested size
281 * was queried instead of iterating the size through all the buckets.
282 */
283 static struct bo_cache_bucket *
bucket_for_size(struct iris_bufmgr * bufmgr,uint64_t size,enum iris_heap heap,unsigned flags)284 bucket_for_size(struct iris_bufmgr *bufmgr, uint64_t size,
285 enum iris_heap heap, unsigned flags)
286 {
287 if (flags & BO_ALLOC_PROTECTED)
288 return NULL;
289
290 const struct intel_device_info *devinfo = &bufmgr->devinfo;
291 struct iris_bucket_cache *cache = &bufmgr->bucket_cache[heap];
292
293 if (devinfo->kmd_type == INTEL_KMD_TYPE_XE &&
294 (flags & (BO_ALLOC_SHARED | BO_ALLOC_SCANOUT)))
295 return NULL;
296
297 /* Calculating the pages and rounding up to the page size. */
298 const unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
299
300 /* Row Bucket sizes clz((x-1) | 3) Row Column
301 * in pages stride size
302 * 0: 1 2 3 4 -> 30 30 30 30 4 1
303 * 1: 5 6 7 8 -> 29 29 29 29 4 1
304 * 2: 10 12 14 16 -> 28 28 28 28 8 2
305 * 3: 20 24 28 32 -> 27 27 27 27 16 4
306 */
307 const unsigned row = 30 - __builtin_clz((pages - 1) | 3);
308 const unsigned row_max_pages = 4 << row;
309
310 /* The '& ~2' is the special case for row 1. In row 1, max pages /
311 * 2 is 2, but the previous row maximum is zero (because there is
312 * no previous row). All row maximum sizes are power of 2, so that
313 * is the only case where that bit will be set.
314 */
315 const unsigned prev_row_max_pages = (row_max_pages / 2) & ~2;
316 int col_size_log2 = row - 1;
317 col_size_log2 += (col_size_log2 < 0);
318
319 const unsigned col = (pages - prev_row_max_pages +
320 ((1 << col_size_log2) - 1)) >> col_size_log2;
321
322 /* Calculating the index based on the row and column. */
323 const unsigned index = (row * 4) + (col - 1);
324
325 return (index < cache->num_buckets) ? &cache->bucket[index] : NULL;
326 }
327
328 enum iris_memory_zone
iris_memzone_for_address(uint64_t address)329 iris_memzone_for_address(uint64_t address)
330 {
331 STATIC_ASSERT(IRIS_MEMZONE_OTHER_START > IRIS_MEMZONE_DYNAMIC_START);
332 STATIC_ASSERT(IRIS_MEMZONE_SURFACE_START > IRIS_MEMZONE_SCRATCH_START);
333 STATIC_ASSERT(IRIS_MEMZONE_SCRATCH_START == IRIS_MEMZONE_BINDER_START);
334 STATIC_ASSERT(IRIS_MEMZONE_BINDER_START > IRIS_MEMZONE_SHADER_START);
335 STATIC_ASSERT(IRIS_MEMZONE_DYNAMIC_START > IRIS_MEMZONE_SURFACE_START);
336 STATIC_ASSERT(IRIS_BORDER_COLOR_POOL_ADDRESS == IRIS_MEMZONE_DYNAMIC_START);
337
338 if (address >= IRIS_MEMZONE_OTHER_START)
339 return IRIS_MEMZONE_OTHER;
340
341 if (address == IRIS_BORDER_COLOR_POOL_ADDRESS)
342 return IRIS_MEMZONE_BORDER_COLOR_POOL;
343
344 if (address > IRIS_MEMZONE_DYNAMIC_START)
345 return IRIS_MEMZONE_DYNAMIC;
346
347 if (address >= IRIS_MEMZONE_SURFACE_START)
348 return IRIS_MEMZONE_SURFACE;
349
350 if (address >= (IRIS_MEMZONE_BINDER_START + IRIS_SCRATCH_ZONE_SIZE))
351 return IRIS_MEMZONE_BINDER;
352
353 if (address >= IRIS_MEMZONE_SCRATCH_START)
354 return IRIS_MEMZONE_SCRATCH;
355
356 return IRIS_MEMZONE_SHADER;
357 }
358
359 /**
360 * Allocate a section of virtual memory for a buffer, assigning an address.
361 *
362 * This uses either the bucket allocator for the given size, or the large
363 * object allocator (util_vma).
364 */
365 static uint64_t
vma_alloc(struct iris_bufmgr * bufmgr,enum iris_memory_zone memzone,uint64_t size,uint64_t alignment)366 vma_alloc(struct iris_bufmgr *bufmgr,
367 enum iris_memory_zone memzone,
368 uint64_t size,
369 uint64_t alignment)
370 {
371 simple_mtx_assert_locked(&bufmgr->lock);
372
373 const unsigned _2mb = 2 * 1024 * 1024;
374
375 /* Force minimum alignment based on device requirements */
376 assert((alignment & (alignment - 1)) == 0);
377 alignment = MAX2(alignment, bufmgr->devinfo.mem_alignment);
378
379 /* If the allocation is a multiple of 2MB, ensure the virtual address is
380 * aligned to 2MB, so that it's possible for the kernel to use 64K pages.
381 */
382 if (size % _2mb == 0)
383 alignment = MAX2(alignment, _2mb);
384
385 if (memzone == IRIS_MEMZONE_BORDER_COLOR_POOL)
386 return IRIS_BORDER_COLOR_POOL_ADDRESS;
387
388 uint64_t addr =
389 util_vma_heap_alloc(&bufmgr->vma_allocator[memzone], size, alignment);
390
391 assert((addr >> 48ull) == 0);
392 assert((addr % alignment) == 0);
393
394 return intel_canonical_address(addr);
395 }
396
397 static void
vma_free(struct iris_bufmgr * bufmgr,uint64_t address,uint64_t size)398 vma_free(struct iris_bufmgr *bufmgr,
399 uint64_t address,
400 uint64_t size)
401 {
402 simple_mtx_assert_locked(&bufmgr->lock);
403
404 if (address == IRIS_BORDER_COLOR_POOL_ADDRESS)
405 return;
406
407 /* Un-canonicalize the address. */
408 address = intel_48b_address(address);
409
410 if (address == 0ull)
411 return;
412
413 enum iris_memory_zone memzone = iris_memzone_for_address(address);
414
415 assert(memzone < ARRAY_SIZE(bufmgr->vma_allocator));
416
417 util_vma_heap_free(&bufmgr->vma_allocator[memzone], address, size);
418 }
419
420 /* Exports a BO's implicit synchronization state to a drm_syncobj, returning
421 * its wrapping iris_syncobj. The drm_syncobj is created new and has to be
422 * destroyed by the caller after the execbuf ioctl.
423 */
424 struct iris_syncobj *
iris_bo_export_sync_state(struct iris_bo * bo)425 iris_bo_export_sync_state(struct iris_bo *bo)
426 {
427 struct iris_bufmgr *bufmgr = bo->bufmgr;
428 int drm_fd = iris_bufmgr_get_fd(bufmgr);
429
430 struct iris_syncobj *iris_syncobj = iris_create_syncobj(bufmgr);
431
432 struct dma_buf_export_sync_file export_sync_file_ioctl = {
433 .flags = DMA_BUF_SYNC_RW, /* TODO */
434 .fd = -1,
435 };
436 if (intel_ioctl(bo->real.prime_fd, DMA_BUF_IOCTL_EXPORT_SYNC_FILE,
437 &export_sync_file_ioctl)) {
438 fprintf(stderr, "DMA_BUF_IOCTL_EXPORT_SYNC_FILE ioctl failed (%d)\n",
439 errno);
440 goto error_export;
441 }
442
443 int sync_file_fd = export_sync_file_ioctl.fd;
444 assert(sync_file_fd >= 0);
445
446 struct drm_syncobj_handle syncobj_import_ioctl = {
447 .handle = iris_syncobj->handle,
448 .flags = DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_IMPORT_SYNC_FILE,
449 .fd = sync_file_fd,
450 };
451 if (intel_ioctl(drm_fd, DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE,
452 &syncobj_import_ioctl)) {
453 fprintf(stderr, "DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE ioctl failed (%d)\n",
454 errno);
455 }
456
457 close(sync_file_fd);
458
459 return iris_syncobj;
460 error_export:
461 iris_syncobj_destroy(bufmgr, iris_syncobj);
462 return NULL;
463 }
464
465 /* Import the state of a sync_file_fd (which we should have gotten from
466 * batch_syncobj_to_sync_file_fd) into a BO as its implicit synchronization
467 * state.
468 */
469 void
iris_bo_import_sync_state(struct iris_bo * bo,int sync_file_fd)470 iris_bo_import_sync_state(struct iris_bo *bo, int sync_file_fd)
471 {
472 struct dma_buf_import_sync_file import_sync_file_ioctl = {
473 .flags = DMA_BUF_SYNC_WRITE,
474 .fd = sync_file_fd,
475 };
476 if (intel_ioctl(bo->real.prime_fd, DMA_BUF_IOCTL_IMPORT_SYNC_FILE,
477 &import_sync_file_ioctl))
478 fprintf(stderr, "DMA_BUF_IOCTL_IMPORT_SYNC_FILE ioctl failed (%d)\n",
479 errno);
480 }
481
482 /* A timeout of 0 just checks for busyness. */
483 static int
iris_bo_wait_syncobj(struct iris_bo * bo,int64_t timeout_ns)484 iris_bo_wait_syncobj(struct iris_bo *bo, int64_t timeout_ns)
485 {
486 int ret = 0;
487 struct iris_bufmgr *bufmgr = bo->bufmgr;
488 const bool is_external = iris_bo_is_real(bo) && bo->real.prime_fd != -1;
489 struct iris_syncobj *external_implicit_syncobj = NULL;
490
491 /* If we know it's idle, don't bother with the kernel round trip.
492 * Can't do that for Xe KMD with external BOs since we have to check the
493 * implicit synchronization information.
494 */
495 if (!is_external && bo->idle)
496 return 0;
497
498 simple_mtx_lock(&bufmgr->bo_deps_lock);
499
500 const int handles_len = bo->deps_size * IRIS_BATCH_COUNT * 2 + is_external;
501 uint32_t *handles = handles_len <= 32 ?
502 (uint32_t *)alloca(handles_len * sizeof(*handles)) :
503 (uint32_t *)malloc(handles_len * sizeof(*handles));
504 int handle_count = 0;
505
506 if (is_external) {
507 external_implicit_syncobj = iris_bo_export_sync_state(bo);
508 if (external_implicit_syncobj)
509 handles[handle_count++] = external_implicit_syncobj->handle;
510 }
511
512 for (int d = 0; d < bo->deps_size; d++) {
513 for (int b = 0; b < IRIS_BATCH_COUNT; b++) {
514 struct iris_syncobj *r = bo->deps[d].read_syncobjs[b];
515 struct iris_syncobj *w = bo->deps[d].write_syncobjs[b];
516 if (r)
517 handles[handle_count++] = r->handle;
518 if (w)
519 handles[handle_count++] = w->handle;
520 }
521 }
522
523 if (handle_count == 0)
524 goto out;
525
526 /* Unlike the gem wait, negative values are not infinite here. */
527 int64_t timeout_abs = os_time_get_absolute_timeout(timeout_ns);
528 if (timeout_abs < 0)
529 timeout_abs = INT64_MAX;
530
531 struct drm_syncobj_wait args = {
532 .handles = (uintptr_t) handles,
533 .timeout_nsec = timeout_abs,
534 .count_handles = handle_count,
535 .flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL,
536 };
537
538 ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_SYNCOBJ_WAIT, &args);
539 if (ret != 0) {
540 ret = -errno;
541 goto out;
542 }
543
544 /* We just waited everything, so clean all the deps. */
545 for (int d = 0; d < bo->deps_size; d++) {
546 for (int b = 0; b < IRIS_BATCH_COUNT; b++) {
547 iris_syncobj_reference(bufmgr, &bo->deps[d].write_syncobjs[b], NULL);
548 iris_syncobj_reference(bufmgr, &bo->deps[d].read_syncobjs[b], NULL);
549 }
550 }
551
552 out:
553 if (handles_len > 32)
554 free(handles);
555 if (external_implicit_syncobj)
556 iris_syncobj_reference(bufmgr, &external_implicit_syncobj, NULL);
557
558 simple_mtx_unlock(&bufmgr->bo_deps_lock);
559 return ret;
560 }
561
562 static bool
iris_bo_busy_syncobj(struct iris_bo * bo)563 iris_bo_busy_syncobj(struct iris_bo *bo)
564 {
565 return iris_bo_wait_syncobj(bo, 0) == -ETIME;
566 }
567
568 bool
iris_bo_busy(struct iris_bo * bo)569 iris_bo_busy(struct iris_bo *bo)
570 {
571 bool busy;
572
573 switch (iris_bufmgr_get_device_info(bo->bufmgr)->kmd_type) {
574 case INTEL_KMD_TYPE_I915:
575 if (iris_bo_is_external(bo))
576 busy = iris_i915_bo_busy_gem(bo);
577 else
578 busy = iris_bo_busy_syncobj(bo);
579 break;
580 case INTEL_KMD_TYPE_XE:
581 busy = iris_bo_busy_syncobj(bo);
582 break;
583 default:
584 unreachable("missing");
585 busy = true;
586 }
587
588 bo->idle = !busy;
589
590 return busy;
591 }
592
593 /**
594 * Specify the volatility of the buffer.
595 * \param bo Buffer to create a name for
596 * \param state The purgeable status
597 *
598 * Use IRIS_MADVICE_DONT_NEED to mark the buffer as purgeable, and it will be
599 * reclaimed under memory pressure. If you subsequently require the buffer,
600 * then you must pass IRIS_MADVICE_WILL_NEED to mark the buffer as required.
601 *
602 * Returns true if the buffer was retained, or false if it was discarded
603 * whilst marked as IRIS_MADVICE_DONT_NEED.
604 */
605 static inline bool
iris_bo_madvise(struct iris_bo * bo,enum iris_madvice state)606 iris_bo_madvise(struct iris_bo *bo, enum iris_madvice state)
607 {
608 /* We can't madvise suballocated BOs. */
609 assert(iris_bo_is_real(bo));
610
611 return bo->bufmgr->kmd_backend->bo_madvise(bo, state);
612 }
613
614 static struct iris_bo *
bo_calloc(void)615 bo_calloc(void)
616 {
617 struct iris_bo *bo = calloc(1, sizeof(*bo));
618 if (!bo)
619 return NULL;
620
621 list_inithead(&bo->real.exports);
622
623 bo->hash = _mesa_hash_pointer(bo);
624
625 return bo;
626 }
627
628 static void
bo_unmap(struct iris_bo * bo)629 bo_unmap(struct iris_bo *bo)
630 {
631 assert(iris_bo_is_real(bo));
632
633 VG_NOACCESS(bo->real.map, bo->size);
634 os_munmap(bo->real.map, bo->size);
635 bo->real.map = NULL;
636 }
637
638 static struct pb_slabs *
get_slabs(struct iris_bufmgr * bufmgr,uint64_t size)639 get_slabs(struct iris_bufmgr *bufmgr, uint64_t size)
640 {
641 for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
642 struct pb_slabs *slabs = &bufmgr->bo_slabs[i];
643
644 if (size <= 1ull << (slabs->min_order + slabs->num_orders - 1))
645 return slabs;
646 }
647
648 unreachable("should have found a valid slab for this size");
649 }
650
651 /* Return the power of two size of a slab entry matching the input size. */
652 static unsigned
get_slab_pot_entry_size(struct iris_bufmgr * bufmgr,unsigned size)653 get_slab_pot_entry_size(struct iris_bufmgr *bufmgr, unsigned size)
654 {
655 unsigned entry_size = util_next_power_of_two(size);
656 unsigned min_entry_size = 1 << bufmgr->bo_slabs[0].min_order;
657
658 return MAX2(entry_size, min_entry_size);
659 }
660
661 /* Return the slab entry alignment. */
662 static unsigned
get_slab_entry_alignment(struct iris_bufmgr * bufmgr,unsigned size)663 get_slab_entry_alignment(struct iris_bufmgr *bufmgr, unsigned size)
664 {
665 unsigned entry_size = get_slab_pot_entry_size(bufmgr, size);
666
667 if (size <= entry_size * 3 / 4)
668 return entry_size / 4;
669
670 return entry_size;
671 }
672
673 static bool
iris_can_reclaim_slab(void * priv,struct pb_slab_entry * entry)674 iris_can_reclaim_slab(void *priv, struct pb_slab_entry *entry)
675 {
676 struct iris_bo *bo = container_of(entry, struct iris_bo, slab.entry);
677
678 return !iris_bo_busy(bo);
679 }
680
681 static void
iris_slab_free(void * priv,struct pb_slab * pslab)682 iris_slab_free(void *priv, struct pb_slab *pslab)
683 {
684 struct iris_bufmgr *bufmgr = priv;
685 struct iris_slab *slab = (void *) pslab;
686 struct intel_aux_map_context *aux_map_ctx = bufmgr->aux_map_ctx;
687
688 assert(!slab->bo->aux_map_address);
689
690 /* Since we're freeing the whole slab, all buffers allocated out of it
691 * must be reclaimable. We require buffers to be idle to be reclaimed
692 * (see iris_can_reclaim_slab()), so we know all entries must be idle.
693 * Therefore, we can safely unmap their aux table entries.
694 */
695 for (unsigned i = 0; i < pslab->num_entries; i++) {
696 struct iris_bo *bo = &slab->entries[i];
697 if (aux_map_ctx && bo->aux_map_address) {
698 intel_aux_map_unmap_range(aux_map_ctx, bo->address, bo->size);
699 bo->aux_map_address = 0;
700 }
701
702 /* Unref read/write dependency syncobjs and free the array. */
703 for (int d = 0; d < bo->deps_size; d++) {
704 for (int b = 0; b < IRIS_BATCH_COUNT; b++) {
705 iris_syncobj_reference(bufmgr, &bo->deps[d].write_syncobjs[b], NULL);
706 iris_syncobj_reference(bufmgr, &bo->deps[d].read_syncobjs[b], NULL);
707 }
708 }
709 free(bo->deps);
710 }
711
712 iris_bo_unreference(slab->bo);
713
714 free(slab->entries);
715 free(slab);
716 }
717
718 static struct pb_slab *
iris_slab_alloc(void * priv,unsigned heap,unsigned entry_size,unsigned group_index)719 iris_slab_alloc(void *priv,
720 unsigned heap,
721 unsigned entry_size,
722 unsigned group_index)
723 {
724 struct iris_bufmgr *bufmgr = priv;
725 struct iris_slab *slab = calloc(1, sizeof(struct iris_slab));
726 uint32_t flags;
727 unsigned slab_size = 0;
728 /* We only support slab allocation for IRIS_MEMZONE_OTHER */
729 enum iris_memory_zone memzone = IRIS_MEMZONE_OTHER;
730
731 if (!slab)
732 return NULL;
733
734 struct pb_slabs *slabs = bufmgr->bo_slabs;
735
736 /* Determine the slab buffer size. */
737 for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
738 unsigned max_entry_size =
739 1 << (slabs[i].min_order + slabs[i].num_orders - 1);
740
741 if (entry_size <= max_entry_size) {
742 /* The slab size is twice the size of the largest possible entry. */
743 slab_size = max_entry_size * 2;
744
745 if (!util_is_power_of_two_nonzero(entry_size)) {
746 assert(util_is_power_of_two_nonzero(entry_size * 4 / 3));
747
748 /* If the entry size is 3/4 of a power of two, we would waste
749 * space and not gain anything if we allocated only twice the
750 * power of two for the backing buffer:
751 *
752 * 2 * 3/4 = 1.5 usable with buffer size 2
753 *
754 * Allocating 5 times the entry size leads us to the next power
755 * of two and results in a much better memory utilization:
756 *
757 * 5 * 3/4 = 3.75 usable with buffer size 4
758 */
759 if (entry_size * 5 > slab_size)
760 slab_size = util_next_power_of_two(entry_size * 5);
761 }
762
763 /* The largest slab should have the same size as the PTE fragment
764 * size to get faster address translation.
765 *
766 * TODO: move this to intel_device_info?
767 */
768 const unsigned pte_size = 2 * 1024 * 1024;
769
770 if (i == NUM_SLAB_ALLOCATORS - 1 && slab_size < pte_size)
771 slab_size = pte_size;
772
773 break;
774 }
775 }
776 assert(slab_size != 0);
777
778 if (heap == IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT ||
779 heap == IRIS_HEAP_SYSTEM_MEMORY_UNCACHED)
780 flags = BO_ALLOC_SMEM;
781 else if (heap == IRIS_HEAP_DEVICE_LOCAL)
782 flags = BO_ALLOC_LMEM;
783 else
784 flags = BO_ALLOC_PLAIN;
785
786 slab->bo =
787 iris_bo_alloc(bufmgr, "slab", slab_size, slab_size, memzone, flags);
788 if (!slab->bo)
789 goto fail;
790
791 slab_size = slab->bo->size;
792
793 slab->base.num_entries = slab_size / entry_size;
794 slab->base.num_free = slab->base.num_entries;
795 slab->base.group_index = group_index;
796 slab->base.entry_size = entry_size;
797 slab->entries = calloc(slab->base.num_entries, sizeof(*slab->entries));
798 if (!slab->entries)
799 goto fail_bo;
800
801 list_inithead(&slab->base.free);
802
803 for (unsigned i = 0; i < slab->base.num_entries; i++) {
804 struct iris_bo *bo = &slab->entries[i];
805
806 bo->size = entry_size;
807 bo->bufmgr = bufmgr;
808 bo->hash = _mesa_hash_pointer(bo);
809 bo->gem_handle = 0;
810 bo->address = intel_canonical_address(slab->bo->address + i * entry_size);
811 bo->aux_map_address = 0;
812 bo->index = -1;
813 bo->refcount = 0;
814 bo->idle = true;
815 bo->zeroed = slab->bo->zeroed;
816
817 bo->slab.entry.slab = &slab->base;
818
819 bo->slab.real = iris_get_backing_bo(slab->bo);
820
821 list_addtail(&bo->slab.entry.head, &slab->base.free);
822 }
823
824 return &slab->base;
825
826 fail_bo:
827 iris_bo_unreference(slab->bo);
828 fail:
829 free(slab);
830 return NULL;
831 }
832
833 /**
834 * Selects a heap for the given buffer allocation flags.
835 *
836 * This determines the cacheability, coherency, and mmap mode settings.
837 */
838 static enum iris_heap
flags_to_heap(struct iris_bufmgr * bufmgr,unsigned flags)839 flags_to_heap(struct iris_bufmgr *bufmgr, unsigned flags)
840 {
841 const struct intel_device_info *devinfo = &bufmgr->devinfo;
842
843 if (bufmgr->vram.size > 0) {
844 /* Discrete GPUs currently always snoop CPU caches. */
845 if ((flags & BO_ALLOC_SMEM) || (flags & BO_ALLOC_COHERENT))
846 return IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT;
847
848 if ((flags & BO_ALLOC_LMEM) ||
849 ((flags & BO_ALLOC_SCANOUT) && !(flags & BO_ALLOC_SHARED)))
850 return IRIS_HEAP_DEVICE_LOCAL;
851
852 return IRIS_HEAP_DEVICE_LOCAL_PREFERRED;
853 } else if (devinfo->has_llc) {
854 assert(!(flags & BO_ALLOC_LMEM));
855
856 if (flags & (BO_ALLOC_SCANOUT | BO_ALLOC_SHARED))
857 return IRIS_HEAP_SYSTEM_MEMORY_UNCACHED;
858
859 return IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT;
860 } else {
861 assert(!devinfo->has_llc);
862 assert(!(flags & BO_ALLOC_LMEM));
863
864 if (flags & BO_ALLOC_COHERENT)
865 return IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT;
866
867 return IRIS_HEAP_SYSTEM_MEMORY_UNCACHED;
868 }
869 }
870
871 static bool
zero_bo(struct iris_bufmgr * bufmgr,unsigned flags,struct iris_bo * bo)872 zero_bo(struct iris_bufmgr *bufmgr,
873 unsigned flags,
874 struct iris_bo *bo)
875 {
876 assert(flags & BO_ALLOC_ZEROED);
877
878 if (bo->zeroed)
879 return true;
880
881 if (bufmgr->devinfo.has_flat_ccs && (flags & BO_ALLOC_LMEM)) {
882 /* With flat CCS, all allocations in LMEM have memory ranges with
883 * corresponding CCS elements. These elements are only accessible
884 * through GPU commands, but we don't issue GPU commands here.
885 */
886 return false;
887 }
888
889 void *map = iris_bo_map(NULL, bo, MAP_WRITE | MAP_RAW);
890 if (!map)
891 return false;
892
893 memset(map, 0, bo->size);
894 bo->zeroed = true;
895 return true;
896 }
897
898 static struct iris_bo *
alloc_bo_from_slabs(struct iris_bufmgr * bufmgr,const char * name,uint64_t size,uint32_t alignment,unsigned flags)899 alloc_bo_from_slabs(struct iris_bufmgr *bufmgr,
900 const char *name,
901 uint64_t size,
902 uint32_t alignment,
903 unsigned flags)
904 {
905 if (flags & BO_ALLOC_NO_SUBALLOC)
906 return NULL;
907
908 struct pb_slabs *last_slab = &bufmgr->bo_slabs[NUM_SLAB_ALLOCATORS - 1];
909 unsigned max_slab_entry_size =
910 1 << (last_slab->min_order + last_slab->num_orders - 1);
911
912 if (size > max_slab_entry_size)
913 return NULL;
914
915 struct pb_slab_entry *entry;
916
917 enum iris_heap heap = flags_to_heap(bufmgr, flags);
918
919 unsigned alloc_size = size;
920
921 /* Always use slabs for sizes less than 4 KB because the kernel aligns
922 * everything to 4 KB.
923 */
924 if (size < alignment && alignment <= 4 * 1024)
925 alloc_size = alignment;
926
927 if (alignment > get_slab_entry_alignment(bufmgr, alloc_size)) {
928 /* 3/4 allocations can return too small alignment.
929 * Try again with a power of two allocation size.
930 */
931 unsigned pot_size = get_slab_pot_entry_size(bufmgr, alloc_size);
932
933 if (alignment <= pot_size) {
934 /* This size works but wastes some memory to fulfill the alignment. */
935 alloc_size = pot_size;
936 } else {
937 /* can't fulfill alignment requirements */
938 return NULL;
939 }
940 }
941
942 struct pb_slabs *slabs = get_slabs(bufmgr, alloc_size);
943 entry = pb_slab_alloc(slabs, alloc_size, heap);
944 if (!entry) {
945 /* Clean up and try again... */
946 pb_slabs_reclaim(slabs);
947
948 entry = pb_slab_alloc(slabs, alloc_size, heap);
949 }
950 if (!entry)
951 return NULL;
952
953 struct iris_bo *bo = container_of(entry, struct iris_bo, slab.entry);
954
955 if (bo->aux_map_address && bo->bufmgr->aux_map_ctx) {
956 /* This buffer was associated with an aux-buffer range. We only allow
957 * slab allocated buffers to be reclaimed when idle (not in use by an
958 * executing batch). (See iris_can_reclaim_slab().) So we know that
959 * our previous aux mapping is no longer in use, and we can safely
960 * remove it.
961 */
962 intel_aux_map_unmap_range(bo->bufmgr->aux_map_ctx, bo->address,
963 bo->size);
964 bo->aux_map_address = 0;
965 }
966
967 p_atomic_set(&bo->refcount, 1);
968 bo->name = name;
969 bo->size = size;
970
971 /* Zero the contents if necessary. If this fails, fall back to
972 * allocating a fresh BO, which will always be zeroed by the kernel.
973 */
974 if ((flags & BO_ALLOC_ZEROED) && !zero_bo(bufmgr, flags, bo)) {
975 pb_slab_free(slabs, &bo->slab.entry);
976 return NULL;
977 }
978
979 return bo;
980 }
981
982 static struct iris_bo *
alloc_bo_from_cache(struct iris_bufmgr * bufmgr,struct bo_cache_bucket * bucket,uint32_t alignment,enum iris_memory_zone memzone,enum iris_mmap_mode mmap_mode,unsigned flags,bool match_zone)983 alloc_bo_from_cache(struct iris_bufmgr *bufmgr,
984 struct bo_cache_bucket *bucket,
985 uint32_t alignment,
986 enum iris_memory_zone memzone,
987 enum iris_mmap_mode mmap_mode,
988 unsigned flags,
989 bool match_zone)
990 {
991 if (!bucket)
992 return NULL;
993
994 struct iris_bo *bo = NULL;
995
996 simple_mtx_assert_locked(&bufmgr->lock);
997
998 list_for_each_entry_safe(struct iris_bo, cur, &bucket->head, head) {
999 assert(iris_bo_is_real(cur));
1000
1001 /* Find one that's got the right mapping type. We used to swap maps
1002 * around but the kernel doesn't allow this on discrete GPUs.
1003 */
1004 if (mmap_mode != cur->real.mmap_mode)
1005 continue;
1006
1007 /* Try a little harder to find one that's already in the right memzone */
1008 if (match_zone && memzone != iris_memzone_for_address(cur->address))
1009 continue;
1010
1011 if (cur->real.capture != !!(flags & BO_ALLOC_CAPTURE))
1012 continue;
1013
1014 /* If the last BO in the cache is busy, there are no idle BOs. Bail,
1015 * either falling back to a non-matching memzone, or if that fails,
1016 * allocating a fresh buffer.
1017 */
1018 if (iris_bo_busy(cur))
1019 return NULL;
1020
1021 list_del(&cur->head);
1022
1023 /* Tell the kernel we need this BO and check if it still exist */
1024 if (!iris_bo_madvise(cur, IRIS_MADVICE_WILL_NEED)) {
1025 /* This BO was purged, throw it out and keep looking. */
1026 bo_free(cur);
1027 continue;
1028 }
1029
1030 if (cur->aux_map_address) {
1031 /* This buffer was associated with an aux-buffer range. We make sure
1032 * that buffers are not reused from the cache while the buffer is (busy)
1033 * being used by an executing batch. Since we are here, the buffer is no
1034 * longer being used by a batch and the buffer was deleted (in order to
1035 * end up in the cache). Therefore its old aux-buffer range can be
1036 * removed from the aux-map.
1037 */
1038 if (cur->bufmgr->aux_map_ctx)
1039 intel_aux_map_unmap_range(cur->bufmgr->aux_map_ctx, cur->address,
1040 cur->size);
1041 cur->aux_map_address = 0;
1042 }
1043
1044 /* If the cached BO isn't in the right memory zone, or the alignment
1045 * isn't sufficient, free the old memory and assign it a new address.
1046 */
1047 if (memzone != iris_memzone_for_address(cur->address) ||
1048 cur->address % alignment != 0) {
1049 if (!bufmgr->kmd_backend->gem_vm_unbind(cur)) {
1050 DBG("Unable to unbind vm of buf %u\n", cur->gem_handle);
1051 bo_free(cur);
1052 continue;
1053 }
1054
1055 vma_free(bufmgr, cur->address, cur->size);
1056 cur->address = 0ull;
1057 }
1058
1059 bo = cur;
1060 break;
1061 }
1062
1063 if (!bo)
1064 return NULL;
1065
1066 /* Zero the contents if necessary. If this fails, fall back to
1067 * allocating a fresh BO, which will always be zeroed by the kernel.
1068 */
1069 assert(bo->zeroed == false);
1070 if ((flags & BO_ALLOC_ZEROED) && !zero_bo(bufmgr, flags, bo)) {
1071 bo_free(bo);
1072 return NULL;
1073 }
1074
1075 return bo;
1076 }
1077
1078 static struct iris_bo *
alloc_fresh_bo(struct iris_bufmgr * bufmgr,uint64_t bo_size,unsigned flags)1079 alloc_fresh_bo(struct iris_bufmgr *bufmgr, uint64_t bo_size, unsigned flags)
1080 {
1081 struct iris_bo *bo = bo_calloc();
1082 if (!bo)
1083 return NULL;
1084
1085 /* Try to allocate memory in multiples of 2MB, as this allows us to use
1086 * 64K pages rather than the less-efficient 4K pages. Most BOs smaller
1087 * than 64MB should hit the BO cache or slab allocations anyway, so this
1088 * shouldn't waste too much memory. We do exclude small (< 1MB) sizes to
1089 * be defensive in case any of those bypass the caches and end up here.
1090 */
1091 if (bo_size >= 1024 * 1024)
1092 bo_size = align64(bo_size, 2 * 1024 * 1024);
1093
1094 bo->real.heap = flags_to_heap(bufmgr, flags);
1095
1096 const struct intel_memory_class_instance *regions[2];
1097 uint16_t num_regions = 0;
1098
1099 if (bufmgr->vram.size > 0) {
1100 switch (bo->real.heap) {
1101 case IRIS_HEAP_DEVICE_LOCAL_PREFERRED:
1102 /* For vram allocations, still use system memory as a fallback. */
1103 regions[num_regions++] = bufmgr->vram.region;
1104 regions[num_regions++] = bufmgr->sys.region;
1105 break;
1106 case IRIS_HEAP_DEVICE_LOCAL:
1107 regions[num_regions++] = bufmgr->vram.region;
1108 break;
1109 case IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT:
1110 regions[num_regions++] = bufmgr->sys.region;
1111 break;
1112 case IRIS_HEAP_SYSTEM_MEMORY_UNCACHED:
1113 /* not valid; discrete cards always enable snooping */
1114 case IRIS_HEAP_MAX:
1115 unreachable("invalid heap for BO");
1116 }
1117 } else {
1118 regions[num_regions++] = bufmgr->sys.region;
1119 }
1120
1121 bo->gem_handle = bufmgr->kmd_backend->gem_create(bufmgr, regions,
1122 num_regions, bo_size,
1123 bo->real.heap, flags);
1124 if (bo->gem_handle == 0) {
1125 free(bo);
1126 return NULL;
1127 }
1128 bo->bufmgr = bufmgr;
1129 bo->size = bo_size;
1130 bo->idle = true;
1131 bo->zeroed = true;
1132 bo->real.capture = (flags & BO_ALLOC_CAPTURE) != 0;
1133
1134 return bo;
1135 }
1136
1137 const char *
1138 iris_heap_to_string[IRIS_HEAP_MAX] = {
1139 [IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT] = "system-cached-coherent",
1140 [IRIS_HEAP_SYSTEM_MEMORY_UNCACHED] = "system-uncached",
1141 [IRIS_HEAP_DEVICE_LOCAL] = "local",
1142 [IRIS_HEAP_DEVICE_LOCAL_PREFERRED] = "local-preferred",
1143 };
1144
1145 static enum iris_mmap_mode
heap_to_mmap_mode(struct iris_bufmgr * bufmgr,enum iris_heap heap)1146 heap_to_mmap_mode(struct iris_bufmgr *bufmgr, enum iris_heap heap)
1147 {
1148 const struct intel_device_info *devinfo = &bufmgr->devinfo;
1149
1150 switch (heap) {
1151 case IRIS_HEAP_DEVICE_LOCAL:
1152 return intel_vram_all_mappable(devinfo) ? IRIS_MMAP_WC : IRIS_MMAP_NONE;
1153 case IRIS_HEAP_DEVICE_LOCAL_PREFERRED:
1154 return IRIS_MMAP_WC;
1155 case IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT:
1156 return IRIS_MMAP_WB;
1157 case IRIS_HEAP_SYSTEM_MEMORY_UNCACHED:
1158 return IRIS_MMAP_WC;
1159 default:
1160 unreachable("invalid heap");
1161 }
1162 }
1163
1164 struct iris_bo *
iris_bo_alloc(struct iris_bufmgr * bufmgr,const char * name,uint64_t size,uint32_t alignment,enum iris_memory_zone memzone,unsigned flags)1165 iris_bo_alloc(struct iris_bufmgr *bufmgr,
1166 const char *name,
1167 uint64_t size,
1168 uint32_t alignment,
1169 enum iris_memory_zone memzone,
1170 unsigned flags)
1171 {
1172 struct iris_bo *bo;
1173 unsigned int page_size = getpagesize();
1174 enum iris_heap heap = flags_to_heap(bufmgr, flags);
1175 struct bo_cache_bucket *bucket =
1176 bucket_for_size(bufmgr, size, heap, flags);
1177
1178 if (memzone != IRIS_MEMZONE_OTHER || (flags & BO_ALLOC_COHERENT))
1179 flags |= BO_ALLOC_NO_SUBALLOC;
1180
1181 /* By default, capture all driver-internal buffers like shader kernels,
1182 * surface states, dynamic states, border colors, and so on.
1183 */
1184 if (memzone < IRIS_MEMZONE_OTHER || INTEL_DEBUG(DEBUG_CAPTURE_ALL))
1185 flags |= BO_ALLOC_CAPTURE;
1186
1187 bo = alloc_bo_from_slabs(bufmgr, name, size, alignment, flags);
1188
1189 if (bo)
1190 return bo;
1191
1192 /* Round the size up to the bucket size, or if we don't have caching
1193 * at this size, a multiple of the page size.
1194 */
1195 uint64_t bo_size =
1196 bucket ? bucket->size : MAX2(align64(size, page_size), page_size);
1197 enum iris_mmap_mode mmap_mode = heap_to_mmap_mode(bufmgr, heap);
1198
1199 simple_mtx_lock(&bufmgr->lock);
1200
1201 /* Get a buffer out of the cache if available. First, we try to find
1202 * one with a matching memory zone so we can avoid reallocating VMA.
1203 */
1204 bo = alloc_bo_from_cache(bufmgr, bucket, alignment, memzone, mmap_mode,
1205 flags, true);
1206
1207 /* If that fails, we try for any cached BO, without matching memzone. */
1208 if (!bo) {
1209 bo = alloc_bo_from_cache(bufmgr, bucket, alignment, memzone, mmap_mode,
1210 flags, false);
1211 }
1212
1213 simple_mtx_unlock(&bufmgr->lock);
1214
1215 if (!bo) {
1216 bo = alloc_fresh_bo(bufmgr, bo_size, flags);
1217 if (!bo)
1218 return NULL;
1219 }
1220
1221 if (bo->address == 0ull) {
1222 simple_mtx_lock(&bufmgr->lock);
1223 bo->address = vma_alloc(bufmgr, memzone, bo->size, alignment);
1224 simple_mtx_unlock(&bufmgr->lock);
1225
1226 if (bo->address == 0ull)
1227 goto err_free;
1228
1229 if (!bufmgr->kmd_backend->gem_vm_bind(bo))
1230 goto err_vm_alloc;
1231 }
1232
1233 bo->name = name;
1234 p_atomic_set(&bo->refcount, 1);
1235 bo->real.reusable = bucket && bufmgr->bo_reuse;
1236 bo->real.protected = flags & BO_ALLOC_PROTECTED;
1237 bo->index = -1;
1238 bo->real.prime_fd = -1;
1239
1240 assert(bo->real.map == NULL || bo->real.mmap_mode == mmap_mode);
1241 bo->real.mmap_mode = mmap_mode;
1242
1243 /* On integrated GPUs, enable snooping to ensure coherency if needed.
1244 * For discrete, we instead use SMEM and avoid WB maps for coherency.
1245 */
1246 if ((flags & BO_ALLOC_COHERENT) &&
1247 !bufmgr->devinfo.has_llc && bufmgr->devinfo.has_caching_uapi) {
1248 if (bufmgr->kmd_backend->bo_set_caching(bo, true) != 0)
1249 goto err_free;
1250 }
1251
1252 DBG("bo_create: buf %d (%s) (%s memzone) (%s) %llub\n", bo->gem_handle,
1253 bo->name, memzone_name(memzone), iris_heap_to_string[bo->real.heap],
1254 (unsigned long long) size);
1255
1256 return bo;
1257
1258 err_vm_alloc:
1259 simple_mtx_lock(&bufmgr->lock);
1260 vma_free(bufmgr, bo->address, bo->size);
1261 simple_mtx_unlock(&bufmgr->lock);
1262 err_free:
1263 simple_mtx_lock(&bufmgr->lock);
1264 bo_free(bo);
1265 simple_mtx_unlock(&bufmgr->lock);
1266 return NULL;
1267 }
1268
1269 static int
iris_bo_close(int fd,uint32_t gem_handle)1270 iris_bo_close(int fd, uint32_t gem_handle)
1271 {
1272 struct drm_gem_close close = {
1273 .handle = gem_handle,
1274 };
1275 return intel_ioctl(fd, DRM_IOCTL_GEM_CLOSE, &close);
1276 }
1277
1278 struct iris_bo *
iris_bo_create_userptr(struct iris_bufmgr * bufmgr,const char * name,void * ptr,size_t size,enum iris_memory_zone memzone)1279 iris_bo_create_userptr(struct iris_bufmgr *bufmgr, const char *name,
1280 void *ptr, size_t size,
1281 enum iris_memory_zone memzone)
1282 {
1283 struct iris_bo *bo;
1284
1285 bo = bo_calloc();
1286 if (!bo)
1287 return NULL;
1288
1289 bo->gem_handle = bufmgr->kmd_backend->gem_create_userptr(bufmgr, ptr, size);
1290 if (bo->gem_handle == 0)
1291 goto err_free;
1292
1293 bo->name = name;
1294 bo->size = size;
1295 bo->real.map = ptr;
1296 bo->real.userptr = true;
1297
1298 bo->bufmgr = bufmgr;
1299
1300 if (INTEL_DEBUG(DEBUG_CAPTURE_ALL))
1301 bo->real.capture = true;
1302
1303 simple_mtx_lock(&bufmgr->lock);
1304 bo->address = vma_alloc(bufmgr, memzone, size, 1);
1305 simple_mtx_unlock(&bufmgr->lock);
1306
1307 if (bo->address == 0ull)
1308 goto err_close;
1309
1310 p_atomic_set(&bo->refcount, 1);
1311 bo->index = -1;
1312 bo->idle = true;
1313 bo->real.heap = IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT;
1314 bo->real.mmap_mode = heap_to_mmap_mode(bufmgr, bo->real.heap);
1315 bo->real.prime_fd = -1;
1316
1317 if (!bufmgr->kmd_backend->gem_vm_bind(bo))
1318 goto err_vma_free;
1319
1320 return bo;
1321
1322 err_vma_free:
1323 simple_mtx_lock(&bufmgr->lock);
1324 vma_free(bufmgr, bo->address, bo->size);
1325 simple_mtx_unlock(&bufmgr->lock);
1326 err_close:
1327 bufmgr->kmd_backend->gem_close(bufmgr, bo);
1328 err_free:
1329 free(bo);
1330 return NULL;
1331 }
1332
1333 static bool
needs_prime_fd(struct iris_bufmgr * bufmgr)1334 needs_prime_fd(struct iris_bufmgr *bufmgr)
1335 {
1336 return bufmgr->devinfo.kmd_type == INTEL_KMD_TYPE_XE;
1337 }
1338
1339 static bool
iris_bo_set_prime_fd(struct iris_bo * bo)1340 iris_bo_set_prime_fd(struct iris_bo *bo)
1341 {
1342 struct iris_bufmgr *bufmgr = bo->bufmgr;
1343
1344 if (needs_prime_fd(bufmgr) && bo->real.prime_fd == -1) {
1345 if (drmPrimeHandleToFD(bufmgr->fd, bo->gem_handle,
1346 DRM_CLOEXEC | DRM_RDWR, &bo->real.prime_fd)) {
1347 fprintf(stderr, "Failed to get prime fd for bo %s/%u\n",
1348 bo->name, bo->gem_handle);
1349 return false;
1350 }
1351 }
1352
1353 return true;
1354 }
1355
1356 /**
1357 * Returns a iris_bo wrapping the given buffer object handle.
1358 *
1359 * This can be used when one application needs to pass a buffer object
1360 * to another.
1361 */
1362 struct iris_bo *
iris_bo_gem_create_from_name(struct iris_bufmgr * bufmgr,const char * name,unsigned int handle)1363 iris_bo_gem_create_from_name(struct iris_bufmgr *bufmgr,
1364 const char *name, unsigned int handle)
1365 {
1366 struct iris_bo *bo;
1367
1368 /* At the moment most applications only have a few named bo.
1369 * For instance, in a DRI client only the render buffers passed
1370 * between X and the client are named. And since X returns the
1371 * alternating names for the front/back buffer a linear search
1372 * provides a sufficiently fast match.
1373 */
1374 simple_mtx_lock(&bufmgr->lock);
1375 bo = find_and_ref_external_bo(bufmgr->name_table, handle);
1376 if (bo)
1377 goto out;
1378
1379 struct drm_gem_open open_arg = { .name = handle };
1380 int ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_GEM_OPEN, &open_arg);
1381 if (ret != 0) {
1382 DBG("Couldn't reference %s handle 0x%08x: %s\n",
1383 name, handle, strerror(errno));
1384 bo = NULL;
1385 goto out;
1386 }
1387 /* Now see if someone has used a prime handle to get this
1388 * object from the kernel before by looking through the list
1389 * again for a matching gem_handle
1390 */
1391 bo = find_and_ref_external_bo(bufmgr->handle_table, open_arg.handle);
1392 if (bo)
1393 goto out;
1394
1395 bo = bo_calloc();
1396 if (!bo) {
1397 struct iris_bo close_bo = {
1398 .gem_handle = open_arg.handle,
1399 };
1400 bufmgr->kmd_backend->gem_close(bufmgr, &close_bo);
1401 goto out;
1402 }
1403
1404 p_atomic_set(&bo->refcount, 1);
1405
1406 bo->size = open_arg.size;
1407 bo->bufmgr = bufmgr;
1408 bo->gem_handle = open_arg.handle;
1409 bo->name = name;
1410 bo->index = -1;
1411 bo->real.global_name = handle;
1412 bo->real.prime_fd = -1;
1413 bo->real.reusable = false;
1414 bo->real.imported = true;
1415 /* Xe KMD expects at least 1-way coherency for imports */
1416 bo->real.heap = IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT;
1417 bo->real.mmap_mode = IRIS_MMAP_NONE;
1418 if (INTEL_DEBUG(DEBUG_CAPTURE_ALL))
1419 bo->real.capture = true;
1420 bo->address = vma_alloc(bufmgr, IRIS_MEMZONE_OTHER, bo->size, 1);
1421 if (bo->address == 0ull)
1422 goto err_free;
1423
1424 if (!iris_bo_set_prime_fd(bo))
1425 goto err_vm_alloc;
1426
1427 if (!bufmgr->kmd_backend->gem_vm_bind(bo))
1428 goto err_vm_alloc;
1429
1430 _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo);
1431 _mesa_hash_table_insert(bufmgr->name_table, &bo->real.global_name, bo);
1432
1433 DBG("bo_create_from_handle: %d (%s)\n", handle, bo->name);
1434
1435 out:
1436 simple_mtx_unlock(&bufmgr->lock);
1437 return bo;
1438
1439 err_vm_alloc:
1440 vma_free(bufmgr, bo->address, bo->size);
1441 err_free:
1442 bo_free(bo);
1443 simple_mtx_unlock(&bufmgr->lock);
1444 return NULL;
1445 }
1446
1447 static void
bo_close(struct iris_bo * bo)1448 bo_close(struct iris_bo *bo)
1449 {
1450 struct iris_bufmgr *bufmgr = bo->bufmgr;
1451
1452 simple_mtx_assert_locked(&bufmgr->lock);
1453 assert(iris_bo_is_real(bo));
1454
1455 if (iris_bo_is_external(bo)) {
1456 struct hash_entry *entry;
1457
1458 if (bo->real.global_name) {
1459 entry = _mesa_hash_table_search(bufmgr->name_table,
1460 &bo->real.global_name);
1461 _mesa_hash_table_remove(bufmgr->name_table, entry);
1462 }
1463
1464 entry = _mesa_hash_table_search(bufmgr->handle_table, &bo->gem_handle);
1465 _mesa_hash_table_remove(bufmgr->handle_table, entry);
1466
1467 list_for_each_entry_safe(struct bo_export, export, &bo->real.exports, link) {
1468 iris_bo_close(export->drm_fd, export->gem_handle);
1469
1470 list_del(&export->link);
1471 free(export);
1472 }
1473 } else {
1474 assert(list_is_empty(&bo->real.exports));
1475 }
1476
1477 /* Unbind and return the VMA for reuse */
1478 if (bufmgr->kmd_backend->gem_vm_unbind(bo))
1479 vma_free(bo->bufmgr, bo->address, bo->size);
1480 else
1481 DBG("Unable to unbind vm of buf %u\n", bo->gem_handle);
1482
1483 if (bo->real.prime_fd != -1)
1484 close(bo->real.prime_fd);
1485
1486 /* Close this object */
1487 if (bufmgr->kmd_backend->gem_close(bufmgr, bo) != 0) {
1488 DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n",
1489 bo->gem_handle, bo->name, strerror(errno));
1490 }
1491
1492 if (bo->aux_map_address && bo->bufmgr->aux_map_ctx) {
1493 intel_aux_map_unmap_range(bo->bufmgr->aux_map_ctx, bo->address,
1494 bo->size);
1495 }
1496
1497 for (int d = 0; d < bo->deps_size; d++) {
1498 for (int b = 0; b < IRIS_BATCH_COUNT; b++) {
1499 iris_syncobj_reference(bufmgr, &bo->deps[d].write_syncobjs[b], NULL);
1500 iris_syncobj_reference(bufmgr, &bo->deps[d].read_syncobjs[b], NULL);
1501 }
1502 }
1503 free(bo->deps);
1504
1505 free(bo);
1506 }
1507
1508 static void
bo_free(struct iris_bo * bo)1509 bo_free(struct iris_bo *bo)
1510 {
1511 struct iris_bufmgr *bufmgr = bo->bufmgr;
1512
1513 simple_mtx_assert_locked(&bufmgr->lock);
1514 assert(iris_bo_is_real(bo));
1515
1516 if (!bo->real.userptr && bo->real.map)
1517 bo_unmap(bo);
1518
1519 if (bo->idle || !iris_bo_busy(bo)) {
1520 bo_close(bo);
1521 } else {
1522 /* Defer closing the GEM BO and returning the VMA for reuse until the
1523 * BO is idle. Just move it to the dead list for now.
1524 */
1525 list_addtail(&bo->head, &bufmgr->zombie_list);
1526 }
1527 }
1528
1529 /** Frees all cached buffers significantly older than @time. */
1530 static void
cleanup_bo_cache(struct iris_bufmgr * bufmgr,time_t time)1531 cleanup_bo_cache(struct iris_bufmgr *bufmgr, time_t time)
1532 {
1533 simple_mtx_assert_locked(&bufmgr->lock);
1534
1535 if (bufmgr->time == time)
1536 return;
1537
1538 for (int h = 0; h < IRIS_HEAP_MAX; h++) {
1539 struct iris_bucket_cache *cache = &bufmgr->bucket_cache[h];
1540
1541 for (int i = 0; i < cache->num_buckets; i++) {
1542 struct bo_cache_bucket *bucket = &cache->bucket[i];
1543
1544 list_for_each_entry_safe(struct iris_bo, bo, &bucket->head, head) {
1545 if (time - bo->real.free_time <= 1)
1546 break;
1547
1548 list_del(&bo->head);
1549
1550 bo_free(bo);
1551 }
1552 }
1553 }
1554
1555 list_for_each_entry_safe(struct iris_bo, bo, &bufmgr->zombie_list, head) {
1556 /* Stop once we reach a busy BO - all others past this point were
1557 * freed more recently so are likely also busy.
1558 */
1559 if (!bo->idle && iris_bo_busy(bo))
1560 break;
1561
1562 list_del(&bo->head);
1563 bo_close(bo);
1564 }
1565
1566 bufmgr->time = time;
1567 }
1568
1569 static void
bo_unreference_final(struct iris_bo * bo,time_t time)1570 bo_unreference_final(struct iris_bo *bo, time_t time)
1571 {
1572 struct iris_bufmgr *bufmgr = bo->bufmgr;
1573
1574 DBG("bo_unreference final: %d (%s)\n", bo->gem_handle, bo->name);
1575
1576 assert(iris_bo_is_real(bo));
1577
1578 struct bo_cache_bucket *bucket = !bo->real.reusable ? NULL :
1579 bucket_for_size(bufmgr, bo->size, bo->real.heap, 0);
1580
1581 /* Put the buffer into our internal cache for reuse if we can. */
1582 if (bucket && iris_bo_madvise(bo, IRIS_MADVICE_DONT_NEED)) {
1583 bo->real.free_time = time;
1584 bo->name = NULL;
1585
1586 list_addtail(&bo->head, &bucket->head);
1587 } else {
1588 bo_free(bo);
1589 }
1590 }
1591
1592 void
iris_bo_unreference(struct iris_bo * bo)1593 iris_bo_unreference(struct iris_bo *bo)
1594 {
1595 if (bo == NULL)
1596 return;
1597
1598 assert(p_atomic_read(&bo->refcount) > 0);
1599
1600 if (atomic_add_unless(&bo->refcount, -1, 1)) {
1601 struct iris_bufmgr *bufmgr = bo->bufmgr;
1602 struct timespec time;
1603
1604 clock_gettime(CLOCK_MONOTONIC, &time);
1605
1606 bo->zeroed = false;
1607 if (bo->gem_handle == 0) {
1608 pb_slab_free(get_slabs(bufmgr, bo->size), &bo->slab.entry);
1609 } else {
1610 simple_mtx_lock(&bufmgr->lock);
1611
1612 if (p_atomic_dec_zero(&bo->refcount)) {
1613 bo_unreference_final(bo, time.tv_sec);
1614 cleanup_bo_cache(bufmgr, time.tv_sec);
1615 }
1616
1617 simple_mtx_unlock(&bufmgr->lock);
1618 }
1619 }
1620 }
1621
1622 static void
bo_wait_with_stall_warning(struct util_debug_callback * dbg,struct iris_bo * bo,const char * action)1623 bo_wait_with_stall_warning(struct util_debug_callback *dbg,
1624 struct iris_bo *bo,
1625 const char *action)
1626 {
1627 bool busy = dbg && !bo->idle;
1628 double elapsed = unlikely(busy) ? -get_time() : 0.0;
1629
1630 iris_bo_wait_rendering(bo);
1631
1632 if (unlikely(busy)) {
1633 elapsed += get_time();
1634 if (elapsed > 1e-5) /* 0.01ms */ {
1635 perf_debug(dbg, "%s a busy \"%s\" BO stalled and took %.03f ms.\n",
1636 action, bo->name, elapsed * 1000);
1637 }
1638 }
1639 }
1640
1641 static void
print_flags(unsigned flags)1642 print_flags(unsigned flags)
1643 {
1644 if (flags & MAP_READ)
1645 DBG("READ ");
1646 if (flags & MAP_WRITE)
1647 DBG("WRITE ");
1648 if (flags & MAP_ASYNC)
1649 DBG("ASYNC ");
1650 if (flags & MAP_PERSISTENT)
1651 DBG("PERSISTENT ");
1652 if (flags & MAP_COHERENT)
1653 DBG("COHERENT ");
1654 if (flags & MAP_RAW)
1655 DBG("RAW ");
1656 DBG("\n");
1657 }
1658
1659 void *
iris_bo_map(struct util_debug_callback * dbg,struct iris_bo * bo,unsigned flags)1660 iris_bo_map(struct util_debug_callback *dbg,
1661 struct iris_bo *bo, unsigned flags)
1662 {
1663 struct iris_bufmgr *bufmgr = bo->bufmgr;
1664 void *map = NULL;
1665
1666 if (bo->gem_handle == 0) {
1667 struct iris_bo *real = iris_get_backing_bo(bo);
1668 uint64_t offset = bo->address - real->address;
1669 map = iris_bo_map(dbg, real, flags | MAP_ASYNC) + offset;
1670 } else {
1671 assert(bo->real.mmap_mode != IRIS_MMAP_NONE);
1672 if (bo->real.mmap_mode == IRIS_MMAP_NONE)
1673 return NULL;
1674
1675 if (!bo->real.map) {
1676 DBG("iris_bo_map: %d (%s)\n", bo->gem_handle, bo->name);
1677 map = bufmgr->kmd_backend->gem_mmap(bufmgr, bo);
1678 if (!map) {
1679 return NULL;
1680 }
1681
1682 VG_DEFINED(map, bo->size);
1683
1684 if (p_atomic_cmpxchg(&bo->real.map, NULL, map)) {
1685 VG_NOACCESS(map, bo->size);
1686 os_munmap(map, bo->size);
1687 }
1688 }
1689 assert(bo->real.map);
1690 map = bo->real.map;
1691 }
1692
1693 DBG("iris_bo_map: %d (%s) -> %p\n",
1694 bo->gem_handle, bo->name, bo->real.map);
1695 print_flags(flags);
1696
1697 if (!(flags & MAP_ASYNC)) {
1698 bo_wait_with_stall_warning(dbg, bo, "memory mapping");
1699 }
1700
1701 return map;
1702 }
1703
1704 /**
1705 * Waits on a BO for the given amount of time.
1706 *
1707 * @bo: buffer object to wait for
1708 * @timeout_ns: amount of time to wait in nanoseconds.
1709 * If value is less than 0, an infinite wait will occur.
1710 *
1711 * Returns 0 if the wait was successful ie. the last batch referencing the
1712 * object has completed within the allotted time. Otherwise some negative return
1713 * value describes the error. Of particular interest is -ETIME when the wait has
1714 * failed to yield the desired result.
1715 *
1716 * Similar to iris_bo_wait_rendering except a timeout parameter allows
1717 * the operation to give up after a certain amount of time. Another subtle
1718 * difference is the internal locking semantics are different (this variant does
1719 * not hold the lock for the duration of the wait). This makes the wait subject
1720 * to a larger userspace race window.
1721 *
1722 * The implementation shall wait until the object is no longer actively
1723 * referenced within a batch buffer at the time of the call. The wait will
1724 * not guarantee that the buffer is re-issued via another thread, or an flinked
1725 * handle. Userspace must make sure this race does not occur if such precision
1726 * is important.
1727 *
1728 * Note that some kernels have broken the infinite wait for negative values
1729 * promise, upgrade to latest stable kernels if this is the case.
1730 */
1731 static inline int
iris_bo_wait(struct iris_bo * bo,int64_t timeout_ns)1732 iris_bo_wait(struct iris_bo *bo, int64_t timeout_ns)
1733 {
1734 int ret;
1735
1736 switch (iris_bufmgr_get_device_info(bo->bufmgr)->kmd_type) {
1737 case INTEL_KMD_TYPE_I915:
1738 if (iris_bo_is_external(bo))
1739 ret = iris_i915_bo_wait_gem(bo, timeout_ns);
1740 else
1741 ret = iris_bo_wait_syncobj(bo, timeout_ns);
1742 break;
1743 case INTEL_KMD_TYPE_XE:
1744 ret = iris_bo_wait_syncobj(bo, timeout_ns);
1745 break;
1746 default:
1747 unreachable("missing");
1748 ret = -1;
1749 }
1750
1751 bo->idle = ret == 0;
1752
1753 return ret;
1754 }
1755
1756 /** Waits for all GPU rendering with the object to have completed. */
1757 void
iris_bo_wait_rendering(struct iris_bo * bo)1758 iris_bo_wait_rendering(struct iris_bo *bo)
1759 {
1760 /* We require a kernel recent enough for WAIT_IOCTL support.
1761 * See intel_init_bufmgr()
1762 */
1763 iris_bo_wait(bo, -1);
1764 }
1765
1766 static void
iris_bufmgr_destroy_global_vm(struct iris_bufmgr * bufmgr)1767 iris_bufmgr_destroy_global_vm(struct iris_bufmgr *bufmgr)
1768 {
1769 switch (bufmgr->devinfo.kmd_type) {
1770 case INTEL_KMD_TYPE_I915:
1771 /* Nothing to do in i915 */
1772 break;
1773 case INTEL_KMD_TYPE_XE:
1774 intel_bind_timeline_finish(&bufmgr->bind_timeline, bufmgr->fd);
1775 iris_xe_destroy_global_vm(bufmgr);
1776 break;
1777 default:
1778 unreachable("missing");
1779 }
1780 }
1781
1782 static void
iris_bufmgr_destroy(struct iris_bufmgr * bufmgr)1783 iris_bufmgr_destroy(struct iris_bufmgr *bufmgr)
1784 {
1785 iris_destroy_border_color_pool(&bufmgr->border_color_pool);
1786
1787 /* Free aux-map buffers */
1788 intel_aux_map_finish(bufmgr->aux_map_ctx);
1789
1790 /* bufmgr will no longer try to free VMA entries in the aux-map */
1791 bufmgr->aux_map_ctx = NULL;
1792
1793 for (int i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
1794 if (bufmgr->bo_slabs[i].groups)
1795 pb_slabs_deinit(&bufmgr->bo_slabs[i]);
1796 }
1797
1798 simple_mtx_lock(&bufmgr->lock);
1799
1800 /* Free any cached buffer objects we were going to reuse */
1801 for (int h = 0; h < IRIS_HEAP_MAX; h++) {
1802 struct iris_bucket_cache *cache = &bufmgr->bucket_cache[h];
1803
1804 for (int i = 0; i < cache->num_buckets; i++) {
1805 struct bo_cache_bucket *bucket = &cache->bucket[i];
1806
1807 list_for_each_entry_safe(struct iris_bo, bo, &bucket->head, head) {
1808 list_del(&bo->head);
1809
1810 bo_free(bo);
1811 }
1812 }
1813 }
1814
1815 /* Close any buffer objects on the dead list. */
1816 list_for_each_entry_safe(struct iris_bo, bo, &bufmgr->zombie_list, head) {
1817 list_del(&bo->head);
1818 bo_close(bo);
1819 }
1820
1821 _mesa_hash_table_destroy(bufmgr->name_table, NULL);
1822 _mesa_hash_table_destroy(bufmgr->handle_table, NULL);
1823
1824 for (int z = 0; z < IRIS_MEMZONE_COUNT; z++)
1825 util_vma_heap_finish(&bufmgr->vma_allocator[z]);
1826
1827 iris_bufmgr_destroy_global_vm(bufmgr);
1828
1829 close(bufmgr->fd);
1830
1831 simple_mtx_unlock(&bufmgr->lock);
1832
1833 simple_mtx_destroy(&bufmgr->lock);
1834 simple_mtx_destroy(&bufmgr->bo_deps_lock);
1835
1836 free(bufmgr);
1837 }
1838
1839 int
iris_gem_get_tiling(struct iris_bo * bo,uint32_t * tiling)1840 iris_gem_get_tiling(struct iris_bo *bo, uint32_t *tiling)
1841 {
1842 struct iris_bufmgr *bufmgr = bo->bufmgr;
1843
1844 if (!bufmgr->devinfo.has_tiling_uapi) {
1845 *tiling = 0;
1846 return 0;
1847 }
1848
1849 assert(iris_bufmgr_get_device_info(bo->bufmgr)->kmd_type == INTEL_KMD_TYPE_I915);
1850 return iris_i915_bo_get_tiling(bo, tiling);
1851 }
1852
1853 int
iris_gem_set_tiling(struct iris_bo * bo,const struct isl_surf * surf)1854 iris_gem_set_tiling(struct iris_bo *bo, const struct isl_surf *surf)
1855 {
1856 struct iris_bufmgr *bufmgr = bo->bufmgr;
1857
1858 /* If we can't do map_gtt, the set/get_tiling API isn't useful. And it's
1859 * actually not supported by the kernel in those cases.
1860 */
1861 if (!bufmgr->devinfo.has_tiling_uapi)
1862 return 0;
1863
1864 assert(iris_bufmgr_get_device_info(bo->bufmgr)->kmd_type == INTEL_KMD_TYPE_I915);
1865 return iris_i915_bo_set_tiling(bo, surf);
1866 }
1867
1868 struct iris_bo *
iris_bo_import_dmabuf(struct iris_bufmgr * bufmgr,int prime_fd,const uint64_t modifier)1869 iris_bo_import_dmabuf(struct iris_bufmgr *bufmgr, int prime_fd,
1870 const uint64_t modifier)
1871 {
1872 uint32_t handle;
1873 struct iris_bo *bo;
1874
1875 simple_mtx_lock(&bufmgr->lock);
1876 int ret = drmPrimeFDToHandle(bufmgr->fd, prime_fd, &handle);
1877 if (ret) {
1878 DBG("import_dmabuf: failed to obtain handle from fd: %s\n",
1879 strerror(errno));
1880 simple_mtx_unlock(&bufmgr->lock);
1881 return NULL;
1882 }
1883
1884 /*
1885 * See if the kernel has already returned this buffer to us. Just as
1886 * for named buffers, we must not create two bo's pointing at the same
1887 * kernel object
1888 */
1889 bo = find_and_ref_external_bo(bufmgr->handle_table, handle);
1890 if (bo)
1891 goto out;
1892
1893 bo = bo_calloc();
1894 if (!bo)
1895 goto out;
1896
1897 p_atomic_set(&bo->refcount, 1);
1898
1899 /* Determine size of bo. The fd-to-handle ioctl really should
1900 * return the size, but it doesn't. If we have kernel 3.12 or
1901 * later, we can lseek on the prime fd to get the size. Older
1902 * kernels will just fail, in which case we fall back to the
1903 * provided (estimated or guess size). */
1904 ret = lseek(prime_fd, 0, SEEK_END);
1905 if (ret != -1)
1906 bo->size = ret;
1907
1908 bo->bufmgr = bufmgr;
1909 bo->name = "prime";
1910 bo->index = -1;
1911 bo->real.reusable = false;
1912 bo->real.imported = true;
1913 /* Xe KMD expects at least 1-way coherency for imports */
1914 bo->real.heap = IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT;
1915 bo->real.mmap_mode = IRIS_MMAP_NONE;
1916 if (INTEL_DEBUG(DEBUG_CAPTURE_ALL))
1917 bo->real.capture = true;
1918 bo->gem_handle = handle;
1919 bo->real.prime_fd = needs_prime_fd(bufmgr) ? dup(prime_fd) : -1;
1920
1921 uint64_t alignment = 1;
1922
1923 /* When an aux map will be used, there is an alignment requirement on the
1924 * main surface from the mapping granularity. Some planes of the image may
1925 * have smaller alignment requirements, but this one should work for all.
1926 */
1927 if (bufmgr->devinfo.has_aux_map && isl_drm_modifier_has_aux(modifier))
1928 alignment = intel_aux_map_get_alignment(bufmgr->aux_map_ctx);
1929
1930 bo->address = vma_alloc(bufmgr, IRIS_MEMZONE_OTHER, bo->size, alignment);
1931 if (bo->address == 0ull)
1932 goto err_free;
1933
1934 if (!bufmgr->kmd_backend->gem_vm_bind(bo))
1935 goto err_vm_alloc;
1936
1937 _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo);
1938
1939 out:
1940 simple_mtx_unlock(&bufmgr->lock);
1941 return bo;
1942
1943 err_vm_alloc:
1944 vma_free(bufmgr, bo->address, bo->size);
1945 err_free:
1946 bo_free(bo);
1947 simple_mtx_unlock(&bufmgr->lock);
1948 return NULL;
1949 }
1950
1951 static void
iris_bo_mark_exported_locked(struct iris_bo * bo)1952 iris_bo_mark_exported_locked(struct iris_bo *bo)
1953 {
1954 struct iris_bufmgr *bufmgr = bo->bufmgr;
1955
1956 /* We cannot export suballocated BOs. */
1957 assert(iris_bo_is_real(bo));
1958 simple_mtx_assert_locked(&bufmgr->lock);
1959
1960 if (!iris_bo_is_external(bo))
1961 _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo);
1962
1963 if (!bo->real.exported) {
1964 /* If a BO is going to be used externally, it could be sent to the
1965 * display HW. So make sure our CPU mappings don't assume cache
1966 * coherency since display is outside that cache.
1967 */
1968 bo->real.exported = true;
1969 bo->real.reusable = false;
1970 }
1971 }
1972
1973 void
iris_bo_mark_exported(struct iris_bo * bo)1974 iris_bo_mark_exported(struct iris_bo *bo)
1975 {
1976 struct iris_bufmgr *bufmgr = bo->bufmgr;
1977
1978 /* We cannot export suballocated BOs. */
1979 assert(iris_bo_is_real(bo));
1980
1981 if (bo->real.exported) {
1982 assert(!bo->real.reusable);
1983 return;
1984 }
1985
1986 simple_mtx_lock(&bufmgr->lock);
1987 iris_bo_mark_exported_locked(bo);
1988 simple_mtx_unlock(&bufmgr->lock);
1989
1990 iris_bo_set_prime_fd(bo);
1991 }
1992
1993 int
iris_bo_export_dmabuf(struct iris_bo * bo,int * prime_fd)1994 iris_bo_export_dmabuf(struct iris_bo *bo, int *prime_fd)
1995 {
1996 struct iris_bufmgr *bufmgr = bo->bufmgr;
1997
1998 /* We cannot export suballocated BOs. */
1999 assert(iris_bo_is_real(bo));
2000
2001 if (drmPrimeHandleToFD(bufmgr->fd, bo->gem_handle,
2002 DRM_CLOEXEC | DRM_RDWR, prime_fd) != 0)
2003 return -errno;
2004
2005 iris_bo_mark_exported(bo);
2006
2007 return 0;
2008 }
2009
2010 static uint32_t
iris_bo_export_gem_handle(struct iris_bo * bo)2011 iris_bo_export_gem_handle(struct iris_bo *bo)
2012 {
2013 /* We cannot export suballocated BOs. */
2014 assert(iris_bo_is_real(bo));
2015
2016 iris_bo_mark_exported(bo);
2017
2018 return bo->gem_handle;
2019 }
2020
2021 int
iris_bo_flink(struct iris_bo * bo,uint32_t * name)2022 iris_bo_flink(struct iris_bo *bo, uint32_t *name)
2023 {
2024 struct iris_bufmgr *bufmgr = bo->bufmgr;
2025
2026 /* We cannot export suballocated BOs. */
2027 assert(iris_bo_is_real(bo));
2028
2029 if (!bo->real.global_name) {
2030 struct drm_gem_flink flink = { .handle = bo->gem_handle };
2031
2032 if (intel_ioctl(bufmgr->fd, DRM_IOCTL_GEM_FLINK, &flink))
2033 return -errno;
2034
2035 simple_mtx_lock(&bufmgr->lock);
2036 if (!bo->real.global_name) {
2037 iris_bo_mark_exported_locked(bo);
2038 bo->real.global_name = flink.name;
2039 _mesa_hash_table_insert(bufmgr->name_table, &bo->real.global_name, bo);
2040 }
2041 simple_mtx_unlock(&bufmgr->lock);
2042
2043 iris_bo_set_prime_fd(bo);
2044 }
2045
2046 *name = bo->real.global_name;
2047 return 0;
2048 }
2049
2050 int
iris_bo_export_gem_handle_for_device(struct iris_bo * bo,int drm_fd,uint32_t * out_handle)2051 iris_bo_export_gem_handle_for_device(struct iris_bo *bo, int drm_fd,
2052 uint32_t *out_handle)
2053 {
2054 /* We cannot export suballocated BOs. */
2055 assert(iris_bo_is_real(bo));
2056
2057 /* Only add the new GEM handle to the list of export if it belongs to a
2058 * different GEM device. Otherwise we might close the same buffer multiple
2059 * times.
2060 */
2061 struct iris_bufmgr *bufmgr = bo->bufmgr;
2062 int ret = os_same_file_description(drm_fd, bufmgr->fd);
2063 WARN_ONCE(ret < 0,
2064 "Kernel has no file descriptor comparison support: %s\n",
2065 strerror(errno));
2066 if (ret == 0) {
2067 *out_handle = iris_bo_export_gem_handle(bo);
2068 return 0;
2069 }
2070
2071 struct bo_export *export = calloc(1, sizeof(*export));
2072 if (!export)
2073 return -ENOMEM;
2074
2075 export->drm_fd = drm_fd;
2076
2077 int dmabuf_fd = -1;
2078 int err = iris_bo_export_dmabuf(bo, &dmabuf_fd);
2079 if (err) {
2080 free(export);
2081 return err;
2082 }
2083
2084 simple_mtx_lock(&bufmgr->lock);
2085 err = drmPrimeFDToHandle(drm_fd, dmabuf_fd, &export->gem_handle);
2086 close(dmabuf_fd);
2087 if (err) {
2088 simple_mtx_unlock(&bufmgr->lock);
2089 free(export);
2090 return err;
2091 }
2092
2093 bool found = false;
2094 list_for_each_entry(struct bo_export, iter, &bo->real.exports, link) {
2095 if (iter->drm_fd != drm_fd)
2096 continue;
2097 /* Here we assume that for a given DRM fd, we'll always get back the
2098 * same GEM handle for a given buffer.
2099 */
2100 assert(iter->gem_handle == export->gem_handle);
2101 free(export);
2102 export = iter;
2103 found = true;
2104 break;
2105 }
2106 if (!found)
2107 list_addtail(&export->link, &bo->real.exports);
2108
2109 simple_mtx_unlock(&bufmgr->lock);
2110
2111 *out_handle = export->gem_handle;
2112
2113 return 0;
2114 }
2115
2116 static void
add_bucket(struct iris_bufmgr * bufmgr,int size,enum iris_heap heap)2117 add_bucket(struct iris_bufmgr *bufmgr, int size, enum iris_heap heap)
2118 {
2119 struct iris_bucket_cache *cache = &bufmgr->bucket_cache[heap];
2120 unsigned int i = cache->num_buckets++;
2121
2122 list_inithead(&cache->bucket[i].head);
2123 cache->bucket[i].size = size;
2124
2125 assert(bucket_for_size(bufmgr, size, heap, 0) == &cache->bucket[i]);
2126 assert(bucket_for_size(bufmgr, size - 2048, heap, 0) == &cache->bucket[i]);
2127 assert(bucket_for_size(bufmgr, size + 1, heap, 0) != &cache->bucket[i]);
2128 }
2129
2130 static void
init_cache_buckets(struct iris_bufmgr * bufmgr,enum iris_heap heap)2131 init_cache_buckets(struct iris_bufmgr *bufmgr, enum iris_heap heap)
2132 {
2133 uint64_t size, cache_max_size = 64 * 1024 * 1024;
2134
2135 /* OK, so power of two buckets was too wasteful of memory.
2136 * Give 3 other sizes between each power of two, to hopefully
2137 * cover things accurately enough. (The alternative is
2138 * probably to just go for exact matching of sizes, and assume
2139 * that for things like composited window resize the tiled
2140 * width/height alignment and rounding of sizes to pages will
2141 * get us useful cache hit rates anyway)
2142 */
2143 add_bucket(bufmgr, PAGE_SIZE, heap);
2144 add_bucket(bufmgr, PAGE_SIZE * 2, heap);
2145 add_bucket(bufmgr, PAGE_SIZE * 3, heap);
2146
2147 /* Initialize the linked lists for BO reuse cache. */
2148 for (size = 4 * PAGE_SIZE; size <= cache_max_size; size *= 2) {
2149 add_bucket(bufmgr, size, heap);
2150
2151 add_bucket(bufmgr, size + size * 1 / 4, heap);
2152 add_bucket(bufmgr, size + size * 2 / 4, heap);
2153 add_bucket(bufmgr, size + size * 3 / 4, heap);
2154 }
2155 }
2156
2157 static struct intel_buffer *
intel_aux_map_buffer_alloc(void * driver_ctx,uint32_t size)2158 intel_aux_map_buffer_alloc(void *driver_ctx, uint32_t size)
2159 {
2160 struct intel_buffer *buf = malloc(sizeof(struct intel_buffer));
2161 if (!buf)
2162 return NULL;
2163
2164 struct iris_bufmgr *bufmgr = (struct iris_bufmgr *)driver_ctx;
2165
2166 unsigned int page_size = getpagesize();
2167 size = MAX2(ALIGN(size, page_size), page_size);
2168
2169 struct iris_bo *bo = alloc_fresh_bo(bufmgr, size, BO_ALLOC_CAPTURE);
2170 if (!bo) {
2171 free(buf);
2172 return NULL;
2173 }
2174
2175 simple_mtx_lock(&bufmgr->lock);
2176
2177 bo->address = vma_alloc(bufmgr, IRIS_MEMZONE_OTHER, bo->size, 64 * 1024);
2178 if (bo->address == 0ull)
2179 goto err_free;
2180
2181 if (!bufmgr->kmd_backend->gem_vm_bind(bo))
2182 goto err_vm_alloc;
2183
2184 simple_mtx_unlock(&bufmgr->lock);
2185
2186 bo->name = "aux-map";
2187 p_atomic_set(&bo->refcount, 1);
2188 bo->index = -1;
2189 bo->real.mmap_mode = heap_to_mmap_mode(bufmgr, bo->real.heap);
2190 bo->real.prime_fd = -1;
2191
2192 buf->driver_bo = bo;
2193 buf->gpu = bo->address;
2194 buf->gpu_end = buf->gpu + bo->size;
2195 buf->map = iris_bo_map(NULL, bo, MAP_WRITE | MAP_RAW);
2196 return buf;
2197
2198 err_vm_alloc:
2199 vma_free(bufmgr, bo->address, bo->size);
2200 err_free:
2201 free(buf);
2202 bo_free(bo);
2203 simple_mtx_unlock(&bufmgr->lock);
2204 return NULL;
2205 }
2206
2207 static void
intel_aux_map_buffer_free(void * driver_ctx,struct intel_buffer * buffer)2208 intel_aux_map_buffer_free(void *driver_ctx, struct intel_buffer *buffer)
2209 {
2210 iris_bo_unreference((struct iris_bo*)buffer->driver_bo);
2211 free(buffer);
2212 }
2213
2214 static struct intel_mapped_pinned_buffer_alloc aux_map_allocator = {
2215 .alloc = intel_aux_map_buffer_alloc,
2216 .free = intel_aux_map_buffer_free,
2217 };
2218
2219 static bool
iris_bufmgr_get_meminfo(struct iris_bufmgr * bufmgr,struct intel_device_info * devinfo)2220 iris_bufmgr_get_meminfo(struct iris_bufmgr *bufmgr,
2221 struct intel_device_info *devinfo)
2222 {
2223 bufmgr->sys.region = &devinfo->mem.sram.mem;
2224 bufmgr->sys.size = devinfo->mem.sram.mappable.size;
2225
2226 /* When the resizable bar feature is disabled,
2227 * then vram.mappable.size is only 256MB.
2228 * The second half of the total size is in the vram.unmappable.size
2229 * variable.
2230 */
2231 bufmgr->vram.region = &devinfo->mem.vram.mem;
2232 bufmgr->vram.size = devinfo->mem.vram.mappable.size +
2233 devinfo->mem.vram.unmappable.size;
2234
2235 return true;
2236 }
2237
2238 static bool
iris_bufmgr_init_global_vm(struct iris_bufmgr * bufmgr)2239 iris_bufmgr_init_global_vm(struct iris_bufmgr *bufmgr)
2240 {
2241 switch (bufmgr->devinfo.kmd_type) {
2242 case INTEL_KMD_TYPE_I915:
2243 bufmgr->use_global_vm = iris_i915_init_global_vm(bufmgr, &bufmgr->global_vm_id);
2244 /* i915 don't require VM, so returning true even if use_global_vm is false */
2245 return true;
2246 case INTEL_KMD_TYPE_XE:
2247 if (!intel_bind_timeline_init(&bufmgr->bind_timeline, bufmgr->fd))
2248 return false;
2249
2250 bufmgr->use_global_vm = iris_xe_init_global_vm(bufmgr, &bufmgr->global_vm_id);
2251 /* Xe requires VM */
2252 return bufmgr->use_global_vm;
2253 default:
2254 unreachable("missing");
2255 return false;
2256 }
2257 }
2258
2259 /**
2260 * Initializes the GEM buffer manager, which uses the kernel to allocate, map,
2261 * and manage map buffer objections.
2262 *
2263 * \param fd File descriptor of the opened DRM device.
2264 */
2265 static struct iris_bufmgr *
iris_bufmgr_create(struct intel_device_info * devinfo,int fd,bool bo_reuse)2266 iris_bufmgr_create(struct intel_device_info *devinfo, int fd, bool bo_reuse)
2267 {
2268 if (devinfo->gtt_size <= IRIS_MEMZONE_OTHER_START)
2269 return NULL;
2270
2271 struct iris_bufmgr *bufmgr = calloc(1, sizeof(*bufmgr));
2272 if (bufmgr == NULL)
2273 return NULL;
2274
2275 /* Handles to buffer objects belong to the device fd and are not
2276 * reference counted by the kernel. If the same fd is used by
2277 * multiple parties (threads sharing the same screen bufmgr, or
2278 * even worse the same device fd passed to multiple libraries)
2279 * ownership of those handles is shared by those independent parties.
2280 *
2281 * Don't do this! Ensure that each library/bufmgr has its own device
2282 * fd so that its namespace does not clash with another.
2283 */
2284 bufmgr->fd = os_dupfd_cloexec(fd);
2285 if (bufmgr->fd == -1)
2286 goto error_dup;
2287
2288 p_atomic_set(&bufmgr->refcount, 1);
2289
2290 simple_mtx_init(&bufmgr->lock, mtx_plain);
2291 simple_mtx_init(&bufmgr->bo_deps_lock, mtx_plain);
2292
2293 list_inithead(&bufmgr->zombie_list);
2294
2295 bufmgr->devinfo = *devinfo;
2296 devinfo = &bufmgr->devinfo;
2297 bufmgr->bo_reuse = bo_reuse;
2298 iris_bufmgr_get_meminfo(bufmgr, devinfo);
2299 bufmgr->kmd_backend = iris_kmd_backend_get(devinfo->kmd_type);
2300
2301 struct intel_query_engine_info *engine_info;
2302 engine_info = intel_engine_get_info(bufmgr->fd, bufmgr->devinfo.kmd_type);
2303 bufmgr->devinfo.has_compute_engine = engine_info &&
2304 intel_engines_count(engine_info,
2305 INTEL_ENGINE_CLASS_COMPUTE);
2306 bufmgr->compute_engine_supported = bufmgr->devinfo.has_compute_engine &&
2307 intel_engines_supported_count(bufmgr->fd,
2308 &bufmgr->devinfo,
2309 engine_info,
2310 INTEL_ENGINE_CLASS_COMPUTE);
2311 free(engine_info);
2312
2313 if (!iris_bufmgr_init_global_vm(bufmgr))
2314 goto error_init_vm;
2315
2316 STATIC_ASSERT(IRIS_MEMZONE_SHADER_START == 0ull);
2317 const uint64_t _4GB = 1ull << 32;
2318 const uint64_t _2GB = 1ul << 31;
2319
2320 /* The STATE_BASE_ADDRESS size field can only hold 1 page shy of 4GB */
2321 const uint64_t _4GB_minus_1 = _4GB - PAGE_SIZE;
2322
2323 const struct {
2324 uint64_t start;
2325 uint64_t size;
2326 } vma[IRIS_MEMZONE_COUNT] = {
2327 [IRIS_MEMZONE_SHADER] = {
2328 .start = PAGE_SIZE,
2329 .size = _4GB_minus_1 - PAGE_SIZE
2330 },
2331 [IRIS_MEMZONE_BINDER] = {
2332 .start = IRIS_MEMZONE_BINDER_START + IRIS_SCRATCH_ZONE_SIZE,
2333 .size = IRIS_BINDER_ZONE_SIZE - IRIS_SCRATCH_ZONE_SIZE
2334 },
2335 [IRIS_MEMZONE_SCRATCH] = {
2336 .start = IRIS_MEMZONE_SCRATCH_START,
2337 .size = IRIS_SCRATCH_ZONE_SIZE
2338 },
2339 [IRIS_MEMZONE_SURFACE] = {
2340 .start = IRIS_MEMZONE_SURFACE_START,
2341 .size = _4GB_minus_1 - IRIS_BINDER_ZONE_SIZE - IRIS_SCRATCH_ZONE_SIZE
2342 },
2343 [IRIS_MEMZONE_DYNAMIC] = {
2344 .start = IRIS_MEMZONE_DYNAMIC_START + IRIS_BORDER_COLOR_POOL_SIZE,
2345
2346 /* Wa_2209859288: the Tigerlake PRM's workarounds volume says:
2347 *
2348 * "PSDunit is dropping MSB of the blend state pointer from SD
2349 * FIFO [...] Limit the Blend State Pointer to < 2G"
2350 *
2351 * We restrict the dynamic state pool to 2GB so that we don't ever
2352 * get a BLEND_STATE pointer with the MSB set. We aren't likely to
2353 * need the full 4GB for dynamic state anyway.
2354 */
2355 .size = (devinfo->ver >= 12 ? _2GB : _4GB_minus_1)
2356 - IRIS_BORDER_COLOR_POOL_SIZE
2357 },
2358 [IRIS_MEMZONE_OTHER] = {
2359 .start = IRIS_MEMZONE_OTHER_START,
2360
2361 /* Leave the last 4GB out of the high vma range, so that no state
2362 * base address + size can overflow 48 bits.
2363 */
2364 .size = (devinfo->gtt_size - _4GB) - IRIS_MEMZONE_OTHER_START,
2365 },
2366 };
2367
2368 for (unsigned i = 0; i < IRIS_MEMZONE_COUNT; i++) {
2369 util_vma_heap_init(&bufmgr->vma_allocator[i],
2370 vma[i].start, vma[i].size);
2371 }
2372
2373 if (INTEL_DEBUG(DEBUG_HEAPS)) {
2374 for (unsigned i = 0; i < IRIS_MEMZONE_COUNT; i++) {
2375 fprintf(stderr, "%-11s | 0x%016" PRIx64 "-0x%016" PRIx64 "\n",
2376 memzone_name(i), vma[i].start,
2377 vma[i].start + vma[i].size - 1);
2378 }
2379 }
2380
2381 for (int h = 0; h < IRIS_HEAP_MAX; h++)
2382 init_cache_buckets(bufmgr, h);
2383
2384 unsigned min_slab_order = 8; /* 256 bytes */
2385 unsigned max_slab_order = 20; /* 1 MB (slab size = 2 MB) */
2386 unsigned num_slab_orders_per_allocator =
2387 (max_slab_order - min_slab_order) / NUM_SLAB_ALLOCATORS;
2388
2389 /* Divide the size order range among slab managers. */
2390 for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
2391 unsigned min_order = min_slab_order;
2392 unsigned max_order =
2393 MIN2(min_order + num_slab_orders_per_allocator, max_slab_order);
2394
2395 if (!pb_slabs_init(&bufmgr->bo_slabs[i], min_order, max_order,
2396 IRIS_HEAP_MAX, true, bufmgr,
2397 iris_can_reclaim_slab,
2398 iris_slab_alloc,
2399 (void *) iris_slab_free)) {
2400 goto error_slabs_init;
2401 }
2402 min_slab_order = max_order + 1;
2403 }
2404
2405 bufmgr->name_table =
2406 _mesa_hash_table_create(NULL, _mesa_hash_uint, _mesa_key_uint_equal);
2407 bufmgr->handle_table =
2408 _mesa_hash_table_create(NULL, _mesa_hash_uint, _mesa_key_uint_equal);
2409
2410 if (devinfo->has_aux_map) {
2411 bufmgr->aux_map_ctx = intel_aux_map_init(bufmgr, &aux_map_allocator,
2412 devinfo);
2413 assert(bufmgr->aux_map_ctx);
2414 }
2415
2416 iris_init_border_color_pool(bufmgr, &bufmgr->border_color_pool);
2417
2418 return bufmgr;
2419
2420 error_slabs_init:
2421 for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
2422 if (!bufmgr->bo_slabs[i].groups)
2423 break;
2424
2425 pb_slabs_deinit(&bufmgr->bo_slabs[i]);
2426 }
2427 iris_bufmgr_destroy_global_vm(bufmgr);
2428 error_init_vm:
2429 close(bufmgr->fd);
2430 error_dup:
2431 free(bufmgr);
2432 return NULL;
2433 }
2434
2435 static struct iris_bufmgr *
iris_bufmgr_ref(struct iris_bufmgr * bufmgr)2436 iris_bufmgr_ref(struct iris_bufmgr *bufmgr)
2437 {
2438 p_atomic_inc(&bufmgr->refcount);
2439 return bufmgr;
2440 }
2441
2442 void
iris_bufmgr_unref(struct iris_bufmgr * bufmgr)2443 iris_bufmgr_unref(struct iris_bufmgr *bufmgr)
2444 {
2445 simple_mtx_lock(&global_bufmgr_list_mutex);
2446 if (p_atomic_dec_zero(&bufmgr->refcount)) {
2447 list_del(&bufmgr->link);
2448 iris_bufmgr_destroy(bufmgr);
2449 }
2450 simple_mtx_unlock(&global_bufmgr_list_mutex);
2451 }
2452
2453 /** Returns a new unique id, to be used by screens. */
2454 int
iris_bufmgr_create_screen_id(struct iris_bufmgr * bufmgr)2455 iris_bufmgr_create_screen_id(struct iris_bufmgr *bufmgr)
2456 {
2457 return p_atomic_inc_return(&bufmgr->next_screen_id) - 1;
2458 }
2459
2460 /**
2461 * Gets an already existing GEM buffer manager or create a new one.
2462 *
2463 * \param fd File descriptor of the opened DRM device.
2464 */
2465 struct iris_bufmgr *
iris_bufmgr_get_for_fd(int fd,bool bo_reuse)2466 iris_bufmgr_get_for_fd(int fd, bool bo_reuse)
2467 {
2468 struct intel_device_info devinfo;
2469 struct stat st;
2470
2471 if (fstat(fd, &st))
2472 return NULL;
2473
2474 struct iris_bufmgr *bufmgr = NULL;
2475
2476 simple_mtx_lock(&global_bufmgr_list_mutex);
2477 list_for_each_entry(struct iris_bufmgr, iter_bufmgr, &global_bufmgr_list, link) {
2478 struct stat iter_st;
2479 if (fstat(iter_bufmgr->fd, &iter_st))
2480 continue;
2481
2482 if (st.st_rdev == iter_st.st_rdev) {
2483 assert(iter_bufmgr->bo_reuse == bo_reuse);
2484 bufmgr = iris_bufmgr_ref(iter_bufmgr);
2485 goto unlock;
2486 }
2487 }
2488
2489 if (!intel_get_device_info_from_fd(fd, &devinfo, 8, -1))
2490 return NULL;
2491
2492 if (devinfo.ver < 8 || devinfo.platform == INTEL_PLATFORM_CHV)
2493 return NULL;
2494
2495 bufmgr = iris_bufmgr_create(&devinfo, fd, bo_reuse);
2496 if (bufmgr)
2497 list_addtail(&bufmgr->link, &global_bufmgr_list);
2498
2499 unlock:
2500 simple_mtx_unlock(&global_bufmgr_list_mutex);
2501
2502 return bufmgr;
2503 }
2504
2505 int
iris_bufmgr_get_fd(struct iris_bufmgr * bufmgr)2506 iris_bufmgr_get_fd(struct iris_bufmgr *bufmgr)
2507 {
2508 return bufmgr->fd;
2509 }
2510
2511 void*
iris_bufmgr_get_aux_map_context(struct iris_bufmgr * bufmgr)2512 iris_bufmgr_get_aux_map_context(struct iris_bufmgr *bufmgr)
2513 {
2514 return bufmgr->aux_map_ctx;
2515 }
2516
2517 simple_mtx_t *
iris_bufmgr_get_bo_deps_lock(struct iris_bufmgr * bufmgr)2518 iris_bufmgr_get_bo_deps_lock(struct iris_bufmgr *bufmgr)
2519 {
2520 return &bufmgr->bo_deps_lock;
2521 }
2522
2523 struct iris_border_color_pool *
iris_bufmgr_get_border_color_pool(struct iris_bufmgr * bufmgr)2524 iris_bufmgr_get_border_color_pool(struct iris_bufmgr *bufmgr)
2525 {
2526 return &bufmgr->border_color_pool;
2527 }
2528
2529 uint64_t
iris_bufmgr_vram_size(struct iris_bufmgr * bufmgr)2530 iris_bufmgr_vram_size(struct iris_bufmgr *bufmgr)
2531 {
2532 return bufmgr->vram.size;
2533 }
2534
2535 uint64_t
iris_bufmgr_sram_size(struct iris_bufmgr * bufmgr)2536 iris_bufmgr_sram_size(struct iris_bufmgr *bufmgr)
2537 {
2538 return bufmgr->sys.size;
2539 }
2540
2541 const struct intel_device_info *
iris_bufmgr_get_device_info(struct iris_bufmgr * bufmgr)2542 iris_bufmgr_get_device_info(struct iris_bufmgr *bufmgr)
2543 {
2544 return &bufmgr->devinfo;
2545 }
2546
2547 const struct iris_kmd_backend *
iris_bufmgr_get_kernel_driver_backend(struct iris_bufmgr * bufmgr)2548 iris_bufmgr_get_kernel_driver_backend(struct iris_bufmgr *bufmgr)
2549 {
2550 return bufmgr->kmd_backend;
2551 }
2552
2553 uint32_t
iris_bufmgr_get_global_vm_id(struct iris_bufmgr * bufmgr)2554 iris_bufmgr_get_global_vm_id(struct iris_bufmgr *bufmgr)
2555 {
2556 return bufmgr->global_vm_id;
2557 }
2558
2559 bool
iris_bufmgr_use_global_vm_id(struct iris_bufmgr * bufmgr)2560 iris_bufmgr_use_global_vm_id(struct iris_bufmgr *bufmgr)
2561 {
2562 return bufmgr->use_global_vm;
2563 }
2564
2565 bool
iris_bufmgr_compute_engine_supported(struct iris_bufmgr * bufmgr)2566 iris_bufmgr_compute_engine_supported(struct iris_bufmgr *bufmgr)
2567 {
2568 return bufmgr->compute_engine_supported;
2569 }
2570
2571 /**
2572 * Return the pat entry based on the bo heap and allocation flags.
2573 */
2574 const struct intel_device_info_pat_entry *
iris_heap_to_pat_entry(const struct intel_device_info * devinfo,enum iris_heap heap)2575 iris_heap_to_pat_entry(const struct intel_device_info *devinfo,
2576 enum iris_heap heap)
2577 {
2578 switch (heap) {
2579 case IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT:
2580 return &devinfo->pat.cached_coherent;
2581 case IRIS_HEAP_SYSTEM_MEMORY_UNCACHED:
2582 return &devinfo->pat.writecombining;
2583 case IRIS_HEAP_DEVICE_LOCAL:
2584 case IRIS_HEAP_DEVICE_LOCAL_PREFERRED:
2585 return &devinfo->pat.writecombining;
2586 default:
2587 unreachable("invalid heap for platforms using PAT entries");
2588 }
2589 }
2590
2591 struct intel_bind_timeline *
iris_bufmgr_get_bind_timeline(struct iris_bufmgr * bufmgr)2592 iris_bufmgr_get_bind_timeline(struct iris_bufmgr *bufmgr)
2593 {
2594 return &bufmgr->bind_timeline;
2595 }
2596