1 /*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23 /**
24 * @file iris_bufmgr.c
25 *
26 * The Iris buffer manager.
27 *
28 * XXX: write better comments
29 * - BOs
30 * - Explain BO cache
31 * - main interface to GEM in the kernel
32 */
33
34 #include <util/u_atomic.h>
35 #include <fcntl.h>
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <unistd.h>
40 #include <assert.h>
41 #include <sys/ioctl.h>
42 #include <sys/mman.h>
43 #include <sys/stat.h>
44 #include <sys/types.h>
45 #include <stdbool.h>
46 #include <time.h>
47 #include <unistd.h>
48
49 #include "errno.h"
50 #include "common/intel_aux_map.h"
51 #include "common/intel_mem.h"
52 #include "c99_alloca.h"
53 #include "dev/intel_debug.h"
54 #include "common/intel_common.h"
55 #include "common/intel_gem.h"
56 #include "dev/intel_device_info.h"
57 #include "drm-uapi/dma-buf.h"
58 #include "isl/isl.h"
59 #include "util/os_mman.h"
60 #include "util/u_debug.h"
61 #include "util/macros.h"
62 #include "util/hash_table.h"
63 #include "util/list.h"
64 #include "util/os_file.h"
65 #include "util/u_dynarray.h"
66 #include "util/vma.h"
67 #include "iris_bufmgr.h"
68 #include "iris_context.h"
69 #include "string.h"
70 #include "iris_kmd_backend.h"
71 #include "i915/iris_bufmgr.h"
72 #include "xe/iris_bufmgr.h"
73
74 #include <xf86drm.h>
75
76 #ifdef HAVE_VALGRIND
77 #include <valgrind.h>
78 #include <memcheck.h>
79 #define VG(x) x
80 #else
81 #define VG(x)
82 #endif
83
84 /* VALGRIND_FREELIKE_BLOCK unfortunately does not actually undo the earlier
85 * VALGRIND_MALLOCLIKE_BLOCK but instead leaves vg convinced the memory is
86 * leaked. All because it does not call VG(cli_free) from its
87 * VG_USERREQ__FREELIKE_BLOCK handler. Instead of treating the memory like
88 * and allocation, we mark it available for use upon mmapping and remove
89 * it upon unmapping.
90 */
91 #define VG_DEFINED(ptr, size) VG(VALGRIND_MAKE_MEM_DEFINED(ptr, size))
92 #define VG_NOACCESS(ptr, size) VG(VALGRIND_MAKE_MEM_NOACCESS(ptr, size))
93
94 /* On FreeBSD PAGE_SIZE is already defined in
95 * /usr/include/machine/param.h that is indirectly
96 * included here.
97 */
98 #ifndef PAGE_SIZE
99 #define PAGE_SIZE 4096
100 #endif
101
102 #define WARN_ONCE(cond, fmt...) do { \
103 if (unlikely(cond)) { \
104 static bool _warned = false; \
105 if (!_warned) { \
106 fprintf(stderr, "WARNING: "); \
107 fprintf(stderr, fmt); \
108 _warned = true; \
109 } \
110 } \
111 } while (0)
112
113 #define FILE_DEBUG_FLAG DEBUG_BUFMGR
114
115 /**
116 * For debugging purposes, this returns a time in seconds.
117 */
118 static double
get_time(void)119 get_time(void)
120 {
121 struct timespec tp;
122
123 clock_gettime(CLOCK_MONOTONIC, &tp);
124
125 return tp.tv_sec + tp.tv_nsec / 1000000000.0;
126 }
127
128 static inline int
atomic_add_unless(int * v,int add,int unless)129 atomic_add_unless(int *v, int add, int unless)
130 {
131 int c, old;
132 c = p_atomic_read(v);
133 while (c != unless && (old = p_atomic_cmpxchg(v, c, c + add)) != c)
134 c = old;
135 return c == unless;
136 }
137
138 static const char *
memzone_name(enum iris_memory_zone memzone)139 memzone_name(enum iris_memory_zone memzone)
140 {
141 const char *names[] = {
142 [IRIS_MEMZONE_SHADER] = "shader",
143 [IRIS_MEMZONE_BINDER] = "binder",
144 [IRIS_MEMZONE_SCRATCH] = "scratchsurf",
145 [IRIS_MEMZONE_SURFACE] = "surface",
146 [IRIS_MEMZONE_DYNAMIC] = "dynamic",
147 [IRIS_MEMZONE_OTHER] = "other",
148 [IRIS_MEMZONE_BORDER_COLOR_POOL] = "bordercolor",
149 };
150 assert(memzone < ARRAY_SIZE(names));
151 return names[memzone];
152 }
153
154 struct bo_cache_bucket {
155 /** List of cached BOs. */
156 struct list_head head;
157
158 /** Size of this bucket, in bytes. */
159 uint64_t size;
160 };
161
162 struct bo_export {
163 /** File descriptor associated with a handle export. */
164 int drm_fd;
165
166 /** GEM handle in drm_fd */
167 uint32_t gem_handle;
168
169 struct list_head link;
170 };
171
172 struct iris_memregion {
173 struct intel_memory_class_instance *region;
174 uint64_t size;
175 };
176
177 #define NUM_SLAB_ALLOCATORS 3
178
179 struct iris_slab {
180 struct pb_slab base;
181
182 /** The BO representing the entire slab */
183 struct iris_bo *bo;
184
185 /** Array of iris_bo structs representing BOs allocated out of this slab */
186 struct iris_bo *entries;
187 };
188
189 #define BUCKET_ARRAY_SIZE 25
190
191 struct iris_bucket_cache {
192 struct bo_cache_bucket bucket[BUCKET_ARRAY_SIZE];
193 int num_buckets;
194 };
195
196 struct iris_bufmgr {
197 /**
198 * List into the list of bufmgr.
199 */
200 struct list_head link;
201
202 uint32_t refcount;
203
204 int fd;
205
206 simple_mtx_t lock;
207 simple_mtx_t bo_deps_lock;
208
209 /** Array of lists of cached gem objects of power-of-two sizes */
210 struct iris_bucket_cache *bucket_cache;
211
212 time_t time;
213
214 struct hash_table *name_table;
215 struct hash_table *handle_table;
216
217 /**
218 * List of BOs which we've effectively freed, but are hanging on to
219 * until they're idle before closing and returning the VMA.
220 */
221 struct list_head zombie_list;
222
223 struct util_vma_heap vma_allocator[IRIS_MEMZONE_COUNT];
224
225 struct iris_memregion vram, sys;
226
227 /* Used only when use_global_vm is true. */
228 uint32_t global_vm_id;
229
230 int next_screen_id;
231
232 struct intel_device_info devinfo;
233 const struct iris_kmd_backend *kmd_backend;
234 struct intel_bind_timeline bind_timeline; /* Xe only */
235 bool bo_reuse:1;
236 bool use_global_vm:1;
237
238 struct intel_aux_map_context *aux_map_ctx;
239
240 struct pb_slabs bo_slabs[NUM_SLAB_ALLOCATORS];
241
242 struct iris_border_color_pool border_color_pool;
243
244 struct iris_bo *dummy_aux_bo;
245 struct iris_bo *mem_fence_bo;
246 };
247
248 static simple_mtx_t global_bufmgr_list_mutex = SIMPLE_MTX_INITIALIZER;
249 static struct list_head global_bufmgr_list = {
250 .next = &global_bufmgr_list,
251 .prev = &global_bufmgr_list,
252 };
253
254 static void bo_free(struct iris_bo *bo);
255
256 static struct iris_bo *
find_and_ref_external_bo(struct hash_table * ht,unsigned int key)257 find_and_ref_external_bo(struct hash_table *ht, unsigned int key)
258 {
259 struct hash_entry *entry = _mesa_hash_table_search(ht, &key);
260 struct iris_bo *bo = entry ? entry->data : NULL;
261
262 if (bo) {
263 assert(iris_bo_is_external(bo));
264 assert(iris_bo_is_real(bo));
265 assert(!bo->real.reusable);
266
267 /* Being non-reusable, the BO cannot be in the cache lists, but it
268 * may be in the zombie list if it had reached zero references, but
269 * we hadn't yet closed it...and then reimported the same BO. If it
270 * is, then remove it since it's now been resurrected.
271 */
272 if (list_is_linked(&bo->head))
273 list_del(&bo->head);
274
275 iris_bo_reference(bo);
276 }
277
278 return bo;
279 }
280
281 /**
282 * This function finds the correct bucket fit for the input size.
283 * The function works with O(1) complexity when the requested size
284 * was queried instead of iterating the size through all the buckets.
285 */
286 static struct bo_cache_bucket *
bucket_for_size(struct iris_bufmgr * bufmgr,uint64_t size,enum iris_heap heap,unsigned flags)287 bucket_for_size(struct iris_bufmgr *bufmgr, uint64_t size,
288 enum iris_heap heap, unsigned flags)
289 {
290 if (flags & BO_ALLOC_PROTECTED)
291 return NULL;
292
293 const struct intel_device_info *devinfo = &bufmgr->devinfo;
294 struct iris_bucket_cache *cache = &bufmgr->bucket_cache[heap];
295
296 if (devinfo->kmd_type == INTEL_KMD_TYPE_XE &&
297 (flags & (BO_ALLOC_SHARED | BO_ALLOC_SCANOUT)))
298 return NULL;
299
300 const unsigned _4MB = 4 * 1024 * 1024;
301 const unsigned _6MB = 6 * 1024 * 1024;
302 const unsigned _8MB = 8 * 1024 * 1024;
303 const unsigned _64MB = 64 * 1024 * 1024;
304 unsigned index;
305
306 if (size <= 4096) {
307 index = 0;
308 } else if (size <= _4MB) {
309 index = util_logbase2_ceil(size) - 12;
310 } else if (size <= _6MB) {
311 index = 11;
312 } else if (size <= _8MB) {
313 index = 12;
314 } else if (size <= _64MB) {
315 const unsigned power = util_logbase2(size);
316 const unsigned base_size = 1u << power;
317 const unsigned quarter_size = base_size / 4;
318 const unsigned quarter = DIV_ROUND_UP(size - base_size, quarter_size);
319 index = 12 + (power - 23) * 4 + quarter;
320 } else {
321 return NULL;
322 }
323
324 return (index < cache->num_buckets) ? &cache->bucket[index] : NULL;
325 }
326
327 enum iris_memory_zone
iris_memzone_for_address(uint64_t address)328 iris_memzone_for_address(uint64_t address)
329 {
330 STATIC_ASSERT(IRIS_MEMZONE_OTHER_START > IRIS_MEMZONE_DYNAMIC_START);
331 STATIC_ASSERT(IRIS_MEMZONE_SURFACE_START > IRIS_MEMZONE_SCRATCH_START);
332 STATIC_ASSERT(IRIS_MEMZONE_SCRATCH_START == IRIS_MEMZONE_BINDER_START);
333 STATIC_ASSERT(IRIS_MEMZONE_BINDER_START > IRIS_MEMZONE_SHADER_START);
334 STATIC_ASSERT(IRIS_MEMZONE_DYNAMIC_START > IRIS_MEMZONE_SURFACE_START);
335 STATIC_ASSERT(IRIS_BORDER_COLOR_POOL_ADDRESS == IRIS_MEMZONE_DYNAMIC_START);
336
337 if (address >= IRIS_MEMZONE_OTHER_START)
338 return IRIS_MEMZONE_OTHER;
339
340 if (address == IRIS_BORDER_COLOR_POOL_ADDRESS)
341 return IRIS_MEMZONE_BORDER_COLOR_POOL;
342
343 if (address > IRIS_MEMZONE_DYNAMIC_START)
344 return IRIS_MEMZONE_DYNAMIC;
345
346 if (address >= IRIS_MEMZONE_SURFACE_START)
347 return IRIS_MEMZONE_SURFACE;
348
349 if (address >= (IRIS_MEMZONE_BINDER_START + IRIS_SCRATCH_ZONE_SIZE))
350 return IRIS_MEMZONE_BINDER;
351
352 if (address >= IRIS_MEMZONE_SCRATCH_START)
353 return IRIS_MEMZONE_SCRATCH;
354
355 return IRIS_MEMZONE_SHADER;
356 }
357
358 /**
359 * Allocate a section of virtual memory for a buffer, assigning an address.
360 *
361 * This uses either the bucket allocator for the given size, or the large
362 * object allocator (util_vma).
363 */
364 static uint64_t
vma_alloc(struct iris_bufmgr * bufmgr,enum iris_memory_zone memzone,uint64_t size,uint64_t alignment)365 vma_alloc(struct iris_bufmgr *bufmgr,
366 enum iris_memory_zone memzone,
367 uint64_t size,
368 uint64_t alignment)
369 {
370 simple_mtx_assert_locked(&bufmgr->lock);
371
372 const unsigned _2mb = 2 * 1024 * 1024;
373
374 /* Force minimum alignment based on device requirements */
375 assert((alignment & (alignment - 1)) == 0);
376 alignment = MAX2(alignment, bufmgr->devinfo.mem_alignment);
377
378 /* If the allocation is a multiple of 2MB, ensure the virtual address is
379 * aligned to 2MB, so that it's possible for the kernel to use 64K pages.
380 */
381 if (size % _2mb == 0)
382 alignment = MAX2(alignment, _2mb);
383
384 if (memzone == IRIS_MEMZONE_BORDER_COLOR_POOL)
385 return IRIS_BORDER_COLOR_POOL_ADDRESS;
386
387 uint64_t addr =
388 util_vma_heap_alloc(&bufmgr->vma_allocator[memzone], size, alignment);
389
390 assert((addr >> 48ull) == 0);
391 assert((addr % alignment) == 0);
392
393 return intel_canonical_address(addr);
394 }
395
396 static void
vma_free(struct iris_bufmgr * bufmgr,uint64_t address,uint64_t size)397 vma_free(struct iris_bufmgr *bufmgr,
398 uint64_t address,
399 uint64_t size)
400 {
401 simple_mtx_assert_locked(&bufmgr->lock);
402
403 if (address == IRIS_BORDER_COLOR_POOL_ADDRESS)
404 return;
405
406 /* Un-canonicalize the address. */
407 address = intel_48b_address(address);
408
409 if (address == 0ull)
410 return;
411
412 enum iris_memory_zone memzone = iris_memzone_for_address(address);
413
414 assert(memzone < ARRAY_SIZE(bufmgr->vma_allocator));
415
416 util_vma_heap_free(&bufmgr->vma_allocator[memzone], address, size);
417 }
418
419 /* Exports a BO's implicit synchronization state to a drm_syncobj, returning
420 * its wrapping iris_syncobj. The drm_syncobj is created new and has to be
421 * destroyed by the caller after the execbuf ioctl.
422 */
423 struct iris_syncobj *
iris_bo_export_sync_state(struct iris_bo * bo)424 iris_bo_export_sync_state(struct iris_bo *bo)
425 {
426 struct iris_bufmgr *bufmgr = bo->bufmgr;
427 int drm_fd = iris_bufmgr_get_fd(bufmgr);
428
429 struct iris_syncobj *iris_syncobj = iris_create_syncobj(bufmgr);
430
431 struct dma_buf_export_sync_file export_sync_file_ioctl = {
432 .flags = DMA_BUF_SYNC_RW, /* TODO */
433 .fd = -1,
434 };
435 if (intel_ioctl(bo->real.prime_fd, DMA_BUF_IOCTL_EXPORT_SYNC_FILE,
436 &export_sync_file_ioctl)) {
437 fprintf(stderr, "DMA_BUF_IOCTL_EXPORT_SYNC_FILE ioctl failed (%d)\n",
438 errno);
439 goto error_export;
440 }
441
442 int sync_file_fd = export_sync_file_ioctl.fd;
443 assert(sync_file_fd >= 0);
444
445 struct drm_syncobj_handle syncobj_import_ioctl = {
446 .handle = iris_syncobj->handle,
447 .flags = DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_IMPORT_SYNC_FILE,
448 .fd = sync_file_fd,
449 };
450 if (intel_ioctl(drm_fd, DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE,
451 &syncobj_import_ioctl)) {
452 fprintf(stderr, "DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE ioctl failed (%d)\n",
453 errno);
454 }
455
456 close(sync_file_fd);
457
458 return iris_syncobj;
459 error_export:
460 iris_syncobj_destroy(bufmgr, iris_syncobj);
461 return NULL;
462 }
463
464 /* Import the state of a sync_file_fd (which we should have gotten from
465 * batch_syncobj_to_sync_file_fd) into a BO as its implicit synchronization
466 * state.
467 */
468 void
iris_bo_import_sync_state(struct iris_bo * bo,int sync_file_fd)469 iris_bo_import_sync_state(struct iris_bo *bo, int sync_file_fd)
470 {
471 struct dma_buf_import_sync_file import_sync_file_ioctl = {
472 .flags = DMA_BUF_SYNC_WRITE,
473 .fd = sync_file_fd,
474 };
475 if (intel_ioctl(bo->real.prime_fd, DMA_BUF_IOCTL_IMPORT_SYNC_FILE,
476 &import_sync_file_ioctl))
477 fprintf(stderr, "DMA_BUF_IOCTL_IMPORT_SYNC_FILE ioctl failed (%d)\n",
478 errno);
479 }
480
481 /* A timeout of 0 just checks for busyness. */
482 static int
iris_bo_wait_syncobj(struct iris_bo * bo,int64_t timeout_ns)483 iris_bo_wait_syncobj(struct iris_bo *bo, int64_t timeout_ns)
484 {
485 int ret = 0;
486 struct iris_bufmgr *bufmgr = bo->bufmgr;
487 const bool is_external = iris_bo_is_real(bo) && bo->real.prime_fd != -1;
488 struct iris_syncobj *external_implicit_syncobj = NULL;
489
490 /* If we know it's idle, don't bother with the kernel round trip.
491 * Can't do that for Xe KMD with external BOs since we have to check the
492 * implicit synchronization information.
493 */
494 if (!is_external && bo->idle)
495 return 0;
496
497 simple_mtx_lock(&bufmgr->bo_deps_lock);
498
499 const int handles_len = bo->deps_size * IRIS_BATCH_COUNT * 2 + is_external;
500 uint32_t *handles = handles_len <= 32 ?
501 (uint32_t *)alloca(handles_len * sizeof(*handles)) :
502 (uint32_t *)malloc(handles_len * sizeof(*handles));
503 int handle_count = 0;
504
505 if (is_external) {
506 external_implicit_syncobj = iris_bo_export_sync_state(bo);
507 if (external_implicit_syncobj)
508 handles[handle_count++] = external_implicit_syncobj->handle;
509 }
510
511 for (int d = 0; d < bo->deps_size; d++) {
512 for (int b = 0; b < IRIS_BATCH_COUNT; b++) {
513 struct iris_syncobj *r = bo->deps[d].read_syncobjs[b];
514 struct iris_syncobj *w = bo->deps[d].write_syncobjs[b];
515 if (r)
516 handles[handle_count++] = r->handle;
517 if (w)
518 handles[handle_count++] = w->handle;
519 }
520 }
521
522 if (handle_count == 0)
523 goto out;
524
525 /* Unlike the gem wait, negative values are not infinite here. */
526 int64_t timeout_abs = os_time_get_absolute_timeout(timeout_ns);
527 if (timeout_abs < 0)
528 timeout_abs = INT64_MAX;
529
530 struct drm_syncobj_wait args = {
531 .handles = (uintptr_t) handles,
532 .timeout_nsec = timeout_abs,
533 .count_handles = handle_count,
534 .flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL,
535 };
536
537 ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_SYNCOBJ_WAIT, &args);
538 if (ret != 0) {
539 ret = -errno;
540 goto out;
541 }
542
543 /* We just waited everything, so clean all the deps. */
544 for (int d = 0; d < bo->deps_size; d++) {
545 for (int b = 0; b < IRIS_BATCH_COUNT; b++) {
546 iris_syncobj_reference(bufmgr, &bo->deps[d].write_syncobjs[b], NULL);
547 iris_syncobj_reference(bufmgr, &bo->deps[d].read_syncobjs[b], NULL);
548 }
549 }
550
551 out:
552 if (handles_len > 32)
553 free(handles);
554 if (external_implicit_syncobj)
555 iris_syncobj_reference(bufmgr, &external_implicit_syncobj, NULL);
556
557 simple_mtx_unlock(&bufmgr->bo_deps_lock);
558 return ret;
559 }
560
561 static bool
iris_bo_busy_syncobj(struct iris_bo * bo)562 iris_bo_busy_syncobj(struct iris_bo *bo)
563 {
564 return iris_bo_wait_syncobj(bo, 0) == -ETIME;
565 }
566
567 bool
iris_bo_busy(struct iris_bo * bo)568 iris_bo_busy(struct iris_bo *bo)
569 {
570 bool busy;
571
572 switch (iris_bufmgr_get_device_info(bo->bufmgr)->kmd_type) {
573 case INTEL_KMD_TYPE_I915:
574 if (iris_bo_is_external(bo))
575 busy = iris_i915_bo_busy_gem(bo);
576 else
577 busy = iris_bo_busy_syncobj(bo);
578 break;
579 case INTEL_KMD_TYPE_XE:
580 busy = iris_bo_busy_syncobj(bo);
581 break;
582 default:
583 unreachable("missing");
584 busy = true;
585 }
586
587 bo->idle = !busy;
588
589 return busy;
590 }
591
592 /**
593 * Specify the volatility of the buffer.
594 * \param bo Buffer to create a name for
595 * \param state The purgeable status
596 *
597 * Use IRIS_MADVICE_DONT_NEED to mark the buffer as purgeable, and it will be
598 * reclaimed under memory pressure. If you subsequently require the buffer,
599 * then you must pass IRIS_MADVICE_WILL_NEED to mark the buffer as required.
600 *
601 * Returns true if the buffer was retained, or false if it was discarded
602 * whilst marked as IRIS_MADVICE_DONT_NEED.
603 */
604 static inline bool
iris_bo_madvise(struct iris_bo * bo,enum iris_madvice state)605 iris_bo_madvise(struct iris_bo *bo, enum iris_madvice state)
606 {
607 /* We can't madvise suballocated BOs. */
608 assert(iris_bo_is_real(bo));
609
610 return bo->bufmgr->kmd_backend->bo_madvise(bo, state);
611 }
612
613 static struct iris_bo *
bo_calloc(void)614 bo_calloc(void)
615 {
616 struct iris_bo *bo = calloc(1, sizeof(*bo));
617 if (!bo)
618 return NULL;
619
620 list_inithead(&bo->real.exports);
621
622 bo->hash = _mesa_hash_pointer(bo);
623
624 return bo;
625 }
626
627 static void
bo_unmap(struct iris_bo * bo)628 bo_unmap(struct iris_bo *bo)
629 {
630 assert(iris_bo_is_real(bo));
631
632 VG_NOACCESS(bo->real.map, bo->size);
633 os_munmap(bo->real.map, bo->size);
634 bo->real.map = NULL;
635 }
636
637 static struct pb_slabs *
get_slabs(struct iris_bufmgr * bufmgr,uint64_t size)638 get_slabs(struct iris_bufmgr *bufmgr, uint64_t size)
639 {
640 for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
641 struct pb_slabs *slabs = &bufmgr->bo_slabs[i];
642
643 if (size <= 1ull << (slabs->min_order + slabs->num_orders - 1))
644 return slabs;
645 }
646
647 unreachable("should have found a valid slab for this size");
648 }
649
650 /* Return the power of two size of a slab entry matching the input size. */
651 static unsigned
get_slab_pot_entry_size(struct iris_bufmgr * bufmgr,unsigned size)652 get_slab_pot_entry_size(struct iris_bufmgr *bufmgr, unsigned size)
653 {
654 unsigned entry_size = util_next_power_of_two(size);
655 unsigned min_entry_size = 1 << bufmgr->bo_slabs[0].min_order;
656
657 return MAX2(entry_size, min_entry_size);
658 }
659
660 /* Return the slab entry alignment. */
661 static unsigned
get_slab_entry_alignment(struct iris_bufmgr * bufmgr,unsigned size)662 get_slab_entry_alignment(struct iris_bufmgr *bufmgr, unsigned size)
663 {
664 unsigned entry_size = get_slab_pot_entry_size(bufmgr, size);
665
666 if (size <= entry_size * 3 / 4)
667 return entry_size / 4;
668
669 return entry_size;
670 }
671
672 static bool
iris_can_reclaim_slab(void * priv,struct pb_slab_entry * entry)673 iris_can_reclaim_slab(void *priv, struct pb_slab_entry *entry)
674 {
675 struct iris_bo *bo = container_of(entry, struct iris_bo, slab.entry);
676
677 return !iris_bo_busy(bo);
678 }
679
680 static void
iris_slab_free(void * priv,struct pb_slab * pslab)681 iris_slab_free(void *priv, struct pb_slab *pslab)
682 {
683 struct iris_bufmgr *bufmgr = priv;
684 struct iris_slab *slab = (void *) pslab;
685 struct intel_aux_map_context *aux_map_ctx = bufmgr->aux_map_ctx;
686
687 assert(!slab->bo->aux_map_address);
688
689 /* Since we're freeing the whole slab, all buffers allocated out of it
690 * must be reclaimable. We require buffers to be idle to be reclaimed
691 * (see iris_can_reclaim_slab()), so we know all entries must be idle.
692 * Therefore, we can safely unmap their aux table entries.
693 */
694 for (unsigned i = 0; i < pslab->num_entries; i++) {
695 struct iris_bo *bo = &slab->entries[i];
696 if (aux_map_ctx && bo->aux_map_address) {
697 intel_aux_map_unmap_range(aux_map_ctx, bo->address, bo->size);
698 bo->aux_map_address = 0;
699 }
700
701 /* Unref read/write dependency syncobjs and free the array. */
702 for (int d = 0; d < bo->deps_size; d++) {
703 for (int b = 0; b < IRIS_BATCH_COUNT; b++) {
704 iris_syncobj_reference(bufmgr, &bo->deps[d].write_syncobjs[b], NULL);
705 iris_syncobj_reference(bufmgr, &bo->deps[d].read_syncobjs[b], NULL);
706 }
707 }
708 free(bo->deps);
709 }
710
711 iris_bo_unreference(slab->bo);
712
713 free(slab->entries);
714 free(slab);
715 }
716
717 static struct pb_slab *
iris_slab_alloc(void * priv,unsigned heap,unsigned entry_size,unsigned group_index)718 iris_slab_alloc(void *priv,
719 unsigned heap,
720 unsigned entry_size,
721 unsigned group_index)
722 {
723 struct iris_bufmgr *bufmgr = priv;
724 struct iris_slab *slab = calloc(1, sizeof(struct iris_slab));
725 uint32_t flags = BO_ALLOC_NO_SUBALLOC;
726 unsigned slab_size = 0;
727 /* We only support slab allocation for IRIS_MEMZONE_OTHER */
728 enum iris_memory_zone memzone = IRIS_MEMZONE_OTHER;
729
730 if (!slab)
731 return NULL;
732
733 struct pb_slabs *slabs = bufmgr->bo_slabs;
734
735 /* Determine the slab buffer size. */
736 for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
737 unsigned max_entry_size =
738 1 << (slabs[i].min_order + slabs[i].num_orders - 1);
739
740 if (entry_size <= max_entry_size) {
741 /* The slab size is twice the size of the largest possible entry. */
742 slab_size = max_entry_size * 2;
743
744 if (!util_is_power_of_two_nonzero(entry_size)) {
745 assert(util_is_power_of_two_nonzero(entry_size * 4 / 3));
746
747 /* If the entry size is 3/4 of a power of two, we would waste
748 * space and not gain anything if we allocated only twice the
749 * power of two for the backing buffer:
750 *
751 * 2 * 3/4 = 1.5 usable with buffer size 2
752 *
753 * Allocating 5 times the entry size leads us to the next power
754 * of two and results in a much better memory utilization:
755 *
756 * 5 * 3/4 = 3.75 usable with buffer size 4
757 */
758 if (entry_size * 5 > slab_size)
759 slab_size = util_next_power_of_two(entry_size * 5);
760 }
761
762 /* The largest slab should have the same size as the PTE fragment
763 * size to get faster address translation.
764 *
765 * TODO: move this to intel_device_info?
766 */
767 const unsigned pte_size = 2 * 1024 * 1024;
768
769 if (i == NUM_SLAB_ALLOCATORS - 1 && slab_size < pte_size)
770 slab_size = pte_size;
771
772 break;
773 }
774 }
775 assert(slab_size != 0);
776
777 switch (heap) {
778 case IRIS_HEAP_SYSTEM_MEMORY_UNCACHED_COMPRESSED:
779 case IRIS_HEAP_DEVICE_LOCAL_COMPRESSED:
780 flags |= BO_ALLOC_COMPRESSED;
781 break;
782 case IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT:
783 case IRIS_HEAP_SYSTEM_MEMORY_UNCACHED:
784 flags |= BO_ALLOC_SMEM;
785 break;
786 case IRIS_HEAP_DEVICE_LOCAL:
787 flags |= BO_ALLOC_LMEM;
788 break;
789 case IRIS_HEAP_DEVICE_LOCAL_CPU_VISIBLE_SMALL_BAR:
790 flags |= BO_ALLOC_LMEM | BO_ALLOC_CPU_VISIBLE;
791 break;
792 default:
793 flags |= BO_ALLOC_PLAIN;
794 }
795
796 slab->bo =
797 iris_bo_alloc(bufmgr, "slab", slab_size, slab_size, memzone, flags);
798 if (!slab->bo)
799 goto fail;
800
801 slab_size = slab->bo->size;
802
803 slab->base.num_entries = slab_size / entry_size;
804 slab->base.num_free = slab->base.num_entries;
805 slab->base.group_index = group_index;
806 slab->base.entry_size = entry_size;
807 slab->entries = calloc(slab->base.num_entries, sizeof(*slab->entries));
808 if (!slab->entries)
809 goto fail_bo;
810
811 list_inithead(&slab->base.free);
812
813 for (unsigned i = 0; i < slab->base.num_entries; i++) {
814 struct iris_bo *bo = &slab->entries[i];
815
816 bo->size = entry_size;
817 bo->bufmgr = bufmgr;
818 bo->hash = _mesa_hash_pointer(bo);
819 bo->gem_handle = 0;
820 bo->address = intel_canonical_address(slab->bo->address + i * entry_size);
821 bo->aux_map_address = 0;
822 bo->index = -1;
823 bo->refcount = 0;
824 bo->idle = true;
825 bo->zeroed = slab->bo->zeroed;
826
827 bo->slab.entry.slab = &slab->base;
828
829 bo->slab.real = iris_get_backing_bo(slab->bo);
830
831 list_addtail(&bo->slab.entry.head, &slab->base.free);
832 }
833
834 return &slab->base;
835
836 fail_bo:
837 iris_bo_unreference(slab->bo);
838 fail:
839 free(slab);
840 return NULL;
841 }
842
843 /**
844 * Selects a heap for the given buffer allocation flags.
845 *
846 * This determines the cacheability, coherency, and mmap mode settings.
847 */
848 static enum iris_heap
flags_to_heap(struct iris_bufmgr * bufmgr,unsigned flags)849 flags_to_heap(struct iris_bufmgr *bufmgr, unsigned flags)
850 {
851 const struct intel_device_info *devinfo = &bufmgr->devinfo;
852
853 if (bufmgr->vram.size > 0) {
854 if (flags & BO_ALLOC_COMPRESSED)
855 return IRIS_HEAP_DEVICE_LOCAL_COMPRESSED;
856
857 /* Discrete GPUs currently always snoop CPU caches. */
858 if ((flags & BO_ALLOC_SMEM) || (flags & BO_ALLOC_CACHED_COHERENT))
859 return IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT;
860
861 if ((flags & BO_ALLOC_LMEM) ||
862 ((flags & BO_ALLOC_SCANOUT) && !(flags & BO_ALLOC_SHARED))) {
863
864 if ((flags & BO_ALLOC_CPU_VISIBLE) && !intel_vram_all_mappable(devinfo))
865 return IRIS_HEAP_DEVICE_LOCAL_CPU_VISIBLE_SMALL_BAR;
866
867 return IRIS_HEAP_DEVICE_LOCAL;
868 }
869
870 return IRIS_HEAP_DEVICE_LOCAL_PREFERRED;
871 } else if (devinfo->has_llc) {
872 assert(!(flags & BO_ALLOC_LMEM));
873
874 if (flags & (BO_ALLOC_SCANOUT | BO_ALLOC_SHARED))
875 return IRIS_HEAP_SYSTEM_MEMORY_UNCACHED;
876
877 return IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT;
878 } else {
879 assert(!devinfo->has_llc);
880 assert(!(flags & BO_ALLOC_LMEM));
881
882 if (flags & BO_ALLOC_COMPRESSED)
883 return IRIS_HEAP_SYSTEM_MEMORY_UNCACHED_COMPRESSED;
884
885 if (flags & (BO_ALLOC_SCANOUT | BO_ALLOC_SHARED))
886 return IRIS_HEAP_SYSTEM_MEMORY_UNCACHED;
887
888 if (flags & BO_ALLOC_CACHED_COHERENT)
889 return IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT;
890
891 return IRIS_HEAP_SYSTEM_MEMORY_UNCACHED;
892 }
893 }
894
895 static bool
zero_bo(struct iris_bufmgr * bufmgr,unsigned flags,struct iris_bo * bo)896 zero_bo(struct iris_bufmgr *bufmgr,
897 unsigned flags,
898 struct iris_bo *bo)
899 {
900 assert(flags & BO_ALLOC_ZEROED);
901
902 if (bo->zeroed)
903 return true;
904
905 if (bufmgr->devinfo.has_flat_ccs && (flags & BO_ALLOC_LMEM)) {
906 /* With flat CCS, all allocations in LMEM have memory ranges with
907 * corresponding CCS elements. These elements are only accessible
908 * through GPU commands, but we don't issue GPU commands here.
909 */
910 return false;
911 }
912
913 void *map = iris_bo_map(NULL, bo, MAP_WRITE | MAP_RAW);
914 if (!map)
915 return false;
916
917 memset(map, 0, bo->size);
918 bo->zeroed = true;
919 return true;
920 }
921
922 static struct iris_bo *
alloc_bo_from_slabs(struct iris_bufmgr * bufmgr,const char * name,uint64_t size,uint32_t alignment,unsigned flags)923 alloc_bo_from_slabs(struct iris_bufmgr *bufmgr,
924 const char *name,
925 uint64_t size,
926 uint32_t alignment,
927 unsigned flags)
928 {
929 if (flags & BO_ALLOC_NO_SUBALLOC)
930 return NULL;
931
932 struct pb_slabs *last_slab = &bufmgr->bo_slabs[NUM_SLAB_ALLOCATORS - 1];
933 unsigned max_slab_entry_size =
934 1 << (last_slab->min_order + last_slab->num_orders - 1);
935
936 if (size > max_slab_entry_size)
937 return NULL;
938
939 struct pb_slab_entry *entry;
940
941 enum iris_heap heap = flags_to_heap(bufmgr, flags);
942
943 unsigned alloc_size = size;
944
945 /* Always use slabs for sizes less than 4 KB because the kernel aligns
946 * everything to 4 KB.
947 */
948 if (size < alignment && alignment <= 4 * 1024)
949 alloc_size = alignment;
950
951 if (alignment > get_slab_entry_alignment(bufmgr, alloc_size)) {
952 /* 3/4 allocations can return too small alignment.
953 * Try again with a power of two allocation size.
954 */
955 unsigned pot_size = get_slab_pot_entry_size(bufmgr, alloc_size);
956
957 if (alignment <= pot_size) {
958 /* This size works but wastes some memory to fulfill the alignment. */
959 alloc_size = pot_size;
960 } else {
961 /* can't fulfill alignment requirements */
962 return NULL;
963 }
964 }
965
966 struct pb_slabs *slabs = get_slabs(bufmgr, alloc_size);
967 entry = pb_slab_alloc(slabs, alloc_size, heap);
968 if (!entry) {
969 /* Clean up and try again... */
970 pb_slabs_reclaim(slabs);
971
972 entry = pb_slab_alloc(slabs, alloc_size, heap);
973 }
974 if (!entry)
975 return NULL;
976
977 struct iris_bo *bo = container_of(entry, struct iris_bo, slab.entry);
978
979 if (bo->aux_map_address && bo->bufmgr->aux_map_ctx) {
980 /* This buffer was associated with an aux-buffer range. We only allow
981 * slab allocated buffers to be reclaimed when idle (not in use by an
982 * executing batch). (See iris_can_reclaim_slab().) So we know that
983 * our previous aux mapping is no longer in use, and we can safely
984 * remove it.
985 */
986 intel_aux_map_unmap_range(bo->bufmgr->aux_map_ctx, bo->address,
987 bo->size);
988 bo->aux_map_address = 0;
989 }
990
991 p_atomic_set(&bo->refcount, 1);
992 bo->name = name;
993 bo->size = size;
994
995 /* Zero the contents if necessary. If this fails, fall back to
996 * allocating a fresh BO, which will always be zeroed by the kernel.
997 */
998 if ((flags & BO_ALLOC_ZEROED) && !zero_bo(bufmgr, flags, bo)) {
999 pb_slab_free(slabs, &bo->slab.entry);
1000 return NULL;
1001 }
1002
1003 return bo;
1004 }
1005
1006 static struct iris_bo *
alloc_bo_from_cache(struct iris_bufmgr * bufmgr,struct bo_cache_bucket * bucket,uint32_t alignment,enum iris_memory_zone memzone,enum iris_mmap_mode mmap_mode,unsigned flags,bool match_zone)1007 alloc_bo_from_cache(struct iris_bufmgr *bufmgr,
1008 struct bo_cache_bucket *bucket,
1009 uint32_t alignment,
1010 enum iris_memory_zone memzone,
1011 enum iris_mmap_mode mmap_mode,
1012 unsigned flags,
1013 bool match_zone)
1014 {
1015 if (!bucket)
1016 return NULL;
1017
1018 struct iris_bo *bo = NULL;
1019
1020 simple_mtx_assert_locked(&bufmgr->lock);
1021
1022 list_for_each_entry_safe(struct iris_bo, cur, &bucket->head, head) {
1023 assert(iris_bo_is_real(cur));
1024
1025 /* Find one that's got the right mapping type. We used to swap maps
1026 * around but the kernel doesn't allow this on discrete GPUs.
1027 */
1028 if (mmap_mode != cur->real.mmap_mode)
1029 continue;
1030
1031 /* Try a little harder to find one that's already in the right memzone */
1032 if (match_zone && memzone != iris_memzone_for_address(cur->address))
1033 continue;
1034
1035 if (cur->real.capture != !!(flags & BO_ALLOC_CAPTURE))
1036 continue;
1037
1038 /* If the last BO in the cache is busy, there are no idle BOs. Bail,
1039 * either falling back to a non-matching memzone, or if that fails,
1040 * allocating a fresh buffer.
1041 */
1042 if (iris_bo_busy(cur))
1043 return NULL;
1044
1045 list_del(&cur->head);
1046
1047 /* Tell the kernel we need this BO and check if it still exist */
1048 if (!iris_bo_madvise(cur, IRIS_MADVICE_WILL_NEED)) {
1049 /* This BO was purged, throw it out and keep looking. */
1050 bo_free(cur);
1051 continue;
1052 }
1053
1054 if (cur->aux_map_address) {
1055 /* This buffer was associated with an aux-buffer range. We make sure
1056 * that buffers are not reused from the cache while the buffer is (busy)
1057 * being used by an executing batch. Since we are here, the buffer is no
1058 * longer being used by a batch and the buffer was deleted (in order to
1059 * end up in the cache). Therefore its old aux-buffer range can be
1060 * removed from the aux-map.
1061 */
1062 if (cur->bufmgr->aux_map_ctx)
1063 intel_aux_map_unmap_range(cur->bufmgr->aux_map_ctx, cur->address,
1064 cur->size);
1065 cur->aux_map_address = 0;
1066 }
1067
1068 /* If the cached BO isn't in the right memory zone, or the alignment
1069 * isn't sufficient, free the old memory and assign it a new address.
1070 */
1071 if (memzone != iris_memzone_for_address(cur->address) ||
1072 cur->address % alignment != 0) {
1073 if (!bufmgr->kmd_backend->gem_vm_unbind(cur)) {
1074 DBG("Unable to unbind vm of buf %u\n", cur->gem_handle);
1075 bo_free(cur);
1076 continue;
1077 }
1078
1079 vma_free(bufmgr, cur->address, cur->size);
1080 cur->address = 0ull;
1081 }
1082
1083 bo = cur;
1084 break;
1085 }
1086
1087 if (!bo)
1088 return NULL;
1089
1090 /* Zero the contents if necessary. If this fails, fall back to
1091 * allocating a fresh BO, which will always be zeroed by the kernel.
1092 */
1093 assert(bo->zeroed == false);
1094 if ((flags & BO_ALLOC_ZEROED) && !zero_bo(bufmgr, flags, bo)) {
1095 bo_free(bo);
1096 return NULL;
1097 }
1098
1099 return bo;
1100 }
1101
1102 static struct iris_bo *
alloc_fresh_bo(struct iris_bufmgr * bufmgr,uint64_t bo_size,unsigned flags)1103 alloc_fresh_bo(struct iris_bufmgr *bufmgr, uint64_t bo_size, unsigned flags)
1104 {
1105 struct iris_bo *bo = bo_calloc();
1106 if (!bo)
1107 return NULL;
1108
1109 /* Try to allocate memory in multiples of 2MB, as this allows us to use
1110 * 64K pages rather than the less-efficient 4K pages. Most BOs smaller
1111 * than 64MB should hit the BO cache or slab allocations anyway, so this
1112 * shouldn't waste too much memory. We do exclude small (< 1MB) sizes to
1113 * be defensive in case any of those bypass the caches and end up here.
1114 */
1115 if (bo_size >= 1024 * 1024)
1116 bo_size = align64(bo_size, 2 * 1024 * 1024);
1117
1118 bo->real.heap = flags_to_heap(bufmgr, flags);
1119
1120 const struct intel_memory_class_instance *regions[2];
1121 uint16_t num_regions = 0;
1122
1123 if (bufmgr->vram.size > 0) {
1124 switch (bo->real.heap) {
1125 case IRIS_HEAP_DEVICE_LOCAL_PREFERRED:
1126 /* For vram allocations, still use system memory as a fallback. */
1127 regions[num_regions++] = bufmgr->vram.region;
1128 regions[num_regions++] = bufmgr->sys.region;
1129 break;
1130 case IRIS_HEAP_DEVICE_LOCAL:
1131 case IRIS_HEAP_DEVICE_LOCAL_CPU_VISIBLE_SMALL_BAR:
1132 case IRIS_HEAP_DEVICE_LOCAL_COMPRESSED:
1133 regions[num_regions++] = bufmgr->vram.region;
1134 break;
1135 case IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT:
1136 regions[num_regions++] = bufmgr->sys.region;
1137 break;
1138 case IRIS_HEAP_SYSTEM_MEMORY_UNCACHED_COMPRESSED:
1139 /* not valid, compressed in discrete is always created with
1140 * IRIS_HEAP_DEVICE_LOCAL_PREFERRED_COMPRESSED
1141 */
1142 case IRIS_HEAP_SYSTEM_MEMORY_UNCACHED:
1143 /* not valid; discrete cards always enable snooping */
1144 case IRIS_HEAP_MAX:
1145 unreachable("invalid heap for BO");
1146 }
1147 } else {
1148 regions[num_regions++] = bufmgr->sys.region;
1149 }
1150
1151 bo->gem_handle = bufmgr->kmd_backend->gem_create(bufmgr, regions,
1152 num_regions, bo_size,
1153 bo->real.heap, flags);
1154 if (bo->gem_handle == 0) {
1155 free(bo);
1156 return NULL;
1157 }
1158 bo->bufmgr = bufmgr;
1159 bo->size = bo_size;
1160 bo->idle = true;
1161 bo->zeroed = true;
1162 bo->real.capture = (flags & BO_ALLOC_CAPTURE) != 0;
1163 bo->real.scanout = (flags & BO_ALLOC_SCANOUT) != 0;
1164
1165 return bo;
1166 }
1167
1168 const char *
1169 iris_heap_to_string[IRIS_HEAP_MAX] = {
1170 [IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT] = "system-cached-coherent",
1171 [IRIS_HEAP_SYSTEM_MEMORY_UNCACHED] = "system-uncached",
1172 [IRIS_HEAP_SYSTEM_MEMORY_UNCACHED_COMPRESSED] = "system-uncached-compressed",
1173 [IRIS_HEAP_DEVICE_LOCAL] = "local",
1174 [IRIS_HEAP_DEVICE_LOCAL_COMPRESSED] = "local-compressed",
1175 [IRIS_HEAP_DEVICE_LOCAL_PREFERRED] = "local-preferred",
1176 [IRIS_HEAP_DEVICE_LOCAL_CPU_VISIBLE_SMALL_BAR] = "local-cpu-visible-small-bar",
1177 };
1178
1179 static enum iris_mmap_mode
heap_to_mmap_mode(struct iris_bufmgr * bufmgr,enum iris_heap heap)1180 heap_to_mmap_mode(struct iris_bufmgr *bufmgr, enum iris_heap heap)
1181 {
1182 const struct intel_device_info *devinfo = &bufmgr->devinfo;
1183
1184 switch (heap) {
1185 case IRIS_HEAP_DEVICE_LOCAL:
1186 return intel_vram_all_mappable(devinfo) ? IRIS_MMAP_WC : IRIS_MMAP_NONE;
1187 case IRIS_HEAP_DEVICE_LOCAL_CPU_VISIBLE_SMALL_BAR:
1188 case IRIS_HEAP_DEVICE_LOCAL_PREFERRED:
1189 return IRIS_MMAP_WC;
1190 case IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT:
1191 return IRIS_MMAP_WB;
1192 case IRIS_HEAP_SYSTEM_MEMORY_UNCACHED:
1193 return IRIS_MMAP_WC;
1194 case IRIS_HEAP_SYSTEM_MEMORY_UNCACHED_COMPRESSED:
1195 case IRIS_HEAP_DEVICE_LOCAL_COMPRESSED:
1196 /* compressed bos are not mmaped */
1197 return IRIS_MMAP_NONE;
1198 default:
1199 unreachable("invalid heap");
1200 }
1201 }
1202
1203 struct iris_bo *
iris_bo_alloc(struct iris_bufmgr * bufmgr,const char * name,uint64_t size,uint32_t alignment,enum iris_memory_zone memzone,unsigned flags)1204 iris_bo_alloc(struct iris_bufmgr *bufmgr,
1205 const char *name,
1206 uint64_t size,
1207 uint32_t alignment,
1208 enum iris_memory_zone memzone,
1209 unsigned flags)
1210 {
1211 struct iris_bo *bo;
1212 unsigned int page_size = getpagesize();
1213 enum iris_heap heap = flags_to_heap(bufmgr, flags);
1214 struct bo_cache_bucket *bucket =
1215 bucket_for_size(bufmgr, size, heap, flags);
1216
1217 if (memzone != IRIS_MEMZONE_OTHER || (flags & BO_ALLOC_CACHED_COHERENT))
1218 flags |= BO_ALLOC_NO_SUBALLOC;
1219
1220 /* By default, capture all driver-internal buffers like shader kernels,
1221 * surface states, dynamic states, border colors, and so on.
1222 */
1223 if (memzone < IRIS_MEMZONE_OTHER || INTEL_DEBUG(DEBUG_CAPTURE_ALL))
1224 flags |= BO_ALLOC_CAPTURE;
1225
1226 bo = alloc_bo_from_slabs(bufmgr, name, size, alignment, flags);
1227
1228 if (bo)
1229 return bo;
1230
1231 /* Round the size up to the bucket size, or if we don't have caching
1232 * at this size, a multiple of the page size.
1233 */
1234 uint64_t bo_size =
1235 bucket ? bucket->size : MAX2(align64(size, page_size), page_size);
1236 enum iris_mmap_mode mmap_mode = heap_to_mmap_mode(bufmgr, heap);
1237
1238 simple_mtx_lock(&bufmgr->lock);
1239
1240 /* Get a buffer out of the cache if available. First, we try to find
1241 * one with a matching memory zone so we can avoid reallocating VMA.
1242 */
1243 bo = alloc_bo_from_cache(bufmgr, bucket, alignment, memzone, mmap_mode,
1244 flags, true);
1245
1246 /* If that fails, we try for any cached BO, without matching memzone. */
1247 if (!bo) {
1248 bo = alloc_bo_from_cache(bufmgr, bucket, alignment, memzone, mmap_mode,
1249 flags, false);
1250 }
1251
1252 simple_mtx_unlock(&bufmgr->lock);
1253
1254 if (!bo) {
1255 bo = alloc_fresh_bo(bufmgr, bo_size, flags);
1256 if (!bo)
1257 return NULL;
1258 }
1259
1260 if (bo->address == 0ull) {
1261 simple_mtx_lock(&bufmgr->lock);
1262 bo->address = vma_alloc(bufmgr, memzone, bo->size, alignment);
1263 simple_mtx_unlock(&bufmgr->lock);
1264
1265 if (bo->address == 0ull)
1266 goto err_free;
1267
1268 if (!bufmgr->kmd_backend->gem_vm_bind(bo))
1269 goto err_vm_alloc;
1270 }
1271
1272 bo->name = name;
1273 p_atomic_set(&bo->refcount, 1);
1274 bo->real.reusable = bucket && bufmgr->bo_reuse;
1275 bo->real.protected = flags & BO_ALLOC_PROTECTED;
1276 bo->index = -1;
1277 bo->real.prime_fd = -1;
1278
1279 assert(bo->real.map == NULL || bo->real.mmap_mode == mmap_mode);
1280 bo->real.mmap_mode = mmap_mode;
1281
1282 /* On integrated GPUs, enable snooping to ensure coherency if needed.
1283 * For discrete, we instead use SMEM and avoid WB maps for coherency.
1284 */
1285 if ((flags & BO_ALLOC_CACHED_COHERENT) &&
1286 !bufmgr->devinfo.has_llc && bufmgr->devinfo.has_caching_uapi) {
1287 if (bufmgr->kmd_backend->bo_set_caching(bo, true) != 0)
1288 goto err_free;
1289 }
1290
1291 DBG("bo_create: buf %d (%s) (%s memzone) (%s) %llub\n", bo->gem_handle,
1292 bo->name, memzone_name(memzone), iris_heap_to_string[bo->real.heap],
1293 (unsigned long long) size);
1294
1295 return bo;
1296
1297 err_vm_alloc:
1298 simple_mtx_lock(&bufmgr->lock);
1299 vma_free(bufmgr, bo->address, bo->size);
1300 simple_mtx_unlock(&bufmgr->lock);
1301 err_free:
1302 simple_mtx_lock(&bufmgr->lock);
1303 bo_free(bo);
1304 simple_mtx_unlock(&bufmgr->lock);
1305 return NULL;
1306 }
1307
1308 static int
iris_bo_close(int fd,uint32_t gem_handle)1309 iris_bo_close(int fd, uint32_t gem_handle)
1310 {
1311 struct drm_gem_close close = {
1312 .handle = gem_handle,
1313 };
1314 return intel_ioctl(fd, DRM_IOCTL_GEM_CLOSE, &close);
1315 }
1316
1317 struct iris_bo *
iris_bo_create_userptr(struct iris_bufmgr * bufmgr,const char * name,void * ptr,size_t size,enum iris_memory_zone memzone)1318 iris_bo_create_userptr(struct iris_bufmgr *bufmgr, const char *name,
1319 void *ptr, size_t size,
1320 enum iris_memory_zone memzone)
1321 {
1322 struct iris_bo *bo;
1323
1324 bo = bo_calloc();
1325 if (!bo)
1326 return NULL;
1327
1328 bo->gem_handle = bufmgr->kmd_backend->gem_create_userptr(bufmgr, ptr, size);
1329 if (bo->gem_handle == 0)
1330 goto err_free;
1331
1332 bo->name = name;
1333 bo->size = size;
1334 bo->real.map = ptr;
1335 bo->real.userptr = true;
1336
1337 bo->bufmgr = bufmgr;
1338
1339 if (INTEL_DEBUG(DEBUG_CAPTURE_ALL))
1340 bo->real.capture = true;
1341
1342 simple_mtx_lock(&bufmgr->lock);
1343 bo->address = vma_alloc(bufmgr, memzone, size, 1);
1344 simple_mtx_unlock(&bufmgr->lock);
1345
1346 if (bo->address == 0ull)
1347 goto err_close;
1348
1349 p_atomic_set(&bo->refcount, 1);
1350 bo->index = -1;
1351 bo->idle = true;
1352 bo->real.heap = IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT;
1353 bo->real.mmap_mode = heap_to_mmap_mode(bufmgr, bo->real.heap);
1354 bo->real.prime_fd = -1;
1355
1356 if (!bufmgr->kmd_backend->gem_vm_bind(bo))
1357 goto err_vma_free;
1358
1359 return bo;
1360
1361 err_vma_free:
1362 simple_mtx_lock(&bufmgr->lock);
1363 vma_free(bufmgr, bo->address, bo->size);
1364 simple_mtx_unlock(&bufmgr->lock);
1365 err_close:
1366 bufmgr->kmd_backend->gem_close(bufmgr, bo);
1367 err_free:
1368 free(bo);
1369 return NULL;
1370 }
1371
1372 static bool
needs_prime_fd(struct iris_bufmgr * bufmgr)1373 needs_prime_fd(struct iris_bufmgr *bufmgr)
1374 {
1375 return bufmgr->devinfo.kmd_type == INTEL_KMD_TYPE_XE;
1376 }
1377
1378 static bool
iris_bo_set_prime_fd(struct iris_bo * bo)1379 iris_bo_set_prime_fd(struct iris_bo *bo)
1380 {
1381 struct iris_bufmgr *bufmgr = bo->bufmgr;
1382
1383 if (needs_prime_fd(bufmgr) && bo->real.prime_fd == -1) {
1384 if (drmPrimeHandleToFD(bufmgr->fd, bo->gem_handle,
1385 DRM_CLOEXEC | DRM_RDWR, &bo->real.prime_fd)) {
1386 fprintf(stderr, "Failed to get prime fd for bo %s/%u\n",
1387 bo->name, bo->gem_handle);
1388 return false;
1389 }
1390 }
1391
1392 return true;
1393 }
1394
1395 /**
1396 * Returns a iris_bo wrapping the given buffer object handle.
1397 *
1398 * This can be used when one application needs to pass a buffer object
1399 * to another.
1400 */
1401 struct iris_bo *
iris_bo_gem_create_from_name(struct iris_bufmgr * bufmgr,const char * name,unsigned int handle)1402 iris_bo_gem_create_from_name(struct iris_bufmgr *bufmgr,
1403 const char *name, unsigned int handle)
1404 {
1405 struct iris_bo *bo;
1406
1407 /* At the moment most applications only have a few named bo.
1408 * For instance, in a DRI client only the render buffers passed
1409 * between X and the client are named. And since X returns the
1410 * alternating names for the front/back buffer a linear search
1411 * provides a sufficiently fast match.
1412 */
1413 simple_mtx_lock(&bufmgr->lock);
1414 bo = find_and_ref_external_bo(bufmgr->name_table, handle);
1415 if (bo)
1416 goto out;
1417
1418 struct drm_gem_open open_arg = { .name = handle };
1419 int ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_GEM_OPEN, &open_arg);
1420 if (ret != 0) {
1421 DBG("Couldn't reference %s handle 0x%08x: %s\n",
1422 name, handle, strerror(errno));
1423 bo = NULL;
1424 goto out;
1425 }
1426 /* Now see if someone has used a prime handle to get this
1427 * object from the kernel before by looking through the list
1428 * again for a matching gem_handle
1429 */
1430 bo = find_and_ref_external_bo(bufmgr->handle_table, open_arg.handle);
1431 if (bo)
1432 goto out;
1433
1434 bo = bo_calloc();
1435 if (!bo) {
1436 struct iris_bo close_bo = {
1437 .gem_handle = open_arg.handle,
1438 };
1439 bufmgr->kmd_backend->gem_close(bufmgr, &close_bo);
1440 goto out;
1441 }
1442
1443 p_atomic_set(&bo->refcount, 1);
1444
1445 bo->size = open_arg.size;
1446 bo->bufmgr = bufmgr;
1447 bo->gem_handle = open_arg.handle;
1448 bo->name = name;
1449 bo->index = -1;
1450 bo->real.global_name = handle;
1451 bo->real.prime_fd = -1;
1452 bo->real.reusable = false;
1453 bo->real.imported = true;
1454 /* Xe KMD expects at least 1-way coherency for imports */
1455 bo->real.heap = IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT;
1456 bo->real.mmap_mode = IRIS_MMAP_NONE;
1457 if (INTEL_DEBUG(DEBUG_CAPTURE_ALL))
1458 bo->real.capture = true;
1459 bo->address = vma_alloc(bufmgr, IRIS_MEMZONE_OTHER, bo->size, 1);
1460 if (bo->address == 0ull)
1461 goto err_free;
1462
1463 if (!iris_bo_set_prime_fd(bo))
1464 goto err_vm_alloc;
1465
1466 if (!bufmgr->kmd_backend->gem_vm_bind(bo))
1467 goto err_vm_alloc;
1468
1469 _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo);
1470 _mesa_hash_table_insert(bufmgr->name_table, &bo->real.global_name, bo);
1471
1472 DBG("bo_create_from_handle: %d (%s)\n", handle, bo->name);
1473
1474 out:
1475 simple_mtx_unlock(&bufmgr->lock);
1476 return bo;
1477
1478 err_vm_alloc:
1479 vma_free(bufmgr, bo->address, bo->size);
1480 err_free:
1481 bo_free(bo);
1482 simple_mtx_unlock(&bufmgr->lock);
1483 return NULL;
1484 }
1485
1486 static void
bo_close(struct iris_bo * bo)1487 bo_close(struct iris_bo *bo)
1488 {
1489 struct iris_bufmgr *bufmgr = bo->bufmgr;
1490
1491 simple_mtx_assert_locked(&bufmgr->lock);
1492 assert(iris_bo_is_real(bo));
1493
1494 if (iris_bo_is_external(bo)) {
1495 struct hash_entry *entry;
1496
1497 if (bo->real.global_name) {
1498 entry = _mesa_hash_table_search(bufmgr->name_table,
1499 &bo->real.global_name);
1500 _mesa_hash_table_remove(bufmgr->name_table, entry);
1501 }
1502
1503 entry = _mesa_hash_table_search(bufmgr->handle_table, &bo->gem_handle);
1504 _mesa_hash_table_remove(bufmgr->handle_table, entry);
1505
1506 list_for_each_entry_safe(struct bo_export, export, &bo->real.exports, link) {
1507 iris_bo_close(export->drm_fd, export->gem_handle);
1508
1509 list_del(&export->link);
1510 free(export);
1511 }
1512 } else {
1513 assert(list_is_empty(&bo->real.exports));
1514 }
1515
1516 /* Unbind and return the VMA for reuse */
1517 if (bufmgr->kmd_backend->gem_vm_unbind(bo))
1518 vma_free(bo->bufmgr, bo->address, bo->size);
1519 else
1520 DBG("Unable to unbind vm of buf %u\n", bo->gem_handle);
1521
1522 if (bo->real.prime_fd != -1)
1523 close(bo->real.prime_fd);
1524
1525 /* Close this object */
1526 if (bufmgr->kmd_backend->gem_close(bufmgr, bo) != 0) {
1527 DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n",
1528 bo->gem_handle, bo->name, strerror(errno));
1529 }
1530
1531 if (bo->aux_map_address && bo->bufmgr->aux_map_ctx) {
1532 intel_aux_map_unmap_range(bo->bufmgr->aux_map_ctx, bo->address,
1533 bo->size);
1534 }
1535
1536 for (int d = 0; d < bo->deps_size; d++) {
1537 for (int b = 0; b < IRIS_BATCH_COUNT; b++) {
1538 iris_syncobj_reference(bufmgr, &bo->deps[d].write_syncobjs[b], NULL);
1539 iris_syncobj_reference(bufmgr, &bo->deps[d].read_syncobjs[b], NULL);
1540 }
1541 }
1542 free(bo->deps);
1543
1544 free(bo);
1545 }
1546
1547 static void
bo_free(struct iris_bo * bo)1548 bo_free(struct iris_bo *bo)
1549 {
1550 struct iris_bufmgr *bufmgr = bo->bufmgr;
1551
1552 simple_mtx_assert_locked(&bufmgr->lock);
1553 assert(iris_bo_is_real(bo));
1554
1555 if (!bo->real.userptr && bo->real.map)
1556 bo_unmap(bo);
1557
1558 if (bo->idle || !iris_bo_busy(bo)) {
1559 bo_close(bo);
1560 } else {
1561 /* Defer closing the GEM BO and returning the VMA for reuse until the
1562 * BO is idle. Just move it to the dead list for now.
1563 */
1564 list_addtail(&bo->head, &bufmgr->zombie_list);
1565 }
1566 }
1567
1568 static enum iris_heap
iris_get_heap_max(struct iris_bufmgr * bufmgr)1569 iris_get_heap_max(struct iris_bufmgr *bufmgr)
1570 {
1571 if (bufmgr->vram.size) {
1572 return intel_vram_all_mappable(&bufmgr->devinfo) ?
1573 IRIS_HEAP_MAX_LARGE_BAR : IRIS_HEAP_MAX;
1574 }
1575
1576 return bufmgr->devinfo.ver >= 20 ? IRIS_HEAP_MAX_NO_VRAM :
1577 IRIS_HEAP_SYSTEM_MEMORY_UNCACHED_COMPRESSED;
1578 }
1579
1580 /** Frees all cached buffers significantly older than @time. */
1581 static void
cleanup_bo_cache(struct iris_bufmgr * bufmgr,time_t time)1582 cleanup_bo_cache(struct iris_bufmgr *bufmgr, time_t time)
1583 {
1584 simple_mtx_assert_locked(&bufmgr->lock);
1585
1586 if (bufmgr->time == time)
1587 return;
1588
1589 for (int h = 0; h < iris_get_heap_max(bufmgr); h++) {
1590 struct iris_bucket_cache *cache = &bufmgr->bucket_cache[h];
1591
1592 for (int i = 0; i < cache->num_buckets; i++) {
1593 struct bo_cache_bucket *bucket = &cache->bucket[i];
1594
1595 list_for_each_entry_safe(struct iris_bo, bo, &bucket->head, head) {
1596 if (time - bo->real.free_time <= 1)
1597 break;
1598
1599 list_del(&bo->head);
1600
1601 bo_free(bo);
1602 }
1603 }
1604 }
1605
1606 list_for_each_entry_safe(struct iris_bo, bo, &bufmgr->zombie_list, head) {
1607 /* Stop once we reach a busy BO - all others past this point were
1608 * freed more recently so are likely also busy.
1609 */
1610 if (!bo->idle && iris_bo_busy(bo))
1611 break;
1612
1613 list_del(&bo->head);
1614 bo_close(bo);
1615 }
1616
1617 bufmgr->time = time;
1618 }
1619
1620 static void
bo_unreference_final(struct iris_bo * bo,time_t time)1621 bo_unreference_final(struct iris_bo *bo, time_t time)
1622 {
1623 struct iris_bufmgr *bufmgr = bo->bufmgr;
1624
1625 DBG("bo_unreference final: %d (%s)\n", bo->gem_handle, bo->name);
1626
1627 assert(iris_bo_is_real(bo));
1628
1629 struct bo_cache_bucket *bucket = !bo->real.reusable ? NULL :
1630 bucket_for_size(bufmgr, bo->size, bo->real.heap, 0);
1631
1632 /* Put the buffer into our internal cache for reuse if we can. */
1633 if (bucket && iris_bo_madvise(bo, IRIS_MADVICE_DONT_NEED)) {
1634 bo->real.free_time = time;
1635 bo->name = NULL;
1636
1637 list_addtail(&bo->head, &bucket->head);
1638 } else {
1639 bo_free(bo);
1640 }
1641 }
1642
1643 void
iris_bo_unreference(struct iris_bo * bo)1644 iris_bo_unreference(struct iris_bo *bo)
1645 {
1646 if (bo == NULL)
1647 return;
1648
1649 assert(p_atomic_read(&bo->refcount) > 0);
1650
1651 if (atomic_add_unless(&bo->refcount, -1, 1)) {
1652 struct iris_bufmgr *bufmgr = bo->bufmgr;
1653 struct timespec time;
1654
1655 clock_gettime(CLOCK_MONOTONIC, &time);
1656
1657 bo->zeroed = false;
1658 if (bo->gem_handle == 0) {
1659 pb_slab_free(get_slabs(bufmgr, bo->size), &bo->slab.entry);
1660 } else {
1661 simple_mtx_lock(&bufmgr->lock);
1662
1663 if (p_atomic_dec_zero(&bo->refcount)) {
1664 bo_unreference_final(bo, time.tv_sec);
1665 cleanup_bo_cache(bufmgr, time.tv_sec);
1666 }
1667
1668 simple_mtx_unlock(&bufmgr->lock);
1669 }
1670 }
1671 }
1672
1673 static void
bo_wait_with_stall_warning(struct util_debug_callback * dbg,struct iris_bo * bo,const char * action)1674 bo_wait_with_stall_warning(struct util_debug_callback *dbg,
1675 struct iris_bo *bo,
1676 const char *action)
1677 {
1678 bool busy = dbg && !bo->idle;
1679 double elapsed = unlikely(busy) ? -get_time() : 0.0;
1680
1681 iris_bo_wait_rendering(bo);
1682
1683 if (unlikely(busy)) {
1684 elapsed += get_time();
1685 if (elapsed > 1e-5) /* 0.01ms */ {
1686 perf_debug(dbg, "%s a busy \"%s\" BO stalled and took %.03f ms.\n",
1687 action, bo->name, elapsed * 1000);
1688 }
1689 }
1690 }
1691
1692 static void
print_flags(unsigned flags)1693 print_flags(unsigned flags)
1694 {
1695 if (flags & MAP_READ)
1696 DBG("READ ");
1697 if (flags & MAP_WRITE)
1698 DBG("WRITE ");
1699 if (flags & MAP_ASYNC)
1700 DBG("ASYNC ");
1701 if (flags & MAP_PERSISTENT)
1702 DBG("PERSISTENT ");
1703 if (flags & MAP_COHERENT)
1704 DBG("COHERENT ");
1705 if (flags & MAP_RAW)
1706 DBG("RAW ");
1707 DBG("\n");
1708 }
1709
1710 void *
iris_bo_map(struct util_debug_callback * dbg,struct iris_bo * bo,unsigned flags)1711 iris_bo_map(struct util_debug_callback *dbg,
1712 struct iris_bo *bo, unsigned flags)
1713 {
1714 struct iris_bufmgr *bufmgr = bo->bufmgr;
1715 void *map = NULL;
1716
1717 if (bo->gem_handle == 0) {
1718 struct iris_bo *real = iris_get_backing_bo(bo);
1719 uint64_t offset = bo->address - real->address;
1720 map = iris_bo_map(dbg, real, flags | MAP_ASYNC) + offset;
1721 } else {
1722 assert(bo->real.mmap_mode != IRIS_MMAP_NONE);
1723 if (bo->real.mmap_mode == IRIS_MMAP_NONE)
1724 return NULL;
1725
1726 if (!bo->real.map) {
1727 DBG("iris_bo_map: %d (%s)\n", bo->gem_handle, bo->name);
1728 map = bufmgr->kmd_backend->gem_mmap(bufmgr, bo);
1729 if (!map) {
1730 return NULL;
1731 }
1732
1733 VG_DEFINED(map, bo->size);
1734
1735 if (p_atomic_cmpxchg(&bo->real.map, NULL, map)) {
1736 VG_NOACCESS(map, bo->size);
1737 os_munmap(map, bo->size);
1738 }
1739 }
1740 assert(bo->real.map);
1741 map = bo->real.map;
1742 }
1743
1744 DBG("iris_bo_map: %d (%s) -> %p\n",
1745 bo->gem_handle, bo->name, bo->real.map);
1746 print_flags(flags);
1747
1748 if (!(flags & MAP_ASYNC)) {
1749 bo_wait_with_stall_warning(dbg, bo, "memory mapping");
1750 }
1751
1752 return map;
1753 }
1754
1755 /**
1756 * Waits on a BO for the given amount of time.
1757 *
1758 * @bo: buffer object to wait for
1759 * @timeout_ns: amount of time to wait in nanoseconds.
1760 * If value is less than 0, an infinite wait will occur.
1761 *
1762 * Returns 0 if the wait was successful ie. the last batch referencing the
1763 * object has completed within the allotted time. Otherwise some negative return
1764 * value describes the error. Of particular interest is -ETIME when the wait has
1765 * failed to yield the desired result.
1766 *
1767 * Similar to iris_bo_wait_rendering except a timeout parameter allows
1768 * the operation to give up after a certain amount of time. Another subtle
1769 * difference is the internal locking semantics are different (this variant does
1770 * not hold the lock for the duration of the wait). This makes the wait subject
1771 * to a larger userspace race window.
1772 *
1773 * The implementation shall wait until the object is no longer actively
1774 * referenced within a batch buffer at the time of the call. The wait will
1775 * not guarantee that the buffer is re-issued via another thread, or an flinked
1776 * handle. Userspace must make sure this race does not occur if such precision
1777 * is important.
1778 *
1779 * Note that some kernels have broken the infinite wait for negative values
1780 * promise, upgrade to latest stable kernels if this is the case.
1781 */
1782 static inline int
iris_bo_wait(struct iris_bo * bo,int64_t timeout_ns)1783 iris_bo_wait(struct iris_bo *bo, int64_t timeout_ns)
1784 {
1785 int ret;
1786
1787 switch (iris_bufmgr_get_device_info(bo->bufmgr)->kmd_type) {
1788 case INTEL_KMD_TYPE_I915:
1789 if (iris_bo_is_external(bo))
1790 ret = iris_i915_bo_wait_gem(bo, timeout_ns);
1791 else
1792 ret = iris_bo_wait_syncobj(bo, timeout_ns);
1793 break;
1794 case INTEL_KMD_TYPE_XE:
1795 ret = iris_bo_wait_syncobj(bo, timeout_ns);
1796 break;
1797 default:
1798 unreachable("missing");
1799 ret = -1;
1800 }
1801
1802 bo->idle = ret == 0;
1803
1804 return ret;
1805 }
1806
1807 /** Waits for all GPU rendering with the object to have completed. */
1808 void
iris_bo_wait_rendering(struct iris_bo * bo)1809 iris_bo_wait_rendering(struct iris_bo *bo)
1810 {
1811 /* We require a kernel recent enough for WAIT_IOCTL support.
1812 * See intel_init_bufmgr()
1813 */
1814 iris_bo_wait(bo, -1);
1815 }
1816
1817 static void
iris_bufmgr_destroy_global_vm(struct iris_bufmgr * bufmgr)1818 iris_bufmgr_destroy_global_vm(struct iris_bufmgr *bufmgr)
1819 {
1820 switch (bufmgr->devinfo.kmd_type) {
1821 case INTEL_KMD_TYPE_I915:
1822 /* Nothing to do in i915 */
1823 break;
1824 case INTEL_KMD_TYPE_XE:
1825 intel_bind_timeline_finish(&bufmgr->bind_timeline, bufmgr->fd);
1826 iris_xe_destroy_global_vm(bufmgr);
1827 break;
1828 default:
1829 unreachable("missing");
1830 }
1831 }
1832
1833 static void
iris_bufmgr_destroy(struct iris_bufmgr * bufmgr)1834 iris_bufmgr_destroy(struct iris_bufmgr *bufmgr)
1835 {
1836 iris_bo_unreference(bufmgr->dummy_aux_bo);
1837 iris_bo_unreference(bufmgr->mem_fence_bo);
1838
1839 iris_destroy_border_color_pool(&bufmgr->border_color_pool);
1840
1841 /* Free aux-map buffers */
1842 intel_aux_map_finish(bufmgr->aux_map_ctx);
1843
1844 /* bufmgr will no longer try to free VMA entries in the aux-map */
1845 bufmgr->aux_map_ctx = NULL;
1846
1847 for (int i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
1848 if (bufmgr->bo_slabs[i].groups)
1849 pb_slabs_deinit(&bufmgr->bo_slabs[i]);
1850 }
1851
1852 simple_mtx_lock(&bufmgr->lock);
1853
1854 /* Free any cached buffer objects we were going to reuse */
1855 for (int h = 0; h < iris_get_heap_max(bufmgr); h++) {
1856 struct iris_bucket_cache *cache = &bufmgr->bucket_cache[h];
1857
1858 for (int i = 0; i < cache->num_buckets; i++) {
1859 struct bo_cache_bucket *bucket = &cache->bucket[i];
1860
1861 list_for_each_entry_safe(struct iris_bo, bo, &bucket->head, head) {
1862 list_del(&bo->head);
1863
1864 bo_free(bo);
1865 }
1866 }
1867 }
1868 free(bufmgr->bucket_cache);
1869
1870 /* Close any buffer objects on the dead list. */
1871 list_for_each_entry_safe(struct iris_bo, bo, &bufmgr->zombie_list, head) {
1872 list_del(&bo->head);
1873 bo_close(bo);
1874 }
1875
1876 _mesa_hash_table_destroy(bufmgr->name_table, NULL);
1877 _mesa_hash_table_destroy(bufmgr->handle_table, NULL);
1878
1879 for (int z = 0; z < IRIS_MEMZONE_COUNT; z++)
1880 util_vma_heap_finish(&bufmgr->vma_allocator[z]);
1881
1882 iris_bufmgr_destroy_global_vm(bufmgr);
1883
1884 close(bufmgr->fd);
1885
1886 simple_mtx_unlock(&bufmgr->lock);
1887
1888 simple_mtx_destroy(&bufmgr->lock);
1889 simple_mtx_destroy(&bufmgr->bo_deps_lock);
1890
1891 free(bufmgr);
1892 }
1893
1894 int
iris_gem_get_tiling(struct iris_bo * bo,uint32_t * tiling)1895 iris_gem_get_tiling(struct iris_bo *bo, uint32_t *tiling)
1896 {
1897 struct iris_bufmgr *bufmgr = bo->bufmgr;
1898
1899 if (!bufmgr->devinfo.has_tiling_uapi) {
1900 *tiling = 0;
1901 return 0;
1902 }
1903
1904 assert(iris_bufmgr_get_device_info(bo->bufmgr)->kmd_type == INTEL_KMD_TYPE_I915);
1905 return iris_i915_bo_get_tiling(bo, tiling);
1906 }
1907
1908 int
iris_gem_set_tiling(struct iris_bo * bo,const struct isl_surf * surf)1909 iris_gem_set_tiling(struct iris_bo *bo, const struct isl_surf *surf)
1910 {
1911 struct iris_bufmgr *bufmgr = bo->bufmgr;
1912
1913 /* If we can't do map_gtt, the set/get_tiling API isn't useful. And it's
1914 * actually not supported by the kernel in those cases.
1915 */
1916 if (!bufmgr->devinfo.has_tiling_uapi)
1917 return 0;
1918
1919 assert(iris_bufmgr_get_device_info(bo->bufmgr)->kmd_type == INTEL_KMD_TYPE_I915);
1920 return iris_i915_bo_set_tiling(bo, surf);
1921 }
1922
1923 struct iris_bo *
iris_bo_import_dmabuf(struct iris_bufmgr * bufmgr,int prime_fd,const uint64_t modifier)1924 iris_bo_import_dmabuf(struct iris_bufmgr *bufmgr, int prime_fd,
1925 const uint64_t modifier)
1926 {
1927 uint32_t handle;
1928 struct iris_bo *bo;
1929
1930 simple_mtx_lock(&bufmgr->lock);
1931 int ret = drmPrimeFDToHandle(bufmgr->fd, prime_fd, &handle);
1932 if (ret) {
1933 DBG("import_dmabuf: failed to obtain handle from fd: %s\n",
1934 strerror(errno));
1935 simple_mtx_unlock(&bufmgr->lock);
1936 return NULL;
1937 }
1938
1939 /*
1940 * See if the kernel has already returned this buffer to us. Just as
1941 * for named buffers, we must not create two bo's pointing at the same
1942 * kernel object
1943 */
1944 bo = find_and_ref_external_bo(bufmgr->handle_table, handle);
1945 if (bo)
1946 goto out;
1947
1948 bo = bo_calloc();
1949 if (!bo)
1950 goto out;
1951
1952 p_atomic_set(&bo->refcount, 1);
1953
1954 /* Determine size of bo. The fd-to-handle ioctl really should
1955 * return the size, but it doesn't. If we have kernel 3.12 or
1956 * later, we can lseek on the prime fd to get the size. Older
1957 * kernels will just fail, in which case we fall back to the
1958 * provided (estimated or guess size). */
1959 ret = lseek(prime_fd, 0, SEEK_END);
1960 if (ret != -1)
1961 bo->size = ret;
1962
1963 bo->bufmgr = bufmgr;
1964 bo->name = "prime";
1965 bo->index = -1;
1966 bo->real.reusable = false;
1967 bo->real.imported = true;
1968 /* Xe KMD expects at least 1-way coherency for imports */
1969 bo->real.heap = IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT;
1970 bo->real.mmap_mode = IRIS_MMAP_NONE;
1971 if (INTEL_DEBUG(DEBUG_CAPTURE_ALL))
1972 bo->real.capture = true;
1973 bo->gem_handle = handle;
1974 bo->real.prime_fd = needs_prime_fd(bufmgr) ? dup(prime_fd) : -1;
1975
1976 uint64_t alignment = 1;
1977
1978 /* When an aux map will be used, there is an alignment requirement on the
1979 * main surface from the mapping granularity. Some planes of the image may
1980 * have smaller alignment requirements, but this one should work for all.
1981 */
1982 if (bufmgr->devinfo.has_aux_map && isl_drm_modifier_has_aux(modifier))
1983 alignment = intel_aux_map_get_alignment(bufmgr->aux_map_ctx);
1984
1985 bo->address = vma_alloc(bufmgr, IRIS_MEMZONE_OTHER, bo->size, alignment);
1986 if (bo->address == 0ull)
1987 goto err_free;
1988
1989 if (!bufmgr->kmd_backend->gem_vm_bind(bo))
1990 goto err_vm_alloc;
1991
1992 _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo);
1993
1994 out:
1995 simple_mtx_unlock(&bufmgr->lock);
1996 return bo;
1997
1998 err_vm_alloc:
1999 vma_free(bufmgr, bo->address, bo->size);
2000 err_free:
2001 bo_free(bo);
2002 simple_mtx_unlock(&bufmgr->lock);
2003 return NULL;
2004 }
2005
2006 static void
iris_bo_mark_exported_locked(struct iris_bo * bo)2007 iris_bo_mark_exported_locked(struct iris_bo *bo)
2008 {
2009 struct iris_bufmgr *bufmgr = bo->bufmgr;
2010
2011 /* We cannot export suballocated BOs. */
2012 assert(iris_bo_is_real(bo));
2013 simple_mtx_assert_locked(&bufmgr->lock);
2014
2015 if (!iris_bo_is_external(bo))
2016 _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo);
2017
2018 if (!bo->real.exported) {
2019 /* If a BO is going to be used externally, it could be sent to the
2020 * display HW. So make sure our CPU mappings don't assume cache
2021 * coherency since display is outside that cache.
2022 */
2023 bo->real.exported = true;
2024 bo->real.reusable = false;
2025 }
2026 }
2027
2028 void
iris_bo_mark_exported(struct iris_bo * bo)2029 iris_bo_mark_exported(struct iris_bo *bo)
2030 {
2031 struct iris_bufmgr *bufmgr = bo->bufmgr;
2032
2033 /* We cannot export suballocated BOs. */
2034 assert(iris_bo_is_real(bo));
2035
2036 if (bo->real.exported) {
2037 assert(!bo->real.reusable);
2038 return;
2039 }
2040
2041 simple_mtx_lock(&bufmgr->lock);
2042 iris_bo_mark_exported_locked(bo);
2043 simple_mtx_unlock(&bufmgr->lock);
2044
2045 iris_bo_set_prime_fd(bo);
2046 }
2047
2048 int
iris_bo_export_dmabuf(struct iris_bo * bo,int * prime_fd)2049 iris_bo_export_dmabuf(struct iris_bo *bo, int *prime_fd)
2050 {
2051 struct iris_bufmgr *bufmgr = bo->bufmgr;
2052
2053 /* We cannot export suballocated BOs. */
2054 assert(iris_bo_is_real(bo));
2055
2056 if (drmPrimeHandleToFD(bufmgr->fd, bo->gem_handle,
2057 DRM_CLOEXEC | DRM_RDWR, prime_fd) != 0)
2058 return -errno;
2059
2060 iris_bo_mark_exported(bo);
2061
2062 return 0;
2063 }
2064
2065 static uint32_t
iris_bo_export_gem_handle(struct iris_bo * bo)2066 iris_bo_export_gem_handle(struct iris_bo *bo)
2067 {
2068 /* We cannot export suballocated BOs. */
2069 assert(iris_bo_is_real(bo));
2070
2071 iris_bo_mark_exported(bo);
2072
2073 return bo->gem_handle;
2074 }
2075
2076 int
iris_bo_flink(struct iris_bo * bo,uint32_t * name)2077 iris_bo_flink(struct iris_bo *bo, uint32_t *name)
2078 {
2079 struct iris_bufmgr *bufmgr = bo->bufmgr;
2080
2081 /* We cannot export suballocated BOs. */
2082 assert(iris_bo_is_real(bo));
2083
2084 if (!bo->real.global_name) {
2085 struct drm_gem_flink flink = { .handle = bo->gem_handle };
2086
2087 if (intel_ioctl(bufmgr->fd, DRM_IOCTL_GEM_FLINK, &flink))
2088 return -errno;
2089
2090 simple_mtx_lock(&bufmgr->lock);
2091 if (!bo->real.global_name) {
2092 iris_bo_mark_exported_locked(bo);
2093 bo->real.global_name = flink.name;
2094 _mesa_hash_table_insert(bufmgr->name_table, &bo->real.global_name, bo);
2095 }
2096 simple_mtx_unlock(&bufmgr->lock);
2097
2098 iris_bo_set_prime_fd(bo);
2099 }
2100
2101 *name = bo->real.global_name;
2102 return 0;
2103 }
2104
2105 int
iris_bo_export_gem_handle_for_device(struct iris_bo * bo,int drm_fd,uint32_t * out_handle)2106 iris_bo_export_gem_handle_for_device(struct iris_bo *bo, int drm_fd,
2107 uint32_t *out_handle)
2108 {
2109 /* We cannot export suballocated BOs. */
2110 assert(iris_bo_is_real(bo));
2111
2112 /* Only add the new GEM handle to the list of export if it belongs to a
2113 * different GEM device. Otherwise we might close the same buffer multiple
2114 * times.
2115 */
2116 struct iris_bufmgr *bufmgr = bo->bufmgr;
2117 int ret = os_same_file_description(drm_fd, bufmgr->fd);
2118 WARN_ONCE(ret < 0,
2119 "Kernel has no file descriptor comparison support: %s\n",
2120 strerror(errno));
2121 if (ret == 0) {
2122 *out_handle = iris_bo_export_gem_handle(bo);
2123 return 0;
2124 }
2125
2126 struct bo_export *export = calloc(1, sizeof(*export));
2127 if (!export)
2128 return -ENOMEM;
2129
2130 export->drm_fd = drm_fd;
2131
2132 int dmabuf_fd = -1;
2133 int err = iris_bo_export_dmabuf(bo, &dmabuf_fd);
2134 if (err) {
2135 free(export);
2136 return err;
2137 }
2138
2139 simple_mtx_lock(&bufmgr->lock);
2140 err = drmPrimeFDToHandle(drm_fd, dmabuf_fd, &export->gem_handle);
2141 close(dmabuf_fd);
2142 if (err) {
2143 simple_mtx_unlock(&bufmgr->lock);
2144 free(export);
2145 return err;
2146 }
2147
2148 bool found = false;
2149 list_for_each_entry(struct bo_export, iter, &bo->real.exports, link) {
2150 if (iter->drm_fd != drm_fd)
2151 continue;
2152 /* Here we assume that for a given DRM fd, we'll always get back the
2153 * same GEM handle for a given buffer.
2154 */
2155 assert(iter->gem_handle == export->gem_handle);
2156 free(export);
2157 export = iter;
2158 found = true;
2159 break;
2160 }
2161 if (!found)
2162 list_addtail(&export->link, &bo->real.exports);
2163
2164 simple_mtx_unlock(&bufmgr->lock);
2165
2166 *out_handle = export->gem_handle;
2167
2168 return 0;
2169 }
2170
2171 static void
add_bucket(struct iris_bufmgr * bufmgr,int size,enum iris_heap heap)2172 add_bucket(struct iris_bufmgr *bufmgr, int size, enum iris_heap heap)
2173 {
2174 struct iris_bucket_cache *cache = &bufmgr->bucket_cache[heap];
2175 unsigned int i = cache->num_buckets++;
2176
2177 assert(i < BUCKET_ARRAY_SIZE);
2178
2179 list_inithead(&cache->bucket[i].head);
2180 cache->bucket[i].size = size;
2181
2182 assert(bucket_for_size(bufmgr, size, heap, 0) == &cache->bucket[i]);
2183 assert(bucket_for_size(bufmgr, size - 2048, heap, 0) == &cache->bucket[i]);
2184 assert(bucket_for_size(bufmgr, size + 1, heap, 0) != &cache->bucket[i]);
2185 }
2186
2187 static void
init_cache_buckets(struct iris_bufmgr * bufmgr,enum iris_heap heap)2188 init_cache_buckets(struct iris_bufmgr *bufmgr, enum iris_heap heap)
2189 {
2190 const unsigned _6MB = 6 * 1024 * 1024;
2191 const unsigned _8MB = 8 * 1024 * 1024;
2192 const unsigned _64MB = 64 * 1024 * 1024;
2193
2194 /* power-of-two buckets from 4K to 4MB */
2195 for (uint64_t size = 4096; size < _8MB; size *= 2)
2196 add_bucket(bufmgr, size, heap);
2197
2198 /* 6MB */
2199 add_bucket(bufmgr, _6MB, heap);
2200
2201 /* 8MB+: three sizes between each power of two to reduce waste */
2202 for (uint64_t size = _8MB; size < _64MB; size *= 2) {
2203 add_bucket(bufmgr, size, heap);
2204 add_bucket(bufmgr, size + size * 1 / 4, heap);
2205 add_bucket(bufmgr, size + size * 2 / 4, heap);
2206 add_bucket(bufmgr, size + size * 3 / 4, heap);
2207 }
2208
2209 /* 64MB */
2210 add_bucket(bufmgr, _64MB, heap);
2211 }
2212
2213 static struct intel_buffer *
intel_aux_map_buffer_alloc(void * driver_ctx,uint32_t size)2214 intel_aux_map_buffer_alloc(void *driver_ctx, uint32_t size)
2215 {
2216 struct intel_buffer *buf = malloc(sizeof(struct intel_buffer));
2217 if (!buf)
2218 return NULL;
2219
2220 struct iris_bufmgr *bufmgr = (struct iris_bufmgr *)driver_ctx;
2221
2222 unsigned int page_size = getpagesize();
2223 size = MAX2(ALIGN(size, page_size), page_size);
2224
2225 struct iris_bo *bo = alloc_fresh_bo(bufmgr, size, BO_ALLOC_CAPTURE);
2226 if (!bo) {
2227 free(buf);
2228 return NULL;
2229 }
2230
2231 simple_mtx_lock(&bufmgr->lock);
2232
2233 bo->address = vma_alloc(bufmgr, IRIS_MEMZONE_OTHER, bo->size, 64 * 1024);
2234 if (bo->address == 0ull)
2235 goto err_free;
2236
2237 if (!bufmgr->kmd_backend->gem_vm_bind(bo))
2238 goto err_vm_alloc;
2239
2240 simple_mtx_unlock(&bufmgr->lock);
2241
2242 bo->name = "aux-map";
2243 p_atomic_set(&bo->refcount, 1);
2244 bo->index = -1;
2245 bo->real.mmap_mode = heap_to_mmap_mode(bufmgr, bo->real.heap);
2246 bo->real.prime_fd = -1;
2247
2248 buf->driver_bo = bo;
2249 buf->gpu = bo->address;
2250 buf->gpu_end = buf->gpu + bo->size;
2251 buf->map = iris_bo_map(NULL, bo, MAP_WRITE | MAP_RAW);
2252 return buf;
2253
2254 err_vm_alloc:
2255 vma_free(bufmgr, bo->address, bo->size);
2256 err_free:
2257 free(buf);
2258 bo_free(bo);
2259 simple_mtx_unlock(&bufmgr->lock);
2260 return NULL;
2261 }
2262
2263 static void
intel_aux_map_buffer_free(void * driver_ctx,struct intel_buffer * buffer)2264 intel_aux_map_buffer_free(void *driver_ctx, struct intel_buffer *buffer)
2265 {
2266 iris_bo_unreference((struct iris_bo*)buffer->driver_bo);
2267 free(buffer);
2268 }
2269
2270 static struct intel_mapped_pinned_buffer_alloc aux_map_allocator = {
2271 .alloc = intel_aux_map_buffer_alloc,
2272 .free = intel_aux_map_buffer_free,
2273 };
2274
2275 static bool
iris_bufmgr_get_meminfo(struct iris_bufmgr * bufmgr,struct intel_device_info * devinfo)2276 iris_bufmgr_get_meminfo(struct iris_bufmgr *bufmgr,
2277 struct intel_device_info *devinfo)
2278 {
2279 bufmgr->sys.region = &devinfo->mem.sram.mem;
2280 bufmgr->sys.size = devinfo->mem.sram.mappable.size;
2281
2282 /* When the resizable bar feature is disabled,
2283 * then vram.mappable.size is only 256MB.
2284 * The second half of the total size is in the vram.unmappable.size
2285 * variable.
2286 */
2287 bufmgr->vram.region = &devinfo->mem.vram.mem;
2288 bufmgr->vram.size = devinfo->mem.vram.mappable.size +
2289 devinfo->mem.vram.unmappable.size;
2290
2291 return true;
2292 }
2293
2294 static bool
iris_bufmgr_init_global_vm(struct iris_bufmgr * bufmgr)2295 iris_bufmgr_init_global_vm(struct iris_bufmgr *bufmgr)
2296 {
2297 switch (bufmgr->devinfo.kmd_type) {
2298 case INTEL_KMD_TYPE_I915:
2299 bufmgr->use_global_vm = iris_i915_init_global_vm(bufmgr, &bufmgr->global_vm_id);
2300 /* i915 don't require VM, so returning true even if use_global_vm is false */
2301 return true;
2302 case INTEL_KMD_TYPE_XE:
2303 if (!intel_bind_timeline_init(&bufmgr->bind_timeline, bufmgr->fd))
2304 return false;
2305
2306 bufmgr->use_global_vm = iris_xe_init_global_vm(bufmgr, &bufmgr->global_vm_id);
2307 /* Xe requires VM */
2308 return bufmgr->use_global_vm;
2309 default:
2310 unreachable("missing");
2311 return false;
2312 }
2313 }
2314
2315 /**
2316 * Initializes the GEM buffer manager, which uses the kernel to allocate, map,
2317 * and manage map buffer objections.
2318 *
2319 * \param fd File descriptor of the opened DRM device.
2320 */
2321 static struct iris_bufmgr *
iris_bufmgr_create(struct intel_device_info * devinfo,int fd,bool bo_reuse)2322 iris_bufmgr_create(struct intel_device_info *devinfo, int fd, bool bo_reuse)
2323 {
2324 if (devinfo->gtt_size <= IRIS_MEMZONE_OTHER_START)
2325 return NULL;
2326
2327 struct iris_bufmgr *bufmgr = calloc(1, sizeof(*bufmgr));
2328 if (bufmgr == NULL)
2329 return NULL;
2330
2331 /* Handles to buffer objects belong to the device fd and are not
2332 * reference counted by the kernel. If the same fd is used by
2333 * multiple parties (threads sharing the same screen bufmgr, or
2334 * even worse the same device fd passed to multiple libraries)
2335 * ownership of those handles is shared by those independent parties.
2336 *
2337 * Don't do this! Ensure that each library/bufmgr has its own device
2338 * fd so that its namespace does not clash with another.
2339 */
2340 bufmgr->fd = os_dupfd_cloexec(fd);
2341 if (bufmgr->fd == -1)
2342 goto error_dup;
2343
2344 p_atomic_set(&bufmgr->refcount, 1);
2345
2346 simple_mtx_init(&bufmgr->lock, mtx_plain);
2347 simple_mtx_init(&bufmgr->bo_deps_lock, mtx_plain);
2348
2349 list_inithead(&bufmgr->zombie_list);
2350
2351 bufmgr->devinfo = *devinfo;
2352 devinfo = &bufmgr->devinfo;
2353 bufmgr->bo_reuse = bo_reuse;
2354 iris_bufmgr_get_meminfo(bufmgr, devinfo);
2355 bufmgr->kmd_backend = iris_kmd_backend_get(devinfo->kmd_type);
2356
2357 intel_common_update_device_info(bufmgr->fd, devinfo);
2358
2359 if (!iris_bufmgr_init_global_vm(bufmgr))
2360 goto error_init_vm;
2361
2362 STATIC_ASSERT(IRIS_MEMZONE_SHADER_START == 0ull);
2363 const uint64_t _4GB = 1ull << 32;
2364 const uint64_t _2GB = 1ul << 31;
2365
2366 /* The STATE_BASE_ADDRESS size field can only hold 1 page shy of 4GB */
2367 const uint64_t _4GB_minus_1 = _4GB - PAGE_SIZE;
2368
2369 const struct {
2370 uint64_t start;
2371 uint64_t size;
2372 } vma[IRIS_MEMZONE_COUNT] = {
2373 [IRIS_MEMZONE_SHADER] = {
2374 .start = PAGE_SIZE,
2375 .size = _4GB_minus_1 - PAGE_SIZE
2376 },
2377 [IRIS_MEMZONE_BINDER] = {
2378 .start = IRIS_MEMZONE_BINDER_START + IRIS_SCRATCH_ZONE_SIZE,
2379 .size = IRIS_BINDER_ZONE_SIZE - IRIS_SCRATCH_ZONE_SIZE
2380 },
2381 [IRIS_MEMZONE_SCRATCH] = {
2382 .start = IRIS_MEMZONE_SCRATCH_START,
2383 .size = IRIS_SCRATCH_ZONE_SIZE
2384 },
2385 [IRIS_MEMZONE_SURFACE] = {
2386 .start = IRIS_MEMZONE_SURFACE_START,
2387 .size = _4GB_minus_1 - IRIS_BINDER_ZONE_SIZE - IRIS_SCRATCH_ZONE_SIZE
2388 },
2389 [IRIS_MEMZONE_DYNAMIC] = {
2390 .start = IRIS_MEMZONE_DYNAMIC_START + IRIS_BORDER_COLOR_POOL_SIZE,
2391
2392 /* Wa_2209859288: the Tigerlake PRM's workarounds volume says:
2393 *
2394 * "PSDunit is dropping MSB of the blend state pointer from SD
2395 * FIFO [...] Limit the Blend State Pointer to < 2G"
2396 *
2397 * We restrict the dynamic state pool to 2GB so that we don't ever
2398 * get a BLEND_STATE pointer with the MSB set. We aren't likely to
2399 * need the full 4GB for dynamic state anyway.
2400 */
2401 .size = (devinfo->ver >= 12 ? _2GB : _4GB_minus_1)
2402 - IRIS_BORDER_COLOR_POOL_SIZE
2403 },
2404 [IRIS_MEMZONE_OTHER] = {
2405 .start = IRIS_MEMZONE_OTHER_START,
2406
2407 /* Leave the last 4GB out of the high vma range, so that no state
2408 * base address + size can overflow 48 bits.
2409 */
2410 .size = (devinfo->gtt_size - _4GB) - IRIS_MEMZONE_OTHER_START,
2411 },
2412 };
2413
2414 for (unsigned i = 0; i < IRIS_MEMZONE_COUNT; i++) {
2415 util_vma_heap_init(&bufmgr->vma_allocator[i],
2416 vma[i].start, vma[i].size);
2417 }
2418
2419 if (INTEL_DEBUG(DEBUG_HEAPS)) {
2420 for (unsigned i = 0; i < IRIS_MEMZONE_COUNT; i++) {
2421 fprintf(stderr, "%-11s | 0x%016" PRIx64 "-0x%016" PRIx64 "\n",
2422 memzone_name(i), vma[i].start,
2423 vma[i].start + vma[i].size - 1);
2424 }
2425 }
2426
2427 bufmgr->bucket_cache = calloc(iris_get_heap_max(bufmgr),
2428 sizeof(*bufmgr->bucket_cache));
2429 if (!bufmgr->bucket_cache)
2430 goto error_bucket_cache;
2431 for (int h = 0; h < iris_get_heap_max(bufmgr); h++)
2432 init_cache_buckets(bufmgr, h);
2433
2434 unsigned min_slab_order = 8; /* 256 bytes */
2435 unsigned max_slab_order = 20; /* 1 MB (slab size = 2 MB) */
2436 unsigned num_slab_orders_per_allocator =
2437 (max_slab_order - min_slab_order) / NUM_SLAB_ALLOCATORS;
2438
2439 /* Divide the size order range among slab managers. */
2440 for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
2441 unsigned min_order = min_slab_order;
2442 unsigned max_order =
2443 MIN2(min_order + num_slab_orders_per_allocator, max_slab_order);
2444
2445 if (!pb_slabs_init(&bufmgr->bo_slabs[i], min_order, max_order,
2446 iris_get_heap_max(bufmgr), true, bufmgr,
2447 iris_can_reclaim_slab,
2448 iris_slab_alloc,
2449 (void *) iris_slab_free)) {
2450 goto error_slabs_init;
2451 }
2452 min_slab_order = max_order + 1;
2453 }
2454
2455 bufmgr->name_table =
2456 _mesa_hash_table_create(NULL, _mesa_hash_uint, _mesa_key_uint_equal);
2457 bufmgr->handle_table =
2458 _mesa_hash_table_create(NULL, _mesa_hash_uint, _mesa_key_uint_equal);
2459
2460 if (devinfo->has_aux_map) {
2461 bufmgr->aux_map_ctx = intel_aux_map_init(bufmgr, &aux_map_allocator,
2462 devinfo);
2463 assert(bufmgr->aux_map_ctx);
2464 }
2465
2466 iris_init_border_color_pool(bufmgr, &bufmgr->border_color_pool);
2467
2468 if (intel_needs_workaround(devinfo, 14019708328)) {
2469 bufmgr->dummy_aux_bo = iris_bo_alloc(bufmgr, "dummy_aux", 4096, 4096,
2470 IRIS_MEMZONE_OTHER, BO_ALLOC_PLAIN);
2471 if (!bufmgr->dummy_aux_bo)
2472 goto error_alloc_bo;
2473 }
2474
2475 /* Programming note from MI_MEM_FENCE specification:
2476 *
2477 * Software must ensure STATE_SYSTEM_MEM_FENCE_ADDRESS command is
2478 * programmed prior to programming this command.
2479 *
2480 * HAS 1607240579 then provides the size information: 4K
2481 */
2482 if (devinfo->verx10 >= 200) {
2483 bufmgr->mem_fence_bo = iris_bo_alloc(bufmgr, "mem_fence", 4096, 4096,
2484 IRIS_MEMZONE_OTHER, BO_ALLOC_SMEM);
2485 if (!bufmgr->mem_fence_bo)
2486 goto error_alloc_bo;
2487 }
2488
2489 return bufmgr;
2490
2491 error_alloc_bo:
2492 iris_bo_unreference(bufmgr->dummy_aux_bo);
2493 iris_bo_unreference(bufmgr->mem_fence_bo);
2494 iris_destroy_border_color_pool(&bufmgr->border_color_pool);
2495 intel_aux_map_finish(bufmgr->aux_map_ctx);
2496 _mesa_hash_table_destroy(bufmgr->handle_table, NULL);
2497 _mesa_hash_table_destroy(bufmgr->name_table, NULL);
2498 error_slabs_init:
2499 for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
2500 if (!bufmgr->bo_slabs[i].groups)
2501 break;
2502
2503 pb_slabs_deinit(&bufmgr->bo_slabs[i]);
2504 }
2505 free(bufmgr->bucket_cache);
2506 error_bucket_cache:
2507 for (unsigned i = 0; i < IRIS_MEMZONE_COUNT; i++)
2508 util_vma_heap_finish(&bufmgr->vma_allocator[i]);
2509 iris_bufmgr_destroy_global_vm(bufmgr);
2510 error_init_vm:
2511 close(bufmgr->fd);
2512 error_dup:
2513 free(bufmgr);
2514 return NULL;
2515 }
2516
2517 static struct iris_bufmgr *
iris_bufmgr_ref(struct iris_bufmgr * bufmgr)2518 iris_bufmgr_ref(struct iris_bufmgr *bufmgr)
2519 {
2520 p_atomic_inc(&bufmgr->refcount);
2521 return bufmgr;
2522 }
2523
2524 void
iris_bufmgr_unref(struct iris_bufmgr * bufmgr)2525 iris_bufmgr_unref(struct iris_bufmgr *bufmgr)
2526 {
2527 simple_mtx_lock(&global_bufmgr_list_mutex);
2528 if (p_atomic_dec_zero(&bufmgr->refcount)) {
2529 list_del(&bufmgr->link);
2530 iris_bufmgr_destroy(bufmgr);
2531 }
2532 simple_mtx_unlock(&global_bufmgr_list_mutex);
2533 }
2534
2535 /** Returns a new unique id, to be used by screens. */
2536 int
iris_bufmgr_create_screen_id(struct iris_bufmgr * bufmgr)2537 iris_bufmgr_create_screen_id(struct iris_bufmgr *bufmgr)
2538 {
2539 return p_atomic_inc_return(&bufmgr->next_screen_id) - 1;
2540 }
2541
2542 /**
2543 * Gets an already existing GEM buffer manager or create a new one.
2544 *
2545 * \param fd File descriptor of the opened DRM device.
2546 */
2547 struct iris_bufmgr *
iris_bufmgr_get_for_fd(int fd,bool bo_reuse)2548 iris_bufmgr_get_for_fd(int fd, bool bo_reuse)
2549 {
2550 struct intel_device_info devinfo;
2551 struct stat st;
2552
2553 if (fstat(fd, &st))
2554 return NULL;
2555
2556 struct iris_bufmgr *bufmgr = NULL;
2557
2558 simple_mtx_lock(&global_bufmgr_list_mutex);
2559 list_for_each_entry(struct iris_bufmgr, iter_bufmgr, &global_bufmgr_list, link) {
2560 struct stat iter_st;
2561 if (fstat(iter_bufmgr->fd, &iter_st))
2562 continue;
2563
2564 if (st.st_rdev == iter_st.st_rdev) {
2565 assert(iter_bufmgr->bo_reuse == bo_reuse);
2566 bufmgr = iris_bufmgr_ref(iter_bufmgr);
2567 goto unlock;
2568 }
2569 }
2570
2571 if (!intel_get_device_info_from_fd(fd, &devinfo, 8, -1))
2572 return NULL;
2573
2574 if (devinfo.ver < 8 || devinfo.platform == INTEL_PLATFORM_CHV)
2575 return NULL;
2576
2577 #ifndef INTEL_USE_ELK
2578 if (devinfo.ver < 9) {
2579 WARN_ONCE(devinfo.ver == 8,
2580 "ERROR: Iris was compiled without support for Gfx version 8.\n");
2581 return NULL;
2582 }
2583 #endif
2584
2585 bufmgr = iris_bufmgr_create(&devinfo, fd, bo_reuse);
2586 if (bufmgr)
2587 list_addtail(&bufmgr->link, &global_bufmgr_list);
2588
2589 unlock:
2590 simple_mtx_unlock(&global_bufmgr_list_mutex);
2591
2592 return bufmgr;
2593 }
2594
2595 int
iris_bufmgr_get_fd(struct iris_bufmgr * bufmgr)2596 iris_bufmgr_get_fd(struct iris_bufmgr *bufmgr)
2597 {
2598 return bufmgr->fd;
2599 }
2600
2601 void*
iris_bufmgr_get_aux_map_context(struct iris_bufmgr * bufmgr)2602 iris_bufmgr_get_aux_map_context(struct iris_bufmgr *bufmgr)
2603 {
2604 return bufmgr->aux_map_ctx;
2605 }
2606
2607 simple_mtx_t *
iris_bufmgr_get_bo_deps_lock(struct iris_bufmgr * bufmgr)2608 iris_bufmgr_get_bo_deps_lock(struct iris_bufmgr *bufmgr)
2609 {
2610 return &bufmgr->bo_deps_lock;
2611 }
2612
2613 struct iris_border_color_pool *
iris_bufmgr_get_border_color_pool(struct iris_bufmgr * bufmgr)2614 iris_bufmgr_get_border_color_pool(struct iris_bufmgr *bufmgr)
2615 {
2616 return &bufmgr->border_color_pool;
2617 }
2618
2619 uint64_t
iris_bufmgr_vram_size(struct iris_bufmgr * bufmgr)2620 iris_bufmgr_vram_size(struct iris_bufmgr *bufmgr)
2621 {
2622 return bufmgr->vram.size;
2623 }
2624
2625 uint64_t
iris_bufmgr_sram_size(struct iris_bufmgr * bufmgr)2626 iris_bufmgr_sram_size(struct iris_bufmgr *bufmgr)
2627 {
2628 return bufmgr->sys.size;
2629 }
2630
2631 const struct intel_device_info *
iris_bufmgr_get_device_info(struct iris_bufmgr * bufmgr)2632 iris_bufmgr_get_device_info(struct iris_bufmgr *bufmgr)
2633 {
2634 return &bufmgr->devinfo;
2635 }
2636
2637 const struct iris_kmd_backend *
iris_bufmgr_get_kernel_driver_backend(struct iris_bufmgr * bufmgr)2638 iris_bufmgr_get_kernel_driver_backend(struct iris_bufmgr *bufmgr)
2639 {
2640 return bufmgr->kmd_backend;
2641 }
2642
2643 uint32_t
iris_bufmgr_get_global_vm_id(struct iris_bufmgr * bufmgr)2644 iris_bufmgr_get_global_vm_id(struct iris_bufmgr *bufmgr)
2645 {
2646 return bufmgr->global_vm_id;
2647 }
2648
2649 bool
iris_bufmgr_use_global_vm_id(struct iris_bufmgr * bufmgr)2650 iris_bufmgr_use_global_vm_id(struct iris_bufmgr *bufmgr)
2651 {
2652 return bufmgr->use_global_vm;
2653 }
2654
2655 bool
iris_bufmgr_compute_engine_supported(struct iris_bufmgr * bufmgr)2656 iris_bufmgr_compute_engine_supported(struct iris_bufmgr *bufmgr)
2657 {
2658 return bufmgr->devinfo.engine_class_supported_count[INTEL_ENGINE_CLASS_COMPUTE];
2659 }
2660
2661 /**
2662 * Return the pat entry based on the bo heap and allocation flags.
2663 */
2664 const struct intel_device_info_pat_entry *
iris_heap_to_pat_entry(const struct intel_device_info * devinfo,enum iris_heap heap,bool scanout)2665 iris_heap_to_pat_entry(const struct intel_device_info *devinfo,
2666 enum iris_heap heap, bool scanout)
2667 {
2668 if (scanout) {
2669 if (iris_heap_is_compressed(heap) == false)
2670 return &devinfo->pat.scanout;
2671
2672 WARN_ONCE(iris_heap_is_compressed(heap),
2673 "update heap_to_pat_entry when compressed scanout pat entries are added");
2674 }
2675
2676 switch (heap) {
2677 case IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT:
2678 return &devinfo->pat.cached_coherent;
2679 case IRIS_HEAP_SYSTEM_MEMORY_UNCACHED:
2680 return &devinfo->pat.writecombining;
2681 case IRIS_HEAP_DEVICE_LOCAL:
2682 case IRIS_HEAP_DEVICE_LOCAL_CPU_VISIBLE_SMALL_BAR:
2683 case IRIS_HEAP_DEVICE_LOCAL_PREFERRED:
2684 return &devinfo->pat.writecombining;
2685 case IRIS_HEAP_SYSTEM_MEMORY_UNCACHED_COMPRESSED:
2686 case IRIS_HEAP_DEVICE_LOCAL_COMPRESSED:
2687 return &devinfo->pat.compressed;
2688 default:
2689 unreachable("invalid heap for platforms using PAT entries");
2690 }
2691 }
2692
2693 struct intel_bind_timeline *
iris_bufmgr_get_bind_timeline(struct iris_bufmgr * bufmgr)2694 iris_bufmgr_get_bind_timeline(struct iris_bufmgr *bufmgr)
2695 {
2696 return &bufmgr->bind_timeline;
2697 }
2698
2699 uint64_t
iris_bufmgr_get_dummy_aux_address(struct iris_bufmgr * bufmgr)2700 iris_bufmgr_get_dummy_aux_address(struct iris_bufmgr *bufmgr)
2701 {
2702 return bufmgr->dummy_aux_bo ? bufmgr->dummy_aux_bo->address : 0;
2703 }
2704
2705 struct iris_bo *
iris_bufmgr_get_mem_fence_bo(struct iris_bufmgr * bufmgr)2706 iris_bufmgr_get_mem_fence_bo(struct iris_bufmgr *bufmgr)
2707 {
2708 return bufmgr->mem_fence_bo;
2709 }
2710