1 /*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #ifndef IRIS_BUFMGR_H
25 #define IRIS_BUFMGR_H
26
27 #include <stdbool.h>
28 #include <stdint.h>
29 #include <stdio.h>
30 #include <sys/types.h>
31 #include "c11/threads.h"
32 #include "util/macros.h"
33 #include "util/u_atomic.h"
34 #include "util/u_dynarray.h"
35 #include "util/list.h"
36 #include "util/simple_mtx.h"
37 #include "pipe/p_defines.h"
38 #include "pipebuffer/pb_slab.h"
39 #include "intel/dev/intel_device_info.h"
40
41 struct intel_device_info;
42 struct util_debug_callback;
43 struct isl_surf;
44 struct iris_syncobj;
45
46 /**
47 * Memory zones. When allocating a buffer, you can request that it is
48 * placed into a specific region of the virtual address space (PPGTT).
49 *
50 * Most buffers can go anywhere (IRIS_MEMZONE_OTHER). Some buffers are
51 * accessed via an offset from a base address. STATE_BASE_ADDRESS has
52 * a maximum 4GB size for each region, so we need to restrict those
53 * buffers to be within 4GB of the base. Each memory zone corresponds
54 * to a particular base address.
55 *
56 * We lay out the virtual address space as follows:
57 *
58 * - [0, 4K): Nothing (empty page for null address)
59 * - [4K, 4G): Shaders (Instruction Base Address)
60 * - [4G, 8G): Surfaces & Binders (Surface State Base Address, Bindless ...)
61 * - [8G, 12G): Dynamic (Dynamic State Base Address)
62 * - [12G, *): Other (everything else in the full 48-bit VMA)
63 *
64 * A special buffer for border color lives at the start of the dynamic state
65 * memory zone. This unfortunately has to be handled specially because the
66 * SAMPLER_STATE "Indirect State Pointer" field is only a 24-bit pointer.
67 *
68 * Each GL context uses a separate GEM context, which technically gives them
69 * each a separate VMA. However, we assign address globally, so buffers will
70 * have the same address in all GEM contexts. This lets us have a single BO
71 * field for the address, which is easy and cheap.
72 */
73 enum iris_memory_zone {
74 IRIS_MEMZONE_SHADER,
75 IRIS_MEMZONE_BINDER,
76 IRIS_MEMZONE_BINDLESS,
77 IRIS_MEMZONE_SURFACE,
78 IRIS_MEMZONE_DYNAMIC,
79 IRIS_MEMZONE_OTHER,
80
81 IRIS_MEMZONE_BORDER_COLOR_POOL,
82 };
83
84 /* Intentionally exclude single buffer "zones" */
85 #define IRIS_MEMZONE_COUNT (IRIS_MEMZONE_OTHER + 1)
86
87 #define IRIS_BINDLESS_SIZE (8 * 1024 * 1024)
88 #define IRIS_BINDER_ZONE_SIZE ((1ull << 30) - IRIS_BINDLESS_SIZE)
89
90 #define IRIS_MEMZONE_SHADER_START (0ull * (1ull << 32))
91 #define IRIS_MEMZONE_BINDER_START (1ull * (1ull << 32))
92 #define IRIS_MEMZONE_BINDLESS_START (IRIS_MEMZONE_BINDER_START + IRIS_BINDER_ZONE_SIZE)
93 #define IRIS_MEMZONE_SURFACE_START (IRIS_MEMZONE_BINDLESS_START + IRIS_BINDLESS_SIZE)
94 #define IRIS_MEMZONE_DYNAMIC_START (2ull * (1ull << 32))
95 #define IRIS_MEMZONE_OTHER_START (3ull * (1ull << 32))
96
97 #define IRIS_BORDER_COLOR_POOL_ADDRESS IRIS_MEMZONE_DYNAMIC_START
98 #define IRIS_BORDER_COLOR_POOL_SIZE (64 * 4096)
99
100 /**
101 * Classification of the various incoherent caches of the GPU into a number of
102 * caching domains.
103 */
104 enum iris_domain {
105 /** Render color cache. */
106 IRIS_DOMAIN_RENDER_WRITE = 0,
107 /** (Hi)Z/stencil cache. */
108 IRIS_DOMAIN_DEPTH_WRITE,
109 /** Data port (HDC) cache. */
110 IRIS_DOMAIN_DATA_WRITE,
111 /** Any other read-write cache. */
112 IRIS_DOMAIN_OTHER_WRITE,
113 /** Vertex cache. */
114 IRIS_DOMAIN_VF_READ,
115 /** Texture cache. */
116 IRIS_DOMAIN_SAMPLER_READ,
117 /** Pull-style shader constant loads. */
118 IRIS_DOMAIN_PULL_CONSTANT_READ,
119 /** Any other read-only cache, including reads from non-L3 clients. */
120 IRIS_DOMAIN_OTHER_READ,
121 /** Number of caching domains. */
122 NUM_IRIS_DOMAINS,
123 /** Not a real cache, use to opt out of the cache tracking mechanism. */
124 IRIS_DOMAIN_NONE = NUM_IRIS_DOMAINS
125 };
126
127 /**
128 * Whether a caching domain is guaranteed not to write any data to memory.
129 */
130 static inline bool
iris_domain_is_read_only(enum iris_domain access)131 iris_domain_is_read_only(enum iris_domain access)
132 {
133 return access >= IRIS_DOMAIN_VF_READ &&
134 access <= IRIS_DOMAIN_OTHER_READ;
135 }
136
137 static inline bool
iris_domain_is_l3_coherent(const struct intel_device_info * devinfo,enum iris_domain access)138 iris_domain_is_l3_coherent(const struct intel_device_info *devinfo,
139 enum iris_domain access)
140 {
141 /* VF reads are coherent with the L3 on Tigerlake+ because we set
142 * the "L3 Bypass Disable" bit in the vertex/index buffer packets.
143 */
144 if (access == IRIS_DOMAIN_VF_READ)
145 return devinfo->ver >= 12;
146
147 return access != IRIS_DOMAIN_OTHER_WRITE &&
148 access != IRIS_DOMAIN_OTHER_READ;
149 }
150
151 enum iris_mmap_mode {
152 IRIS_MMAP_NONE, /**< Cannot be mapped */
153 IRIS_MMAP_UC, /**< Fully uncached memory map */
154 IRIS_MMAP_WC, /**< Write-combining map with no caching of reads */
155 IRIS_MMAP_WB, /**< Write-back mapping with CPU caches enabled */
156 };
157
158 enum iris_heap {
159 IRIS_HEAP_SYSTEM_MEMORY,
160 IRIS_HEAP_DEVICE_LOCAL,
161 IRIS_HEAP_DEVICE_LOCAL_PREFERRED,
162 IRIS_HEAP_MAX,
163 };
164
165 extern const char *iris_heap_to_string[];
166
167 #define IRIS_BATCH_COUNT 3
168
169 struct iris_bo_screen_deps {
170 struct iris_syncobj *write_syncobjs[IRIS_BATCH_COUNT];
171 struct iris_syncobj *read_syncobjs[IRIS_BATCH_COUNT];
172 };
173
174 struct iris_bo {
175 /**
176 * Size in bytes of the buffer object.
177 *
178 * The size may be larger than the size originally requested for the
179 * allocation, such as being aligned to page size.
180 */
181 uint64_t size;
182
183 /** Buffer manager context associated with this buffer object */
184 struct iris_bufmgr *bufmgr;
185
186 /** Pre-computed hash using _mesa_hash_pointer for cache tracking sets */
187 uint32_t hash;
188
189 /** The GEM handle for this buffer object. */
190 uint32_t gem_handle;
191
192 /**
193 * Virtual address of the buffer inside the PPGTT (Per-Process Graphics
194 * Translation Table).
195 *
196 * Although each hardware context has its own VMA, we assign BO's to the
197 * same address in all contexts, for simplicity.
198 */
199 uint64_t address;
200
201 /**
202 * If non-zero, then this bo has an aux-map translation to this address.
203 */
204 uint64_t aux_map_address;
205
206 /**
207 * If this BO is referenced by a batch, this _may_ be the index into the
208 * batch->exec_bos[] list.
209 *
210 * Note that a single buffer may be used by multiple batches/contexts,
211 * and thus appear in multiple lists, but we only track one index here.
212 * In the common case one can guess that batch->exec_bos[bo->index] == bo
213 * and double check if that's true to avoid a linear list walk.
214 *
215 * XXX: this is not ideal now that we have more than one batch per context,
216 * XXX: as the index will flop back and forth between the render index and
217 * XXX: compute index...
218 */
219 unsigned index;
220
221 int refcount;
222 const char *name;
223
224 /** BO cache list */
225 struct list_head head;
226
227 /**
228 * Synchronization sequence number of most recent access of this BO from
229 * each caching domain.
230 *
231 * Although this is a global field, use in multiple contexts should be
232 * safe, see iris_emit_buffer_barrier_for() for details.
233 *
234 * Also align it to 64 bits. This will make atomic operations faster on 32
235 * bit platforms.
236 */
237 uint64_t last_seqnos[NUM_IRIS_DOMAINS] __attribute__ ((aligned (8)));
238
239 /** Up to one per screen, may need realloc. */
240 struct iris_bo_screen_deps *deps;
241 int deps_size;
242
243 /**
244 * Boolean of whether the GPU is definitely not accessing the buffer.
245 *
246 * This is only valid when reusable, since non-reusable
247 * buffers are those that have been shared with other
248 * processes, so we don't know their state.
249 */
250 bool idle;
251
252 union {
253 struct {
254 uint64_t kflags;
255
256 time_t free_time;
257
258 /** Mapped address for the buffer, saved across map/unmap cycles */
259 void *map;
260
261 /** List of GEM handle exports of this buffer (bo_export) */
262 struct list_head exports;
263
264 /**
265 * Kernel-assigned global name for this object
266 *
267 * List contains both flink named and prime fd'd objects
268 */
269 unsigned global_name;
270
271 /** The mmap coherency mode selected at BO allocation time */
272 enum iris_mmap_mode mmap_mode;
273
274 /** The heap selected at BO allocation time */
275 enum iris_heap heap;
276
277 /** Was this buffer imported from an external client? */
278 bool imported;
279
280 /** Has this buffer been exported to external clients? */
281 bool exported;
282
283 /** Boolean of whether this buffer can be re-used */
284 bool reusable;
285
286 /** Boolean of whether this buffer points into user memory */
287 bool userptr;
288 } real;
289 struct {
290 struct pb_slab_entry entry;
291 struct iris_bo *real;
292 } slab;
293 };
294 };
295
296 #define BO_ALLOC_ZEROED (1<<0)
297 #define BO_ALLOC_COHERENT (1<<1)
298 #define BO_ALLOC_SMEM (1<<2)
299 #define BO_ALLOC_SCANOUT (1<<3)
300 #define BO_ALLOC_NO_SUBALLOC (1<<4)
301 #define BO_ALLOC_LMEM (1<<5)
302
303 /**
304 * Allocate a buffer object.
305 *
306 * Buffer objects are not necessarily initially mapped into CPU virtual
307 * address space or graphics device aperture. They must be mapped
308 * using iris_bo_map() to be used by the CPU.
309 */
310 struct iris_bo *iris_bo_alloc(struct iris_bufmgr *bufmgr,
311 const char *name,
312 uint64_t size,
313 uint32_t alignment,
314 enum iris_memory_zone memzone,
315 unsigned flags);
316
317 struct iris_bo *
318 iris_bo_create_userptr(struct iris_bufmgr *bufmgr, const char *name,
319 void *ptr, size_t size,
320 enum iris_memory_zone memzone);
321
322 /** Takes a reference on a buffer object */
323 static inline void
iris_bo_reference(struct iris_bo * bo)324 iris_bo_reference(struct iris_bo *bo)
325 {
326 p_atomic_inc(&bo->refcount);
327 }
328
329 /**
330 * Releases a reference on a buffer object, freeing the data if
331 * no references remain.
332 */
333 void iris_bo_unreference(struct iris_bo *bo);
334
335 #define MAP_READ PIPE_MAP_READ
336 #define MAP_WRITE PIPE_MAP_WRITE
337 #define MAP_ASYNC PIPE_MAP_UNSYNCHRONIZED
338 #define MAP_PERSISTENT PIPE_MAP_PERSISTENT
339 #define MAP_COHERENT PIPE_MAP_COHERENT
340 /* internal */
341 #define MAP_RAW (PIPE_MAP_DRV_PRV << 0)
342 #define MAP_INTERNAL_MASK (MAP_RAW)
343
344 #define MAP_FLAGS (MAP_READ | MAP_WRITE | MAP_ASYNC | \
345 MAP_PERSISTENT | MAP_COHERENT | MAP_INTERNAL_MASK)
346
347 /**
348 * Maps the buffer into userspace.
349 *
350 * This function will block waiting for any existing execution on the
351 * buffer to complete, first. The resulting mapping is returned.
352 */
353 MUST_CHECK void *iris_bo_map(struct util_debug_callback *dbg,
354 struct iris_bo *bo, unsigned flags);
355
356 /**
357 * Reduces the refcount on the userspace mapping of the buffer
358 * object.
359 */
iris_bo_unmap(struct iris_bo * bo)360 static inline int iris_bo_unmap(struct iris_bo *bo) { return 0; }
361
362 /**
363 * Waits for rendering to an object by the GPU to have completed.
364 *
365 * This is not required for any access to the BO by bo_map,
366 * bo_subdata, etc. It is merely a way for the driver to implement
367 * glFinish.
368 */
369 void iris_bo_wait_rendering(struct iris_bo *bo);
370
371
372 /**
373 * Unref a buffer manager instance.
374 */
375 void iris_bufmgr_unref(struct iris_bufmgr *bufmgr);
376
377 /**
378 * Create a visible name for a buffer which can be used by other apps
379 *
380 * \param buf Buffer to create a name for
381 * \param name Returned name
382 */
383 int iris_bo_flink(struct iris_bo *bo, uint32_t *name);
384
385 /**
386 * Returns true if the BO is backed by a real GEM object, false if it's
387 * a wrapper that's suballocated from a larger BO.
388 */
389 static inline bool
iris_bo_is_real(struct iris_bo * bo)390 iris_bo_is_real(struct iris_bo *bo)
391 {
392 return bo->gem_handle != 0;
393 }
394
395 /**
396 * Unwrap any slab-allocated wrapper BOs to get the BO for the underlying
397 * backing storage, which is a real BO associated with a GEM object.
398 */
399 static inline struct iris_bo *
iris_get_backing_bo(struct iris_bo * bo)400 iris_get_backing_bo(struct iris_bo *bo)
401 {
402 if (!iris_bo_is_real(bo))
403 bo = bo->slab.real;
404
405 /* We only allow one level of wrapping. */
406 assert(iris_bo_is_real(bo));
407
408 return bo;
409 }
410
411 /**
412 * Is this buffer shared with external clients (imported or exported)?
413 */
414 static inline bool
iris_bo_is_external(const struct iris_bo * bo)415 iris_bo_is_external(const struct iris_bo *bo)
416 {
417 bo = iris_get_backing_bo((struct iris_bo *) bo);
418 return bo->real.exported || bo->real.imported;
419 }
420
421 static inline bool
iris_bo_is_imported(const struct iris_bo * bo)422 iris_bo_is_imported(const struct iris_bo *bo)
423 {
424 bo = iris_get_backing_bo((struct iris_bo *) bo);
425 return bo->real.imported;
426 }
427
428 static inline bool
iris_bo_is_exported(const struct iris_bo * bo)429 iris_bo_is_exported(const struct iris_bo *bo)
430 {
431 bo = iris_get_backing_bo((struct iris_bo *) bo);
432 return bo->real.exported;
433 }
434
435 /**
436 * True if the BO prefers to reside in device-local memory.
437 *
438 * We don't consider eviction here; this is meant to be a performance hint.
439 * It will return true for BOs allocated from the LMEM or LMEM+SMEM heaps,
440 * even if the buffer has been temporarily evicted to system memory.
441 */
442 static inline bool
iris_bo_likely_local(const struct iris_bo * bo)443 iris_bo_likely_local(const struct iris_bo *bo)
444 {
445 if (!bo)
446 return false;
447
448 bo = iris_get_backing_bo((struct iris_bo *) bo);
449 return bo->real.heap != IRIS_HEAP_SYSTEM_MEMORY;
450 }
451
452 static inline enum iris_mmap_mode
iris_bo_mmap_mode(const struct iris_bo * bo)453 iris_bo_mmap_mode(const struct iris_bo *bo)
454 {
455 bo = iris_get_backing_bo((struct iris_bo *) bo);
456 return bo->real.mmap_mode;
457 }
458
459 /**
460 * Mark a buffer as being shared with other external clients.
461 */
462 void iris_bo_mark_exported(struct iris_bo *bo);
463
464 /**
465 * Returns true if mapping the buffer for write could cause the process
466 * to block, due to the object being active in the GPU.
467 */
468 bool iris_bo_busy(struct iris_bo *bo);
469
470 /**
471 * Specify the volatility of the buffer.
472 * \param bo Buffer to create a name for
473 * \param madv The purgeable status
474 *
475 * Use I915_MADV_DONTNEED to mark the buffer as purgeable, and it will be
476 * reclaimed under memory pressure. If you subsequently require the buffer,
477 * then you must pass I915_MADV_WILLNEED to mark the buffer as required.
478 *
479 * Returns 1 if the buffer was retained, or 0 if it was discarded whilst
480 * marked as I915_MADV_DONTNEED.
481 */
482 int iris_bo_madvise(struct iris_bo *bo, int madv);
483
484 struct iris_bufmgr *iris_bufmgr_get_for_fd(struct intel_device_info *devinfo,
485 int fd, bool bo_reuse);
486 int iris_bufmgr_get_fd(struct iris_bufmgr *bufmgr);
487
488 struct iris_bo *iris_bo_gem_create_from_name(struct iris_bufmgr *bufmgr,
489 const char *name,
490 unsigned handle);
491
492 void* iris_bufmgr_get_aux_map_context(struct iris_bufmgr *bufmgr);
493
494 int iris_bo_wait(struct iris_bo *bo, int64_t timeout_ns);
495
496 uint32_t iris_create_hw_context(struct iris_bufmgr *bufmgr);
497 uint32_t iris_clone_hw_context(struct iris_bufmgr *bufmgr, uint32_t ctx_id);
498 int iris_kernel_context_get_priority(struct iris_bufmgr *bufmgr, uint32_t ctx_id);
499
500 #define IRIS_CONTEXT_LOW_PRIORITY ((I915_CONTEXT_MIN_USER_PRIORITY-1)/2)
501 #define IRIS_CONTEXT_MEDIUM_PRIORITY (I915_CONTEXT_DEFAULT_PRIORITY)
502 #define IRIS_CONTEXT_HIGH_PRIORITY ((I915_CONTEXT_MAX_USER_PRIORITY+1)/2)
503
504 void iris_hw_context_set_unrecoverable(struct iris_bufmgr *bufmgr,
505 uint32_t ctx_id);
506 void iris_hw_context_set_vm_id(struct iris_bufmgr *bufmgr, uint32_t ctx_id);
507 int iris_hw_context_set_priority(struct iris_bufmgr *bufmgr,
508 uint32_t ctx_id, int priority);
509
510 void iris_destroy_kernel_context(struct iris_bufmgr *bufmgr, uint32_t ctx_id);
511
512 int iris_gem_get_tiling(struct iris_bo *bo, uint32_t *tiling);
513 int iris_gem_set_tiling(struct iris_bo *bo, const struct isl_surf *surf);
514
515 int iris_bo_export_dmabuf(struct iris_bo *bo, int *prime_fd);
516 struct iris_bo *iris_bo_import_dmabuf(struct iris_bufmgr *bufmgr, int prime_fd);
517
518 /**
519 * Exports a bo as a GEM handle into a given DRM file descriptor
520 * \param bo Buffer to export
521 * \param drm_fd File descriptor where the new handle is created
522 * \param out_handle Pointer to store the new handle
523 *
524 * Returns 0 if the buffer was successfully exported, a non zero error code
525 * otherwise.
526 */
527 int iris_bo_export_gem_handle_for_device(struct iris_bo *bo, int drm_fd,
528 uint32_t *out_handle);
529
530 uint32_t iris_bo_export_gem_handle(struct iris_bo *bo);
531
532 int iris_reg_read(struct iris_bufmgr *bufmgr, uint32_t offset, uint64_t *out);
533
534 /**
535 * Returns the BO's address relative to the appropriate base address.
536 *
537 * All of our base addresses are programmed to the start of a 4GB region,
538 * so simply returning the bottom 32 bits of the BO address will give us
539 * the offset from whatever base address corresponds to that memory region.
540 */
541 static inline uint32_t
iris_bo_offset_from_base_address(struct iris_bo * bo)542 iris_bo_offset_from_base_address(struct iris_bo *bo)
543 {
544 /* This only works for buffers in the memory zones corresponding to a
545 * base address - the top, unbounded memory zone doesn't have a base.
546 */
547 assert(bo->address < IRIS_MEMZONE_OTHER_START);
548 return bo->address;
549 }
550
551 /**
552 * Track access of a BO from the specified caching domain and sequence number.
553 *
554 * Can be used without locking. Only the most recent access (i.e. highest
555 * seqno) is tracked.
556 */
557 static inline void
iris_bo_bump_seqno(struct iris_bo * bo,uint64_t seqno,enum iris_domain type)558 iris_bo_bump_seqno(struct iris_bo *bo, uint64_t seqno,
559 enum iris_domain type)
560 {
561 uint64_t *const last_seqno = &bo->last_seqnos[type];
562 uint64_t tmp, prev_seqno = p_atomic_read(last_seqno);
563
564 while (prev_seqno < seqno &&
565 prev_seqno != (tmp = p_atomic_cmpxchg(last_seqno, prev_seqno, seqno)))
566 prev_seqno = tmp;
567 }
568
569 enum iris_memory_zone iris_memzone_for_address(uint64_t address);
570
571 int iris_bufmgr_create_screen_id(struct iris_bufmgr *bufmgr);
572
573 simple_mtx_t *iris_bufmgr_get_bo_deps_lock(struct iris_bufmgr *bufmgr);
574
575 /**
576 * A pool containing SAMPLER_BORDER_COLOR_STATE entries.
577 *
578 * See iris_border_color.c for more information.
579 */
580 struct iris_border_color_pool {
581 struct iris_bo *bo;
582 void *map;
583 unsigned insert_point;
584
585 /** Map from border colors to offsets in the buffer. */
586 struct hash_table *ht;
587
588 /** Protects insert_point and the hash table. */
589 simple_mtx_t lock;
590 };
591
592 struct iris_border_color_pool *iris_bufmgr_get_border_color_pool(
593 struct iris_bufmgr *bufmgr);
594
595 /* iris_border_color.c */
596 void iris_init_border_color_pool(struct iris_bufmgr *bufmgr,
597 struct iris_border_color_pool *pool);
598 void iris_destroy_border_color_pool(struct iris_border_color_pool *pool);
599 uint32_t iris_upload_border_color(struct iris_border_color_pool *pool,
600 union pipe_color_union *color);
601
602 uint64_t iris_bufmgr_vram_size(struct iris_bufmgr *bufmgr);
603 uint64_t iris_bufmgr_sram_size(struct iris_bufmgr *bufmgr);
604
605 #endif /* IRIS_BUFMGR_H */
606