1 /*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #ifndef IRIS_BUFMGR_H
25 #define IRIS_BUFMGR_H
26
27 #include <stdbool.h>
28 #include <stdint.h>
29 #include <stdio.h>
30 #include <sys/types.h>
31 #include "c11/threads.h"
32 #include "util/macros.h"
33 #include "util/u_atomic.h"
34 #include "util/u_dynarray.h"
35 #include "util/list.h"
36 #include "util/simple_mtx.h"
37 #include "pipe/p_defines.h"
38 #include "pipebuffer/pb_slab.h"
39
40 struct intel_device_info;
41 struct pipe_debug_callback;
42 struct isl_surf;
43 struct iris_syncobj;
44
45 /**
46 * Memory zones. When allocating a buffer, you can request that it is
47 * placed into a specific region of the virtual address space (PPGTT).
48 *
49 * Most buffers can go anywhere (IRIS_MEMZONE_OTHER). Some buffers are
50 * accessed via an offset from a base address. STATE_BASE_ADDRESS has
51 * a maximum 4GB size for each region, so we need to restrict those
52 * buffers to be within 4GB of the base. Each memory zone corresponds
53 * to a particular base address.
54 *
55 * We lay out the virtual address space as follows:
56 *
57 * - [0, 4K): Nothing (empty page for null address)
58 * - [4K, 4G): Shaders (Instruction Base Address)
59 * - [4G, 8G): Surfaces & Binders (Surface State Base Address, Bindless ...)
60 * - [8G, 12G): Dynamic (Dynamic State Base Address)
61 * - [12G, *): Other (everything else in the full 48-bit VMA)
62 *
63 * A special buffer for border color lives at the start of the dynamic state
64 * memory zone. This unfortunately has to be handled specially because the
65 * SAMPLER_STATE "Indirect State Pointer" field is only a 24-bit pointer.
66 *
67 * Each GL context uses a separate GEM context, which technically gives them
68 * each a separate VMA. However, we assign address globally, so buffers will
69 * have the same address in all GEM contexts. This lets us have a single BO
70 * field for the address, which is easy and cheap.
71 */
72 enum iris_memory_zone {
73 IRIS_MEMZONE_SHADER,
74 IRIS_MEMZONE_BINDER,
75 IRIS_MEMZONE_BINDLESS,
76 IRIS_MEMZONE_SURFACE,
77 IRIS_MEMZONE_DYNAMIC,
78 IRIS_MEMZONE_OTHER,
79
80 IRIS_MEMZONE_BORDER_COLOR_POOL,
81 };
82
83 /* Intentionally exclude single buffer "zones" */
84 #define IRIS_MEMZONE_COUNT (IRIS_MEMZONE_OTHER + 1)
85
86 #define IRIS_BINDER_SIZE (64 * 1024)
87 #define IRIS_MAX_BINDERS 100
88 #define IRIS_BINDLESS_SIZE (8 * 1024 * 1024)
89
90 #define IRIS_MEMZONE_SHADER_START (0ull * (1ull << 32))
91 #define IRIS_MEMZONE_BINDER_START (1ull * (1ull << 32))
92 #define IRIS_MEMZONE_BINDLESS_START (IRIS_MEMZONE_BINDER_START + IRIS_MAX_BINDERS * IRIS_BINDER_SIZE)
93 #define IRIS_MEMZONE_SURFACE_START (IRIS_MEMZONE_BINDLESS_START + IRIS_BINDLESS_SIZE)
94 #define IRIS_MEMZONE_DYNAMIC_START (2ull * (1ull << 32))
95 #define IRIS_MEMZONE_OTHER_START (3ull * (1ull << 32))
96
97 #define IRIS_BORDER_COLOR_POOL_ADDRESS IRIS_MEMZONE_DYNAMIC_START
98 #define IRIS_BORDER_COLOR_POOL_SIZE (64 * 1024)
99
100 /**
101 * Classification of the various incoherent caches of the GPU into a number of
102 * caching domains.
103 */
104 enum iris_domain {
105 /** Render color cache. */
106 IRIS_DOMAIN_RENDER_WRITE = 0,
107 /** (Hi)Z/stencil cache. */
108 IRIS_DOMAIN_DEPTH_WRITE,
109 /** Data port (HDC) cache. */
110 IRIS_DOMAIN_DATA_WRITE,
111 /** Any other read-write cache. */
112 IRIS_DOMAIN_OTHER_WRITE,
113 /** Vertex cache. */
114 IRIS_DOMAIN_VF_READ,
115 /** Any other read-only cache. */
116 IRIS_DOMAIN_OTHER_READ,
117 /** Number of caching domains. */
118 NUM_IRIS_DOMAINS,
119 /** Not a real cache, use to opt out of the cache tracking mechanism. */
120 IRIS_DOMAIN_NONE = NUM_IRIS_DOMAINS
121 };
122
123 /**
124 * Whether a caching domain is guaranteed not to write any data to memory.
125 */
126 static inline bool
iris_domain_is_read_only(enum iris_domain access)127 iris_domain_is_read_only(enum iris_domain access)
128 {
129 return access == IRIS_DOMAIN_OTHER_READ ||
130 access == IRIS_DOMAIN_VF_READ;
131 }
132
133 enum iris_mmap_mode {
134 IRIS_MMAP_NONE, /**< Cannot be mapped */
135 IRIS_MMAP_UC, /**< Fully uncached memory map */
136 IRIS_MMAP_WC, /**< Write-combining map with no caching of reads */
137 IRIS_MMAP_WB, /**< Write-back mapping with CPU caches enabled */
138 };
139
140 #define IRIS_BATCH_COUNT 2
141
142 struct iris_bo_screen_deps {
143 struct iris_syncobj *write_syncobjs[IRIS_BATCH_COUNT];
144 struct iris_syncobj *read_syncobjs[IRIS_BATCH_COUNT];
145 };
146
147 struct iris_bo {
148 /**
149 * Size in bytes of the buffer object.
150 *
151 * The size may be larger than the size originally requested for the
152 * allocation, such as being aligned to page size.
153 */
154 uint64_t size;
155
156 /** Buffer manager context associated with this buffer object */
157 struct iris_bufmgr *bufmgr;
158
159 /** Pre-computed hash using _mesa_hash_pointer for cache tracking sets */
160 uint32_t hash;
161
162 /** The GEM handle for this buffer object. */
163 uint32_t gem_handle;
164
165 /**
166 * Virtual address of the buffer inside the PPGTT (Per-Process Graphics
167 * Translation Table).
168 *
169 * Although each hardware context has its own VMA, we assign BO's to the
170 * same address in all contexts, for simplicity.
171 */
172 uint64_t address;
173
174 /**
175 * If non-zero, then this bo has an aux-map translation to this address.
176 */
177 uint64_t aux_map_address;
178
179 /**
180 * If this BO is referenced by a batch, this _may_ be the index into the
181 * batch->exec_bos[] list.
182 *
183 * Note that a single buffer may be used by multiple batches/contexts,
184 * and thus appear in multiple lists, but we only track one index here.
185 * In the common case one can guess that batch->exec_bos[bo->index] == bo
186 * and double check if that's true to avoid a linear list walk.
187 *
188 * XXX: this is not ideal now that we have more than one batch per context,
189 * XXX: as the index will flop back and forth between the render index and
190 * XXX: compute index...
191 */
192 unsigned index;
193
194 int refcount;
195 const char *name;
196
197 /** BO cache list */
198 struct list_head head;
199
200 /**
201 * Synchronization sequence number of most recent access of this BO from
202 * each caching domain.
203 *
204 * Although this is a global field, use in multiple contexts should be
205 * safe, see iris_emit_buffer_barrier_for() for details.
206 *
207 * Also align it to 64 bits. This will make atomic operations faster on 32
208 * bit platforms.
209 */
210 uint64_t last_seqnos[NUM_IRIS_DOMAINS] __attribute__ ((aligned (8)));
211
212 /** Up to one per screen, may need realloc. */
213 struct iris_bo_screen_deps *deps;
214 int deps_size;
215
216 /**
217 * Boolean of whether the GPU is definitely not accessing the buffer.
218 *
219 * This is only valid when reusable, since non-reusable
220 * buffers are those that have been shared with other
221 * processes, so we don't know their state.
222 */
223 bool idle;
224
225 union {
226 struct {
227 uint64_t kflags;
228
229 time_t free_time;
230
231 /** Mapped address for the buffer, saved across map/unmap cycles */
232 void *map;
233
234 /** List of GEM handle exports of this buffer (bo_export) */
235 struct list_head exports;
236
237 /**
238 * Kernel-assigned global name for this object
239 *
240 * List contains both flink named and prime fd'd objects
241 */
242 unsigned global_name;
243
244 /** The mmap coherency mode selected at BO allocation time */
245 enum iris_mmap_mode mmap_mode;
246
247 /** Was this buffer imported from an external client? */
248 bool imported;
249
250 /** Has this buffer been exported to external clients? */
251 bool exported;
252
253 /** Boolean of whether this buffer can be re-used */
254 bool reusable;
255
256 /** Boolean of whether this buffer points into user memory */
257 bool userptr;
258
259 /** Boolean of whether this was allocated from local memory */
260 bool local;
261 } real;
262 struct {
263 struct pb_slab_entry entry;
264 struct iris_bo *real;
265 } slab;
266 };
267 };
268
269 #define BO_ALLOC_ZEROED (1<<0)
270 #define BO_ALLOC_COHERENT (1<<1)
271 #define BO_ALLOC_SMEM (1<<2)
272 #define BO_ALLOC_SCANOUT (1<<3)
273 #define BO_ALLOC_NO_SUBALLOC (1<<4)
274
275 /**
276 * Allocate a buffer object.
277 *
278 * Buffer objects are not necessarily initially mapped into CPU virtual
279 * address space or graphics device aperture. They must be mapped
280 * using iris_bo_map() to be used by the CPU.
281 */
282 struct iris_bo *iris_bo_alloc(struct iris_bufmgr *bufmgr,
283 const char *name,
284 uint64_t size,
285 uint32_t alignment,
286 enum iris_memory_zone memzone,
287 unsigned flags);
288
289 struct iris_bo *
290 iris_bo_create_userptr(struct iris_bufmgr *bufmgr, const char *name,
291 void *ptr, size_t size,
292 enum iris_memory_zone memzone);
293
294 /** Takes a reference on a buffer object */
295 static inline void
iris_bo_reference(struct iris_bo * bo)296 iris_bo_reference(struct iris_bo *bo)
297 {
298 p_atomic_inc(&bo->refcount);
299 }
300
301 /**
302 * Releases a reference on a buffer object, freeing the data if
303 * no references remain.
304 */
305 void iris_bo_unreference(struct iris_bo *bo);
306
307 #define MAP_READ PIPE_MAP_READ
308 #define MAP_WRITE PIPE_MAP_WRITE
309 #define MAP_ASYNC PIPE_MAP_UNSYNCHRONIZED
310 #define MAP_PERSISTENT PIPE_MAP_PERSISTENT
311 #define MAP_COHERENT PIPE_MAP_COHERENT
312 /* internal */
313 #define MAP_RAW (PIPE_MAP_DRV_PRV << 0)
314 #define MAP_INTERNAL_MASK (MAP_RAW)
315
316 #define MAP_FLAGS (MAP_READ | MAP_WRITE | MAP_ASYNC | \
317 MAP_PERSISTENT | MAP_COHERENT | MAP_INTERNAL_MASK)
318
319 /**
320 * Maps the buffer into userspace.
321 *
322 * This function will block waiting for any existing execution on the
323 * buffer to complete, first. The resulting mapping is returned.
324 */
325 MUST_CHECK void *iris_bo_map(struct pipe_debug_callback *dbg,
326 struct iris_bo *bo, unsigned flags);
327
328 /**
329 * Reduces the refcount on the userspace mapping of the buffer
330 * object.
331 */
iris_bo_unmap(struct iris_bo * bo)332 static inline int iris_bo_unmap(struct iris_bo *bo) { return 0; }
333
334 /**
335 * Waits for rendering to an object by the GPU to have completed.
336 *
337 * This is not required for any access to the BO by bo_map,
338 * bo_subdata, etc. It is merely a way for the driver to implement
339 * glFinish.
340 */
341 void iris_bo_wait_rendering(struct iris_bo *bo);
342
343
344 /**
345 * Unref a buffer manager instance.
346 */
347 void iris_bufmgr_unref(struct iris_bufmgr *bufmgr);
348
349 /**
350 * Create a visible name for a buffer which can be used by other apps
351 *
352 * \param buf Buffer to create a name for
353 * \param name Returned name
354 */
355 int iris_bo_flink(struct iris_bo *bo, uint32_t *name);
356
357 /**
358 * Returns true if the BO is backed by a real GEM object, false if it's
359 * a wrapper that's suballocated from a larger BO.
360 */
361 static inline bool
iris_bo_is_real(struct iris_bo * bo)362 iris_bo_is_real(struct iris_bo *bo)
363 {
364 return bo->gem_handle != 0;
365 }
366
367 /**
368 * Unwrap any slab-allocated wrapper BOs to get the BO for the underlying
369 * backing storage, which is a real BO associated with a GEM object.
370 */
371 static inline struct iris_bo *
iris_get_backing_bo(struct iris_bo * bo)372 iris_get_backing_bo(struct iris_bo *bo)
373 {
374 if (!iris_bo_is_real(bo))
375 bo = bo->slab.real;
376
377 /* We only allow one level of wrapping. */
378 assert(iris_bo_is_real(bo));
379
380 return bo;
381 }
382
383 /**
384 * Is this buffer shared with external clients (imported or exported)?
385 */
386 static inline bool
iris_bo_is_external(const struct iris_bo * bo)387 iris_bo_is_external(const struct iris_bo *bo)
388 {
389 bo = iris_get_backing_bo((struct iris_bo *) bo);
390 return bo->real.exported || bo->real.imported;
391 }
392
393 static inline bool
iris_bo_is_imported(const struct iris_bo * bo)394 iris_bo_is_imported(const struct iris_bo *bo)
395 {
396 bo = iris_get_backing_bo((struct iris_bo *) bo);
397 return bo->real.imported;
398 }
399
400 static inline bool
iris_bo_is_exported(const struct iris_bo * bo)401 iris_bo_is_exported(const struct iris_bo *bo)
402 {
403 bo = iris_get_backing_bo((struct iris_bo *) bo);
404 return bo->real.exported;
405 }
406
407 static inline enum iris_mmap_mode
iris_bo_mmap_mode(const struct iris_bo * bo)408 iris_bo_mmap_mode(const struct iris_bo *bo)
409 {
410 bo = iris_get_backing_bo((struct iris_bo *) bo);
411 return bo->real.mmap_mode;
412 }
413
414 /**
415 * Mark a buffer as being shared with other external clients.
416 */
417 void iris_bo_mark_exported(struct iris_bo *bo);
418
419 /**
420 * Returns true if mapping the buffer for write could cause the process
421 * to block, due to the object being active in the GPU.
422 */
423 bool iris_bo_busy(struct iris_bo *bo);
424
425 /**
426 * Specify the volatility of the buffer.
427 * \param bo Buffer to create a name for
428 * \param madv The purgeable status
429 *
430 * Use I915_MADV_DONTNEED to mark the buffer as purgeable, and it will be
431 * reclaimed under memory pressure. If you subsequently require the buffer,
432 * then you must pass I915_MADV_WILLNEED to mark the buffer as required.
433 *
434 * Returns 1 if the buffer was retained, or 0 if it was discarded whilst
435 * marked as I915_MADV_DONTNEED.
436 */
437 int iris_bo_madvise(struct iris_bo *bo, int madv);
438
439 struct iris_bufmgr *iris_bufmgr_get_for_fd(struct intel_device_info *devinfo,
440 int fd, bool bo_reuse);
441 int iris_bufmgr_get_fd(struct iris_bufmgr *bufmgr);
442
443 struct iris_bo *iris_bo_gem_create_from_name(struct iris_bufmgr *bufmgr,
444 const char *name,
445 unsigned handle);
446
447 void* iris_bufmgr_get_aux_map_context(struct iris_bufmgr *bufmgr);
448
449 int iris_bo_wait(struct iris_bo *bo, int64_t timeout_ns);
450
451 uint32_t iris_create_hw_context(struct iris_bufmgr *bufmgr);
452 uint32_t iris_clone_hw_context(struct iris_bufmgr *bufmgr, uint32_t ctx_id);
453
454 #define IRIS_CONTEXT_LOW_PRIORITY ((I915_CONTEXT_MIN_USER_PRIORITY-1)/2)
455 #define IRIS_CONTEXT_MEDIUM_PRIORITY (I915_CONTEXT_DEFAULT_PRIORITY)
456 #define IRIS_CONTEXT_HIGH_PRIORITY ((I915_CONTEXT_MAX_USER_PRIORITY+1)/2)
457
458 int iris_hw_context_set_priority(struct iris_bufmgr *bufmgr,
459 uint32_t ctx_id, int priority);
460
461 void iris_destroy_hw_context(struct iris_bufmgr *bufmgr, uint32_t ctx_id);
462
463 int iris_gem_get_tiling(struct iris_bo *bo, uint32_t *tiling);
464 int iris_gem_set_tiling(struct iris_bo *bo, const struct isl_surf *surf);
465
466 int iris_bo_export_dmabuf(struct iris_bo *bo, int *prime_fd);
467 struct iris_bo *iris_bo_import_dmabuf(struct iris_bufmgr *bufmgr, int prime_fd);
468
469 /**
470 * Exports a bo as a GEM handle into a given DRM file descriptor
471 * \param bo Buffer to export
472 * \param drm_fd File descriptor where the new handle is created
473 * \param out_handle Pointer to store the new handle
474 *
475 * Returns 0 if the buffer was successfully exported, a non zero error code
476 * otherwise.
477 */
478 int iris_bo_export_gem_handle_for_device(struct iris_bo *bo, int drm_fd,
479 uint32_t *out_handle);
480
481 uint32_t iris_bo_export_gem_handle(struct iris_bo *bo);
482
483 int iris_reg_read(struct iris_bufmgr *bufmgr, uint32_t offset, uint64_t *out);
484
485 /**
486 * Returns the BO's address relative to the appropriate base address.
487 *
488 * All of our base addresses are programmed to the start of a 4GB region,
489 * so simply returning the bottom 32 bits of the BO address will give us
490 * the offset from whatever base address corresponds to that memory region.
491 */
492 static inline uint32_t
iris_bo_offset_from_base_address(struct iris_bo * bo)493 iris_bo_offset_from_base_address(struct iris_bo *bo)
494 {
495 /* This only works for buffers in the memory zones corresponding to a
496 * base address - the top, unbounded memory zone doesn't have a base.
497 */
498 assert(bo->address < IRIS_MEMZONE_OTHER_START);
499 return bo->address;
500 }
501
502 /**
503 * Track access of a BO from the specified caching domain and sequence number.
504 *
505 * Can be used without locking. Only the most recent access (i.e. highest
506 * seqno) is tracked.
507 */
508 static inline void
iris_bo_bump_seqno(struct iris_bo * bo,uint64_t seqno,enum iris_domain type)509 iris_bo_bump_seqno(struct iris_bo *bo, uint64_t seqno,
510 enum iris_domain type)
511 {
512 uint64_t *const last_seqno = &bo->last_seqnos[type];
513 uint64_t tmp, prev_seqno = p_atomic_read(last_seqno);
514
515 while (prev_seqno < seqno &&
516 prev_seqno != (tmp = p_atomic_cmpxchg(last_seqno, prev_seqno, seqno)))
517 prev_seqno = tmp;
518 }
519
520 enum iris_memory_zone iris_memzone_for_address(uint64_t address);
521
522 int iris_bufmgr_create_screen_id(struct iris_bufmgr *bufmgr);
523
524 simple_mtx_t *iris_bufmgr_get_bo_deps_lock(struct iris_bufmgr *bufmgr);
525
526 #endif /* IRIS_BUFMGR_H */
527