1 /*
2 * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
3 * Copyright 2010 Marek Olšák <maraeo@gmail.com>
4 * Copyright 2018 Advanced Micro Devices, Inc.
5 *
6 * SPDX-License-Identifier: MIT
7 */
8
9 #ifndef RADEON_WINSYS_H
10 #define RADEON_WINSYS_H
11
12 /* The public winsys interface header for the radeon driver. */
13
14 /* Skip command submission. Same as RADEON_NOOP=1. */
15 #define RADEON_FLUSH_NOOP (1u << 29)
16
17 /* Toggle the secure submission boolean after the flush */
18 #define RADEON_FLUSH_TOGGLE_SECURE_SUBMISSION (1u << 30)
19
20 /* Whether the next IB can start immediately and not wait for draws and
21 * dispatches from the current IB to finish. */
22 #define RADEON_FLUSH_START_NEXT_GFX_IB_NOW (1u << 31)
23
24 #define RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW \
25 (PIPE_FLUSH_ASYNC | RADEON_FLUSH_START_NEXT_GFX_IB_NOW)
26
27 #include "amd/common/ac_gpu_info.h"
28 #include "amd/common/ac_surface.h"
29 #include "pipebuffer/pb_buffer.h"
30
31 /* Tiling flags. */
32 enum radeon_bo_layout
33 {
34 RADEON_LAYOUT_LINEAR = 0,
35 RADEON_LAYOUT_TILED,
36 RADEON_LAYOUT_SQUARETILED,
37
38 RADEON_LAYOUT_UNKNOWN
39 };
40
41 enum radeon_bo_domain
42 { /* bitfield */
43 RADEON_DOMAIN_GTT = 2,
44 RADEON_DOMAIN_VRAM = 4,
45 RADEON_DOMAIN_VRAM_GTT = RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GTT,
46 RADEON_DOMAIN_GDS = 8,
47 RADEON_DOMAIN_OA = 16,
48 };
49
50 enum radeon_bo_flag
51 { /* bitfield */
52 RADEON_FLAG_GTT_WC = (1 << 0),
53 RADEON_FLAG_NO_CPU_ACCESS = (1 << 1),
54 RADEON_FLAG_NO_SUBALLOC = (1 << 2),
55 RADEON_FLAG_SPARSE = (1 << 3),
56 RADEON_FLAG_NO_INTERPROCESS_SHARING = (1 << 4),
57 RADEON_FLAG_READ_ONLY = (1 << 5),
58 RADEON_FLAG_32BIT = (1 << 6),
59 RADEON_FLAG_ENCRYPTED = (1 << 7),
60 RADEON_FLAG_GL2_BYPASS = (1 << 8), /* only gfx9 and newer */
61 RADEON_FLAG_DRIVER_INTERNAL = (1 << 9),
62 /* Discard on eviction (instead of moving the buffer to GTT).
63 * This guarantees that this buffer will never be moved to GTT.
64 */
65 RADEON_FLAG_DISCARDABLE = (1 << 10),
66 RADEON_FLAG_WINSYS_SLAB_BACKING = (1 << 11), /* only used by the winsys */
67 };
68
69 static inline void
si_res_print_flags(enum radeon_bo_flag flags)70 si_res_print_flags(enum radeon_bo_flag flags) {
71 if (flags & RADEON_FLAG_GTT_WC)
72 fprintf(stderr, "GTT_WC ");
73 if (flags & RADEON_FLAG_NO_CPU_ACCESS)
74 fprintf(stderr, "NO_CPU_ACCESS ");
75 if (flags & RADEON_FLAG_NO_SUBALLOC)
76 fprintf(stderr, "NO_SUBALLOC ");
77 if (flags & RADEON_FLAG_SPARSE)
78 fprintf(stderr, "SPARSE ");
79 if (flags & RADEON_FLAG_NO_INTERPROCESS_SHARING)
80 fprintf(stderr, "NO_INTERPROCESS_SHARING ");
81 if (flags & RADEON_FLAG_READ_ONLY)
82 fprintf(stderr, "READ_ONLY ");
83 if (flags & RADEON_FLAG_32BIT)
84 fprintf(stderr, "32BIT ");
85 if (flags & RADEON_FLAG_ENCRYPTED)
86 fprintf(stderr, "ENCRYPTED ");
87 if (flags & RADEON_FLAG_GL2_BYPASS)
88 fprintf(stderr, "GL2_BYPASS ");
89 if (flags & RADEON_FLAG_DRIVER_INTERNAL)
90 fprintf(stderr, "DRIVER_INTERNAL ");
91 if (flags & RADEON_FLAG_DISCARDABLE)
92 fprintf(stderr, "DISCARDABLE ");
93 }
94
95 enum radeon_map_flags
96 {
97 /* Indicates that the caller will unmap the buffer.
98 *
99 * Not unmapping buffers is an important performance optimization for
100 * OpenGL (avoids kernel overhead for frequently mapped buffers).
101 */
102 RADEON_MAP_TEMPORARY = (PIPE_MAP_DRV_PRV << 0),
103 };
104
105 #define RADEON_SPARSE_PAGE_SIZE (64 * 1024)
106
107 enum radeon_value_id
108 {
109 RADEON_REQUESTED_VRAM_MEMORY,
110 RADEON_REQUESTED_GTT_MEMORY,
111 RADEON_MAPPED_VRAM,
112 RADEON_MAPPED_GTT,
113 RADEON_SLAB_WASTED_VRAM,
114 RADEON_SLAB_WASTED_GTT,
115 RADEON_BUFFER_WAIT_TIME_NS,
116 RADEON_NUM_MAPPED_BUFFERS,
117 RADEON_TIMESTAMP,
118 RADEON_NUM_GFX_IBS,
119 RADEON_NUM_SDMA_IBS,
120 RADEON_GFX_BO_LIST_COUNTER, /* number of BOs submitted in gfx IBs */
121 RADEON_GFX_IB_SIZE_COUNTER,
122 RADEON_NUM_BYTES_MOVED,
123 RADEON_NUM_EVICTIONS,
124 RADEON_NUM_VRAM_CPU_PAGE_FAULTS,
125 RADEON_VRAM_USAGE,
126 RADEON_VRAM_VIS_USAGE,
127 RADEON_GTT_USAGE,
128 RADEON_GPU_TEMPERATURE,
129 RADEON_CURRENT_SCLK,
130 RADEON_CURRENT_MCLK,
131 RADEON_CS_THREAD_TIME,
132 };
133
134 enum radeon_ctx_priority
135 {
136 RADEON_CTX_PRIORITY_LOW = 0,
137 RADEON_CTX_PRIORITY_MEDIUM,
138 RADEON_CTX_PRIORITY_HIGH,
139 RADEON_CTX_PRIORITY_REALTIME,
140 };
141
142 enum radeon_ctx_pstate
143 {
144 RADEON_CTX_PSTATE_NONE = 0,
145 RADEON_CTX_PSTATE_STANDARD,
146 RADEON_CTX_PSTATE_MIN_SCLK,
147 RADEON_CTX_PSTATE_MIN_MCLK,
148 RADEON_CTX_PSTATE_PEAK,
149 };
150
151
152 /* Each group of two has the same priority. */
153 #define RADEON_PRIO_FENCE_TRACE (1 << 0)
154 #define RADEON_PRIO_SO_FILLED_SIZE (1 << 1)
155
156 #define RADEON_PRIO_QUERY (1 << 2)
157 #define RADEON_PRIO_IB (1 << 3)
158
159 #define RADEON_PRIO_DRAW_INDIRECT (1 << 4)
160 #define RADEON_PRIO_INDEX_BUFFER (1 << 5)
161
162 #define RADEON_PRIO_CP_DMA (1 << 6)
163 #define RADEON_PRIO_BORDER_COLORS (1 << 7)
164
165 #define RADEON_PRIO_CONST_BUFFER (1 << 8)
166 #define RADEON_PRIO_DESCRIPTORS (1 << 9)
167
168 #define RADEON_PRIO_SAMPLER_BUFFER (1 << 10)
169 #define RADEON_PRIO_VERTEX_BUFFER (1 << 11)
170
171 #define RADEON_PRIO_SHADER_RW_BUFFER (1 << 12)
172 #define RADEON_PRIO_SAMPLER_TEXTURE (1 << 13)
173
174 #define RADEON_PRIO_SHADER_RW_IMAGE (1 << 14)
175 #define RADEON_PRIO_SAMPLER_TEXTURE_MSAA (1 << 15)
176
177 #define RADEON_PRIO_COLOR_BUFFER (1 << 16)
178 #define RADEON_PRIO_DEPTH_BUFFER (1 << 17)
179
180 #define RADEON_PRIO_COLOR_BUFFER_MSAA (1 << 18)
181 #define RADEON_PRIO_DEPTH_BUFFER_MSAA (1 << 19)
182
183 #define RADEON_PRIO_SEPARATE_META (1 << 20)
184 #define RADEON_PRIO_SHADER_BINARY (1 << 21) /* the hw can't hide instruction cache misses */
185
186 #define RADEON_PRIO_SHADER_RINGS (1 << 22)
187 #define RADEON_PRIO_SCRATCH_BUFFER (1 << 23)
188
189 #define RADEON_ALL_PRIORITIES (RADEON_USAGE_READ - 1)
190
191 /* Upper bits of priorities are used by usage flags. */
192 #define RADEON_USAGE_READ (1 << 28)
193 #define RADEON_USAGE_WRITE (1 << 29)
194 #define RADEON_USAGE_READWRITE (RADEON_USAGE_READ | RADEON_USAGE_WRITE)
195
196 /* The winsys ensures that the CS submission will be scheduled after
197 * previously flushed CSs referencing this BO in a conflicting way.
198 */
199 #define RADEON_USAGE_SYNCHRONIZED (1 << 30)
200
201 /* When used, an implicit sync is done to make sure a compute shader
202 * will read the written values from a previous draw.
203 */
204 #define RADEON_USAGE_NEEDS_IMPLICIT_SYNC (1u << 31)
205
206 struct winsys_handle;
207 struct radeon_winsys_ctx;
208
209 struct radeon_cmdbuf_chunk {
210 unsigned cdw; /* Number of used dwords. */
211 unsigned max_dw; /* Maximum number of dwords. */
212 uint32_t *buf; /* The base pointer of the chunk. */
213 };
214
215 struct radeon_cmdbuf {
216 struct radeon_cmdbuf_chunk current;
217 struct radeon_cmdbuf_chunk *prev;
218 uint16_t num_prev; /* Number of previous chunks. */
219 uint16_t max_prev; /* Space in array pointed to by prev. */
220 unsigned prev_dw; /* Total number of dwords in previous chunks. */
221
222 /* Memory usage of the buffer list. These are always 0 for preamble IBs. */
223 uint32_t used_vram_kb;
224 uint32_t used_gart_kb;
225
226 /* Private winsys data. */
227 void *priv;
228 void *csc; /* amdgpu_cs_context */
229 };
230
231 /* Tiling info for display code, DRI sharing, and other data. */
232 struct radeon_bo_metadata {
233 /* Tiling flags describing the texture layout for display code
234 * and DRI sharing.
235 */
236 union {
237 struct {
238 enum radeon_bo_layout microtile;
239 enum radeon_bo_layout macrotile;
240 unsigned pipe_config;
241 unsigned bankw;
242 unsigned bankh;
243 unsigned tile_split;
244 unsigned mtilea;
245 unsigned num_banks;
246 unsigned stride;
247 bool scanout;
248 } legacy;
249 } u;
250
251 enum radeon_surf_mode mode; /* Output from buffer_get_metadata */
252
253 /* Additional metadata associated with the buffer, in bytes.
254 * The maximum size is 64 * 4. This is opaque for the winsys & kernel.
255 * Supported by amdgpu only.
256 */
257 uint32_t size_metadata;
258 uint32_t metadata[64];
259 };
260
261 enum radeon_feature_id
262 {
263 RADEON_FID_R300_HYPERZ_ACCESS, /* ZMask + HiZ */
264 RADEON_FID_R300_CMASK_ACCESS,
265 };
266
267 struct radeon_bo_list_item {
268 uint64_t bo_size;
269 uint64_t vm_address;
270 uint32_t priority_usage; /* mask of (1 << RADEON_PRIO_*) */
271 };
272
273 struct radeon_winsys {
274 /**
275 * The screen object this winsys was created for
276 */
277 struct pipe_screen *screen;
278 /**
279 * Has the application created at least one TMZ buffer.
280 */
281 const bool uses_secure_bos;
282
283 /**
284 * Decrement the winsys reference count.
285 *
286 * \param ws The winsys this function is called for.
287 * \return True if the winsys and screen should be destroyed.
288 */
289 bool (*unref)(struct radeon_winsys *ws);
290
291 /**
292 * Destroy this winsys.
293 *
294 * \param ws The winsys this function is called from.
295 */
296 void (*destroy)(struct radeon_winsys *ws);
297
298 /**
299 * Get FD for winsys if winsys provides one
300 */
301 int (*get_fd)(struct radeon_winsys *ws);
302
303 /**
304 * Query an info structure from winsys.
305 *
306 * \param ws The winsys this function is called from.
307 * \param info Return structure
308 */
309 void (*query_info)(struct radeon_winsys *ws, struct radeon_info *info);
310
311 /**
312 * A hint for the winsys that it should pin its execution threads to
313 * a group of cores sharing a specific L3 cache if the CPU has multiple
314 * L3 caches. This is needed for good multithreading performance on
315 * AMD Zen CPUs.
316 */
317 void (*pin_threads_to_L3_cache)(struct radeon_winsys *ws, unsigned cpu);
318
319 /**************************************************************************
320 * Buffer management. Buffer attributes are mostly fixed over its lifetime.
321 *
322 * Remember that gallium gets to choose the interface it needs, and the
323 * window systems must then implement that interface (rather than the
324 * other way around...).
325 *************************************************************************/
326
327 /**
328 * Create a buffer object.
329 *
330 * \param ws The winsys this function is called from.
331 * \param size The size to allocate.
332 * \param alignment An alignment of the buffer in memory.
333 * \param use_reusable_pool Whether the cache buffer manager should be used.
334 * \param domain A bitmask of the RADEON_DOMAIN_* flags.
335 * \return The created buffer object.
336 */
337 struct pb_buffer_lean *(*buffer_create)(struct radeon_winsys *ws, uint64_t size,
338 unsigned alignment, enum radeon_bo_domain domain,
339 enum radeon_bo_flag flags);
340
341 /**
342 * Don't use directly. Use radeon_bo_reference.
343 */
344 void (*buffer_destroy)(struct radeon_winsys *ws, struct pb_buffer_lean *buf);
345
346 /**
347 * Map the entire data store of a buffer object into the client's address
348 * space.
349 *
350 * Callers are expected to unmap buffers again if and only if the
351 * RADEON_MAP_TEMPORARY flag is set in \p usage.
352 *
353 * \param buf A winsys buffer object to map.
354 * \param cs A command stream to flush if the buffer is referenced by it.
355 * \param usage A bitmask of the PIPE_MAP_* and RADEON_MAP_* flags.
356 * \return The pointer at the beginning of the buffer.
357 */
358 void *(*buffer_map)(struct radeon_winsys *ws, struct pb_buffer_lean *buf,
359 struct radeon_cmdbuf *cs, enum pipe_map_flags usage);
360
361 /**
362 * Unmap a buffer object from the client's address space.
363 *
364 * \param buf A winsys buffer object to unmap.
365 */
366 void (*buffer_unmap)(struct radeon_winsys *ws, struct pb_buffer_lean *buf);
367
368 /**
369 * Wait for the buffer and return true if the buffer is not used
370 * by the device.
371 *
372 * The timeout of 0 will only return the status.
373 * The timeout of OS_TIMEOUT_INFINITE will always wait until the buffer
374 * is idle.
375 */
376 bool (*buffer_wait)(struct radeon_winsys *ws, struct pb_buffer_lean *buf,
377 uint64_t timeout, unsigned usage);
378
379 /**
380 * Return buffer metadata.
381 * (tiling info for display code, DRI sharing, and other data)
382 *
383 * \param buf A winsys buffer object to get the flags from.
384 * \param md Metadata
385 */
386 void (*buffer_get_metadata)(struct radeon_winsys *ws, struct pb_buffer_lean *buf,
387 struct radeon_bo_metadata *md, struct radeon_surf *surf);
388
389 /**
390 * Set buffer metadata.
391 * (tiling info for display code, DRI sharing, and other data)
392 *
393 * \param buf A winsys buffer object to set the flags for.
394 * \param md Metadata
395 */
396 void (*buffer_set_metadata)(struct radeon_winsys *ws, struct pb_buffer_lean *buf,
397 struct radeon_bo_metadata *md, struct radeon_surf *surf);
398
399 /**
400 * Get a winsys buffer from a winsys handle. The internal structure
401 * of the handle is platform-specific and only a winsys should access it.
402 *
403 * \param ws The winsys this function is called from.
404 * \param whandle A winsys handle pointer as was received from a state
405 * tracker.
406 */
407 struct pb_buffer_lean *(*buffer_from_handle)(struct radeon_winsys *ws,
408 struct winsys_handle *whandle,
409 unsigned vm_alignment,
410 bool is_prime_linear_buffer);
411
412 /**
413 * Get a winsys buffer from a user pointer. The resulting buffer can't
414 * be exported. Both pointer and size must be page aligned.
415 *
416 * \param ws The winsys this function is called from.
417 * \param pointer User pointer to turn into a buffer object.
418 * \param Size Size in bytes for the new buffer.
419 */
420 struct pb_buffer_lean *(*buffer_from_ptr)(struct radeon_winsys *ws, void *pointer,
421 uint64_t size, enum radeon_bo_flag flags);
422
423 /**
424 * Whether the buffer was created from a user pointer.
425 *
426 * \param buf A winsys buffer object
427 * \return whether \p buf was created via buffer_from_ptr
428 */
429 bool (*buffer_is_user_ptr)(struct pb_buffer_lean *buf);
430
431 /** Whether the buffer was suballocated. */
432 bool (*buffer_is_suballocated)(struct pb_buffer_lean *buf);
433
434 /**
435 * Get a winsys handle from a winsys buffer. The internal structure
436 * of the handle is platform-specific and only a winsys should access it.
437 *
438 * \param ws The winsys instance for which the handle is to be valid
439 * \param buf A winsys buffer object to get the handle from.
440 * \param whandle A winsys handle pointer.
441 * \return true on success.
442 */
443 bool (*buffer_get_handle)(struct radeon_winsys *ws, struct pb_buffer_lean *buf,
444 struct winsys_handle *whandle);
445
446 /**
447 * Change the commitment of a (64KB-page aligned) region of the given
448 * sparse buffer.
449 *
450 * \warning There is no automatic synchronization with command submission.
451 *
452 * \note Only implemented by the amdgpu winsys.
453 *
454 * \return false on out of memory or other failure, true on success.
455 */
456 bool (*buffer_commit)(struct radeon_winsys *ws, struct pb_buffer_lean *buf,
457 uint64_t offset, uint64_t size, bool commit);
458
459 /**
460 * Calc size of the first committed part of the given sparse buffer.
461 * \note Only implemented by the amdgpu winsys.
462 * \return the skipped count if the range_offset fall into a hole.
463 */
464 unsigned (*buffer_find_next_committed_memory)(struct pb_buffer_lean *buf,
465 uint64_t range_offset, unsigned *range_size);
466 /**
467 * Return the virtual address of a buffer.
468 *
469 * When virtual memory is not in use, this is the offset relative to the
470 * relocation base (non-zero for sub-allocated buffers).
471 *
472 * \param buf A winsys buffer object
473 * \return virtual address
474 */
475 uint64_t (*buffer_get_virtual_address)(struct pb_buffer_lean *buf);
476
477 /**
478 * Return the offset of this buffer relative to the relocation base.
479 * This is only non-zero for sub-allocated buffers.
480 *
481 * This is only supported in the radeon winsys, since amdgpu uses virtual
482 * addresses in submissions even for the video engines.
483 *
484 * \param buf A winsys buffer object
485 * \return the offset for relocations
486 */
487 unsigned (*buffer_get_reloc_offset)(struct pb_buffer_lean *buf);
488
489 /**
490 * Query the initial placement of the buffer from the kernel driver.
491 */
492 enum radeon_bo_domain (*buffer_get_initial_domain)(struct pb_buffer_lean *buf);
493
494 /**
495 * Query the flags used for creation of this buffer.
496 *
497 * Note that for imported buffer this may be lossy since not all flags
498 * are passed 1:1.
499 */
500 enum radeon_bo_flag (*buffer_get_flags)(struct pb_buffer_lean *buf);
501
502 /**************************************************************************
503 * Command submission.
504 *
505 * Each pipe context should create its own command stream and submit
506 * commands independently of other contexts.
507 *************************************************************************/
508
509 /**
510 * Create a command submission context.
511 * Various command streams can be submitted to the same context.
512 *
513 * \param allow_context_lost If true, lost contexts skip command submission and report
514 * the reset status.
515 * If false, losing the context results in undefined behavior.
516 */
517 struct radeon_winsys_ctx *(*ctx_create)(struct radeon_winsys *ws,
518 enum radeon_ctx_priority priority,
519 bool allow_context_lost);
520
521 /**
522 * Destroy a context.
523 */
524 void (*ctx_destroy)(struct radeon_winsys_ctx *ctx);
525
526 /**
527 * Set a reset status for the context due to a software failure, such as an allocation failure
528 * or a skipped draw.
529 */
530 void (*ctx_set_sw_reset_status)(struct radeon_winsys_ctx *ctx, enum pipe_reset_status status,
531 const char *format, ...);
532
533 /**
534 * Query a GPU reset status.
535 */
536 enum pipe_reset_status (*ctx_query_reset_status)(struct radeon_winsys_ctx *ctx,
537 bool full_reset_only,
538 bool *needs_reset, bool *reset_completed);
539
540 /**
541 * Create a command stream.
542 *
543 * \param cs The returned structure that is initialized by cs_create.
544 * \param ctx The submission context
545 * \param ip_type The IP type (GFX, DMA, UVD)
546 * \param flush Flush callback function associated with the command stream.
547 * \param user User pointer that will be passed to the flush callback.
548 *
549 * \return true on success
550 */
551 bool (*cs_create)(struct radeon_cmdbuf *cs,
552 struct radeon_winsys_ctx *ctx, enum amd_ip_type amd_ip_type,
553 void (*flush)(void *ctx, unsigned flags,
554 struct pipe_fence_handle **fence),
555 void *flush_ctx);
556
557 /**
558 * Set up and enable mid command buffer preemption for the command stream.
559 *
560 * \param cs Command stream
561 * \param preamble_ib Non-preemptible preamble IB for the context.
562 * \param preamble_num_dw Number of dwords in the preamble IB.
563 */
564 bool (*cs_setup_preemption)(struct radeon_cmdbuf *cs, const uint32_t *preamble_ib,
565 unsigned preamble_num_dw);
566
567 /**
568 * Destroy a command stream.
569 *
570 * \param cs A command stream to destroy.
571 */
572 void (*cs_destroy)(struct radeon_cmdbuf *cs);
573
574 /**
575 * Add a buffer. Each buffer used by a CS must be added using this function.
576 *
577 * \param cs Command stream
578 * \param buf Buffer
579 * \param usage Usage
580 * \param domain Bitmask of the RADEON_DOMAIN_* flags.
581 * \return Buffer index.
582 */
583 unsigned (*cs_add_buffer)(struct radeon_cmdbuf *cs, struct pb_buffer_lean *buf,
584 unsigned usage, enum radeon_bo_domain domain);
585
586 /**
587 * Return the index of an already-added buffer.
588 *
589 * Not supported on amdgpu. Drivers with GPUVM should not care about
590 * buffer indices.
591 *
592 * \param cs Command stream
593 * \param buf Buffer
594 * \return The buffer index, or -1 if the buffer has not been added.
595 */
596 int (*cs_lookup_buffer)(struct radeon_cmdbuf *cs, struct pb_buffer_lean *buf);
597
598 /**
599 * Return true if there is enough memory in VRAM and GTT for the buffers
600 * added so far. If the validation fails, all buffers which have
601 * been added since the last call of cs_validate will be removed and
602 * the CS will be flushed (provided there are still any buffers).
603 *
604 * \param cs A command stream to validate.
605 */
606 bool (*cs_validate)(struct radeon_cmdbuf *cs);
607
608 /**
609 * Check whether the given number of dwords is available in the IB.
610 * Optionally chain a new chunk of the IB if necessary and supported.
611 *
612 * \param cs A command stream.
613 * \param dw Number of CS dwords requested by the caller.
614 * \return true if there is enough space
615 */
616 bool (*cs_check_space)(struct radeon_cmdbuf *cs, unsigned dw);
617
618 /**
619 * Return the buffer list.
620 *
621 * This is the buffer list as passed to the kernel, i.e. it only contains
622 * the parent buffers of sub-allocated buffers.
623 *
624 * \param cs Command stream
625 * \param list Returned buffer list. Set to NULL to query the count only.
626 * \return The buffer count.
627 */
628 unsigned (*cs_get_buffer_list)(struct radeon_cmdbuf *cs, struct radeon_bo_list_item *list);
629
630 /**
631 * Flush a command stream.
632 *
633 * \param cs A command stream to flush.
634 * \param flags, PIPE_FLUSH_* flags.
635 * \param fence Pointer to a fence. If non-NULL, a fence is inserted
636 * after the CS and is returned through this parameter.
637 * \return Negative POSIX error code or 0 for success.
638 * Asynchronous submissions never return an error.
639 */
640 int (*cs_flush)(struct radeon_cmdbuf *cs, unsigned flags, struct pipe_fence_handle **fence);
641
642 /**
643 * Create a fence before the CS is flushed.
644 * The user must flush manually to complete the initializaton of the fence.
645 *
646 * The fence must not be used for anything except \ref cs_add_fence_dependency
647 * before the flush.
648 */
649 struct pipe_fence_handle *(*cs_get_next_fence)(struct radeon_cmdbuf *cs);
650
651 /**
652 * Return true if a buffer is referenced by a command stream.
653 *
654 * \param cs A command stream.
655 * \param buf A winsys buffer.
656 */
657 bool (*cs_is_buffer_referenced)(struct radeon_cmdbuf *cs, struct pb_buffer_lean *buf,
658 unsigned usage);
659
660 /**
661 * Request access to a feature for a command stream.
662 *
663 * \param cs A command stream.
664 * \param fid Feature ID, one of RADEON_FID_*
665 * \param enable Whether to enable or disable the feature.
666 */
667 bool (*cs_request_feature)(struct radeon_cmdbuf *cs, enum radeon_feature_id fid, bool enable);
668 /**
669 * Make sure all asynchronous flush of the cs have completed
670 *
671 * \param cs A command stream.
672 */
673 void (*cs_sync_flush)(struct radeon_cmdbuf *cs);
674
675 /**
676 * Add a fence dependency to the CS, so that the CS will wait for
677 * the fence before execution.
678 */
679 void (*cs_add_fence_dependency)(struct radeon_cmdbuf *cs, struct pipe_fence_handle *fence);
680
681 /**
682 * Signal a syncobj when the CS finishes execution.
683 */
684 void (*cs_add_syncobj_signal)(struct radeon_cmdbuf *cs, struct pipe_fence_handle *fence);
685
686 /**
687 * Returns the amd_ip_type type of a CS.
688 */
689 enum amd_ip_type (*cs_get_ip_type)(struct radeon_cmdbuf *cs);
690
691 /**
692 * Wait for the fence and return true if the fence has been signalled.
693 * The timeout of 0 will only return the status.
694 * The timeout of OS_TIMEOUT_INFINITE will always wait until the fence
695 * is signalled.
696 */
697 bool (*fence_wait)(struct radeon_winsys *ws, struct pipe_fence_handle *fence, uint64_t timeout);
698
699 /**
700 * Reference counting for fences.
701 */
702 void (*fence_reference)(struct radeon_winsys *ws, struct pipe_fence_handle **dst,
703 struct pipe_fence_handle *src);
704
705 /**
706 * Create a new fence object corresponding to the given syncobj fd.
707 */
708 struct pipe_fence_handle *(*fence_import_syncobj)(struct radeon_winsys *ws, int fd);
709
710 /**
711 * Create a new fence object corresponding to the given sync_file.
712 */
713 struct pipe_fence_handle *(*fence_import_sync_file)(struct radeon_winsys *ws, int fd);
714
715 /**
716 * Return a sync_file FD corresponding to the given fence object.
717 */
718 int (*fence_export_sync_file)(struct radeon_winsys *ws, struct pipe_fence_handle *fence);
719
720 /**
721 * Return a sync file FD that is already signalled.
722 */
723 int (*export_signalled_sync_file)(struct radeon_winsys *ws);
724
725 /**
726 * Initialize surface
727 *
728 * \param ws The winsys this function is called from.
729 * \param info radeon_info from the driver
730 * \param tex Input texture description
731 * \param flags Bitmask of RADEON_SURF_* flags
732 * \param bpe Bytes per pixel, it can be different for Z buffers.
733 * \param mode Preferred tile mode. (linear, 1D, or 2D)
734 * \param surf Output structure
735 */
736 int (*surface_init)(struct radeon_winsys *ws, const struct radeon_info *info,
737 const struct pipe_resource *tex, uint64_t flags,
738 unsigned bpe, enum radeon_surf_mode mode, struct radeon_surf *surf);
739
740 uint64_t (*query_value)(struct radeon_winsys *ws, enum radeon_value_id value);
741
742 bool (*read_registers)(struct radeon_winsys *ws, unsigned reg_offset, unsigned num_registers,
743 uint32_t *out);
744
745 /**
746 * Secure context
747 */
748 bool (*cs_is_secure)(struct radeon_cmdbuf *cs);
749
750 /**
751 * Stable pstate
752 */
753 bool (*cs_set_pstate)(struct radeon_cmdbuf *cs, enum radeon_ctx_pstate state);
754
755 /**
756 * Pass the VAs to the buffers where various information is saved by the FW during mcbp.
757 */
758 void (*cs_set_mcbp_reg_shadowing_va)(struct radeon_cmdbuf *cs, uint64_t regs_va,
759 uint64_t csa_va);
760 };
761
radeon_emitted(struct radeon_cmdbuf * cs,unsigned num_dw)762 static inline bool radeon_emitted(struct radeon_cmdbuf *cs, unsigned num_dw)
763 {
764 return cs && (cs->prev_dw + cs->current.cdw > num_dw);
765 }
766
radeon_emit(struct radeon_cmdbuf * cs,uint32_t value)767 static inline void radeon_emit(struct radeon_cmdbuf *cs, uint32_t value)
768 {
769 cs->current.buf[cs->current.cdw++] = value;
770 }
771
radeon_emit_array(struct radeon_cmdbuf * cs,const uint32_t * values,unsigned count)772 static inline void radeon_emit_array(struct radeon_cmdbuf *cs, const uint32_t *values,
773 unsigned count)
774 {
775 memcpy(cs->current.buf + cs->current.cdw, values, count * 4);
776 cs->current.cdw += count;
777 }
778
radeon_uses_secure_bos(struct radeon_winsys * ws)779 static inline bool radeon_uses_secure_bos(struct radeon_winsys* ws)
780 {
781 return ws->uses_secure_bos;
782 }
783
784 static inline void
radeon_bo_reference(struct radeon_winsys * rws,struct pb_buffer_lean ** dst,struct pb_buffer_lean * src)785 radeon_bo_reference(struct radeon_winsys *rws, struct pb_buffer_lean **dst,
786 struct pb_buffer_lean *src)
787 {
788 struct pb_buffer_lean *old = *dst;
789
790 if (pipe_reference(&(*dst)->reference, &src->reference))
791 rws->buffer_destroy(rws, old);
792 *dst = src;
793 }
794
795 /* Same as radeon_bo_reference, but ignore the value in *dst. */
796 static inline void
radeon_bo_set_reference(struct pb_buffer_lean ** dst,struct pb_buffer_lean * src)797 radeon_bo_set_reference(struct pb_buffer_lean **dst, struct pb_buffer_lean *src)
798 {
799 *dst = src;
800 pipe_reference(NULL, &src->reference); /* only increment refcount */
801 }
802
803 /* Unreference dst, but don't assign anything. */
804 static inline void
radeon_bo_drop_reference(struct radeon_winsys * rws,struct pb_buffer_lean * dst)805 radeon_bo_drop_reference(struct radeon_winsys *rws, struct pb_buffer_lean *dst)
806 {
807 if (pipe_reference(&dst->reference, NULL)) /* only decrement refcount */
808 rws->buffer_destroy(rws, dst);
809 }
810
811 /* The following bits describe the heaps managed by slab allocators (pb_slab) and
812 * the allocation cache (pb_cache).
813 */
814 #define RADEON_HEAP_BIT_VRAM (1 << 0) /* if false, it's GTT */
815 #define RADEON_HEAP_BIT_READ_ONLY (1 << 1) /* both VRAM and GTT */
816 #define RADEON_HEAP_BIT_32BIT (1 << 2) /* both VRAM and GTT */
817 #define RADEON_HEAP_BIT_ENCRYPTED (1 << 3) /* both VRAM and GTT */
818
819 #define RADEON_HEAP_BIT_NO_CPU_ACCESS (1 << 4) /* VRAM only */
820
821 #define RADEON_HEAP_BIT_WC (1 << 4) /* GTT only, VRAM implies this to be true */
822 #define RADEON_HEAP_BIT_GL2_BYPASS (1 << 5) /* GTT only */
823
824 /* The number of all possible heap descriptions using the bits above. */
825 #define RADEON_NUM_HEAPS (1 << 6)
826
radeon_domain_from_heap(int heap)827 static inline enum radeon_bo_domain radeon_domain_from_heap(int heap)
828 {
829 assert(heap >= 0);
830
831 if (heap & RADEON_HEAP_BIT_VRAM)
832 return RADEON_DOMAIN_VRAM;
833 else
834 return RADEON_DOMAIN_GTT;
835 }
836
radeon_flags_from_heap(int heap)837 static inline unsigned radeon_flags_from_heap(int heap)
838 {
839 assert(heap >= 0);
840
841 unsigned flags = RADEON_FLAG_NO_INTERPROCESS_SHARING;
842
843 if (heap & RADEON_HEAP_BIT_READ_ONLY)
844 flags |= RADEON_FLAG_READ_ONLY;
845 if (heap & RADEON_HEAP_BIT_32BIT)
846 flags |= RADEON_FLAG_32BIT;
847 if (heap & RADEON_HEAP_BIT_ENCRYPTED)
848 flags |= RADEON_FLAG_ENCRYPTED;
849
850 if (heap & RADEON_HEAP_BIT_VRAM) {
851 flags |= RADEON_FLAG_GTT_WC;
852 if (heap & RADEON_HEAP_BIT_NO_CPU_ACCESS)
853 flags |= RADEON_FLAG_NO_CPU_ACCESS;
854 } else {
855 /* GTT only */
856 if (heap & RADEON_HEAP_BIT_WC)
857 flags |= RADEON_FLAG_GTT_WC;
858 if (heap & RADEON_HEAP_BIT_GL2_BYPASS)
859 flags |= RADEON_FLAG_GL2_BYPASS;
860 }
861
862 return flags;
863 }
864
865 /* This cleans up flags, so that we can comfortably assume that no invalid flag combinations
866 * are set.
867 */
radeon_canonicalize_bo_flags(enum radeon_bo_domain * _domain,enum radeon_bo_flag * _flags)868 static void radeon_canonicalize_bo_flags(enum radeon_bo_domain *_domain,
869 enum radeon_bo_flag *_flags)
870 {
871 unsigned domain = *_domain;
872 unsigned flags = *_flags;
873
874 /* Only set 1 domain, e.g. ignore GTT if VRAM is set. */
875 if (domain)
876 domain = BITFIELD_BIT(ffs(domain) - 1);
877 else
878 domain = RADEON_DOMAIN_VRAM;
879
880 switch (domain) {
881 case RADEON_DOMAIN_VRAM:
882 flags |= RADEON_FLAG_GTT_WC;
883 flags &= ~RADEON_FLAG_GL2_BYPASS;
884 break;
885 case RADEON_DOMAIN_GTT:
886 flags &= ~RADEON_FLAG_NO_CPU_ACCESS;
887 break;
888 case RADEON_DOMAIN_GDS:
889 case RADEON_DOMAIN_OA:
890 flags |= RADEON_FLAG_NO_SUBALLOC | RADEON_FLAG_NO_CPU_ACCESS;
891 flags &= ~RADEON_FLAG_SPARSE;
892 break;
893 }
894
895 /* Sparse buffers must have NO_CPU_ACCESS set. */
896 if (flags & RADEON_FLAG_SPARSE)
897 flags |= RADEON_FLAG_NO_CPU_ACCESS;
898
899 *_domain = (enum radeon_bo_domain)domain;
900 *_flags = (enum radeon_bo_flag)flags;
901 }
902
903 /* Return the heap index for winsys allocators, or -1 on failure. */
radeon_get_heap_index(enum radeon_bo_domain domain,enum radeon_bo_flag flags)904 static inline int radeon_get_heap_index(enum radeon_bo_domain domain, enum radeon_bo_flag flags)
905 {
906 radeon_canonicalize_bo_flags(&domain, &flags);
907
908 /* Resources with interprocess sharing don't use any winsys allocators. */
909 if (!(flags & RADEON_FLAG_NO_INTERPROCESS_SHARING))
910 return -1;
911
912 /* These are unsupported flags. */
913 /* RADEON_FLAG_DRIVER_INTERNAL is ignored. It doesn't affect allocators. */
914 if (flags & (RADEON_FLAG_NO_SUBALLOC | RADEON_FLAG_SPARSE |
915 RADEON_FLAG_DISCARDABLE))
916 return -1;
917
918 int heap = 0;
919
920 if (flags & RADEON_FLAG_READ_ONLY)
921 heap |= RADEON_HEAP_BIT_READ_ONLY;
922 if (flags & RADEON_FLAG_32BIT)
923 heap |= RADEON_HEAP_BIT_32BIT;
924 if (flags & RADEON_FLAG_ENCRYPTED)
925 heap |= RADEON_HEAP_BIT_ENCRYPTED;
926
927 if (domain == RADEON_DOMAIN_VRAM) {
928 /* VRAM | GTT shouldn't occur, but if it does, ignore GTT. */
929 heap |= RADEON_HEAP_BIT_VRAM;
930 if (flags & RADEON_FLAG_NO_CPU_ACCESS)
931 heap |= RADEON_HEAP_BIT_NO_CPU_ACCESS;
932 /* RADEON_FLAG_WC is ignored and implied to be true for VRAM */
933 /* RADEON_FLAG_GL2_BYPASS is ignored and implied to be false for VRAM */
934 } else if (domain == RADEON_DOMAIN_GTT) {
935 /* GTT is implied by RADEON_HEAP_BIT_VRAM not being set. */
936 if (flags & RADEON_FLAG_GTT_WC)
937 heap |= RADEON_HEAP_BIT_WC;
938 if (flags & RADEON_FLAG_GL2_BYPASS)
939 heap |= RADEON_HEAP_BIT_GL2_BYPASS;
940 /* RADEON_FLAG_NO_CPU_ACCESS is ignored and implied to be false for GTT */
941 /* RADEON_FLAG_MALL_NOALLOC is ignored and implied to be false for GTT */
942 } else {
943 return -1; /* */
944 }
945
946 assert(heap < RADEON_NUM_HEAPS);
947 return heap;
948 }
949
950 typedef struct pipe_screen *(*radeon_screen_create_t)(struct radeon_winsys *,
951 const struct pipe_screen_config *);
952
953 /* These functions create the radeon_winsys instance for the corresponding kernel driver. */
954 struct radeon_winsys *
955 amdgpu_winsys_create(int fd, const struct pipe_screen_config *config,
956 radeon_screen_create_t screen_create);
957 struct radeon_winsys *
958 radeon_drm_winsys_create(int fd, const struct pipe_screen_config *config,
959 radeon_screen_create_t screen_create);
960
961 #endif
962