• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
3  * Copyright 2010 Marek Olšák <maraeo@gmail.com>
4  * Copyright 2018 Advanced Micro Devices, Inc.
5  * All Rights Reserved.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * on the rights to use, copy, modify, merge, publish, distribute, sub
11  * license, and/or sell copies of the Software, and to permit persons to whom
12  * the Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the next
15  * paragraph) shall be included in all copies or substantial portions of the
16  * Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24  * USE OR OTHER DEALINGS IN THE SOFTWARE. */
25 
26 #ifndef RADEON_WINSYS_H
27 #define RADEON_WINSYS_H
28 
29 /* The public winsys interface header for the radeon driver. */
30 
31 /* Skip command submission. Same as RADEON_NOOP=1. */
32 #define RADEON_FLUSH_NOOP                     (1u << 29)
33 
34 /* Toggle the secure submission boolean after the flush */
35 #define RADEON_FLUSH_TOGGLE_SECURE_SUBMISSION (1u << 30)
36 
37 /* Whether the next IB can start immediately and not wait for draws and
38  * dispatches from the current IB to finish. */
39 #define RADEON_FLUSH_START_NEXT_GFX_IB_NOW    (1u << 31)
40 
41 #define RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW                                                   \
42    (PIPE_FLUSH_ASYNC | RADEON_FLUSH_START_NEXT_GFX_IB_NOW)
43 
44 #include "amd/common/ac_gpu_info.h"
45 #include "amd/common/ac_surface.h"
46 #include "pipebuffer/pb_buffer.h"
47 
48 /* Tiling flags. */
49 enum radeon_bo_layout
50 {
51    RADEON_LAYOUT_LINEAR = 0,
52    RADEON_LAYOUT_TILED,
53    RADEON_LAYOUT_SQUARETILED,
54 
55    RADEON_LAYOUT_UNKNOWN
56 };
57 
58 enum radeon_bo_domain
59 { /* bitfield */
60   RADEON_DOMAIN_GTT = 2,
61   RADEON_DOMAIN_VRAM = 4,
62   RADEON_DOMAIN_VRAM_GTT = RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GTT,
63   RADEON_DOMAIN_GDS = 8,
64   RADEON_DOMAIN_OA = 16,
65 };
66 
67 enum radeon_bo_flag
68 { /* bitfield */
69   RADEON_FLAG_GTT_WC = (1 << 0),
70   RADEON_FLAG_NO_CPU_ACCESS = (1 << 1),
71   RADEON_FLAG_NO_SUBALLOC = (1 << 2),
72   RADEON_FLAG_SPARSE = (1 << 3),
73   RADEON_FLAG_NO_INTERPROCESS_SHARING = (1 << 4),
74   RADEON_FLAG_READ_ONLY = (1 << 5),
75   RADEON_FLAG_32BIT = (1 << 6),
76   RADEON_FLAG_ENCRYPTED = (1 << 7),
77   RADEON_FLAG_GL2_BYPASS = (1 << 8), /* only gfx9 and newer */
78   RADEON_FLAG_DRIVER_INTERNAL = (1 << 9),
79    /* Discard on eviction (instead of moving the buffer to GTT).
80     * This guarantees that this buffer will never be moved to GTT.
81     */
82   RADEON_FLAG_DISCARDABLE = (1 << 10),
83   RADEON_FLAG_MALL_NOALLOC = (1 << 11), /* don't cache in the infinity cache */
84 };
85 
86 enum radeon_map_flags
87 {
88    /* Indicates that the caller will unmap the buffer.
89     *
90     * Not unmapping buffers is an important performance optimization for
91     * OpenGL (avoids kernel overhead for frequently mapped buffers).
92     */
93    RADEON_MAP_TEMPORARY = (PIPE_MAP_DRV_PRV << 0),
94 };
95 
96 #define RADEON_SPARSE_PAGE_SIZE (64 * 1024)
97 
98 enum radeon_value_id
99 {
100    RADEON_REQUESTED_VRAM_MEMORY,
101    RADEON_REQUESTED_GTT_MEMORY,
102    RADEON_MAPPED_VRAM,
103    RADEON_MAPPED_GTT,
104    RADEON_SLAB_WASTED_VRAM,
105    RADEON_SLAB_WASTED_GTT,
106    RADEON_BUFFER_WAIT_TIME_NS,
107    RADEON_NUM_MAPPED_BUFFERS,
108    RADEON_TIMESTAMP,
109    RADEON_NUM_GFX_IBS,
110    RADEON_NUM_SDMA_IBS,
111    RADEON_GFX_BO_LIST_COUNTER, /* number of BOs submitted in gfx IBs */
112    RADEON_GFX_IB_SIZE_COUNTER,
113    RADEON_NUM_BYTES_MOVED,
114    RADEON_NUM_EVICTIONS,
115    RADEON_NUM_VRAM_CPU_PAGE_FAULTS,
116    RADEON_VRAM_USAGE,
117    RADEON_VRAM_VIS_USAGE,
118    RADEON_GTT_USAGE,
119    RADEON_GPU_TEMPERATURE,
120    RADEON_CURRENT_SCLK,
121    RADEON_CURRENT_MCLK,
122    RADEON_CS_THREAD_TIME,
123 };
124 
125 enum radeon_ctx_priority
126 {
127    RADEON_CTX_PRIORITY_LOW = 0,
128    RADEON_CTX_PRIORITY_MEDIUM,
129    RADEON_CTX_PRIORITY_HIGH,
130    RADEON_CTX_PRIORITY_REALTIME,
131 };
132 
133 /* Each group of two has the same priority. */
134 #define RADEON_PRIO_FENCE_TRACE (1 << 0)
135 #define RADEON_PRIO_SO_FILLED_SIZE (1 << 1)
136 
137 #define RADEON_PRIO_QUERY (1 << 2)
138 #define RADEON_PRIO_IB (1 << 3)
139 
140 #define RADEON_PRIO_DRAW_INDIRECT (1 << 4)
141 #define RADEON_PRIO_INDEX_BUFFER (1 << 5)
142 
143 #define RADEON_PRIO_CP_DMA (1 << 6)
144 #define RADEON_PRIO_BORDER_COLORS (1 << 7)
145 
146 #define RADEON_PRIO_CONST_BUFFER (1 << 8)
147 #define RADEON_PRIO_DESCRIPTORS (1 << 9)
148 
149 #define RADEON_PRIO_SAMPLER_BUFFER (1 << 10)
150 #define RADEON_PRIO_VERTEX_BUFFER (1 << 11)
151 
152 #define RADEON_PRIO_SHADER_RW_BUFFER (1 << 12)
153 #define RADEON_PRIO_SAMPLER_TEXTURE (1 << 13)
154 
155 #define RADEON_PRIO_SHADER_RW_IMAGE (1 << 14)
156 #define RADEON_PRIO_SAMPLER_TEXTURE_MSAA (1 << 15)
157 
158 #define RADEON_PRIO_COLOR_BUFFER (1 << 16)
159 #define RADEON_PRIO_DEPTH_BUFFER (1 << 17)
160 
161 #define RADEON_PRIO_COLOR_BUFFER_MSAA (1 << 18)
162 #define RADEON_PRIO_DEPTH_BUFFER_MSAA (1 << 19)
163 
164 #define RADEON_PRIO_SEPARATE_META (1 << 20)
165 #define RADEON_PRIO_SHADER_BINARY (1 << 21) /* the hw can't hide instruction cache misses */
166 
167 #define RADEON_PRIO_SHADER_RINGS (1 << 22)
168 #define RADEON_PRIO_SCRATCH_BUFFER (1 << 23)
169 
170 #define RADEON_ALL_PRIORITIES (RADEON_USAGE_READ - 1)
171 
172 /* Upper bits of priorities are used by usage flags. */
173 #define RADEON_USAGE_READ (1 << 28)
174 #define RADEON_USAGE_WRITE (1 << 29)
175 #define RADEON_USAGE_READWRITE (RADEON_USAGE_READ | RADEON_USAGE_WRITE)
176 
177 /* The winsys ensures that the CS submission will be scheduled after
178  * previously flushed CSs referencing this BO in a conflicting way.
179  */
180 #define RADEON_USAGE_SYNCHRONIZED (1 << 30)
181 
182 /* When used, an implicit sync is done to make sure a compute shader
183  * will read the written values from a previous draw.
184  */
185 #define RADEON_USAGE_NEEDS_IMPLICIT_SYNC (1u << 31)
186 
187 struct winsys_handle;
188 struct radeon_winsys_ctx;
189 
190 struct radeon_cmdbuf_chunk {
191    unsigned cdw;    /* Number of used dwords. */
192    unsigned max_dw; /* Maximum number of dwords. */
193    uint32_t *buf;   /* The base pointer of the chunk. */
194 };
195 
196 struct radeon_cmdbuf {
197    struct radeon_cmdbuf_chunk current;
198    struct radeon_cmdbuf_chunk *prev;
199    uint16_t num_prev; /* Number of previous chunks. */
200    uint16_t max_prev; /* Space in array pointed to by prev. */
201    unsigned prev_dw;  /* Total number of dwords in previous chunks. */
202 
203    /* Memory usage of the buffer list. These are always 0 for preamble IBs. */
204    uint32_t used_vram_kb;
205    uint32_t used_gart_kb;
206    uint64_t gpu_address;
207 
208    /* Private winsys data. */
209    void *priv;
210    void *csc; /* amdgpu_cs_context */
211 };
212 
213 /* Tiling info for display code, DRI sharing, and other data. */
214 struct radeon_bo_metadata {
215    /* Tiling flags describing the texture layout for display code
216     * and DRI sharing.
217     */
218    union {
219       struct {
220          enum radeon_bo_layout microtile;
221          enum radeon_bo_layout macrotile;
222          unsigned pipe_config;
223          unsigned bankw;
224          unsigned bankh;
225          unsigned tile_split;
226          unsigned mtilea;
227          unsigned num_banks;
228          unsigned stride;
229          bool scanout;
230       } legacy;
231    } u;
232 
233    enum radeon_surf_mode mode;   /* Output from buffer_get_metadata */
234 
235    /* Additional metadata associated with the buffer, in bytes.
236     * The maximum size is 64 * 4. This is opaque for the winsys & kernel.
237     * Supported by amdgpu only.
238     */
239    uint32_t size_metadata;
240    uint32_t metadata[64];
241 };
242 
243 enum radeon_feature_id
244 {
245    RADEON_FID_R300_HYPERZ_ACCESS, /* ZMask + HiZ */
246    RADEON_FID_R300_CMASK_ACCESS,
247 };
248 
249 struct radeon_bo_list_item {
250    uint64_t bo_size;
251    uint64_t vm_address;
252    uint32_t priority_usage; /* mask of (1 << RADEON_PRIO_*) */
253 };
254 
255 struct radeon_winsys {
256    /**
257     * The screen object this winsys was created for
258     */
259    struct pipe_screen *screen;
260    /**
261     * Has the application created at least one TMZ buffer.
262     */
263    const bool uses_secure_bos;
264 
265    /**
266     * Decrement the winsys reference count.
267     *
268     * \param ws  The winsys this function is called for.
269     * \return    True if the winsys and screen should be destroyed.
270     */
271    bool (*unref)(struct radeon_winsys *ws);
272 
273    /**
274     * Destroy this winsys.
275     *
276     * \param ws        The winsys this function is called from.
277     */
278    void (*destroy)(struct radeon_winsys *ws);
279 
280    /**
281     * Query an info structure from winsys.
282     *
283     * \param ws        The winsys this function is called from.
284     * \param info      Return structure
285     */
286    void (*query_info)(struct radeon_winsys *ws, struct radeon_info *info,
287                       bool enable_smart_access_memory,
288                       bool disable_smart_access_memory);
289 
290    /**
291     * A hint for the winsys that it should pin its execution threads to
292     * a group of cores sharing a specific L3 cache if the CPU has multiple
293     * L3 caches. This is needed for good multithreading performance on
294     * AMD Zen CPUs.
295     */
296    void (*pin_threads_to_L3_cache)(struct radeon_winsys *ws, unsigned cache);
297 
298    /**************************************************************************
299     * Buffer management. Buffer attributes are mostly fixed over its lifetime.
300     *
301     * Remember that gallium gets to choose the interface it needs, and the
302     * window systems must then implement that interface (rather than the
303     * other way around...).
304     *************************************************************************/
305 
306    /**
307     * Create a buffer object.
308     *
309     * \param ws        The winsys this function is called from.
310     * \param size      The size to allocate.
311     * \param alignment An alignment of the buffer in memory.
312     * \param use_reusable_pool Whether the cache buffer manager should be used.
313     * \param domain    A bitmask of the RADEON_DOMAIN_* flags.
314     * \return          The created buffer object.
315     */
316    struct pb_buffer *(*buffer_create)(struct radeon_winsys *ws, uint64_t size, unsigned alignment,
317                                       enum radeon_bo_domain domain, enum radeon_bo_flag flags);
318 
319    /**
320     * Map the entire data store of a buffer object into the client's address
321     * space.
322     *
323     * Callers are expected to unmap buffers again if and only if the
324     * RADEON_MAP_TEMPORARY flag is set in \p usage.
325     *
326     * \param buf       A winsys buffer object to map.
327     * \param cs        A command stream to flush if the buffer is referenced by it.
328     * \param usage     A bitmask of the PIPE_MAP_* and RADEON_MAP_* flags.
329     * \return          The pointer at the beginning of the buffer.
330     */
331    void *(*buffer_map)(struct radeon_winsys *ws, struct pb_buffer *buf,
332                        struct radeon_cmdbuf *cs, enum pipe_map_flags usage);
333 
334    /**
335     * Unmap a buffer object from the client's address space.
336     *
337     * \param buf       A winsys buffer object to unmap.
338     */
339    void (*buffer_unmap)(struct radeon_winsys *ws, struct pb_buffer *buf);
340 
341    /**
342     * Wait for the buffer and return true if the buffer is not used
343     * by the device.
344     *
345     * The timeout of 0 will only return the status.
346     * The timeout of PIPE_TIMEOUT_INFINITE will always wait until the buffer
347     * is idle.
348     */
349    bool (*buffer_wait)(struct radeon_winsys *ws, struct pb_buffer *buf,
350                        uint64_t timeout, unsigned usage);
351 
352    /**
353     * Return buffer metadata.
354     * (tiling info for display code, DRI sharing, and other data)
355     *
356     * \param buf       A winsys buffer object to get the flags from.
357     * \param md        Metadata
358     */
359    void (*buffer_get_metadata)(struct radeon_winsys *ws, struct pb_buffer *buf,
360                                struct radeon_bo_metadata *md, struct radeon_surf *surf);
361 
362    /**
363     * Set buffer metadata.
364     * (tiling info for display code, DRI sharing, and other data)
365     *
366     * \param buf       A winsys buffer object to set the flags for.
367     * \param md        Metadata
368     */
369    void (*buffer_set_metadata)(struct radeon_winsys *ws, struct pb_buffer *buf,
370                                struct radeon_bo_metadata *md, struct radeon_surf *surf);
371 
372    /**
373     * Get a winsys buffer from a winsys handle. The internal structure
374     * of the handle is platform-specific and only a winsys should access it.
375     *
376     * \param ws        The winsys this function is called from.
377     * \param whandle   A winsys handle pointer as was received from a state
378     *                  tracker.
379     */
380    struct pb_buffer *(*buffer_from_handle)(struct radeon_winsys *ws, struct winsys_handle *whandle,
381                                            unsigned vm_alignment, bool is_prime_linear_buffer);
382 
383    /**
384     * Get a winsys buffer from a user pointer. The resulting buffer can't
385     * be exported. Both pointer and size must be page aligned.
386     *
387     * \param ws        The winsys this function is called from.
388     * \param pointer   User pointer to turn into a buffer object.
389     * \param Size      Size in bytes for the new buffer.
390     */
391    struct pb_buffer *(*buffer_from_ptr)(struct radeon_winsys *ws, void *pointer, uint64_t size, enum radeon_bo_flag flags);
392 
393    /**
394     * Whether the buffer was created from a user pointer.
395     *
396     * \param buf       A winsys buffer object
397     * \return          whether \p buf was created via buffer_from_ptr
398     */
399    bool (*buffer_is_user_ptr)(struct pb_buffer *buf);
400 
401    /** Whether the buffer was suballocated. */
402    bool (*buffer_is_suballocated)(struct pb_buffer *buf);
403 
404    /**
405     * Get a winsys handle from a winsys buffer. The internal structure
406     * of the handle is platform-specific and only a winsys should access it.
407     *
408     * \param ws        The winsys instance for which the handle is to be valid
409     * \param buf       A winsys buffer object to get the handle from.
410     * \param whandle   A winsys handle pointer.
411     * \return          true on success.
412     */
413    bool (*buffer_get_handle)(struct radeon_winsys *ws, struct pb_buffer *buf,
414                              struct winsys_handle *whandle);
415 
416    /**
417     * Change the commitment of a (64KB-page aligned) region of the given
418     * sparse buffer.
419     *
420     * \warning There is no automatic synchronization with command submission.
421     *
422     * \note Only implemented by the amdgpu winsys.
423     *
424     * \return false on out of memory or other failure, true on success.
425     */
426    bool (*buffer_commit)(struct radeon_winsys *ws, struct pb_buffer *buf,
427                          uint64_t offset, uint64_t size, bool commit);
428 
429    /**
430     * Return the virtual address of a buffer.
431     *
432     * When virtual memory is not in use, this is the offset relative to the
433     * relocation base (non-zero for sub-allocated buffers).
434     *
435     * \param buf       A winsys buffer object
436     * \return          virtual address
437     */
438    uint64_t (*buffer_get_virtual_address)(struct pb_buffer *buf);
439 
440    /**
441     * Return the offset of this buffer relative to the relocation base.
442     * This is only non-zero for sub-allocated buffers.
443     *
444     * This is only supported in the radeon winsys, since amdgpu uses virtual
445     * addresses in submissions even for the video engines.
446     *
447     * \param buf      A winsys buffer object
448     * \return         the offset for relocations
449     */
450    unsigned (*buffer_get_reloc_offset)(struct pb_buffer *buf);
451 
452    /**
453     * Query the initial placement of the buffer from the kernel driver.
454     */
455    enum radeon_bo_domain (*buffer_get_initial_domain)(struct pb_buffer *buf);
456 
457    /**
458     * Query the flags used for creation of this buffer.
459     *
460     * Note that for imported buffer this may be lossy since not all flags
461     * are passed 1:1.
462     */
463    enum radeon_bo_flag (*buffer_get_flags)(struct pb_buffer *buf);
464 
465    /**************************************************************************
466     * Command submission.
467     *
468     * Each pipe context should create its own command stream and submit
469     * commands independently of other contexts.
470     *************************************************************************/
471 
472    /**
473     * Create a command submission context.
474     * Various command streams can be submitted to the same context.
475     */
476    struct radeon_winsys_ctx *(*ctx_create)(struct radeon_winsys *ws,
477                                            enum radeon_ctx_priority priority);
478 
479    /**
480     * Destroy a context.
481     */
482    void (*ctx_destroy)(struct radeon_winsys_ctx *ctx);
483 
484    /**
485     * Query a GPU reset status.
486     */
487    enum pipe_reset_status (*ctx_query_reset_status)(struct radeon_winsys_ctx *ctx,
488                                                     bool full_reset_only,
489                                                     bool *needs_reset);
490 
491    /**
492     * Create a command stream.
493     *
494     * \param cs        The returned structure that is initialized by cs_create.
495     * \param ctx       The submission context
496     * \param ip_type   The IP type (GFX, DMA, UVD)
497     * \param flush     Flush callback function associated with the command stream.
498     * \param user      User pointer that will be passed to the flush callback.
499     *
500     * \return true on success
501     */
502    bool (*cs_create)(struct radeon_cmdbuf *cs,
503                      struct radeon_winsys_ctx *ctx, enum amd_ip_type amd_ip_type,
504                      void (*flush)(void *ctx, unsigned flags,
505                                    struct pipe_fence_handle **fence),
506                      void *flush_ctx, bool stop_exec_on_failure);
507 
508    /**
509     * Set or change the CS preamble, which is a sequence of packets that is executed before
510     * the command buffer. If the winsys doesn't support preambles, the packets are inserted
511     * into the command buffer.
512     *
513     * \param cs               Command stream
514     * \param preamble_ib      Preamble IB for the context.
515     * \param preamble_num_dw  Number of dwords in the preamble IB.
516     * \param preamble_changed Whether the preamble changed or is the same as the last one.
517     */
518    void (*cs_set_preamble)(struct radeon_cmdbuf *cs, const uint32_t *preamble_ib,
519                            unsigned preamble_num_dw, bool preamble_changed);
520 
521    /**
522     * Set up and enable mid command buffer preemption for the command stream.
523     *
524     * \param cs               Command stream
525     * \param preamble_ib      Non-preemptible preamble IB for the context.
526     * \param preamble_num_dw  Number of dwords in the preamble IB.
527     */
528    bool (*cs_setup_preemption)(struct radeon_cmdbuf *cs, const uint32_t *preamble_ib,
529                                unsigned preamble_num_dw);
530 
531    /**
532     * Destroy a command stream.
533     *
534     * \param cs        A command stream to destroy.
535     */
536    void (*cs_destroy)(struct radeon_cmdbuf *cs);
537 
538    /**
539     * Add a buffer. Each buffer used by a CS must be added using this function.
540     *
541     * \param cs      Command stream
542     * \param buf     Buffer
543     * \param usage   Usage
544     * \param domain  Bitmask of the RADEON_DOMAIN_* flags.
545     * \return Buffer index.
546     */
547    unsigned (*cs_add_buffer)(struct radeon_cmdbuf *cs, struct pb_buffer *buf,
548                              unsigned usage, enum radeon_bo_domain domain);
549 
550    /**
551     * Return the index of an already-added buffer.
552     *
553     * Not supported on amdgpu. Drivers with GPUVM should not care about
554     * buffer indices.
555     *
556     * \param cs        Command stream
557     * \param buf       Buffer
558     * \return          The buffer index, or -1 if the buffer has not been added.
559     */
560    int (*cs_lookup_buffer)(struct radeon_cmdbuf *cs, struct pb_buffer *buf);
561 
562    /**
563     * Return true if there is enough memory in VRAM and GTT for the buffers
564     * added so far. If the validation fails, all buffers which have
565     * been added since the last call of cs_validate will be removed and
566     * the CS will be flushed (provided there are still any buffers).
567     *
568     * \param cs        A command stream to validate.
569     */
570    bool (*cs_validate)(struct radeon_cmdbuf *cs);
571 
572    /**
573     * Check whether the given number of dwords is available in the IB.
574     * Optionally chain a new chunk of the IB if necessary and supported.
575     *
576     * \param cs        A command stream.
577     * \param dw        Number of CS dwords requested by the caller.
578     * \return true if there is enough space
579     */
580    bool (*cs_check_space)(struct radeon_cmdbuf *cs, unsigned dw);
581 
582    /**
583     * Return the buffer list.
584     *
585     * This is the buffer list as passed to the kernel, i.e. it only contains
586     * the parent buffers of sub-allocated buffers.
587     *
588     * \param cs    Command stream
589     * \param list  Returned buffer list. Set to NULL to query the count only.
590     * \return      The buffer count.
591     */
592    unsigned (*cs_get_buffer_list)(struct radeon_cmdbuf *cs, struct radeon_bo_list_item *list);
593 
594    /**
595     * Flush a command stream.
596     *
597     * \param cs          A command stream to flush.
598     * \param flags,      PIPE_FLUSH_* flags.
599     * \param fence       Pointer to a fence. If non-NULL, a fence is inserted
600     *                    after the CS and is returned through this parameter.
601     * \return Negative POSIX error code or 0 for success.
602     *         Asynchronous submissions never return an error.
603     */
604    int (*cs_flush)(struct radeon_cmdbuf *cs, unsigned flags, struct pipe_fence_handle **fence);
605 
606    /**
607     * Create a fence before the CS is flushed.
608     * The user must flush manually to complete the initializaton of the fence.
609     *
610     * The fence must not be used for anything except \ref cs_add_fence_dependency
611     * before the flush.
612     */
613    struct pipe_fence_handle *(*cs_get_next_fence)(struct radeon_cmdbuf *cs);
614 
615    /**
616     * Return true if a buffer is referenced by a command stream.
617     *
618     * \param cs        A command stream.
619     * \param buf       A winsys buffer.
620     */
621    bool (*cs_is_buffer_referenced)(struct radeon_cmdbuf *cs, struct pb_buffer *buf,
622                                    unsigned usage);
623 
624    /**
625     * Request access to a feature for a command stream.
626     *
627     * \param cs        A command stream.
628     * \param fid       Feature ID, one of RADEON_FID_*
629     * \param enable    Whether to enable or disable the feature.
630     */
631    bool (*cs_request_feature)(struct radeon_cmdbuf *cs, enum radeon_feature_id fid, bool enable);
632    /**
633     * Make sure all asynchronous flush of the cs have completed
634     *
635     * \param cs        A command stream.
636     */
637    void (*cs_sync_flush)(struct radeon_cmdbuf *cs);
638 
639    /**
640     * Add a fence dependency to the CS, so that the CS will wait for
641     * the fence before execution.
642     *
643     * \param dependency_flags  Bitmask of RADEON_DEPENDENCY_*
644     */
645    void (*cs_add_fence_dependency)(struct radeon_cmdbuf *cs, struct pipe_fence_handle *fence,
646                                    unsigned dependency_flags);
647 
648    /**
649     * Signal a syncobj when the CS finishes execution.
650     */
651    void (*cs_add_syncobj_signal)(struct radeon_cmdbuf *cs, struct pipe_fence_handle *fence);
652 
653    /**
654     * Wait for the fence and return true if the fence has been signalled.
655     * The timeout of 0 will only return the status.
656     * The timeout of PIPE_TIMEOUT_INFINITE will always wait until the fence
657     * is signalled.
658     */
659    bool (*fence_wait)(struct radeon_winsys *ws, struct pipe_fence_handle *fence, uint64_t timeout);
660 
661    /**
662     * Reference counting for fences.
663     */
664    void (*fence_reference)(struct pipe_fence_handle **dst, struct pipe_fence_handle *src);
665 
666    /**
667     * Create a new fence object corresponding to the given syncobj fd.
668     */
669    struct pipe_fence_handle *(*fence_import_syncobj)(struct radeon_winsys *ws, int fd);
670 
671    /**
672     * Create a new fence object corresponding to the given sync_file.
673     */
674    struct pipe_fence_handle *(*fence_import_sync_file)(struct radeon_winsys *ws, int fd);
675 
676    /**
677     * Return a sync_file FD corresponding to the given fence object.
678     */
679    int (*fence_export_sync_file)(struct radeon_winsys *ws, struct pipe_fence_handle *fence);
680 
681    /**
682     * Return a sync file FD that is already signalled.
683     */
684    int (*export_signalled_sync_file)(struct radeon_winsys *ws);
685 
686    /**
687     * Initialize surface
688     *
689     * \param ws        The winsys this function is called from.
690     * \param tex       Input texture description
691     * \param flags     Bitmask of RADEON_SURF_* flags
692     * \param bpe       Bytes per pixel, it can be different for Z buffers.
693     * \param mode      Preferred tile mode. (linear, 1D, or 2D)
694     * \param surf      Output structure
695     */
696    int (*surface_init)(struct radeon_winsys *ws, const struct pipe_resource *tex, uint64_t flags,
697                        unsigned bpe, enum radeon_surf_mode mode, struct radeon_surf *surf);
698 
699    uint64_t (*query_value)(struct radeon_winsys *ws, enum radeon_value_id value);
700 
701    bool (*read_registers)(struct radeon_winsys *ws, unsigned reg_offset, unsigned num_registers,
702                           uint32_t *out);
703 
704    /**
705     * Secure context
706     */
707    bool (*cs_is_secure)(struct radeon_cmdbuf *cs);
708 };
709 
radeon_emitted(struct radeon_cmdbuf * cs,unsigned num_dw)710 static inline bool radeon_emitted(struct radeon_cmdbuf *cs, unsigned num_dw)
711 {
712    return cs && (cs->prev_dw + cs->current.cdw > num_dw);
713 }
714 
radeon_emit(struct radeon_cmdbuf * cs,uint32_t value)715 static inline void radeon_emit(struct radeon_cmdbuf *cs, uint32_t value)
716 {
717    cs->current.buf[cs->current.cdw++] = value;
718 }
719 
radeon_emit_array(struct radeon_cmdbuf * cs,const uint32_t * values,unsigned count)720 static inline void radeon_emit_array(struct radeon_cmdbuf *cs, const uint32_t *values,
721                                      unsigned count)
722 {
723    memcpy(cs->current.buf + cs->current.cdw, values, count * 4);
724    cs->current.cdw += count;
725 }
726 
radeon_uses_secure_bos(struct radeon_winsys * ws)727 static inline bool radeon_uses_secure_bos(struct radeon_winsys* ws)
728 {
729   return ws->uses_secure_bos;
730 }
731 
732 static inline void
radeon_bo_reference(struct radeon_winsys * rws,struct pb_buffer ** dst,struct pb_buffer * src)733 radeon_bo_reference(struct radeon_winsys *rws, struct pb_buffer **dst, struct pb_buffer *src)
734 {
735    pb_reference_with_winsys(rws, dst, src);
736 }
737 
738 /* The following bits describe the heaps managed by slab allocators (pb_slab) and
739  * the allocation cache (pb_cache).
740  */
741 #define RADEON_HEAP_BIT_VRAM           (1 << 0) /* if false, it's GTT */
742 #define RADEON_HEAP_BIT_READ_ONLY      (1 << 1) /* both VRAM and GTT */
743 #define RADEON_HEAP_BIT_32BIT          (1 << 2) /* both VRAM and GTT */
744 #define RADEON_HEAP_BIT_ENCRYPTED      (1 << 3) /* both VRAM and GTT */
745 
746 #define RADEON_HEAP_BIT_NO_CPU_ACCESS  (1 << 4) /* VRAM only */
747 #define RADEON_HEAP_BIT_MALL_NOALLOC   (1 << 5) /* VRAM only */
748 
749 #define RADEON_HEAP_BIT_WC             (1 << 4) /* GTT only, VRAM implies this to be true */
750 #define RADEON_HEAP_BIT_GL2_BYPASS     (1 << 5) /* GTT only */
751 
752 /* The number of all possible heap descriptions using the bits above. */
753 #define RADEON_NUM_HEAPS               (1 << 6)
754 
radeon_domain_from_heap(int heap)755 static inline enum radeon_bo_domain radeon_domain_from_heap(int heap)
756 {
757    assert(heap >= 0);
758 
759    if (heap & RADEON_HEAP_BIT_VRAM)
760       return RADEON_DOMAIN_VRAM;
761    else
762       return RADEON_DOMAIN_GTT;
763 }
764 
radeon_flags_from_heap(int heap)765 static inline unsigned radeon_flags_from_heap(int heap)
766 {
767    assert(heap >= 0);
768 
769    unsigned flags = RADEON_FLAG_NO_INTERPROCESS_SHARING;
770 
771    if (heap & RADEON_HEAP_BIT_READ_ONLY)
772       flags |= RADEON_FLAG_READ_ONLY;
773    if (heap & RADEON_HEAP_BIT_32BIT)
774       flags |= RADEON_FLAG_32BIT;
775    if (heap & RADEON_HEAP_BIT_ENCRYPTED)
776       flags |= RADEON_FLAG_ENCRYPTED;
777 
778    if (heap & RADEON_HEAP_BIT_VRAM) {
779       flags |= RADEON_FLAG_GTT_WC;
780       if (heap & RADEON_HEAP_BIT_NO_CPU_ACCESS)
781          flags |= RADEON_FLAG_NO_CPU_ACCESS;
782       if (heap & RADEON_HEAP_BIT_MALL_NOALLOC)
783          flags |= RADEON_FLAG_MALL_NOALLOC;
784    } else {
785       /* GTT only */
786       if (heap & RADEON_HEAP_BIT_WC)
787          flags |= RADEON_FLAG_GTT_WC;
788       if (heap & RADEON_HEAP_BIT_GL2_BYPASS)
789          flags |= RADEON_FLAG_GL2_BYPASS;
790    }
791 
792    return flags;
793 }
794 
795 /* This cleans up flags, so that we can comfortably assume that no invalid flag combinations
796  * are set.
797  */
radeon_canonicalize_bo_flags(enum radeon_bo_domain * _domain,enum radeon_bo_flag * _flags)798 static void radeon_canonicalize_bo_flags(enum radeon_bo_domain *_domain,
799                                          enum radeon_bo_flag *_flags)
800 {
801    unsigned domain = *_domain;
802    unsigned flags = *_flags;
803 
804    /* Only set 1 domain, e.g. ignore GTT if VRAM is set. */
805    if (domain)
806       domain = BITFIELD_BIT(ffs(domain) - 1);
807    else
808       domain = RADEON_DOMAIN_VRAM;
809 
810    switch (domain) {
811    case RADEON_DOMAIN_VRAM:
812       flags |= RADEON_FLAG_GTT_WC;
813       flags &= ~RADEON_FLAG_GL2_BYPASS;
814       break;
815    case RADEON_DOMAIN_GTT:
816       flags &= ~RADEON_FLAG_NO_CPU_ACCESS;
817       flags &= ~RADEON_FLAG_MALL_NOALLOC;
818       break;
819    case RADEON_DOMAIN_GDS:
820    case RADEON_DOMAIN_OA:
821       flags |= RADEON_FLAG_NO_SUBALLOC | RADEON_FLAG_NO_CPU_ACCESS;
822       flags &= ~RADEON_FLAG_SPARSE;
823       break;
824    }
825 
826    /* Sparse buffers must have NO_CPU_ACCESS set. */
827    if (flags & RADEON_FLAG_SPARSE)
828       flags |= RADEON_FLAG_NO_CPU_ACCESS;
829 
830    *_domain = (enum radeon_bo_domain)domain;
831    *_flags = (enum radeon_bo_flag)flags;
832 }
833 
834 /* Return the heap index for winsys allocators, or -1 on failure. */
radeon_get_heap_index(enum radeon_bo_domain domain,enum radeon_bo_flag flags)835 static inline int radeon_get_heap_index(enum radeon_bo_domain domain, enum radeon_bo_flag flags)
836 {
837    radeon_canonicalize_bo_flags(&domain, &flags);
838 
839    /* Resources with interprocess sharing don't use any winsys allocators. */
840    if (!(flags & RADEON_FLAG_NO_INTERPROCESS_SHARING))
841       return -1;
842 
843    /* These are unsupported flags. */
844    /* RADEON_FLAG_DRIVER_INTERNAL is ignored. It doesn't affect allocators. */
845    if (flags & (RADEON_FLAG_NO_SUBALLOC | RADEON_FLAG_SPARSE |
846                 RADEON_FLAG_DISCARDABLE))
847       return -1;
848 
849    int heap = 0;
850 
851    if (flags & RADEON_FLAG_READ_ONLY)
852       heap |= RADEON_HEAP_BIT_READ_ONLY;
853    if (flags & RADEON_FLAG_32BIT)
854       heap |= RADEON_HEAP_BIT_32BIT;
855    if (flags & RADEON_FLAG_ENCRYPTED)
856       heap |= RADEON_HEAP_BIT_ENCRYPTED;
857 
858    if (domain == RADEON_DOMAIN_VRAM) {
859       /* VRAM | GTT shouldn't occur, but if it does, ignore GTT. */
860       heap |= RADEON_HEAP_BIT_VRAM;
861       if (flags & RADEON_FLAG_NO_CPU_ACCESS)
862          heap |= RADEON_HEAP_BIT_NO_CPU_ACCESS;
863       if (flags & RADEON_FLAG_MALL_NOALLOC)
864          heap |= RADEON_HEAP_BIT_MALL_NOALLOC;
865       /* RADEON_FLAG_WC is ignored and implied to be true for VRAM */
866       /* RADEON_FLAG_GL2_BYPASS is ignored and implied to be false for VRAM */
867    } else if (domain == RADEON_DOMAIN_GTT) {
868       /* GTT is implied by RADEON_HEAP_BIT_VRAM not being set. */
869       if (flags & RADEON_FLAG_GTT_WC)
870          heap |= RADEON_HEAP_BIT_WC;
871       if (flags & RADEON_FLAG_GL2_BYPASS)
872          heap |= RADEON_HEAP_BIT_GL2_BYPASS;
873       /* RADEON_FLAG_NO_CPU_ACCESS is ignored and implied to be false for GTT */
874       /* RADEON_FLAG_MALL_NOALLOC is ignored and implied to be false for GTT */
875    } else {
876       return -1; /*  */
877    }
878 
879    assert(heap < RADEON_NUM_HEAPS);
880    return heap;
881 }
882 
883 typedef struct pipe_screen *(*radeon_screen_create_t)(struct radeon_winsys *,
884                                                       const struct pipe_screen_config *);
885 
886 /* These functions create the radeon_winsys instance for the corresponding kernel driver. */
887 struct radeon_winsys *
888 amdgpu_winsys_create(int fd, const struct pipe_screen_config *config,
889 		     radeon_screen_create_t screen_create);
890 struct radeon_winsys *
891 radeon_drm_winsys_create(int fd, const struct pipe_screen_config *config,
892 			 radeon_screen_create_t screen_create);
893 
894 #endif
895