• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2009 Corbin Simpson
3  * Copyright © 2015 Advanced Micro Devices, Inc.
4  *
5  * SPDX-License-Identifier: MIT
6  */
7 
8 #ifndef AMDGPU_WINSYS_H
9 #define AMDGPU_WINSYS_H
10 
11 #include "pipebuffer/pb_cache.h"
12 #include "pipebuffer/pb_slab.h"
13 #include "winsys/radeon_winsys.h"
14 #include "util/simple_mtx.h"
15 #include "util/u_queue.h"
16 #include <amdgpu.h>
17 
18 struct amdgpu_cs;
19 
20 /* DRM file descriptors, file descriptions and buffer sharing.
21  *
22  * amdgpu_device_initialize first argument is a file descriptor (fd)
23  * representing a specific GPU.
24  * If a fd is duplicated using os_dupfd_cloexec,
25  * the file description will remain the same (os_same_file_description will
26  * return 0).
27  * But if the same device is re-opened, the fd and the file description will
28  * be different.
29  *
30  * amdgpu_screen_winsys's fd tracks the file description which was
31  * given to amdgpu_winsys_create. This is the fd used by the application
32  * using the driver and may be used in other ioctl (eg: drmModeAddFB)
33  *
34  * amdgpu_winsys's fd is the file description used to initialize the
35  * device handle in libdrm_amdgpu.
36  *
37  * The 2 fds can be different, even in systems with a single GPU, eg: if
38  * radv is initialized before radeonsi.
39  *
40  * This fd tracking is useful for buffer sharing because KMS/GEM handles are
41  * specific to a DRM file description, i.e. the same handle value may refer
42  * to different underlying BOs in different DRM file descriptions.
43  * As an example, if an app wants to use drmModeAddFB it'll need a KMS handle
44  * valid for its fd (== amdgpu_screen_winsys::fd).
45  * If both fds are identical, there's nothing to do: bo->u.real.kms_handle
46  * can be used directly (see amdgpu_bo_get_handle).
47  * If they're different, the BO has to be exported from the device fd as
48  * a dma-buf, then imported from the app fd as a KMS handle.
49  */
50 
51 struct amdgpu_screen_winsys {
52    struct radeon_winsys base;
53    struct amdgpu_winsys *aws;
54    /* See comment above */
55    int fd;
56    struct pipe_reference reference;
57    struct amdgpu_screen_winsys *next;
58 
59    /* Maps a BO to its KMS handle valid for this DRM file descriptor
60     * Protected by amdgpu_winsys::sws_list_lock
61     */
62    struct hash_table *kms_handles;
63 };
64 
65 /* Maximum this number of IBs can be busy per queue. When submitting a new IB and the oldest IB
66  * ("AMDGPU_FENCE_RING_SIZE" IBs ago) is still busy, the CS thread will wait for it and will
67  * also block all queues from submitting new IBs.
68  */
69 #define AMDGPU_FENCE_RING_SIZE 32
70 
71 /* The maximum number of queues that can be present. */
72 #define AMDGPU_MAX_QUEUES 6
73 
74 /* This can use any integer type because the logic handles integer wraparounds robustly, but
75  * uint8_t wraps around so quickly that some BOs might never become idle because we don't
76  * remove idle fences from BOs, so they become "busy" again after a queue sequence number wraps
77  * around and they may stay "busy" in pb_cache long enough that we run out of memory.
78  */
79 typedef uint16_t uint_seq_no;
80 
81 struct amdgpu_queue {
82    /* Ring buffer of fences.
83     *
84     * We only remember a certain number of the most recent fences per queue. When we add a new
85     * fence, we wait for the oldest one, which implies that all older fences not present
86     * in the ring are idle. This way we don't have to keep track of a million fence references
87     * for a million BOs.
88     *
89     * We only support 1 queue per IP. If an IP has multiple queues, we always add a fence
90     * dependency on the previous fence to make it behave like there is only 1 queue.
91     *
92     * amdgpu_winsys_bo doesn't have a list of fences. It only remembers the last sequence number
93     * for every queue where it was used. We then use the BO's sequence number to look up a fence
94     * in this ring.
95     */
96    struct pipe_fence_handle *fences[AMDGPU_FENCE_RING_SIZE];
97 
98    /* The sequence number of the latest fence.
99     *
100     * This sequence number is global per queue per device, shared by all contexts, and generated
101     * by the winsys, not the kernel.
102     *
103     * The latest fence is: fences[latest_seq_no % AMDGPU_FENCE_RING_SIZE]
104     * The oldest fence is: fences([latest_seq_no + 1) % AMDGPU_FENCE_RING_SIZE]
105     * The oldest sequence number in the ring: latest_seq_no - AMDGPU_FENCE_RING_SIZE + 1
106     *
107     * The sequence number is in the ring if:
108     *    latest_seq_no - buffer_seq_no < AMDGPU_FENCE_RING_SIZE
109     * If the sequence number is not in the ring, it's idle.
110     *
111     * Integer wraparounds of the sequence number behave as follows:
112     *
113     * The comparison above gives the correct answer if buffer_seq_no isn't older than UINT*_MAX.
114     * If it's older than UINT*_MAX but not older than UINT*_MAX + AMDGPU_FENCE_RING_SIZE, we
115     * incorrectly pick and wait for one of the fences in the ring. That's only a problem when
116     * the type is so small (uint8_t) that seq_no wraps around very frequently, causing BOs to
117     * never become idle in certain very unlucky scenarios and running out of memory.
118     */
119    uint_seq_no latest_seq_no;
120 
121    /* The last context using this queue. */
122    struct amdgpu_ctx *last_ctx;
123 };
124 
125 /* This is part of every BO. */
126 struct amdgpu_seq_no_fences {
127    /* A fence sequence number per queue. This number is used to look up the fence from
128     * struct amdgpu_queue.
129     *
130     * This sequence number is global per queue per device, shared by all contexts, and generated
131     * by the winsys, not the kernel.
132     */
133    uint_seq_no seq_no[AMDGPU_MAX_QUEUES];
134 
135    /* The mask of queues where seq_no[i] is valid. */
136    uint8_t valid_fence_mask;
137 };
138 
139 /* valid_fence_mask should have 1 bit for each queue. */
140 static_assert(sizeof(((struct amdgpu_seq_no_fences*)NULL)->valid_fence_mask) * 8 >= AMDGPU_MAX_QUEUES, "");
141 
142 struct amdgpu_winsys {
143    struct pipe_reference reference;
144    /* See comment above */
145    int fd;
146 
147    /* Protected by bo_fence_lock. */
148    struct amdgpu_queue queues[AMDGPU_MAX_QUEUES];
149 
150    struct pb_cache bo_cache;
151    struct pb_slabs bo_slabs;  /* Slab allocator. */
152 
153    amdgpu_device_handle dev;
154 
155    simple_mtx_t bo_fence_lock;
156 
157    int num_cs; /* The number of command streams created. */
158    uint32_t surf_index_color;
159    uint32_t surf_index_fmask;
160    uint32_t next_bo_unique_id;
161    uint64_t allocated_vram;
162    uint64_t allocated_gtt;
163    uint64_t mapped_vram;
164    uint64_t mapped_gtt;
165    uint64_t slab_wasted_vram;
166    uint64_t slab_wasted_gtt;
167    uint64_t buffer_wait_time; /* time spent in buffer_wait in ns */
168    uint64_t num_gfx_IBs;
169    uint64_t num_sdma_IBs;
170    uint64_t num_mapped_buffers;
171    uint64_t gfx_bo_list_counter;
172    uint64_t gfx_ib_size_counter;
173 
174    struct radeon_info info;
175 
176    /* multithreaded IB submission */
177    struct util_queue cs_queue;
178 
179    struct ac_addrlib *addrlib;
180 
181    bool check_vm;
182    bool noop_cs;
183    bool reserve_vmid;
184    bool zero_all_vram_allocs;
185 #if DEBUG
186    bool debug_all_bos;
187 
188    /* List of all allocated buffers */
189    simple_mtx_t global_bo_list_lock;
190    struct list_head global_bo_list;
191    unsigned num_buffers;
192 #endif
193 
194    /* Single-linked list of all structs amdgpu_screen_winsys referencing this
195     * struct amdgpu_winsys
196     */
197    simple_mtx_t sws_list_lock;
198    struct amdgpu_screen_winsys *sws_list;
199 
200    /* For returning the same amdgpu_winsys_bo instance for exported
201     * and re-imported buffers. */
202    struct hash_table *bo_export_table;
203    simple_mtx_t bo_export_table_lock;
204 
205    /* Since most winsys functions require struct radeon_winsys *, dummy_ws.base is used
206     * for invoking them because sws_list can be NULL.
207     */
208    struct amdgpu_screen_winsys dummy_ws;
209 };
210 
211 static inline struct amdgpu_screen_winsys *
amdgpu_screen_winsys(struct radeon_winsys * base)212 amdgpu_screen_winsys(struct radeon_winsys *base)
213 {
214    return (struct amdgpu_screen_winsys*)base;
215 }
216 
217 static inline struct amdgpu_winsys *
amdgpu_winsys(struct radeon_winsys * base)218 amdgpu_winsys(struct radeon_winsys *base)
219 {
220    return amdgpu_screen_winsys(base)->aws;
221 }
222 
223 void amdgpu_surface_init_functions(struct amdgpu_screen_winsys *ws);
224 
225 #endif
226