• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2008 Jérôme Glisse
3  * Copyright © 2011 Marek Olšák <maraeo@gmail.com>
4  * Copyright © 2015 Advanced Micro Devices, Inc.
5  *
6  * SPDX-License-Identifier: MIT
7  */
8 
9 #ifndef AMDGPU_BO_H
10 #define AMDGPU_BO_H
11 
12 #include "amdgpu_winsys.h"
13 #include "pipebuffer/pb_slab.h"
14 
15 #ifdef __cplusplus
16 extern "C" {
17 #endif
18 
19 struct amdgpu_sparse_backing_chunk;
20 
21 /*
22  * Sub-allocation information for a real buffer used as backing memory of a
23  * sparse buffer.
24  */
25 struct amdgpu_sparse_backing {
26    struct list_head list;
27 
28    struct amdgpu_bo_real *bo;
29 
30    /* Sorted list of free chunks. */
31    struct amdgpu_sparse_backing_chunk *chunks;
32    uint32_t max_chunks;
33    uint32_t num_chunks;
34 };
35 
36 struct amdgpu_sparse_commitment {
37    struct amdgpu_sparse_backing *backing;
38    uint32_t page;
39 };
40 
41 enum amdgpu_bo_type {
42    AMDGPU_BO_SLAB_ENTRY,
43    AMDGPU_BO_SPARSE,
44    AMDGPU_BO_REAL,               /* only REAL enums can be present after this */
45    AMDGPU_BO_REAL_REUSABLE,      /* only REAL_REUSABLE enums can be present after this */
46    AMDGPU_BO_REAL_REUSABLE_SLAB,
47 };
48 
49 /* Anything above REAL will use the BO list for REAL. */
50 #define NUM_BO_LIST_TYPES (AMDGPU_BO_REAL + 1)
51 
52 /* Base class of the buffer object that other structures inherit. */
53 struct amdgpu_winsys_bo {
54    struct pb_buffer_lean base;
55    enum amdgpu_bo_type type:8;
56    struct amdgpu_seq_no_fences fences;
57 
58    /* Since some IPs like VCN want to have an unlimited number of queues, we can't generate our
59     * own sequence numbers for those queues. Instead, each buffer will have "alt_fence", which
60     * means an alternative fence. This fence is the last use of that buffer on any VCN queue.
61     * If any other queue wants to use that buffer, it has to insert alt_fence as a dependency,
62     * and replace alt_fence with the new submitted fence, so that it's always equal to the last
63     * use.
64     *
65     * Only VCN uses and updates alt_fence when an IB is submitted. Other IPs only use alt_fence
66     * as a fence dependency. alt_fence is NULL when VCN isn't used, so there is no negative
67     * impact on CPU overhead in that case.
68     */
69    struct pipe_fence_handle *alt_fence;
70 
71    /* This is set when a buffer is returned by buffer_create(), not when the memory is allocated
72     * as part of slab BO.
73     */
74    uint32_t unique_id;
75 
76    /* how many command streams, which are being emitted in a separate
77     * thread, is this bo referenced in? */
78    volatile int num_active_ioctls;
79 };
80 
81 /* Real GPU memory allocation managed by the amdgpu kernel driver.
82  *
83  * There are also types of buffers that are not "real" kernel allocations, such as slab entry
84  * BOs, which are suballocated from real BOs, and sparse BOs, which initially only allocate
85  * the virtual address range, not memory.
86  */
87 struct amdgpu_bo_real {
88    struct amdgpu_winsys_bo b;
89 
90    ac_drm_bo bo;
91    amdgpu_va_handle va_handle;
92    /* Timeline point of latest VM ioctl completion. Only used in userqueue. */
93    uint64_t vm_timeline_point;
94 
95    void *cpu_ptr; /* for user_ptr and permanent maps */
96    int map_count;
97    uint32_t kms_handle;
98 #if MESA_DEBUG
99    struct list_head global_list_item;
100 #endif
101    simple_mtx_t map_lock;
102 
103    bool is_user_ptr;
104 
105    /* Whether buffer_get_handle or buffer_from_handle has been called,
106     * it can only transition from false to true. Protected by lock.
107     */
108    bool is_shared;
109 
110    /* Whether this is a slab buffer and alt_fence was set on one of the slab entries. */
111    bool slab_has_busy_alt_fences;
112 };
113 
114 /* Same as amdgpu_bo_real except this BO isn't destroyed when its reference count drops to 0.
115  * Instead it's cached in pb_cache for later reuse.
116  */
117 struct amdgpu_bo_real_reusable {
118    struct amdgpu_bo_real b;
119    struct pb_cache_entry cache_entry;
120 };
121 
122 /* Sparse BO. This only allocates the virtual address range for the BO. The physical storage is
123  * allocated on demand by the user using radeon_winsys::buffer_commit with 64KB granularity.
124  */
125 struct amdgpu_bo_sparse {
126    struct amdgpu_winsys_bo b;
127    amdgpu_va_handle va_handle;
128    /* Only used in case of userqueue. Will hold the latest point including for backing bo. */
129    uint64_t vm_timeline_point;
130 
131    uint32_t num_va_pages;
132    uint32_t num_backing_pages;
133    simple_mtx_t commit_lock;
134 
135    struct list_head backing;
136 
137    /* Commitment information for each page of the virtual memory area. */
138    struct amdgpu_sparse_commitment *commitments;
139 };
140 
141 /* Suballocated buffer using the slab allocator. This BO is only 1 piece of a larger buffer
142  * called slab, which is a buffer that's divided into smaller equal-sized buffers.
143  */
144 struct amdgpu_bo_slab_entry {
145    struct amdgpu_winsys_bo b;
146    struct pb_slab_entry entry;
147 };
148 
149 /* The slab buffer, which is the big backing buffer out of which smaller BOs are suballocated and
150  * represented by amdgpu_bo_slab_entry. It's always a real and reusable buffer.
151  */
152 struct amdgpu_bo_real_reusable_slab {
153    struct amdgpu_bo_real_reusable b;
154    struct pb_slab slab;
155    struct amdgpu_bo_slab_entry *entries;
156 };
157 
is_real_bo(struct amdgpu_winsys_bo * bo)158 static inline bool is_real_bo(struct amdgpu_winsys_bo *bo)
159 {
160    return bo->type >= AMDGPU_BO_REAL;
161 }
162 
get_real_bo(struct amdgpu_winsys_bo * bo)163 static inline struct amdgpu_bo_real *get_real_bo(struct amdgpu_winsys_bo *bo)
164 {
165    assert(is_real_bo(bo));
166    return (struct amdgpu_bo_real*)bo;
167 }
168 
get_real_bo_reusable(struct amdgpu_winsys_bo * bo)169 static inline struct amdgpu_bo_real_reusable *get_real_bo_reusable(struct amdgpu_winsys_bo *bo)
170 {
171    assert(bo->type >= AMDGPU_BO_REAL_REUSABLE);
172    return (struct amdgpu_bo_real_reusable*)bo;
173 }
174 
get_sparse_bo(struct amdgpu_winsys_bo * bo)175 static inline struct amdgpu_bo_sparse *get_sparse_bo(struct amdgpu_winsys_bo *bo)
176 {
177    assert(bo->type == AMDGPU_BO_SPARSE && bo->base.usage & RADEON_FLAG_SPARSE);
178    return (struct amdgpu_bo_sparse*)bo;
179 }
180 
get_slab_entry_bo(struct amdgpu_winsys_bo * bo)181 static inline struct amdgpu_bo_slab_entry *get_slab_entry_bo(struct amdgpu_winsys_bo *bo)
182 {
183    assert(bo->type == AMDGPU_BO_SLAB_ENTRY);
184    return (struct amdgpu_bo_slab_entry*)bo;
185 }
186 
get_bo_from_slab(struct pb_slab * slab)187 static inline struct amdgpu_bo_real_reusable_slab *get_bo_from_slab(struct pb_slab *slab)
188 {
189    return container_of(slab, struct amdgpu_bo_real_reusable_slab, slab);
190 }
191 
get_slab_entry_real_bo(struct amdgpu_winsys_bo * bo)192 static inline struct amdgpu_bo_real *get_slab_entry_real_bo(struct amdgpu_winsys_bo *bo)
193 {
194    assert(bo->type == AMDGPU_BO_SLAB_ENTRY);
195    return &get_bo_from_slab(((struct amdgpu_bo_slab_entry*)bo)->entry.slab)->b.b;
196 }
197 
get_real_bo_reusable_slab(struct amdgpu_winsys_bo * bo)198 static inline struct amdgpu_bo_real_reusable_slab *get_real_bo_reusable_slab(struct amdgpu_winsys_bo *bo)
199 {
200    assert(bo->type == AMDGPU_BO_REAL_REUSABLE_SLAB);
201    return (struct amdgpu_bo_real_reusable_slab*)bo;
202 }
203 
204 /* Given a sequence number "fences->seq_no[queue_index]", return a pointer to a non-NULL fence
205  * pointer in the queue ring corresponding to that sequence number if the fence is non-NULL.
206  * If the fence is not present in the ring (= is idle), return NULL. If it returns a non-NULL
207  * pointer and the caller finds the fence to be idle, it's recommended to use the returned pointer
208  * to set the fence to NULL in the ring, which is why we return a pointer to a pointer.
209  */
210 static inline struct pipe_fence_handle **
get_fence_from_ring(struct amdgpu_winsys * aws,struct amdgpu_seq_no_fences * fences,unsigned queue_index)211 get_fence_from_ring(struct amdgpu_winsys *aws, struct amdgpu_seq_no_fences *fences,
212                     unsigned queue_index)
213 {
214    /* The caller should check if the BO has a fence. */
215    assert(queue_index < AMDGPU_MAX_QUEUES);
216    assert(fences->valid_fence_mask & BITFIELD_BIT(queue_index));
217 
218    uint_seq_no buffer_seq_no = fences->seq_no[queue_index];
219    uint_seq_no latest_seq_no = aws->queues[queue_index].latest_seq_no;
220    bool fence_present = latest_seq_no - buffer_seq_no < AMDGPU_FENCE_RING_SIZE;
221 
222    if (fence_present) {
223       struct pipe_fence_handle **fence =
224          &aws->queues[queue_index].fences[buffer_seq_no % AMDGPU_FENCE_RING_SIZE];
225 
226       if (*fence)
227          return fence;
228    }
229 
230    /* If the sequence number references a fence that is not present, it's guaranteed to be idle
231     * because the winsys always waits for the oldest fence when it removes it from the ring.
232     */
233    fences->valid_fence_mask &= ~BITFIELD_BIT(queue_index);
234    return NULL;
235 }
236 
pick_latest_seq_no(struct amdgpu_winsys * aws,unsigned queue_index,uint_seq_no n1,uint_seq_no n2)237 static inline uint_seq_no pick_latest_seq_no(struct amdgpu_winsys *aws, unsigned queue_index,
238                                              uint_seq_no n1, uint_seq_no n2)
239 {
240    uint_seq_no latest = aws->queues[queue_index].latest_seq_no;
241 
242    /* Since sequence numbers can wrap around, we need to pick the later number that's logically
243     * before "latest". The trick is to subtract "latest + 1" to underflow integer such
244     * that "latest" becomes UINT*_MAX, and then just return the maximum.
245     */
246    uint_seq_no s1 = n1 - latest - 1;
247    uint_seq_no s2 = n2 - latest - 1;
248 
249    return s1 >= s2 ? n1 : n2;
250 }
251 
add_seq_no_to_list(struct amdgpu_winsys * aws,struct amdgpu_seq_no_fences * fences,unsigned queue_index,uint_seq_no seq_no)252 static inline void add_seq_no_to_list(struct amdgpu_winsys *aws, struct amdgpu_seq_no_fences *fences,
253                                       unsigned queue_index, uint_seq_no seq_no)
254 {
255    if (fences->valid_fence_mask & BITFIELD_BIT(queue_index)) {
256       fences->seq_no[queue_index] = pick_latest_seq_no(aws, queue_index, seq_no,
257                                                        fences->seq_no[queue_index]);
258    } else {
259       fences->seq_no[queue_index] = seq_no;
260       fences->valid_fence_mask |= BITFIELD_BIT(queue_index);
261    }
262 }
263 
264 bool amdgpu_bo_can_reclaim(struct amdgpu_winsys *aws, struct pb_buffer_lean *_buf);
265 struct pb_buffer_lean *amdgpu_bo_create(struct amdgpu_winsys *aws,
266                                    uint64_t size,
267                                    unsigned alignment,
268                                    enum radeon_bo_domain domain,
269                                    enum radeon_bo_flag flags);
270 void amdgpu_bo_destroy(struct amdgpu_winsys *aws, struct pb_buffer_lean *_buf);
271 void *amdgpu_bo_map(struct radeon_winsys *rws,
272                     struct pb_buffer_lean *buf,
273                     struct radeon_cmdbuf *rcs,
274                     enum pipe_map_flags usage);
275 void amdgpu_bo_unmap(struct radeon_winsys *rws, struct pb_buffer_lean *buf);
276 void amdgpu_bo_init_functions(struct amdgpu_screen_winsys *sws);
277 
278 bool amdgpu_bo_can_reclaim_slab(void *priv, struct pb_slab_entry *entry);
279 struct pb_slab *amdgpu_bo_slab_alloc(void *priv, unsigned heap, unsigned entry_size,
280                                      unsigned group_index);
281 void amdgpu_bo_slab_free(struct amdgpu_winsys *aws, struct pb_slab *slab);
282 uint64_t amdgpu_bo_get_va(struct pb_buffer_lean *buf);
283 
284 static inline struct amdgpu_winsys_bo *
amdgpu_winsys_bo(struct pb_buffer_lean * bo)285 amdgpu_winsys_bo(struct pb_buffer_lean *bo)
286 {
287    return (struct amdgpu_winsys_bo *)bo;
288 }
289 
290 static inline void
amdgpu_winsys_bo_reference(struct amdgpu_winsys * aws,struct amdgpu_winsys_bo ** dst,struct amdgpu_winsys_bo * src)291 amdgpu_winsys_bo_reference(struct amdgpu_winsys *aws, struct amdgpu_winsys_bo **dst,
292                            struct amdgpu_winsys_bo *src)
293 {
294    radeon_bo_reference(&aws->dummy_sws.base,
295                        (struct pb_buffer_lean**)dst, (struct pb_buffer_lean*)src);
296 }
297 
298 /* Same as amdgpu_winsys_bo_reference, but ignore the value in *dst. */
299 static inline void
amdgpu_winsys_bo_set_reference(struct amdgpu_winsys_bo ** dst,struct amdgpu_winsys_bo * src)300 amdgpu_winsys_bo_set_reference(struct amdgpu_winsys_bo **dst, struct amdgpu_winsys_bo *src)
301 {
302    radeon_bo_set_reference((struct pb_buffer_lean**)dst, (struct pb_buffer_lean*)src);
303 }
304 
305 /* Unreference dst, but don't assign anything. */
306 static inline void
amdgpu_winsys_bo_drop_reference(struct amdgpu_winsys * aws,struct amdgpu_winsys_bo * dst)307 amdgpu_winsys_bo_drop_reference(struct amdgpu_winsys *aws, struct amdgpu_winsys_bo *dst)
308 {
309    radeon_bo_drop_reference(&aws->dummy_sws.base, &dst->base);
310 }
311 
312 #ifdef __cplusplus
313 }
314 #endif
315 
316 #endif
317