• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2012-2018 Rob Clark <robclark@freedesktop.org>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *    Rob Clark <robclark@freedesktop.org>
25  */
26 
27 #ifndef FREEDRENO_RINGBUFFER_H_
28 #define FREEDRENO_RINGBUFFER_H_
29 
30 #include <stdio.h>
31 #include "util/u_atomic.h"
32 #include "util/u_debug.h"
33 
34 #include "adreno_common.xml.h"
35 #include "adreno_pm4.xml.h"
36 #include "freedreno_drmif.h"
37 #include "freedreno_pm4.h"
38 
39 #ifdef __cplusplus
40 extern "C" {
41 #endif
42 
43 struct fd_submit;
44 struct fd_ringbuffer;
45 
46 enum fd_ringbuffer_flags {
47 
48    /* Primary ringbuffer for a submit, ie. an IB1 level rb
49     * which kernel must setup RB->IB1 CP_INDIRECT_BRANCH
50     * packets.
51     */
52    FD_RINGBUFFER_PRIMARY = 0x1,
53 
54    /* Hint that the stateobj will be used for streaming state
55     * that is used once or a few times and then discarded.
56     *
57     * For sub-allocation, non streaming stateobj's should be
58     * sub-allocated from a page size buffer, so one long lived
59     * state obj doesn't prevent other pages from being freed.
60     * (Ie. it would be no worse than allocating a page sized
61     * bo for each small non-streaming stateobj).
62     *
63     * But streaming stateobj's could be sub-allocated from a
64     * larger buffer to reduce the alloc/del overhead.
65     */
66    FD_RINGBUFFER_STREAMING = 0x2,
67 
68    /* Indicates that "growable" cmdstream can be used,
69     * consisting of multiple physical cmdstream buffers
70     */
71    FD_RINGBUFFER_GROWABLE = 0x4,
72 
73    /* Internal use only: */
74    _FD_RINGBUFFER_OBJECT = 0x8,
75 };
76 
77 /* A submit object manages/tracks all the state buildup for a "submit"
78  * ioctl to the kernel.  Additionally, with the exception of long-lived
79  * non-STREAMING stateobj rb's, rb's are allocated from the submit.
80  */
81 struct fd_submit *fd_submit_new(struct fd_pipe *pipe);
82 
83 /* NOTE: all ringbuffer's create from the submit should be unref'd
84  * before destroying the submit.
85  */
86 void fd_submit_del(struct fd_submit *submit);
87 
88 struct fd_submit * fd_submit_ref(struct fd_submit *submit);
89 
90 /* Allocate a new rb from the submit. */
91 struct fd_ringbuffer *fd_submit_new_ringbuffer(struct fd_submit *submit,
92                                                uint32_t size,
93                                                enum fd_ringbuffer_flags flags);
94 
95 /* in_fence_fd: -1 for no in-fence, else fence fd
96  * if use_fence_fd is true the output fence will be dma_fence fd backed
97  */
98 struct fd_fence *fd_submit_flush(struct fd_submit *submit, int in_fence_fd,
99                                  bool use_fence_fd);
100 
101 struct fd_ringbuffer;
102 struct fd_reloc;
103 
104 struct fd_ringbuffer_funcs {
105    void (*grow)(struct fd_ringbuffer *ring, uint32_t size);
106 
107    /**
108     * Alternative to emit_reloc for the softpin case, where we only need
109     * to track that the bo is used (and not track all the extra info that
110     * the kernel would need to do a legacy reloc.
111     */
112    void (*emit_bo)(struct fd_ringbuffer *ring, struct fd_bo *bo);
113    void (*assert_attached)(struct fd_ringbuffer *ring, struct fd_bo *bo);
114 
115    void (*emit_reloc)(struct fd_ringbuffer *ring, const struct fd_reloc *reloc);
116    uint32_t (*emit_reloc_ring)(struct fd_ringbuffer *ring,
117                                struct fd_ringbuffer *target, uint32_t cmd_idx);
118    uint32_t (*cmd_count)(struct fd_ringbuffer *ring);
119    bool (*check_size)(struct fd_ringbuffer *ring);
120    void (*destroy)(struct fd_ringbuffer *ring);
121 };
122 
123 /* the ringbuffer object is not opaque so that OUT_RING() type stuff
124  * can be inlined.  Note that users should not make assumptions about
125  * the size of this struct.
126  */
127 struct fd_ringbuffer {
128    uint32_t *cur, *end, *start;
129    const struct fd_ringbuffer_funcs *funcs;
130 
131    // size or end coudl probably go away
132    int size;
133    int32_t refcnt;
134    enum fd_ringbuffer_flags flags;
135 };
136 
137 /* Allocate a new long-lived state object, not associated with
138  * a submit:
139  */
140 struct fd_ringbuffer *fd_ringbuffer_new_object(struct fd_pipe *pipe,
141                                                uint32_t size);
142 
143 /*
144  * Helpers for ref/unref with some extra debugging.. unref() returns true if
145  * the object is still live
146  */
147 
148 static inline void
ref(int32_t * ref)149 ref(int32_t *ref)
150 {
151    ASSERTED int32_t count = p_atomic_inc_return(ref);
152    /* We should never see a refcnt transition 0->1, this is a sign of a
153     * zombie coming back from the dead!
154     */
155    assert(count != 1);
156 }
157 
158 static inline bool
unref(int32_t * ref)159 unref(int32_t *ref)
160 {
161    int32_t count = p_atomic_dec_return(ref);
162    assert(count != -1);
163    return count == 0;
164 }
165 
166 static inline void
fd_ringbuffer_del(struct fd_ringbuffer * ring)167 fd_ringbuffer_del(struct fd_ringbuffer *ring)
168 {
169    if (--ring->refcnt > 0)
170       return;
171 
172    ring->funcs->destroy(ring);
173 }
174 
175 static inline struct fd_ringbuffer *
fd_ringbuffer_ref(struct fd_ringbuffer * ring)176 fd_ringbuffer_ref(struct fd_ringbuffer *ring)
177 {
178    ring->refcnt++;
179    return ring;
180 }
181 
182 static inline void
fd_ringbuffer_grow(struct fd_ringbuffer * ring,uint32_t ndwords)183 fd_ringbuffer_grow(struct fd_ringbuffer *ring, uint32_t ndwords)
184 {
185    assert(ring->funcs->grow); /* unsupported on kgsl */
186 
187    ring->funcs->grow(ring, ring->size);
188 }
189 
190 static inline bool
fd_ringbuffer_check_size(struct fd_ringbuffer * ring)191 fd_ringbuffer_check_size(struct fd_ringbuffer *ring)
192 {
193    return ring->funcs->check_size(ring);
194 }
195 
196 static inline void
fd_ringbuffer_emit(struct fd_ringbuffer * ring,uint32_t data)197 fd_ringbuffer_emit(struct fd_ringbuffer *ring, uint32_t data)
198 {
199    (*ring->cur++) = data;
200 }
201 
202 struct fd_reloc {
203    struct fd_bo *bo;
204    uint64_t iova;
205    uint64_t orval;
206 #define FD_RELOC_READ  0x0001
207 #define FD_RELOC_WRITE 0x0002
208 #define FD_RELOC_DUMP  0x0004
209    uint32_t offset;
210    int32_t shift;
211 };
212 
213 /* We always mark BOs for write, instead of tracking it across reloc
214  * sources in userspace.  On the kernel side, this means we track a single
215  * excl fence in the BO instead of a set of read fences, which is cheaper.
216  * The downside is that a dmabuf-shared device won't be able to read in
217  * parallel with a read-only access by freedreno, but most other drivers
218  * have decided that that usecase isn't important enough to do this
219  * tracking, as well.
220  */
221 #define FD_RELOC_FLAGS_INIT (FD_RELOC_READ | FD_RELOC_WRITE)
222 
223 /* NOTE: relocs are 2 dwords on a5xx+ */
224 
225 static inline void
fd_ringbuffer_attach_bo(struct fd_ringbuffer * ring,struct fd_bo * bo)226 fd_ringbuffer_attach_bo(struct fd_ringbuffer *ring, struct fd_bo *bo)
227 {
228    ring->funcs->emit_bo(ring, bo);
229 }
230 
231 static inline void
fd_ringbuffer_assert_attached(struct fd_ringbuffer * ring,struct fd_bo * bo)232 fd_ringbuffer_assert_attached(struct fd_ringbuffer *ring, struct fd_bo *bo)
233 {
234 #ifndef NDEBUG
235    ring->funcs->assert_attached(ring, bo);
236 #endif
237 }
238 
239 static inline void
fd_ringbuffer_reloc(struct fd_ringbuffer * ring,const struct fd_reloc * reloc)240 fd_ringbuffer_reloc(struct fd_ringbuffer *ring, const struct fd_reloc *reloc)
241 {
242    ring->funcs->emit_reloc(ring, reloc);
243 }
244 
245 static inline uint32_t
fd_ringbuffer_cmd_count(struct fd_ringbuffer * ring)246 fd_ringbuffer_cmd_count(struct fd_ringbuffer *ring)
247 {
248    if (!ring->funcs->cmd_count)
249       return 1;
250    return ring->funcs->cmd_count(ring);
251 }
252 
253 static inline uint32_t
fd_ringbuffer_emit_reloc_ring_full(struct fd_ringbuffer * ring,struct fd_ringbuffer * target,uint32_t cmd_idx)254 fd_ringbuffer_emit_reloc_ring_full(struct fd_ringbuffer *ring,
255                                    struct fd_ringbuffer *target,
256                                    uint32_t cmd_idx)
257 {
258    return ring->funcs->emit_reloc_ring(ring, target, cmd_idx);
259 }
260 
261 static inline uint32_t
offset_bytes(void * end,void * start)262 offset_bytes(void *end, void *start)
263 {
264    return ((char *)end) - ((char *)start);
265 }
266 
267 static inline uint32_t
fd_ringbuffer_size(struct fd_ringbuffer * ring)268 fd_ringbuffer_size(struct fd_ringbuffer *ring)
269 {
270    /* only really needed for stateobj ringbuffers, and won't really
271     * do what you expect for growable rb's.. so lets just restrict
272     * this to stateobj's for now:
273     */
274    assert(!(ring->flags & FD_RINGBUFFER_GROWABLE));
275    return offset_bytes(ring->cur, ring->start);
276 }
277 
278 static inline bool
fd_ringbuffer_empty(struct fd_ringbuffer * ring)279 fd_ringbuffer_empty(struct fd_ringbuffer *ring)
280 {
281    return (fd_ringbuffer_cmd_count(ring) == 1) &&
282           (offset_bytes(ring->cur, ring->start) == 0);
283 }
284 
285 #define LOG_DWORDS 0
286 
287 static inline void
OUT_RING(struct fd_ringbuffer * ring,uint32_t data)288 OUT_RING(struct fd_ringbuffer *ring, uint32_t data)
289 {
290    if (LOG_DWORDS) {
291       fprintf(stderr, "ring[%p]: OUT_RING   %04x:  %08x", ring,
292               (uint32_t)(ring->cur - ring->start), data);
293    }
294    fd_ringbuffer_emit(ring, data);
295 }
296 
297 static inline uint64_t
__reloc_iova(struct fd_bo * bo,uint32_t offset,uint64_t orval,int32_t shift)298 __reloc_iova(struct fd_bo *bo, uint32_t offset, uint64_t orval, int32_t shift)
299 {
300    uint64_t iova = fd_bo_get_iova(bo) + offset;
301 
302    if (shift < 0)
303       iova >>= -shift;
304    else
305       iova <<= shift;
306 
307    iova |= orval;
308 
309    return iova;
310 }
311 
312 /*
313  * NOTE: OUT_RELOC() is 2 dwords (64b) on a5xx+
314  */
315 static inline void
OUT_RELOC(struct fd_ringbuffer * ring,struct fd_bo * bo,uint32_t offset,uint64_t orval,int32_t shift)316 OUT_RELOC(struct fd_ringbuffer *ring, struct fd_bo *bo, uint32_t offset,
317           uint64_t orval, int32_t shift)
318 {
319    if (LOG_DWORDS) {
320       fprintf(stderr, "ring[%p]: OUT_RELOC   %04x:  %p+%u << %d", ring,
321               (uint32_t)(ring->cur - ring->start), bo, offset, shift);
322    }
323    assert(offset < fd_bo_size(bo));
324 
325    uint64_t iova = __reloc_iova(bo, offset, orval, shift);
326 
327 #if FD_BO_NO_HARDPIN
328    uint64_t *cur = (uint64_t *)ring->cur;
329    *cur = iova;
330    ring->cur += 2;
331    fd_ringbuffer_assert_attached(ring, bo);
332 #else
333    struct fd_reloc reloc = {
334          .bo = bo,
335          .iova = iova,
336          .orval = orval,
337          .offset = offset,
338          .shift = shift,
339    };
340 
341    fd_ringbuffer_reloc(ring, &reloc);
342 #endif
343 }
344 
345 static inline void
OUT_RB(struct fd_ringbuffer * ring,struct fd_ringbuffer * target)346 OUT_RB(struct fd_ringbuffer *ring, struct fd_ringbuffer *target)
347 {
348    fd_ringbuffer_emit_reloc_ring_full(ring, target, 0);
349 }
350 
351 static inline void
BEGIN_RING(struct fd_ringbuffer * ring,uint32_t ndwords)352 BEGIN_RING(struct fd_ringbuffer *ring, uint32_t ndwords)
353 {
354    if (unlikely(ring->cur + ndwords > ring->end))
355       fd_ringbuffer_grow(ring, ndwords);
356 }
357 
358 static inline void
OUT_PKT0(struct fd_ringbuffer * ring,uint16_t regindx,uint16_t cnt)359 OUT_PKT0(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt)
360 {
361    BEGIN_RING(ring, cnt + 1);
362    OUT_RING(ring, pm4_pkt0_hdr(regindx, cnt));
363 }
364 
365 static inline void
OUT_PKT2(struct fd_ringbuffer * ring)366 OUT_PKT2(struct fd_ringbuffer *ring)
367 {
368    BEGIN_RING(ring, 1);
369    OUT_RING(ring, CP_TYPE2_PKT);
370 }
371 
372 static inline void
OUT_PKT3(struct fd_ringbuffer * ring,uint8_t opcode,uint16_t cnt)373 OUT_PKT3(struct fd_ringbuffer *ring, uint8_t opcode, uint16_t cnt)
374 {
375    BEGIN_RING(ring, cnt + 1);
376    OUT_RING(ring, CP_TYPE3_PKT | ((cnt - 1) << 16) | ((opcode & 0xFF) << 8));
377 }
378 
379 /*
380  * Starting with a5xx, pkt4/pkt7 are used instead of pkt0/pkt3
381  */
382 
383 static inline void
OUT_PKT4(struct fd_ringbuffer * ring,uint16_t regindx,uint16_t cnt)384 OUT_PKT4(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt)
385 {
386    BEGIN_RING(ring, cnt + 1);
387    OUT_RING(ring, pm4_pkt4_hdr((uint16_t)regindx, (uint16_t)cnt));
388 }
389 
390 static inline void
OUT_PKT7(struct fd_ringbuffer * ring,uint32_t opcode,uint32_t cnt)391 OUT_PKT7(struct fd_ringbuffer *ring, uint32_t opcode, uint32_t cnt)
392 {
393    BEGIN_RING(ring, cnt + 1);
394    OUT_RING(ring, pm4_pkt7_hdr((uint8_t)opcode, (uint16_t)cnt));
395 }
396 
397 static inline void
OUT_WFI(struct fd_ringbuffer * ring)398 OUT_WFI(struct fd_ringbuffer *ring)
399 {
400    OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
401    OUT_RING(ring, 0x00000000);
402 }
403 
404 static inline void
OUT_WFI5(struct fd_ringbuffer * ring)405 OUT_WFI5(struct fd_ringbuffer *ring)
406 {
407    OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0);
408 }
409 
410 #ifdef __cplusplus
411 } /* end of extern "C" */
412 #endif
413 
414 #endif /* FREEDRENO_RINGBUFFER_H_ */
415