• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2012-2018 Rob Clark <robclark@freedesktop.org>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *    Rob Clark <robclark@freedesktop.org>
25  */
26 
27 #ifndef FREEDRENO_RINGBUFFER_H_
28 #define FREEDRENO_RINGBUFFER_H_
29 
30 #include <stdio.h>
31 #include "util/u_atomic.h"
32 #include "util/u_debug.h"
33 #include "util/u_queue.h"
34 
35 #include "adreno_common.xml.h"
36 #include "adreno_pm4.xml.h"
37 #include "freedreno_drmif.h"
38 #include "freedreno_pm4.h"
39 
40 #ifdef __cplusplus
41 extern "C" {
42 #endif
43 
44 struct fd_submit;
45 struct fd_ringbuffer;
46 
47 enum fd_ringbuffer_flags {
48 
49    /* Primary ringbuffer for a submit, ie. an IB1 level rb
50     * which kernel must setup RB->IB1 CP_INDIRECT_BRANCH
51     * packets.
52     */
53    FD_RINGBUFFER_PRIMARY = 0x1,
54 
55    /* Hint that the stateobj will be used for streaming state
56     * that is used once or a few times and then discarded.
57     *
58     * For sub-allocation, non streaming stateobj's should be
59     * sub-allocated from a page size buffer, so one long lived
60     * state obj doesn't prevent other pages from being freed.
61     * (Ie. it would be no worse than allocating a page sized
62     * bo for each small non-streaming stateobj).
63     *
64     * But streaming stateobj's could be sub-allocated from a
65     * larger buffer to reduce the alloc/del overhead.
66     */
67    FD_RINGBUFFER_STREAMING = 0x2,
68 
69    /* Indicates that "growable" cmdstream can be used,
70     * consisting of multiple physical cmdstream buffers
71     */
72    FD_RINGBUFFER_GROWABLE = 0x4,
73 
74    /* Internal use only: */
75    _FD_RINGBUFFER_OBJECT = 0x8,
76 };
77 
78 /* A submit object manages/tracks all the state buildup for a "submit"
79  * ioctl to the kernel.  Additionally, with the exception of long-lived
80  * non-STREAMING stateobj rb's, rb's are allocated from the submit.
81  */
82 struct fd_submit *fd_submit_new(struct fd_pipe *pipe);
83 
84 /* NOTE: all ringbuffer's create from the submit should be unref'd
85  * before destroying the submit.
86  */
87 void fd_submit_del(struct fd_submit *submit);
88 
89 struct fd_submit * fd_submit_ref(struct fd_submit *submit);
90 
91 /* Allocate a new rb from the submit. */
92 struct fd_ringbuffer *fd_submit_new_ringbuffer(struct fd_submit *submit,
93                                                uint32_t size,
94                                                enum fd_ringbuffer_flags flags);
95 
96 /**
97  * Encapsulates submit out-fence(s), which consist of a 'timestamp' (per-
98  * pipe (submitqueue) sequence number) and optionally, if requested, an
99  * out-fence-fd
100  */
101 struct fd_submit_fence {
102    /**
103     * The ready fence is signaled once the submit is actually flushed down
104     * to the kernel, and fence/fence_fd are populated.  You must wait for
105     * this fence to be signaled before reading fence/fence_fd.
106     */
107    struct util_queue_fence ready;
108 
109    struct fd_fence fence;
110 
111    /**
112     * Optional dma_fence fd, returned by submit if use_fence_fd is true
113     */
114    int fence_fd;
115    bool use_fence_fd;
116 };
117 
118 /* in_fence_fd: -1 for no in-fence, else fence fd
119  * out_fence can be NULL if no output fence is required
120  */
121 int fd_submit_flush(struct fd_submit *submit, int in_fence_fd,
122                     struct fd_submit_fence *out_fence);
123 
124 struct fd_ringbuffer;
125 struct fd_reloc;
126 
127 struct fd_ringbuffer_funcs {
128    void (*grow)(struct fd_ringbuffer *ring, uint32_t size);
129    void (*emit_reloc)(struct fd_ringbuffer *ring, const struct fd_reloc *reloc);
130    uint32_t (*emit_reloc_ring)(struct fd_ringbuffer *ring,
131                                struct fd_ringbuffer *target, uint32_t cmd_idx);
132    uint32_t (*cmd_count)(struct fd_ringbuffer *ring);
133    bool (*check_size)(struct fd_ringbuffer *ring);
134    void (*destroy)(struct fd_ringbuffer *ring);
135 };
136 
137 /* the ringbuffer object is not opaque so that OUT_RING() type stuff
138  * can be inlined.  Note that users should not make assumptions about
139  * the size of this struct.
140  */
141 struct fd_ringbuffer {
142    uint32_t *cur, *end, *start;
143    const struct fd_ringbuffer_funcs *funcs;
144 
145    // size or end coudl probably go away
146    int size;
147    int32_t refcnt;
148    enum fd_ringbuffer_flags flags;
149 };
150 
151 /* Allocate a new long-lived state object, not associated with
152  * a submit:
153  */
154 struct fd_ringbuffer *fd_ringbuffer_new_object(struct fd_pipe *pipe,
155                                                uint32_t size);
156 
157 static inline void
fd_ringbuffer_del(struct fd_ringbuffer * ring)158 fd_ringbuffer_del(struct fd_ringbuffer *ring)
159 {
160    if (!p_atomic_dec_zero(&ring->refcnt))
161       return;
162 
163    ring->funcs->destroy(ring);
164 }
165 
166 static inline struct fd_ringbuffer *
fd_ringbuffer_ref(struct fd_ringbuffer * ring)167 fd_ringbuffer_ref(struct fd_ringbuffer *ring)
168 {
169    p_atomic_inc(&ring->refcnt);
170    return ring;
171 }
172 
173 static inline void
fd_ringbuffer_grow(struct fd_ringbuffer * ring,uint32_t ndwords)174 fd_ringbuffer_grow(struct fd_ringbuffer *ring, uint32_t ndwords)
175 {
176    assert(ring->funcs->grow); /* unsupported on kgsl */
177 
178    /* there is an upper bound on IB size, which appears to be 0x0fffff */
179    ring->size = MIN2(ring->size << 1, 0x0fffff);
180 
181    ring->funcs->grow(ring, ring->size);
182 }
183 
184 static inline bool
fd_ringbuffer_check_size(struct fd_ringbuffer * ring)185 fd_ringbuffer_check_size(struct fd_ringbuffer *ring)
186 {
187    return ring->funcs->check_size(ring);
188 }
189 
190 static inline void
fd_ringbuffer_emit(struct fd_ringbuffer * ring,uint32_t data)191 fd_ringbuffer_emit(struct fd_ringbuffer *ring, uint32_t data)
192 {
193    (*ring->cur++) = data;
194 }
195 
196 struct fd_reloc {
197    struct fd_bo *bo;
198    uint64_t iova;
199    uint64_t orval;
200 #define FD_RELOC_READ  0x0001
201 #define FD_RELOC_WRITE 0x0002
202 #define FD_RELOC_DUMP  0x0004
203    uint32_t offset;
204    int32_t shift;
205 };
206 
207 /* We always mark BOs for write, instead of tracking it across reloc
208  * sources in userspace.  On the kernel side, this means we track a single
209  * excl fence in the BO instead of a set of read fences, which is cheaper.
210  * The downside is that a dmabuf-shared device won't be able to read in
211  * parallel with a read-only access by freedreno, but most other drivers
212  * have decided that that usecase isn't important enough to do this
213  * tracking, as well.
214  */
215 #define FD_RELOC_FLAGS_INIT (FD_RELOC_READ | FD_RELOC_WRITE)
216 
217 /* NOTE: relocs are 2 dwords on a5xx+ */
218 
219 static inline void
fd_ringbuffer_reloc(struct fd_ringbuffer * ring,const struct fd_reloc * reloc)220 fd_ringbuffer_reloc(struct fd_ringbuffer *ring, const struct fd_reloc *reloc)
221 {
222    ring->funcs->emit_reloc(ring, reloc);
223 }
224 
225 static inline uint32_t
fd_ringbuffer_cmd_count(struct fd_ringbuffer * ring)226 fd_ringbuffer_cmd_count(struct fd_ringbuffer *ring)
227 {
228    if (!ring->funcs->cmd_count)
229       return 1;
230    return ring->funcs->cmd_count(ring);
231 }
232 
233 static inline uint32_t
fd_ringbuffer_emit_reloc_ring_full(struct fd_ringbuffer * ring,struct fd_ringbuffer * target,uint32_t cmd_idx)234 fd_ringbuffer_emit_reloc_ring_full(struct fd_ringbuffer *ring,
235                                    struct fd_ringbuffer *target,
236                                    uint32_t cmd_idx)
237 {
238    return ring->funcs->emit_reloc_ring(ring, target, cmd_idx);
239 }
240 
241 static inline uint32_t
offset_bytes(void * end,void * start)242 offset_bytes(void *end, void *start)
243 {
244    return ((char *)end) - ((char *)start);
245 }
246 
247 static inline uint32_t
fd_ringbuffer_size(struct fd_ringbuffer * ring)248 fd_ringbuffer_size(struct fd_ringbuffer *ring)
249 {
250    /* only really needed for stateobj ringbuffers, and won't really
251     * do what you expect for growable rb's.. so lets just restrict
252     * this to stateobj's for now:
253     */
254    assert(!(ring->flags & FD_RINGBUFFER_GROWABLE));
255    return offset_bytes(ring->cur, ring->start);
256 }
257 
258 static inline bool
fd_ringbuffer_empty(struct fd_ringbuffer * ring)259 fd_ringbuffer_empty(struct fd_ringbuffer *ring)
260 {
261    return (fd_ringbuffer_cmd_count(ring) == 1) &&
262           (offset_bytes(ring->cur, ring->start) == 0);
263 }
264 
265 #define LOG_DWORDS 0
266 
267 static inline void
OUT_RING(struct fd_ringbuffer * ring,uint32_t data)268 OUT_RING(struct fd_ringbuffer *ring, uint32_t data)
269 {
270    if (LOG_DWORDS) {
271       fprintf(stderr, "ring[%p]: OUT_RING   %04x:  %08x", ring,
272               (uint32_t)(ring->cur - ring->start), data);
273    }
274    fd_ringbuffer_emit(ring, data);
275 }
276 
277 /*
278  * NOTE: OUT_RELOC() is 2 dwords (64b) on a5xx+
279  */
280 static inline void
OUT_RELOC(struct fd_ringbuffer * ring,struct fd_bo * bo,uint32_t offset,uint64_t orval,int32_t shift)281 OUT_RELOC(struct fd_ringbuffer *ring, struct fd_bo *bo, uint32_t offset,
282           uint64_t orval, int32_t shift)
283 {
284    if (LOG_DWORDS) {
285       fprintf(stderr, "ring[%p]: OUT_RELOC   %04x:  %p+%u << %d", ring,
286               (uint32_t)(ring->cur - ring->start), bo, offset, shift);
287    }
288    assert(offset < fd_bo_size(bo));
289 
290    uint64_t iova = fd_bo_get_iova(bo) + offset;
291 
292    if (shift < 0)
293       iova >>= -shift;
294    else
295       iova <<= shift;
296 
297    iova |= orval;
298 
299    struct fd_reloc reloc = {
300          .bo = bo,
301          .iova = iova,
302          .orval = orval,
303          .offset = offset,
304          .shift = shift,
305    };
306 
307    fd_ringbuffer_reloc(ring, &reloc);
308 }
309 
310 static inline void
OUT_RB(struct fd_ringbuffer * ring,struct fd_ringbuffer * target)311 OUT_RB(struct fd_ringbuffer *ring, struct fd_ringbuffer *target)
312 {
313    fd_ringbuffer_emit_reloc_ring_full(ring, target, 0);
314 }
315 
316 static inline void
BEGIN_RING(struct fd_ringbuffer * ring,uint32_t ndwords)317 BEGIN_RING(struct fd_ringbuffer *ring, uint32_t ndwords)
318 {
319    if (unlikely(ring->cur + ndwords > ring->end))
320       fd_ringbuffer_grow(ring, ndwords);
321 }
322 
323 static inline void
OUT_PKT0(struct fd_ringbuffer * ring,uint16_t regindx,uint16_t cnt)324 OUT_PKT0(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt)
325 {
326    BEGIN_RING(ring, cnt + 1);
327    OUT_RING(ring, pm4_pkt0_hdr(regindx, cnt));
328 }
329 
330 static inline void
OUT_PKT2(struct fd_ringbuffer * ring)331 OUT_PKT2(struct fd_ringbuffer *ring)
332 {
333    BEGIN_RING(ring, 1);
334    OUT_RING(ring, CP_TYPE2_PKT);
335 }
336 
337 static inline void
OUT_PKT3(struct fd_ringbuffer * ring,uint8_t opcode,uint16_t cnt)338 OUT_PKT3(struct fd_ringbuffer *ring, uint8_t opcode, uint16_t cnt)
339 {
340    BEGIN_RING(ring, cnt + 1);
341    OUT_RING(ring, CP_TYPE3_PKT | ((cnt - 1) << 16) | ((opcode & 0xFF) << 8));
342 }
343 
344 /*
345  * Starting with a5xx, pkt4/pkt7 are used instead of pkt0/pkt3
346  */
347 
348 static inline void
OUT_PKT4(struct fd_ringbuffer * ring,uint16_t regindx,uint16_t cnt)349 OUT_PKT4(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt)
350 {
351    BEGIN_RING(ring, cnt + 1);
352    OUT_RING(ring, pm4_pkt4_hdr(regindx, cnt));
353 }
354 
355 static inline void
OUT_PKT7(struct fd_ringbuffer * ring,uint8_t opcode,uint16_t cnt)356 OUT_PKT7(struct fd_ringbuffer *ring, uint8_t opcode, uint16_t cnt)
357 {
358    BEGIN_RING(ring, cnt + 1);
359    OUT_RING(ring, pm4_pkt7_hdr(opcode, cnt));
360 }
361 
362 static inline void
OUT_WFI(struct fd_ringbuffer * ring)363 OUT_WFI(struct fd_ringbuffer *ring)
364 {
365    OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
366    OUT_RING(ring, 0x00000000);
367 }
368 
369 static inline void
OUT_WFI5(struct fd_ringbuffer * ring)370 OUT_WFI5(struct fd_ringbuffer *ring)
371 {
372    OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0);
373 }
374 
375 #ifdef __cplusplus
376 } /* end of extern "C" */
377 #endif
378 
379 #endif /* FREEDRENO_RINGBUFFER_H_ */
380