1 /*
2 * Copyright (C) 2012-2018 Rob Clark <robclark@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 * Rob Clark <robclark@freedesktop.org>
25 */
26
27 #ifndef FREEDRENO_RINGBUFFER_H_
28 #define FREEDRENO_RINGBUFFER_H_
29
30 #include <stdio.h>
31 #include "util/u_atomic.h"
32 #include "util/u_debug.h"
33 #include "util/u_queue.h"
34
35 #include "adreno_common.xml.h"
36 #include "adreno_pm4.xml.h"
37 #include "freedreno_drmif.h"
38 #include "freedreno_pm4.h"
39
40 #ifdef __cplusplus
41 extern "C" {
42 #endif
43
44 struct fd_submit;
45 struct fd_ringbuffer;
46
47 enum fd_ringbuffer_flags {
48
49 /* Primary ringbuffer for a submit, ie. an IB1 level rb
50 * which kernel must setup RB->IB1 CP_INDIRECT_BRANCH
51 * packets.
52 */
53 FD_RINGBUFFER_PRIMARY = 0x1,
54
55 /* Hint that the stateobj will be used for streaming state
56 * that is used once or a few times and then discarded.
57 *
58 * For sub-allocation, non streaming stateobj's should be
59 * sub-allocated from a page size buffer, so one long lived
60 * state obj doesn't prevent other pages from being freed.
61 * (Ie. it would be no worse than allocating a page sized
62 * bo for each small non-streaming stateobj).
63 *
64 * But streaming stateobj's could be sub-allocated from a
65 * larger buffer to reduce the alloc/del overhead.
66 */
67 FD_RINGBUFFER_STREAMING = 0x2,
68
69 /* Indicates that "growable" cmdstream can be used,
70 * consisting of multiple physical cmdstream buffers
71 */
72 FD_RINGBUFFER_GROWABLE = 0x4,
73
74 /* Internal use only: */
75 _FD_RINGBUFFER_OBJECT = 0x8,
76 };
77
78 /* A submit object manages/tracks all the state buildup for a "submit"
79 * ioctl to the kernel. Additionally, with the exception of long-lived
80 * non-STREAMING stateobj rb's, rb's are allocated from the submit.
81 */
82 struct fd_submit *fd_submit_new(struct fd_pipe *pipe);
83
84 /* NOTE: all ringbuffer's create from the submit should be unref'd
85 * before destroying the submit.
86 */
87 void fd_submit_del(struct fd_submit *submit);
88
89 struct fd_submit * fd_submit_ref(struct fd_submit *submit);
90
91 /* Allocate a new rb from the submit. */
92 struct fd_ringbuffer *fd_submit_new_ringbuffer(struct fd_submit *submit,
93 uint32_t size,
94 enum fd_ringbuffer_flags flags);
95
96 /**
97 * Encapsulates submit out-fence(s), which consist of a 'timestamp' (per-
98 * pipe (submitqueue) sequence number) and optionally, if requested, an
99 * out-fence-fd
100 */
101 struct fd_submit_fence {
102 /**
103 * The ready fence is signaled once the submit is actually flushed down
104 * to the kernel, and fence/fence_fd are populated. You must wait for
105 * this fence to be signaled before reading fence/fence_fd.
106 */
107 struct util_queue_fence ready;
108
109 struct fd_fence fence;
110
111 /**
112 * Optional dma_fence fd, returned by submit if use_fence_fd is true
113 */
114 int fence_fd;
115 bool use_fence_fd;
116 };
117
118 /* in_fence_fd: -1 for no in-fence, else fence fd
119 * out_fence can be NULL if no output fence is required
120 */
121 int fd_submit_flush(struct fd_submit *submit, int in_fence_fd,
122 struct fd_submit_fence *out_fence);
123
124 struct fd_ringbuffer;
125 struct fd_reloc;
126
127 struct fd_ringbuffer_funcs {
128 void (*grow)(struct fd_ringbuffer *ring, uint32_t size);
129 void (*emit_reloc)(struct fd_ringbuffer *ring, const struct fd_reloc *reloc);
130 uint32_t (*emit_reloc_ring)(struct fd_ringbuffer *ring,
131 struct fd_ringbuffer *target, uint32_t cmd_idx);
132 uint32_t (*cmd_count)(struct fd_ringbuffer *ring);
133 bool (*check_size)(struct fd_ringbuffer *ring);
134 void (*destroy)(struct fd_ringbuffer *ring);
135 };
136
137 /* the ringbuffer object is not opaque so that OUT_RING() type stuff
138 * can be inlined. Note that users should not make assumptions about
139 * the size of this struct.
140 */
141 struct fd_ringbuffer {
142 uint32_t *cur, *end, *start;
143 const struct fd_ringbuffer_funcs *funcs;
144
145 // size or end coudl probably go away
146 int size;
147 int32_t refcnt;
148 enum fd_ringbuffer_flags flags;
149 };
150
151 /* Allocate a new long-lived state object, not associated with
152 * a submit:
153 */
154 struct fd_ringbuffer *fd_ringbuffer_new_object(struct fd_pipe *pipe,
155 uint32_t size);
156
157 static inline void
fd_ringbuffer_del(struct fd_ringbuffer * ring)158 fd_ringbuffer_del(struct fd_ringbuffer *ring)
159 {
160 if (!p_atomic_dec_zero(&ring->refcnt))
161 return;
162
163 ring->funcs->destroy(ring);
164 }
165
166 static inline struct fd_ringbuffer *
fd_ringbuffer_ref(struct fd_ringbuffer * ring)167 fd_ringbuffer_ref(struct fd_ringbuffer *ring)
168 {
169 p_atomic_inc(&ring->refcnt);
170 return ring;
171 }
172
173 static inline void
fd_ringbuffer_grow(struct fd_ringbuffer * ring,uint32_t ndwords)174 fd_ringbuffer_grow(struct fd_ringbuffer *ring, uint32_t ndwords)
175 {
176 assert(ring->funcs->grow); /* unsupported on kgsl */
177
178 /* there is an upper bound on IB size, which appears to be 0x0fffff */
179 ring->size = MIN2(ring->size << 1, 0x0fffff);
180
181 ring->funcs->grow(ring, ring->size);
182 }
183
184 static inline bool
fd_ringbuffer_check_size(struct fd_ringbuffer * ring)185 fd_ringbuffer_check_size(struct fd_ringbuffer *ring)
186 {
187 return ring->funcs->check_size(ring);
188 }
189
190 static inline void
fd_ringbuffer_emit(struct fd_ringbuffer * ring,uint32_t data)191 fd_ringbuffer_emit(struct fd_ringbuffer *ring, uint32_t data)
192 {
193 (*ring->cur++) = data;
194 }
195
196 struct fd_reloc {
197 struct fd_bo *bo;
198 uint64_t iova;
199 uint64_t orval;
200 #define FD_RELOC_READ 0x0001
201 #define FD_RELOC_WRITE 0x0002
202 #define FD_RELOC_DUMP 0x0004
203 uint32_t offset;
204 int32_t shift;
205 };
206
207 /* We always mark BOs for write, instead of tracking it across reloc
208 * sources in userspace. On the kernel side, this means we track a single
209 * excl fence in the BO instead of a set of read fences, which is cheaper.
210 * The downside is that a dmabuf-shared device won't be able to read in
211 * parallel with a read-only access by freedreno, but most other drivers
212 * have decided that that usecase isn't important enough to do this
213 * tracking, as well.
214 */
215 #define FD_RELOC_FLAGS_INIT (FD_RELOC_READ | FD_RELOC_WRITE)
216
217 /* NOTE: relocs are 2 dwords on a5xx+ */
218
219 static inline void
fd_ringbuffer_reloc(struct fd_ringbuffer * ring,const struct fd_reloc * reloc)220 fd_ringbuffer_reloc(struct fd_ringbuffer *ring, const struct fd_reloc *reloc)
221 {
222 ring->funcs->emit_reloc(ring, reloc);
223 }
224
225 static inline uint32_t
fd_ringbuffer_cmd_count(struct fd_ringbuffer * ring)226 fd_ringbuffer_cmd_count(struct fd_ringbuffer *ring)
227 {
228 if (!ring->funcs->cmd_count)
229 return 1;
230 return ring->funcs->cmd_count(ring);
231 }
232
233 static inline uint32_t
fd_ringbuffer_emit_reloc_ring_full(struct fd_ringbuffer * ring,struct fd_ringbuffer * target,uint32_t cmd_idx)234 fd_ringbuffer_emit_reloc_ring_full(struct fd_ringbuffer *ring,
235 struct fd_ringbuffer *target,
236 uint32_t cmd_idx)
237 {
238 return ring->funcs->emit_reloc_ring(ring, target, cmd_idx);
239 }
240
241 static inline uint32_t
offset_bytes(void * end,void * start)242 offset_bytes(void *end, void *start)
243 {
244 return ((char *)end) - ((char *)start);
245 }
246
247 static inline uint32_t
fd_ringbuffer_size(struct fd_ringbuffer * ring)248 fd_ringbuffer_size(struct fd_ringbuffer *ring)
249 {
250 /* only really needed for stateobj ringbuffers, and won't really
251 * do what you expect for growable rb's.. so lets just restrict
252 * this to stateobj's for now:
253 */
254 assert(!(ring->flags & FD_RINGBUFFER_GROWABLE));
255 return offset_bytes(ring->cur, ring->start);
256 }
257
258 static inline bool
fd_ringbuffer_empty(struct fd_ringbuffer * ring)259 fd_ringbuffer_empty(struct fd_ringbuffer *ring)
260 {
261 return (fd_ringbuffer_cmd_count(ring) == 1) &&
262 (offset_bytes(ring->cur, ring->start) == 0);
263 }
264
265 #define LOG_DWORDS 0
266
267 static inline void
OUT_RING(struct fd_ringbuffer * ring,uint32_t data)268 OUT_RING(struct fd_ringbuffer *ring, uint32_t data)
269 {
270 if (LOG_DWORDS) {
271 fprintf(stderr, "ring[%p]: OUT_RING %04x: %08x", ring,
272 (uint32_t)(ring->cur - ring->start), data);
273 }
274 fd_ringbuffer_emit(ring, data);
275 }
276
277 /*
278 * NOTE: OUT_RELOC() is 2 dwords (64b) on a5xx+
279 */
280 static inline void
OUT_RELOC(struct fd_ringbuffer * ring,struct fd_bo * bo,uint32_t offset,uint64_t orval,int32_t shift)281 OUT_RELOC(struct fd_ringbuffer *ring, struct fd_bo *bo, uint32_t offset,
282 uint64_t orval, int32_t shift)
283 {
284 if (LOG_DWORDS) {
285 fprintf(stderr, "ring[%p]: OUT_RELOC %04x: %p+%u << %d", ring,
286 (uint32_t)(ring->cur - ring->start), bo, offset, shift);
287 }
288 assert(offset < fd_bo_size(bo));
289
290 uint64_t iova = fd_bo_get_iova(bo) + offset;
291
292 if (shift < 0)
293 iova >>= -shift;
294 else
295 iova <<= shift;
296
297 iova |= orval;
298
299 struct fd_reloc reloc = {
300 .bo = bo,
301 .iova = iova,
302 .orval = orval,
303 .offset = offset,
304 .shift = shift,
305 };
306
307 fd_ringbuffer_reloc(ring, &reloc);
308 }
309
310 static inline void
OUT_RB(struct fd_ringbuffer * ring,struct fd_ringbuffer * target)311 OUT_RB(struct fd_ringbuffer *ring, struct fd_ringbuffer *target)
312 {
313 fd_ringbuffer_emit_reloc_ring_full(ring, target, 0);
314 }
315
316 static inline void
BEGIN_RING(struct fd_ringbuffer * ring,uint32_t ndwords)317 BEGIN_RING(struct fd_ringbuffer *ring, uint32_t ndwords)
318 {
319 if (unlikely(ring->cur + ndwords > ring->end))
320 fd_ringbuffer_grow(ring, ndwords);
321 }
322
323 static inline void
OUT_PKT0(struct fd_ringbuffer * ring,uint16_t regindx,uint16_t cnt)324 OUT_PKT0(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt)
325 {
326 BEGIN_RING(ring, cnt + 1);
327 OUT_RING(ring, pm4_pkt0_hdr(regindx, cnt));
328 }
329
330 static inline void
OUT_PKT2(struct fd_ringbuffer * ring)331 OUT_PKT2(struct fd_ringbuffer *ring)
332 {
333 BEGIN_RING(ring, 1);
334 OUT_RING(ring, CP_TYPE2_PKT);
335 }
336
337 static inline void
OUT_PKT3(struct fd_ringbuffer * ring,uint8_t opcode,uint16_t cnt)338 OUT_PKT3(struct fd_ringbuffer *ring, uint8_t opcode, uint16_t cnt)
339 {
340 BEGIN_RING(ring, cnt + 1);
341 OUT_RING(ring, CP_TYPE3_PKT | ((cnt - 1) << 16) | ((opcode & 0xFF) << 8));
342 }
343
344 /*
345 * Starting with a5xx, pkt4/pkt7 are used instead of pkt0/pkt3
346 */
347
348 static inline void
OUT_PKT4(struct fd_ringbuffer * ring,uint16_t regindx,uint16_t cnt)349 OUT_PKT4(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt)
350 {
351 BEGIN_RING(ring, cnt + 1);
352 OUT_RING(ring, pm4_pkt4_hdr(regindx, cnt));
353 }
354
355 static inline void
OUT_PKT7(struct fd_ringbuffer * ring,uint8_t opcode,uint16_t cnt)356 OUT_PKT7(struct fd_ringbuffer *ring, uint8_t opcode, uint16_t cnt)
357 {
358 BEGIN_RING(ring, cnt + 1);
359 OUT_RING(ring, pm4_pkt7_hdr(opcode, cnt));
360 }
361
362 static inline void
OUT_WFI(struct fd_ringbuffer * ring)363 OUT_WFI(struct fd_ringbuffer *ring)
364 {
365 OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
366 OUT_RING(ring, 0x00000000);
367 }
368
369 static inline void
OUT_WFI5(struct fd_ringbuffer * ring)370 OUT_WFI5(struct fd_ringbuffer *ring)
371 {
372 OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0);
373 }
374
375 #ifdef __cplusplus
376 } /* end of extern "C" */
377 #endif
378
379 #endif /* FREEDRENO_RINGBUFFER_H_ */
380