1 /*
2 * Copyright (C) 2012-2018 Rob Clark <robclark@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 * Rob Clark <robclark@freedesktop.org>
25 */
26
27 #ifndef FREEDRENO_RINGBUFFER_H_
28 #define FREEDRENO_RINGBUFFER_H_
29
30 #include <stdio.h>
31 #include "util/u_debug.h"
32 #include "util/u_dynarray.h"
33
34 #include "freedreno_drmif.h"
35 #include "adreno_common.xml.h"
36 #include "adreno_pm4.xml.h"
37
38 struct fd_submit;
39 struct fd_ringbuffer;
40
41 enum fd_ringbuffer_flags {
42
43 /* Primary ringbuffer for a submit, ie. an IB1 level rb
44 * which kernel must setup RB->IB1 CP_INDIRECT_BRANCH
45 * packets.
46 */
47 FD_RINGBUFFER_PRIMARY = 0x1,
48
49 /* Hint that the stateobj will be used for streaming state
50 * that is used once or a few times and then discarded.
51 *
52 * For sub-allocation, non streaming stateobj's should be
53 * sub-allocated from a page size buffer, so one long lived
54 * state obj doesn't prevent other pages from being freed.
55 * (Ie. it would be no worse than allocating a page sized
56 * bo for each small non-streaming stateobj).
57 *
58 * But streaming stateobj's could be sub-allocated from a
59 * larger buffer to reduce the alloc/del overhead.
60 */
61 FD_RINGBUFFER_STREAMING = 0x2,
62
63 /* Indicates that "growable" cmdstream can be used,
64 * consisting of multiple physical cmdstream buffers
65 */
66 FD_RINGBUFFER_GROWABLE = 0x4,
67
68 /* Internal use only: */
69 _FD_RINGBUFFER_OBJECT = 0x8,
70 };
71
72 /* A submit object manages/tracks all the state buildup for a "submit"
73 * ioctl to the kernel. Additionally, with the exception of long-lived
74 * non-STREAMING stateobj rb's, rb's are allocated from the submit.
75 */
76 struct fd_submit * fd_submit_new(struct fd_pipe *pipe);
77
78 /* NOTE: all ringbuffer's create from the submit should be unref'd
79 * before destroying the submit.
80 */
81 void fd_submit_del(struct fd_submit *submit);
82
83 /* Allocate a new rb from the submit. */
84 struct fd_ringbuffer * fd_submit_new_ringbuffer(struct fd_submit *submit,
85 uint32_t size, enum fd_ringbuffer_flags flags);
86
87 /* in_fence_fd: -1 for no in-fence, else fence fd
88 * out_fence_fd: NULL for no output-fence requested, else ptr to return out-fence
89 */
90 int fd_submit_flush(struct fd_submit *submit,
91 int in_fence_fd, int *out_fence_fd,
92 uint32_t *out_fence);
93
94 struct fd_ringbuffer;
95 struct fd_reloc;
96
97 struct fd_ringbuffer_funcs {
98 void (*grow)(struct fd_ringbuffer *ring, uint32_t size);
99 void (*emit_reloc)(struct fd_ringbuffer *ring,
100 const struct fd_reloc *reloc);
101 uint32_t (*emit_reloc_ring)(struct fd_ringbuffer *ring,
102 struct fd_ringbuffer *target, uint32_t cmd_idx);
103 uint32_t (*cmd_count)(struct fd_ringbuffer *ring);
104 void (*destroy)(struct fd_ringbuffer *ring);
105 };
106
107 /* the ringbuffer object is not opaque so that OUT_RING() type stuff
108 * can be inlined. Note that users should not make assumptions about
109 * the size of this struct.
110 */
111 struct fd_ringbuffer {
112 uint32_t *cur, *end, *start;
113 const struct fd_ringbuffer_funcs *funcs;
114
115 // size or end coudl probably go away
116 int size;
117 int32_t refcnt;
118 enum fd_ringbuffer_flags flags;
119 };
120
121 /* Allocate a new long-lived state object, not associated with
122 * a submit:
123 */
124 struct fd_ringbuffer * fd_ringbuffer_new_object(struct fd_pipe *pipe,
125 uint32_t size);
126
127 static inline void
fd_ringbuffer_del(struct fd_ringbuffer * ring)128 fd_ringbuffer_del(struct fd_ringbuffer *ring)
129 {
130 if (--ring->refcnt > 0)
131 return;
132
133 ring->funcs->destroy(ring);
134 }
135
136 static inline
137 struct fd_ringbuffer *
fd_ringbuffer_ref(struct fd_ringbuffer * ring)138 fd_ringbuffer_ref(struct fd_ringbuffer *ring)
139 {
140 ring->refcnt++;
141 return ring;
142 }
143
144 static inline void
fd_ringbuffer_grow(struct fd_ringbuffer * ring,uint32_t ndwords)145 fd_ringbuffer_grow(struct fd_ringbuffer *ring, uint32_t ndwords)
146 {
147 assert(ring->funcs->grow); /* unsupported on kgsl */
148
149 /* there is an upper bound on IB size, which appears to be 0x100000 */
150 if (ring->size < 0x100000)
151 ring->size *= 2;
152
153 ring->funcs->grow(ring, ring->size);
154 }
155
156 static inline void
fd_ringbuffer_emit(struct fd_ringbuffer * ring,uint32_t data)157 fd_ringbuffer_emit(struct fd_ringbuffer *ring,
158 uint32_t data)
159 {
160 (*ring->cur++) = data;
161 }
162
163 struct fd_reloc {
164 struct fd_bo *bo;
165 #define FD_RELOC_READ 0x0001
166 #define FD_RELOC_WRITE 0x0002
167 #define FD_RELOC_DUMP 0x0004
168 uint32_t offset;
169 uint32_t or;
170 int32_t shift;
171 uint32_t orhi; /* used for a5xx+ */
172 };
173
174 /* We always mark BOs for write, instead of tracking it across reloc
175 * sources in userspace. On the kernel side, this means we track a single
176 * excl fence in the BO instead of a set of read fences, which is cheaper.
177 * The downside is that a dmabuf-shared device won't be able to read in
178 * parallel with a read-only access by freedreno, but most other drivers
179 * have decided that that usecase isn't important enough to do this
180 * tracking, as well.
181 */
182 #define FD_RELOC_FLAGS_INIT (FD_RELOC_READ | FD_RELOC_WRITE)
183
184 /* NOTE: relocs are 2 dwords on a5xx+ */
185
186 static inline void
fd_ringbuffer_reloc(struct fd_ringbuffer * ring,const struct fd_reloc * reloc)187 fd_ringbuffer_reloc(struct fd_ringbuffer *ring,
188 const struct fd_reloc *reloc)
189 {
190 ring->funcs->emit_reloc(ring, reloc);
191 }
192
193 static inline uint32_t
fd_ringbuffer_cmd_count(struct fd_ringbuffer * ring)194 fd_ringbuffer_cmd_count(struct fd_ringbuffer *ring)
195 {
196 if (!ring->funcs->cmd_count)
197 return 1;
198 return ring->funcs->cmd_count(ring);
199 }
200
201 static inline uint32_t
fd_ringbuffer_emit_reloc_ring_full(struct fd_ringbuffer * ring,struct fd_ringbuffer * target,uint32_t cmd_idx)202 fd_ringbuffer_emit_reloc_ring_full(struct fd_ringbuffer *ring,
203 struct fd_ringbuffer *target, uint32_t cmd_idx)
204 {
205 return ring->funcs->emit_reloc_ring(ring, target, cmd_idx);
206 }
207
208 static inline uint32_t
offset_bytes(void * end,void * start)209 offset_bytes(void *end, void *start)
210 {
211 return ((char *)end) - ((char *)start);
212 }
213
214 static inline uint32_t
fd_ringbuffer_size(struct fd_ringbuffer * ring)215 fd_ringbuffer_size(struct fd_ringbuffer *ring)
216 {
217 /* only really needed for stateobj ringbuffers, and won't really
218 * do what you expect for growable rb's.. so lets just restrict
219 * this to stateobj's for now:
220 */
221 debug_assert(!(ring->flags & FD_RINGBUFFER_GROWABLE));
222 return offset_bytes(ring->cur, ring->start);
223 }
224
225 #define LOG_DWORDS 0
226
227 static inline void
OUT_RING(struct fd_ringbuffer * ring,uint32_t data)228 OUT_RING(struct fd_ringbuffer *ring, uint32_t data)
229 {
230 if (LOG_DWORDS) {
231 fprintf(stderr, "ring[%p]: OUT_RING %04x: %08x", ring,
232 (uint32_t)(ring->cur - ring->start), data);
233 }
234 fd_ringbuffer_emit(ring, data);
235 }
236
237 /*
238 * NOTE: OUT_RELOC() is 2 dwords (64b) on a5xx+
239 */
240 static inline void
OUT_RELOC(struct fd_ringbuffer * ring,struct fd_bo * bo,uint32_t offset,uint64_t or,int32_t shift)241 OUT_RELOC(struct fd_ringbuffer *ring, struct fd_bo *bo,
242 uint32_t offset, uint64_t or, int32_t shift)
243 {
244 if (LOG_DWORDS) {
245 fprintf(stderr, "ring[%p]: OUT_RELOC %04x: %p+%u << %d", ring,
246 (uint32_t)(ring->cur - ring->start), bo, offset, shift);
247 }
248 debug_assert(offset < fd_bo_size(bo));
249 fd_ringbuffer_reloc(ring, &(struct fd_reloc){
250 .bo = bo,
251 .offset = offset,
252 .or = or,
253 .shift = shift,
254 .orhi = or >> 32,
255 });
256 }
257
258 static inline void
OUT_RB(struct fd_ringbuffer * ring,struct fd_ringbuffer * target)259 OUT_RB(struct fd_ringbuffer *ring, struct fd_ringbuffer *target)
260 {
261 fd_ringbuffer_emit_reloc_ring_full(ring, target, 0);
262 }
263
BEGIN_RING(struct fd_ringbuffer * ring,uint32_t ndwords)264 static inline void BEGIN_RING(struct fd_ringbuffer *ring, uint32_t ndwords)
265 {
266 if (unlikely(ring->cur + ndwords > ring->end))
267 fd_ringbuffer_grow(ring, ndwords);
268 }
269
270 static inline void
OUT_PKT0(struct fd_ringbuffer * ring,uint16_t regindx,uint16_t cnt)271 OUT_PKT0(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt)
272 {
273 BEGIN_RING(ring, cnt+1);
274 OUT_RING(ring, CP_TYPE0_PKT | ((cnt-1) << 16) | (regindx & 0x7FFF));
275 }
276
277 static inline void
OUT_PKT2(struct fd_ringbuffer * ring)278 OUT_PKT2(struct fd_ringbuffer *ring)
279 {
280 BEGIN_RING(ring, 1);
281 OUT_RING(ring, CP_TYPE2_PKT);
282 }
283
284 static inline void
OUT_PKT3(struct fd_ringbuffer * ring,uint8_t opcode,uint16_t cnt)285 OUT_PKT3(struct fd_ringbuffer *ring, uint8_t opcode, uint16_t cnt)
286 {
287 BEGIN_RING(ring, cnt+1);
288 OUT_RING(ring, CP_TYPE3_PKT | ((cnt-1) << 16) | ((opcode & 0xFF) << 8));
289 }
290
291 /*
292 * Starting with a5xx, pkt4/pkt7 are used instead of pkt0/pkt3
293 */
294
295 static inline unsigned
_odd_parity_bit(unsigned val)296 _odd_parity_bit(unsigned val)
297 {
298 /* See: http://graphics.stanford.edu/~seander/bithacks.html#ParityParallel
299 * note that we want odd parity so 0x6996 is inverted.
300 */
301 val ^= val >> 16;
302 val ^= val >> 8;
303 val ^= val >> 4;
304 val &= 0xf;
305 return (~0x6996 >> val) & 1;
306 }
307
308 static inline void
OUT_PKT4(struct fd_ringbuffer * ring,uint16_t regindx,uint16_t cnt)309 OUT_PKT4(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt)
310 {
311 BEGIN_RING(ring, cnt+1);
312 OUT_RING(ring, CP_TYPE4_PKT | cnt |
313 (_odd_parity_bit(cnt) << 7) |
314 ((regindx & 0x3ffff) << 8) |
315 ((_odd_parity_bit(regindx) << 27)));
316 }
317
318 static inline void
OUT_PKT7(struct fd_ringbuffer * ring,uint8_t opcode,uint16_t cnt)319 OUT_PKT7(struct fd_ringbuffer *ring, uint8_t opcode, uint16_t cnt)
320 {
321 BEGIN_RING(ring, cnt+1);
322 OUT_RING(ring, CP_TYPE7_PKT | cnt |
323 (_odd_parity_bit(cnt) << 15) |
324 ((opcode & 0x7f) << 16) |
325 ((_odd_parity_bit(opcode) << 23)));
326 }
327
328 static inline void
OUT_WFI(struct fd_ringbuffer * ring)329 OUT_WFI(struct fd_ringbuffer *ring)
330 {
331 OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
332 OUT_RING(ring, 0x00000000);
333 }
334
335 static inline void
OUT_WFI5(struct fd_ringbuffer * ring)336 OUT_WFI5(struct fd_ringbuffer *ring)
337 {
338 OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0);
339 }
340
341 #endif /* FREEDRENO_RINGBUFFER_H_ */
342