1 /*
2 * Copyright © 2023 Google, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #define FD_BO_NO_HARDPIN 1
25
26 #include "freedreno_batch.h"
27
28 #include "fd6_barrier.h"
29 #include "fd6_context.h"
30
31 /* TODO probably more of the various fd6_event_write() should be
32 * consolidated here.
33 */
34
35 static uint32_t
event_write(struct fd_context * ctx,struct fd_ringbuffer * ring,enum vgt_event_type evt)36 event_write(struct fd_context *ctx, struct fd_ringbuffer *ring,
37 enum vgt_event_type evt)
38 {
39 bool timestamp = false;
40 switch (evt) {
41 case CACHE_FLUSH_TS:
42 case WT_DONE_TS:
43 case RB_DONE_TS:
44 case PC_CCU_FLUSH_DEPTH_TS:
45 case PC_CCU_FLUSH_COLOR_TS:
46 case PC_CCU_RESOLVE_TS:
47 timestamp = true;
48 break;
49 default:
50 break;
51 }
52
53 OUT_PKT7(ring, CP_EVENT_WRITE, timestamp ? 4 : 1);
54 OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(evt));
55 if (timestamp) {
56 struct fd6_context *fd6_ctx = fd6_context(ctx);
57 uint32_t seqno = ++fd6_ctx->seqno;
58 OUT_RELOC(ring, control_ptr(fd6_ctx, seqno)); /* ADDR_LO/HI */
59 OUT_RING(ring, seqno);
60
61 return seqno;
62 }
63
64 return 0;
65 }
66
67 void
fd6_emit_flushes(struct fd_context * ctx,struct fd_ringbuffer * ring,unsigned flushes)68 fd6_emit_flushes(struct fd_context *ctx, struct fd_ringbuffer *ring,
69 unsigned flushes)
70 {
71 /* Experiments show that invalidating CCU while it still has data in it
72 * doesn't work, so make sure to always flush before invalidating in case
73 * any data remains that hasn't yet been made available through a barrier.
74 * However it does seem to work for UCHE.
75 */
76 if (flushes & (FD6_FLUSH_CCU_COLOR | FD6_INVALIDATE_CCU_COLOR))
77 event_write(ctx, ring, PC_CCU_FLUSH_COLOR_TS);
78
79 if (flushes & (FD6_FLUSH_CCU_DEPTH | FD6_INVALIDATE_CCU_DEPTH))
80 event_write(ctx, ring, PC_CCU_FLUSH_DEPTH_TS);
81
82 if (flushes & FD6_INVALIDATE_CCU_COLOR)
83 event_write(ctx, ring, PC_CCU_INVALIDATE_COLOR);
84
85 if (flushes & FD6_INVALIDATE_CCU_DEPTH)
86 event_write(ctx, ring, PC_CCU_INVALIDATE_DEPTH);
87
88 if (flushes & FD6_FLUSH_CACHE)
89 event_write(ctx, ring, CACHE_FLUSH_TS);
90
91 if (flushes & FD6_INVALIDATE_CACHE)
92 event_write(ctx, ring, CACHE_INVALIDATE);
93
94 if (flushes & FD6_WAIT_MEM_WRITES)
95 OUT_PKT7(ring, CP_WAIT_MEM_WRITES, 0);
96
97 if (flushes & FD6_WAIT_FOR_IDLE)
98 OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0);
99
100 if (flushes & FD6_WAIT_FOR_ME)
101 OUT_PKT7(ring, CP_WAIT_FOR_ME, 0);
102 }
103
104 void
fd6_barrier_flush(struct fd_batch * batch)105 fd6_barrier_flush(struct fd_batch *batch)
106 {
107 fd6_emit_flushes(batch->ctx, batch->draw, batch->barrier);
108 batch->barrier = 0;
109 }
110
111 static void
add_flushes(struct pipe_context * pctx,unsigned flushes)112 add_flushes(struct pipe_context *pctx, unsigned flushes)
113 assert_dt
114 {
115 struct fd_context *ctx = fd_context(pctx);
116 struct fd_batch *batch = NULL;
117
118 /* If there is an active compute/nondraw batch, that is the one
119 * we want to add the flushes to. Ie. last op was a launch_grid,
120 * if the next one is a launch_grid then the barriers should come
121 * between them. If the next op is a draw_vbo then the batch
122 * switch is a sufficient barrier so it doesn't really matter.
123 */
124 fd_batch_reference(&batch, ctx->batch_nondraw);
125 if (!batch)
126 fd_batch_reference(&batch, ctx->batch);
127
128 /* A batch flush is already a sufficient barrier: */
129 if (!batch)
130 return;
131
132 batch->barrier |= flushes;
133
134 fd_batch_reference(&batch, NULL);
135 }
136
137 static void
fd6_texture_barrier(struct pipe_context * pctx,unsigned flags)138 fd6_texture_barrier(struct pipe_context *pctx, unsigned flags)
139 in_dt
140 {
141 unsigned flushes = 0;
142
143 if (flags & PIPE_TEXTURE_BARRIER_SAMPLER) {
144 /* If we are sampling from the fb, we could get away with treating
145 * this as a PIPE_TEXTURE_BARRIER_FRAMEBUFFER in sysmem mode, but
146 * that won't work out in gmem mode because we don't patch the tex
147 * state outside of the case that the frag shader tells us it is
148 * an fb-read. And in particular, the fb-read case guarantees us
149 * that the read will be from the same texel, but the fb-bound-as-
150 * tex case does not.
151 *
152 * We could try to be clever here and detect if zsbuf/cbuf[n] is
153 * bound as a texture, but that doesn't really help if it is bound
154 * as a texture after the barrier without a lot of extra book-
155 * keeping. So hopefully no one calls glTextureBarrierNV() just
156 * for lolz.
157 */
158 pctx->flush(pctx, NULL, 0);
159 return;
160 }
161
162 if (flags & PIPE_TEXTURE_BARRIER_FRAMEBUFFER) {
163 flushes |= FD6_WAIT_FOR_IDLE | FD6_WAIT_FOR_ME |
164 FD6_FLUSH_CCU_COLOR | FD6_FLUSH_CCU_DEPTH |
165 FD6_FLUSH_CACHE | FD6_INVALIDATE_CACHE;
166 }
167
168 add_flushes(pctx, flushes);
169 }
170
171 static void
fd6_memory_barrier(struct pipe_context * pctx,unsigned flags)172 fd6_memory_barrier(struct pipe_context *pctx, unsigned flags)
173 in_dt
174 {
175 unsigned flushes = 0;
176
177 if (flags & (PIPE_BARRIER_SHADER_BUFFER |
178 PIPE_BARRIER_CONSTANT_BUFFER |
179 PIPE_BARRIER_VERTEX_BUFFER |
180 PIPE_BARRIER_INDEX_BUFFER |
181 PIPE_BARRIER_STREAMOUT_BUFFER)) {
182 flushes |= FD6_WAIT_FOR_IDLE;
183 }
184
185 if (flags & (PIPE_BARRIER_TEXTURE |
186 PIPE_BARRIER_IMAGE |
187 PIPE_BARRIER_UPDATE_BUFFER |
188 PIPE_BARRIER_UPDATE_TEXTURE)) {
189 flushes |= FD6_FLUSH_CACHE | FD6_WAIT_FOR_IDLE;
190 }
191
192 if (flags & PIPE_BARRIER_INDIRECT_BUFFER) {
193 flushes |= FD6_FLUSH_CACHE | FD6_WAIT_FOR_IDLE;
194
195 /* Various firmware bugs/inconsistencies mean that some indirect draw opcodes
196 * do not wait for WFI's to complete before executing. Add a WAIT_FOR_ME if
197 * pending for these opcodes. This may result in a few extra WAIT_FOR_ME's
198 * with these opcodes, but the alternative would add unnecessary WAIT_FOR_ME's
199 * before draw opcodes that don't need it.
200 */
201 if (fd_context(pctx)->screen->info->a6xx.indirect_draw_wfm_quirk) {
202 flushes |= FD6_WAIT_FOR_ME;
203 }
204 }
205
206 if (flags & PIPE_BARRIER_FRAMEBUFFER) {
207 fd6_texture_barrier(pctx, PIPE_TEXTURE_BARRIER_FRAMEBUFFER);
208 }
209
210 add_flushes(pctx, flushes);
211 }
212
213 void
fd6_barrier_init(struct pipe_context * pctx)214 fd6_barrier_init(struct pipe_context *pctx)
215 {
216 pctx->texture_barrier = fd6_texture_barrier;
217 pctx->memory_barrier = fd6_memory_barrier;
218 }
219