• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2023 Google, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #define FD_BO_NO_HARDPIN 1
25 
26 #include "freedreno_batch.h"
27 
28 #include "fd6_barrier.h"
29 #include "fd6_context.h"
30 
31 /* TODO probably more of the various fd6_event_write() should be
32  * consolidated here.
33  */
34 
35 static uint32_t
event_write(struct fd_context * ctx,struct fd_ringbuffer * ring,enum vgt_event_type evt)36 event_write(struct fd_context *ctx, struct fd_ringbuffer *ring,
37             enum vgt_event_type evt)
38 {
39    bool timestamp = false;
40    switch (evt) {
41    case CACHE_FLUSH_TS:
42    case WT_DONE_TS:
43    case RB_DONE_TS:
44    case PC_CCU_FLUSH_DEPTH_TS:
45    case PC_CCU_FLUSH_COLOR_TS:
46    case PC_CCU_RESOLVE_TS:
47       timestamp = true;
48       break;
49    default:
50       break;
51    }
52 
53    OUT_PKT7(ring, CP_EVENT_WRITE, timestamp ? 4 : 1);
54    OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(evt));
55    if (timestamp) {
56       struct fd6_context *fd6_ctx = fd6_context(ctx);
57       uint32_t seqno = ++fd6_ctx->seqno;
58       OUT_RELOC(ring, control_ptr(fd6_ctx, seqno)); /* ADDR_LO/HI */
59       OUT_RING(ring, seqno);
60 
61       return seqno;
62    }
63 
64    return 0;
65 }
66 
67 void
fd6_emit_flushes(struct fd_context * ctx,struct fd_ringbuffer * ring,unsigned flushes)68 fd6_emit_flushes(struct fd_context *ctx, struct fd_ringbuffer *ring,
69                  unsigned flushes)
70 {
71    /* Experiments show that invalidating CCU while it still has data in it
72     * doesn't work, so make sure to always flush before invalidating in case
73     * any data remains that hasn't yet been made available through a barrier.
74     * However it does seem to work for UCHE.
75     */
76    if (flushes & (FD6_FLUSH_CCU_COLOR | FD6_INVALIDATE_CCU_COLOR))
77       event_write(ctx, ring, PC_CCU_FLUSH_COLOR_TS);
78 
79    if (flushes & (FD6_FLUSH_CCU_DEPTH | FD6_INVALIDATE_CCU_DEPTH))
80       event_write(ctx, ring, PC_CCU_FLUSH_DEPTH_TS);
81 
82    if (flushes & FD6_INVALIDATE_CCU_COLOR)
83       event_write(ctx, ring, PC_CCU_INVALIDATE_COLOR);
84 
85    if (flushes & FD6_INVALIDATE_CCU_DEPTH)
86       event_write(ctx, ring, PC_CCU_INVALIDATE_DEPTH);
87 
88    if (flushes & FD6_FLUSH_CACHE)
89       event_write(ctx, ring, CACHE_FLUSH_TS);
90 
91    if (flushes & FD6_INVALIDATE_CACHE)
92       event_write(ctx, ring, CACHE_INVALIDATE);
93 
94    if (flushes & FD6_WAIT_MEM_WRITES)
95       OUT_PKT7(ring, CP_WAIT_MEM_WRITES, 0);
96 
97    if (flushes & FD6_WAIT_FOR_IDLE)
98       OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0);
99 
100    if (flushes & FD6_WAIT_FOR_ME)
101       OUT_PKT7(ring, CP_WAIT_FOR_ME, 0);
102 }
103 
104 void
fd6_barrier_flush(struct fd_batch * batch)105 fd6_barrier_flush(struct fd_batch *batch)
106 {
107    fd6_emit_flushes(batch->ctx, batch->draw, batch->barrier);
108    batch->barrier = 0;
109 }
110 
111 static void
add_flushes(struct pipe_context * pctx,unsigned flushes)112 add_flushes(struct pipe_context *pctx, unsigned flushes)
113    assert_dt
114 {
115    struct fd_context *ctx = fd_context(pctx);
116    struct fd_batch *batch = NULL;
117 
118    /* If there is an active compute/nondraw batch, that is the one
119     * we want to add the flushes to.  Ie. last op was a launch_grid,
120     * if the next one is a launch_grid then the barriers should come
121     * between them.  If the next op is a draw_vbo then the batch
122     * switch is a sufficient barrier so it doesn't really matter.
123     */
124    fd_batch_reference(&batch, ctx->batch_nondraw);
125    if (!batch)
126       fd_batch_reference(&batch, ctx->batch);
127 
128    /* A batch flush is already a sufficient barrier: */
129    if (!batch)
130       return;
131 
132    batch->barrier |= flushes;
133 
134    fd_batch_reference(&batch, NULL);
135 }
136 
137 static void
fd6_texture_barrier(struct pipe_context * pctx,unsigned flags)138 fd6_texture_barrier(struct pipe_context *pctx, unsigned flags)
139    in_dt
140 {
141    unsigned flushes = 0;
142 
143    if (flags & PIPE_TEXTURE_BARRIER_SAMPLER) {
144       /* If we are sampling from the fb, we could get away with treating
145        * this as a PIPE_TEXTURE_BARRIER_FRAMEBUFFER in sysmem mode, but
146        * that won't work out in gmem mode because we don't patch the tex
147        * state outside of the case that the frag shader tells us it is
148        * an fb-read.  And in particular, the fb-read case guarantees us
149        * that the read will be from the same texel, but the fb-bound-as-
150        * tex case does not.
151        *
152        * We could try to be clever here and detect if zsbuf/cbuf[n] is
153        * bound as a texture, but that doesn't really help if it is bound
154        * as a texture after the barrier without a lot of extra book-
155        * keeping.  So hopefully no one calls glTextureBarrierNV() just
156        * for lolz.
157        */
158       pctx->flush(pctx, NULL, 0);
159       return;
160    }
161 
162    if (flags & PIPE_TEXTURE_BARRIER_FRAMEBUFFER) {
163       flushes |= FD6_WAIT_FOR_IDLE | FD6_WAIT_FOR_ME |
164             FD6_FLUSH_CCU_COLOR | FD6_FLUSH_CCU_DEPTH |
165             FD6_FLUSH_CACHE | FD6_INVALIDATE_CACHE;
166    }
167 
168    add_flushes(pctx, flushes);
169 }
170 
171 static void
fd6_memory_barrier(struct pipe_context * pctx,unsigned flags)172 fd6_memory_barrier(struct pipe_context *pctx, unsigned flags)
173    in_dt
174 {
175    unsigned flushes = 0;
176 
177    if (flags & (PIPE_BARRIER_SHADER_BUFFER |
178                 PIPE_BARRIER_CONSTANT_BUFFER |
179                 PIPE_BARRIER_VERTEX_BUFFER |
180                 PIPE_BARRIER_INDEX_BUFFER |
181                 PIPE_BARRIER_STREAMOUT_BUFFER)) {
182       flushes |= FD6_WAIT_FOR_IDLE;
183    }
184 
185    if (flags & (PIPE_BARRIER_TEXTURE |
186                 PIPE_BARRIER_IMAGE |
187                 PIPE_BARRIER_UPDATE_BUFFER |
188                 PIPE_BARRIER_UPDATE_TEXTURE)) {
189       flushes |= FD6_FLUSH_CACHE | FD6_WAIT_FOR_IDLE;
190    }
191 
192    if (flags & PIPE_BARRIER_INDIRECT_BUFFER) {
193       flushes |= FD6_FLUSH_CACHE | FD6_WAIT_FOR_IDLE;
194 
195      /* Various firmware bugs/inconsistencies mean that some indirect draw opcodes
196       * do not wait for WFI's to complete before executing. Add a WAIT_FOR_ME if
197       * pending for these opcodes. This may result in a few extra WAIT_FOR_ME's
198       * with these opcodes, but the alternative would add unnecessary WAIT_FOR_ME's
199       * before draw opcodes that don't need it.
200       */
201       if (fd_context(pctx)->screen->info->a6xx.indirect_draw_wfm_quirk) {
202          flushes |= FD6_WAIT_FOR_ME;
203       }
204    }
205 
206    if (flags & PIPE_BARRIER_FRAMEBUFFER) {
207       fd6_texture_barrier(pctx, PIPE_TEXTURE_BARRIER_FRAMEBUFFER);
208    }
209 
210    add_flushes(pctx, flushes);
211 }
212 
213 void
fd6_barrier_init(struct pipe_context * pctx)214 fd6_barrier_init(struct pipe_context *pctx)
215 {
216    pctx->texture_barrier = fd6_texture_barrier;
217    pctx->memory_barrier = fd6_memory_barrier;
218 }
219