1 /*
2  * Copyright © 2016 Rob Clark <robclark@freedesktop.org>
3  * SPDX-License-Identifier: MIT
4  *
5  * Authors:
6  *    Rob Clark <robclark@freedesktop.org>
7  */
8 
9 #ifndef FD5_EMIT_H
10 #define FD5_EMIT_H
11 
12 #include "pipe/p_context.h"
13 
14 #include "fd5_context.h"
15 #include "fd5_format.h"
16 #include "fd5_program.h"
17 #include "fd5_screen.h"
18 #include "freedreno_batch.h"
19 #include "freedreno_context.h"
20 #include "ir3_gallium.h"
21 
22 struct fd_ringbuffer;
23 
24 /* grouped together emit-state for prog/vertex/state emit: */
25 struct fd5_emit {
26    struct util_debug_callback *debug;
27    const struct fd_vertex_state *vtx;
28    const struct fd5_program_state *prog;
29    const struct pipe_draw_info *info;
30         unsigned drawid_offset;
31    const struct pipe_draw_indirect_info *indirect;
32 	const struct pipe_draw_start_count_bias *draw;
33    bool binning_pass;
34    struct ir3_cache_key key;
35    enum fd_dirty_3d_state dirty;
36 
37    uint32_t sprite_coord_enable; /* bitmask */
38    bool sprite_coord_mode;
39    bool rasterflat;
40 
41    /* in binning pass, we don't have real frag shader, so we
42     * don't know if real draw disqualifies lrz write.  So just
43     * figure that out up-front and stash it in the emit.
44     */
45    bool no_lrz_write;
46 
47    /* cached to avoid repeated lookups of same variants: */
48    const struct ir3_shader_variant *vs, *fs;
49    /* TODO: other shader stages.. */
50 
51    unsigned streamout_mask;
52 };
53 
54 static inline enum a5xx_color_fmt
fd5_emit_format(struct pipe_surface * surf)55 fd5_emit_format(struct pipe_surface *surf)
56 {
57    if (!surf)
58       return 0;
59    return fd5_pipe2color(surf->format);
60 }
61 
62 static inline const struct ir3_shader_variant *
fd5_emit_get_vp(struct fd5_emit * emit)63 fd5_emit_get_vp(struct fd5_emit *emit)
64 {
65    if (!emit->vs) {
66       /* We use nonbinning VS during binning when TFB is enabled because that
67        * is what has all the outputs that might be involved in TFB.
68        */
69       if (emit->binning_pass &&
70           !emit->prog->vs->stream_output.num_outputs)
71          emit->vs = emit->prog->bs;
72       else
73          emit->vs = emit->prog->vs;
74    }
75    return emit->vs;
76 }
77 
78 static inline const struct ir3_shader_variant *
fd5_emit_get_fp(struct fd5_emit * emit)79 fd5_emit_get_fp(struct fd5_emit *emit)
80 {
81    if (!emit->fs) {
82       if (emit->binning_pass) {
83          /* use dummy stateobj to simplify binning vs non-binning: */
84          static const struct ir3_shader_variant binning_fs = {};
85          emit->fs = &binning_fs;
86       } else {
87          emit->fs = emit->prog->fs;
88       }
89    }
90    return emit->fs;
91 }
92 
93 static inline void
fd5_cache_flush(struct fd_batch * batch,struct fd_ringbuffer * ring)94 fd5_cache_flush(struct fd_batch *batch, struct fd_ringbuffer *ring) assert_dt
95 {
96    fd_reset_wfi(batch);
97    OUT_PKT4(ring, REG_A5XX_UCHE_CACHE_INVALIDATE_MIN_LO, 5);
98    OUT_RING(ring, 0x00000000); /* UCHE_CACHE_INVALIDATE_MIN_LO */
99    OUT_RING(ring, 0x00000000); /* UCHE_CACHE_INVALIDATE_MIN_HI */
100    OUT_RING(ring, 0x00000000); /* UCHE_CACHE_INVALIDATE_MAX_LO */
101    OUT_RING(ring, 0x00000000); /* UCHE_CACHE_INVALIDATE_MAX_HI */
102    OUT_RING(ring, 0x00000012); /* UCHE_CACHE_INVALIDATE */
103    fd_wfi(batch, ring);
104 }
105 
106 static inline void
fd5_set_render_mode(struct fd_context * ctx,struct fd_ringbuffer * ring,enum render_mode_cmd mode)107 fd5_set_render_mode(struct fd_context *ctx, struct fd_ringbuffer *ring,
108                     enum render_mode_cmd mode)
109 {
110    /* TODO add preemption support, gmem bypass, etc */
111    emit_marker5(ring, 7);
112    OUT_PKT7(ring, CP_SET_RENDER_MODE, 5);
113    OUT_RING(ring, CP_SET_RENDER_MODE_0_MODE(mode));
114    OUT_RING(ring, 0x00000000); /* ADDR_LO */
115    OUT_RING(ring, 0x00000000); /* ADDR_HI */
116    OUT_RING(ring, COND(mode == GMEM, CP_SET_RENDER_MODE_3_GMEM_ENABLE) |
117                      COND(mode == BINNING, CP_SET_RENDER_MODE_3_VSC_ENABLE));
118    OUT_RING(ring, 0x00000000);
119    emit_marker5(ring, 7);
120 }
121 
122 static inline void
fd5_event_write(struct fd_batch * batch,struct fd_ringbuffer * ring,enum vgt_event_type evt,bool timestamp)123 fd5_event_write(struct fd_batch *batch, struct fd_ringbuffer *ring,
124                 enum vgt_event_type evt, bool timestamp)
125 {
126    OUT_PKT7(ring, CP_EVENT_WRITE, timestamp ? 4 : 1);
127    OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(evt));
128    if (timestamp) {
129       OUT_RELOC(ring, fd5_context(batch->ctx)->blit_mem, 0, 0,
130                 0); /* ADDR_LO/HI */
131       OUT_RING(ring, 0x00000000);
132    }
133 }
134 
135 static inline void
fd5_emit_blit(struct fd_batch * batch,struct fd_ringbuffer * ring)136 fd5_emit_blit(struct fd_batch *batch, struct fd_ringbuffer *ring)
137 {
138    emit_marker5(ring, 7);
139    fd5_event_write(batch, ring, BLIT, true);
140    emit_marker5(ring, 7);
141 }
142 
143 static inline void
fd5_emit_render_cntl(struct fd_context * ctx,bool blit,bool binning)144 fd5_emit_render_cntl(struct fd_context *ctx, bool blit, bool binning) assert_dt
145 {
146    struct fd_ringbuffer *ring =
147       binning ? ctx->batch->binning : ctx->batch->draw;
148 
149    /* TODO eventually this partially depends on the pfb state, ie.
150     * which of the cbuf(s)/zsbuf has an UBWC flag buffer.. that part
151     * we could probably cache and just regenerate if framebuffer
152     * state is dirty (or something like that)..
153     *
154     * Other bits seem to depend on query state, like if samples-passed
155     * query is active.
156     */
157    bool samples_passed = (ctx->occlusion_queries_active > 0);
158    OUT_PKT4(ring, REG_A5XX_RB_RENDER_CNTL, 1);
159    OUT_RING(ring, 0x00000000 | /* RB_RENDER_CNTL */
160                      COND(binning, A5XX_RB_RENDER_CNTL_BINNING_PASS) |
161                      COND(binning, A5XX_RB_RENDER_CNTL_DISABLE_COLOR_PIPE) |
162                      COND(samples_passed, A5XX_RB_RENDER_CNTL_SAMPLES_PASSED) |
163                      COND(!blit, 0x8));
164 
165    OUT_PKT4(ring, REG_A5XX_GRAS_SC_CNTL, 1);
166    OUT_RING(ring, 0x00000008 | /* GRAS_SC_CNTL */
167                      COND(binning, A5XX_GRAS_SC_CNTL_BINNING_PASS) |
168                      COND(samples_passed, A5XX_GRAS_SC_CNTL_SAMPLES_PASSED));
169 }
170 
171 static inline void
fd5_emit_lrz_flush(struct fd_batch * batch,struct fd_ringbuffer * ring)172 fd5_emit_lrz_flush(struct fd_batch *batch, struct fd_ringbuffer *ring)
173 {
174    /* TODO I think the extra writes to GRAS_LRZ_CNTL are probably
175     * a workaround and not needed on all a5xx.
176     */
177    OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_CNTL, 1);
178    OUT_RING(ring, A5XX_GRAS_LRZ_CNTL_ENABLE);
179 
180    fd5_event_write(batch, ring, LRZ_FLUSH, false);
181 
182    OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_CNTL, 1);
183    OUT_RING(ring, 0x0);
184 }
185 
186 void fd5_emit_vertex_bufs(struct fd_ringbuffer *ring,
187                           struct fd5_emit *emit) assert_dt;
188 
189 void fd5_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
190                     struct fd5_emit *emit) assert_dt;
191 
192 void fd5_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
193                        struct ir3_shader_variant *cp) assert_dt;
194 void fd5_emit_cs_consts(const struct ir3_shader_variant *v,
195                         struct fd_ringbuffer *ring, struct fd_context *ctx,
196                         const struct pipe_grid_info *info) assert_dt;
197 
198 void fd5_emit_restore(struct fd_batch *batch,
199                       struct fd_ringbuffer *ring) assert_dt;
200 
201 void fd5_emit_init_screen(struct pipe_screen *pscreen);
202 void fd5_emit_init(struct pipe_context *pctx);
203 
204 static inline void
fd5_emit_ib(struct fd_ringbuffer * ring,struct fd_ringbuffer * target)205 fd5_emit_ib(struct fd_ringbuffer *ring, struct fd_ringbuffer *target)
206 {
207    /* for debug after a lock up, write a unique counter value
208     * to scratch6 for each IB, to make it easier to match up
209     * register dumps to cmdstream.  The combination of IB and
210     * DRAW (scratch7) is enough to "triangulate" the particular
211     * draw that caused lockup.
212     */
213    emit_marker5(ring, 6);
214    __OUT_IB5(ring, target);
215    emit_marker5(ring, 6);
216 }
217 
218 #endif /* FD5_EMIT_H */
219