1 /*
2 * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
3 * Copyright © 2018 Google, Inc.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 * Authors:
25 * Rob Clark <robclark@freedesktop.org>
26 */
27
28 #ifndef FD6_EMIT_H
29 #define FD6_EMIT_H
30
31 #include "pipe/p_context.h"
32
33 #include "fd6_context.h"
34 #include "fd6_program.h"
35 #include "fdl/fd6_format_table.h"
36 #include "freedreno_context.h"
37 #include "ir3_gallium.h"
38
39 struct fd_ringbuffer;
40
41 /* To collect all the state objects to emit in a single CP_SET_DRAW_STATE
42 * packet, the emit tracks a collection of however many state_group's that
43 * need to be emit'd.
44 */
45 enum fd6_state_id {
46 FD6_GROUP_PROG_CONFIG,
47 FD6_GROUP_PROG,
48 FD6_GROUP_PROG_BINNING,
49 FD6_GROUP_PROG_INTERP,
50 FD6_GROUP_PROG_FB_RAST,
51 FD6_GROUP_LRZ,
52 FD6_GROUP_VTXSTATE,
53 FD6_GROUP_VBO,
54 FD6_GROUP_CONST,
55 FD6_GROUP_DRIVER_PARAMS,
56 FD6_GROUP_PRIMITIVE_PARAMS,
57 FD6_GROUP_VS_TEX,
58 FD6_GROUP_HS_TEX,
59 FD6_GROUP_DS_TEX,
60 FD6_GROUP_GS_TEX,
61 FD6_GROUP_FS_TEX,
62 FD6_GROUP_RASTERIZER,
63 FD6_GROUP_ZSA,
64 FD6_GROUP_BLEND,
65 FD6_GROUP_SCISSOR,
66 FD6_GROUP_BLEND_COLOR,
67 FD6_GROUP_SAMPLE_LOCATIONS,
68 FD6_GROUP_SO,
69 FD6_GROUP_VS_BINDLESS,
70 FD6_GROUP_HS_BINDLESS,
71 FD6_GROUP_DS_BINDLESS,
72 FD6_GROUP_GS_BINDLESS,
73 FD6_GROUP_FS_BINDLESS,
74 FD6_GROUP_PRIM_MODE_SYSMEM,
75 FD6_GROUP_PRIM_MODE_GMEM,
76
77 /*
78 * Virtual state-groups, which don't turn into a CP_SET_DRAW_STATE group
79 */
80
81 FD6_GROUP_PROG_KEY, /* Set for any state which could change shader key */
82 FD6_GROUP_NON_GROUP, /* placeholder group for state emit in IB2, keep last */
83
84 /*
85 * Note that since we don't interleave draws and grids in the same batch,
86 * the compute vs draw state groups can overlap:
87 */
88 FD6_GROUP_CS_TEX = FD6_GROUP_VS_TEX,
89 FD6_GROUP_CS_BINDLESS = FD6_GROUP_VS_BINDLESS,
90 };
91
92 /**
93 * Pipeline type, Ie. is just plain old VS+FS (which can be high draw rate and
94 * should be a fast-path) or is it a pipeline that uses GS and/or tess to
95 * amplify geometry.
96 *
97 * TODO split GS and TESS?
98 */
99 enum fd6_pipeline_type {
100 NO_TESS_GS, /* Only has VS+FS */
101 HAS_TESS_GS, /* Has tess and/or GS */
102 };
103
104 #define ENABLE_ALL \
105 (CP_SET_DRAW_STATE__0_BINNING | CP_SET_DRAW_STATE__0_GMEM | \
106 CP_SET_DRAW_STATE__0_SYSMEM)
107 #define ENABLE_DRAW (CP_SET_DRAW_STATE__0_GMEM | CP_SET_DRAW_STATE__0_SYSMEM)
108
109 struct fd6_state_group {
110 struct fd_ringbuffer *stateobj;
111 enum fd6_state_id group_id;
112 /* enable_mask controls which states the stateobj is evaluated in,
113 * b0 is binning pass b1 and/or b2 is draw pass
114 */
115 uint32_t enable_mask;
116 };
117
118 struct fd6_state {
119 struct fd6_state_group groups[32];
120 unsigned num_groups;
121 };
122
123 static inline void
fd6_state_emit(struct fd6_state * state,struct fd_ringbuffer * ring)124 fd6_state_emit(struct fd6_state *state, struct fd_ringbuffer *ring)
125 {
126 if (!state->num_groups)
127 return;
128
129 OUT_PKT7(ring, CP_SET_DRAW_STATE, 3 * state->num_groups);
130 for (unsigned i = 0; i < state->num_groups; i++) {
131 struct fd6_state_group *g = &state->groups[i];
132 unsigned n = g->stateobj ? fd_ringbuffer_size(g->stateobj) / 4 : 0;
133
134 assert((g->enable_mask & ~ENABLE_ALL) == 0);
135
136 if (n == 0) {
137 OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) |
138 CP_SET_DRAW_STATE__0_DISABLE | g->enable_mask |
139 CP_SET_DRAW_STATE__0_GROUP_ID(g->group_id));
140 OUT_RING(ring, 0x00000000);
141 OUT_RING(ring, 0x00000000);
142 } else {
143 OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(n) | g->enable_mask |
144 CP_SET_DRAW_STATE__0_GROUP_ID(g->group_id));
145 OUT_RB(ring, g->stateobj);
146 }
147
148 if (g->stateobj)
149 fd_ringbuffer_del(g->stateobj);
150 }
151 }
152
153 static inline unsigned
enable_mask(enum fd6_state_id group_id)154 enable_mask(enum fd6_state_id group_id)
155 {
156 switch (group_id) {
157 case FD6_GROUP_PROG: return ENABLE_DRAW;
158 case FD6_GROUP_PROG_BINNING: return CP_SET_DRAW_STATE__0_BINNING;
159 case FD6_GROUP_PROG_INTERP: return ENABLE_DRAW;
160 case FD6_GROUP_FS_TEX: return ENABLE_DRAW;
161 case FD6_GROUP_FS_BINDLESS: return ENABLE_DRAW;
162 case FD6_GROUP_PRIM_MODE_SYSMEM: return CP_SET_DRAW_STATE__0_SYSMEM | CP_SET_DRAW_STATE__0_BINNING;
163 case FD6_GROUP_PRIM_MODE_GMEM: return CP_SET_DRAW_STATE__0_GMEM;
164 default: return ENABLE_ALL;
165 }
166 }
167
168 static inline void
fd6_state_take_group(struct fd6_state * state,struct fd_ringbuffer * stateobj,enum fd6_state_id group_id)169 fd6_state_take_group(struct fd6_state *state, struct fd_ringbuffer *stateobj,
170 enum fd6_state_id group_id)
171 {
172 assert(state->num_groups < ARRAY_SIZE(state->groups));
173 struct fd6_state_group *g = &state->groups[state->num_groups++];
174 g->stateobj = stateobj;
175 g->group_id = group_id;
176 g->enable_mask = enable_mask(group_id);
177 }
178
179 static inline void
fd6_state_add_group(struct fd6_state * state,struct fd_ringbuffer * stateobj,enum fd6_state_id group_id)180 fd6_state_add_group(struct fd6_state *state, struct fd_ringbuffer *stateobj,
181 enum fd6_state_id group_id)
182 {
183 fd6_state_take_group(state, fd_ringbuffer_ref(stateobj), group_id);
184 }
185
186 /* grouped together emit-state for prog/vertex/state emit: */
187 struct fd6_emit {
188 struct fd_context *ctx;
189 const struct pipe_draw_info *info;
190 const struct pipe_draw_indirect_info *indirect;
191 const struct pipe_draw_start_count_bias *draw;
192 uint32_t dirty_groups;
193
194 uint32_t sprite_coord_enable; /* bitmask */
195 bool sprite_coord_mode : 1;
196 bool rasterflat : 1;
197 bool primitive_restart : 1;
198 uint8_t streamout_mask;
199 uint32_t draw_id;
200
201 /* cached to avoid repeated lookups: */
202 const struct fd6_program_state *prog;
203
204 const struct ir3_shader_variant *vs;
205 const struct ir3_shader_variant *hs;
206 const struct ir3_shader_variant *ds;
207 const struct ir3_shader_variant *gs;
208 const struct ir3_shader_variant *fs;
209
210 struct fd6_state state;
211 };
212
213 static inline const struct fd6_program_state *
fd6_emit_get_prog(struct fd6_emit * emit)214 fd6_emit_get_prog(struct fd6_emit *emit)
215 {
216 return emit->prog;
217 }
218
219 static inline unsigned
fd6_event_write(struct fd_batch * batch,struct fd_ringbuffer * ring,enum vgt_event_type evt,bool timestamp)220 fd6_event_write(struct fd_batch *batch, struct fd_ringbuffer *ring,
221 enum vgt_event_type evt, bool timestamp)
222 {
223 unsigned seqno = 0;
224
225 OUT_PKT7(ring, CP_EVENT_WRITE, timestamp ? 4 : 1);
226 OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(evt));
227 if (timestamp) {
228 struct fd6_context *fd6_ctx = fd6_context(batch->ctx);
229 seqno = ++fd6_ctx->seqno;
230 OUT_RELOC(ring, control_ptr(fd6_ctx, seqno)); /* ADDR_LO/HI */
231 OUT_RING(ring, seqno);
232 }
233
234 return seqno;
235 }
236
237 static inline void
fd6_cache_inv(struct fd_batch * batch,struct fd_ringbuffer * ring)238 fd6_cache_inv(struct fd_batch *batch, struct fd_ringbuffer *ring)
239 {
240 fd6_event_write(batch, ring, PC_CCU_INVALIDATE_COLOR, false);
241 fd6_event_write(batch, ring, PC_CCU_INVALIDATE_DEPTH, false);
242 fd6_event_write(batch, ring, CACHE_INVALIDATE, false);
243 }
244
245 static inline void
fd6_cache_flush(struct fd_batch * batch,struct fd_ringbuffer * ring)246 fd6_cache_flush(struct fd_batch *batch, struct fd_ringbuffer *ring)
247 {
248 struct fd6_context *fd6_ctx = fd6_context(batch->ctx);
249 unsigned seqno;
250
251 seqno = fd6_event_write(batch, ring, RB_DONE_TS, true);
252
253 OUT_PKT7(ring, CP_WAIT_REG_MEM, 6);
254 OUT_RING(ring, CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ) |
255 CP_WAIT_REG_MEM_0_POLL(POLL_MEMORY));
256 OUT_RELOC(ring, control_ptr(fd6_ctx, seqno));
257 OUT_RING(ring, CP_WAIT_REG_MEM_3_REF(seqno));
258 OUT_RING(ring, CP_WAIT_REG_MEM_4_MASK(~0));
259 OUT_RING(ring, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(16));
260
261 seqno = fd6_event_write(batch, ring, CACHE_FLUSH_TS, true);
262
263 OUT_PKT7(ring, CP_WAIT_MEM_GTE, 4);
264 OUT_RING(ring, CP_WAIT_MEM_GTE_0_RESERVED(0));
265 OUT_RELOC(ring, control_ptr(fd6_ctx, seqno));
266 OUT_RING(ring, CP_WAIT_MEM_GTE_3_REF(seqno));
267 }
268
269 static inline void
fd6_emit_blit(struct fd_batch * batch,struct fd_ringbuffer * ring)270 fd6_emit_blit(struct fd_batch *batch, struct fd_ringbuffer *ring)
271 {
272 emit_marker6(ring, 7);
273 fd6_event_write(batch, ring, BLIT, false);
274 emit_marker6(ring, 7);
275 }
276
277 static inline void
fd6_emit_lrz_flush(struct fd_ringbuffer * ring)278 fd6_emit_lrz_flush(struct fd_ringbuffer *ring)
279 {
280 OUT_PKT7(ring, CP_EVENT_WRITE, 1);
281 OUT_RING(ring, LRZ_FLUSH);
282 }
283
284 static inline bool
fd6_geom_stage(gl_shader_stage type)285 fd6_geom_stage(gl_shader_stage type)
286 {
287 switch (type) {
288 case MESA_SHADER_VERTEX:
289 case MESA_SHADER_TESS_CTRL:
290 case MESA_SHADER_TESS_EVAL:
291 case MESA_SHADER_GEOMETRY:
292 return true;
293 case MESA_SHADER_FRAGMENT:
294 case MESA_SHADER_COMPUTE:
295 case MESA_SHADER_KERNEL:
296 return false;
297 default:
298 unreachable("bad shader type");
299 }
300 }
301
302 static inline uint32_t
fd6_stage2opcode(gl_shader_stage type)303 fd6_stage2opcode(gl_shader_stage type)
304 {
305 return fd6_geom_stage(type) ? CP_LOAD_STATE6_GEOM : CP_LOAD_STATE6_FRAG;
306 }
307
308 static inline enum a6xx_state_block
fd6_stage2shadersb(gl_shader_stage type)309 fd6_stage2shadersb(gl_shader_stage type)
310 {
311 switch (type) {
312 case MESA_SHADER_VERTEX:
313 return SB6_VS_SHADER;
314 case MESA_SHADER_TESS_CTRL:
315 return SB6_HS_SHADER;
316 case MESA_SHADER_TESS_EVAL:
317 return SB6_DS_SHADER;
318 case MESA_SHADER_GEOMETRY:
319 return SB6_GS_SHADER;
320 case MESA_SHADER_FRAGMENT:
321 return SB6_FS_SHADER;
322 case MESA_SHADER_COMPUTE:
323 case MESA_SHADER_KERNEL:
324 return SB6_CS_SHADER;
325 default:
326 unreachable("bad shader type");
327 return (enum a6xx_state_block)~0;
328 }
329 }
330
331 static inline enum a6xx_tess_spacing
fd6_gl2spacing(enum gl_tess_spacing spacing)332 fd6_gl2spacing(enum gl_tess_spacing spacing)
333 {
334 switch (spacing) {
335 case TESS_SPACING_EQUAL:
336 return TESS_EQUAL;
337 case TESS_SPACING_FRACTIONAL_ODD:
338 return TESS_FRACTIONAL_ODD;
339 case TESS_SPACING_FRACTIONAL_EVEN:
340 return TESS_FRACTIONAL_EVEN;
341 case TESS_SPACING_UNSPECIFIED:
342 default:
343 unreachable("spacing must be specified");
344 }
345 }
346
347 template <chip CHIP, fd6_pipeline_type PIPELINE>
348 void fd6_emit_3d_state(struct fd_ringbuffer *ring,
349 struct fd6_emit *emit) assert_dt;
350
351 struct fd6_compute_state;
352 template <chip CHIP>
353 void fd6_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
354 struct fd6_compute_state *cs) assert_dt;
355
356 void fd6_emit_ccu_cntl(struct fd_ringbuffer *ring, struct fd_screen *screen, bool gmem);
357
358 template <chip CHIP>
359 void fd6_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring);
360
361 void fd6_emit_init_screen(struct pipe_screen *pscreen);
362
363 static inline void
fd6_emit_ib(struct fd_ringbuffer * ring,struct fd_ringbuffer * target)364 fd6_emit_ib(struct fd_ringbuffer *ring, struct fd_ringbuffer *target)
365 {
366 emit_marker6(ring, 6);
367 __OUT_IB5(ring, target);
368 emit_marker6(ring, 6);
369 }
370
371 #define WRITE(reg, val) \
372 do { \
373 OUT_PKT4(ring, reg, 1); \
374 OUT_RING(ring, val); \
375 } while (0)
376
377 #endif /* FD6_EMIT_H */
378