1 /*
2 * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 * Rob Clark <robclark@freedesktop.org>
25 */
26
27 #include "pipe/p_state.h"
28 #include "util/format/u_format.h"
29 #include "util/u_inlines.h"
30 #include "util/u_memory.h"
31 #include "util/u_string.h"
32
33 #include "freedreno_draw.h"
34 #include "freedreno_resource.h"
35 #include "freedreno_state.h"
36
37 #include "fd5_context.h"
38 #include "fd5_draw.h"
39 #include "fd5_emit.h"
40 #include "fd5_format.h"
41 #include "fd5_gmem.h"
42 #include "fd5_program.h"
43 #include "fd5_zsa.h"
44
45 static void
emit_mrt(struct fd_ringbuffer * ring,unsigned nr_bufs,struct pipe_surface ** bufs,const struct fd_gmem_stateobj * gmem)46 emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
47 struct pipe_surface **bufs, const struct fd_gmem_stateobj *gmem)
48 {
49 enum a5xx_tile_mode tile_mode;
50 unsigned i;
51
52 for (i = 0; i < A5XX_MAX_RENDER_TARGETS; i++) {
53 enum a5xx_color_fmt format = 0;
54 enum a3xx_color_swap swap = WZYX;
55 bool srgb = false, sint = false, uint = false;
56 struct fd_resource *rsc = NULL;
57 struct fdl_slice *slice = NULL;
58 uint32_t stride = 0;
59 uint32_t size = 0;
60 uint32_t base = 0;
61 uint32_t offset = 0;
62
63 if (gmem) {
64 tile_mode = TILE5_2;
65 } else {
66 tile_mode = TILE5_LINEAR;
67 }
68
69 if ((i < nr_bufs) && bufs[i]) {
70 struct pipe_surface *psurf = bufs[i];
71 enum pipe_format pformat = psurf->format;
72
73 rsc = fd_resource(psurf->texture);
74
75 slice = fd_resource_slice(rsc, psurf->u.tex.level);
76 format = fd5_pipe2color(pformat);
77 swap = fd5_pipe2swap(pformat);
78 srgb = util_format_is_srgb(pformat);
79 sint = util_format_is_pure_sint(pformat);
80 uint = util_format_is_pure_uint(pformat);
81
82 debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
83
84 offset = fd_resource_offset(rsc, psurf->u.tex.level,
85 psurf->u.tex.first_layer);
86
87 if (gmem) {
88 stride = gmem->bin_w * gmem->cbuf_cpp[i];
89 size = stride * gmem->bin_h;
90 base = gmem->cbuf_base[i];
91 } else {
92 stride = fd_resource_pitch(rsc, psurf->u.tex.level);
93 size = slice->size0;
94
95 tile_mode =
96 fd_resource_tile_mode(psurf->texture, psurf->u.tex.level);
97 }
98 }
99
100 OUT_PKT4(ring, REG_A5XX_RB_MRT_BUF_INFO(i), 5);
101 OUT_RING(
102 ring,
103 A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) |
104 A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(tile_mode) |
105 A5XX_RB_MRT_BUF_INFO_COLOR_SWAP(swap) |
106 COND(gmem,
107 0x800) | /* XXX 0x1000 for RECTLIST clear, 0x0 for BLIT.. */
108 COND(srgb, A5XX_RB_MRT_BUF_INFO_COLOR_SRGB));
109 OUT_RING(ring, A5XX_RB_MRT_PITCH(stride));
110 OUT_RING(ring, A5XX_RB_MRT_ARRAY_PITCH(size));
111 if (gmem || (i >= nr_bufs) || !bufs[i]) {
112 OUT_RING(ring, base); /* RB_MRT[i].BASE_LO */
113 OUT_RING(ring, 0x00000000); /* RB_MRT[i].BASE_HI */
114 } else {
115 debug_assert((offset + size) <= fd_bo_size(rsc->bo));
116 OUT_RELOC(ring, rsc->bo, offset, 0, 0); /* BASE_LO/HI */
117 }
118
119 OUT_PKT4(ring, REG_A5XX_SP_FS_MRT_REG(i), 1);
120 OUT_RING(ring, A5XX_SP_FS_MRT_REG_COLOR_FORMAT(format) |
121 COND(sint, A5XX_SP_FS_MRT_REG_COLOR_SINT) |
122 COND(uint, A5XX_SP_FS_MRT_REG_COLOR_UINT) |
123 COND(srgb, A5XX_SP_FS_MRT_REG_COLOR_SRGB));
124
125 /* when we support UBWC, these would be the system memory
126 * addr/pitch/etc:
127 */
128 OUT_PKT4(ring, REG_A5XX_RB_MRT_FLAG_BUFFER(i), 4);
129 OUT_RING(ring, 0x00000000); /* RB_MRT_FLAG_BUFFER[i].ADDR_LO */
130 OUT_RING(ring, 0x00000000); /* RB_MRT_FLAG_BUFFER[i].ADDR_HI */
131 OUT_RING(ring, A5XX_RB_MRT_FLAG_BUFFER_PITCH(0));
132 OUT_RING(ring, A5XX_RB_MRT_FLAG_BUFFER_ARRAY_PITCH(0));
133 }
134 }
135
136 static void
emit_zs(struct fd_ringbuffer * ring,struct pipe_surface * zsbuf,const struct fd_gmem_stateobj * gmem)137 emit_zs(struct fd_ringbuffer *ring, struct pipe_surface *zsbuf,
138 const struct fd_gmem_stateobj *gmem)
139 {
140 if (zsbuf) {
141 struct fd_resource *rsc = fd_resource(zsbuf->texture);
142 enum a5xx_depth_format fmt = fd5_pipe2depth(zsbuf->format);
143 uint32_t cpp = rsc->layout.cpp;
144 uint32_t stride = 0;
145 uint32_t size = 0;
146
147 if (gmem) {
148 stride = cpp * gmem->bin_w;
149 size = stride * gmem->bin_h;
150 } else {
151 stride = fd_resource_pitch(rsc, 0);
152 size = fd_resource_slice(rsc, 0)->size0;
153 }
154
155 OUT_PKT4(ring, REG_A5XX_RB_DEPTH_BUFFER_INFO, 5);
156 OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(fmt));
157 if (gmem) {
158 OUT_RING(ring, gmem->zsbuf_base[0]); /* RB_DEPTH_BUFFER_BASE_LO */
159 OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_HI */
160 } else {
161 OUT_RELOC(ring, rsc->bo, 0, 0, 0); /* RB_DEPTH_BUFFER_BASE_LO/HI */
162 }
163 OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_PITCH(stride));
164 OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_ARRAY_PITCH(size));
165
166 OUT_PKT4(ring, REG_A5XX_GRAS_SU_DEPTH_BUFFER_INFO, 1);
167 OUT_RING(ring, A5XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT(fmt));
168
169 OUT_PKT4(ring, REG_A5XX_RB_DEPTH_FLAG_BUFFER_BASE_LO, 3);
170 OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_LO */
171 OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_HI */
172 OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_PITCH */
173
174 if (rsc->lrz) {
175 OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_BUFFER_BASE_LO, 3);
176 OUT_RELOC(ring, rsc->lrz, 0x1000, 0, 0);
177 OUT_RING(ring, A5XX_GRAS_LRZ_BUFFER_PITCH(rsc->lrz_pitch));
178
179 OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO, 2);
180 OUT_RELOC(ring, rsc->lrz, 0, 0, 0);
181 } else {
182 OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_BUFFER_BASE_LO, 3);
183 OUT_RING(ring, 0x00000000);
184 OUT_RING(ring, 0x00000000);
185 OUT_RING(ring, 0x00000000); /* GRAS_LRZ_BUFFER_PITCH */
186
187 OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO, 2);
188 OUT_RING(ring, 0x00000000);
189 OUT_RING(ring, 0x00000000);
190 }
191
192 if (rsc->stencil) {
193 if (gmem) {
194 stride = 1 * gmem->bin_w;
195 size = stride * gmem->bin_h;
196 } else {
197 stride = fd_resource_pitch(rsc->stencil, 0);
198 size = fd_resource_slice(rsc->stencil, 0)->size0;
199 }
200
201 OUT_PKT4(ring, REG_A5XX_RB_STENCIL_INFO, 5);
202 OUT_RING(ring, A5XX_RB_STENCIL_INFO_SEPARATE_STENCIL);
203 if (gmem) {
204 OUT_RING(ring, gmem->zsbuf_base[1]); /* RB_STENCIL_BASE_LO */
205 OUT_RING(ring, 0x00000000); /* RB_STENCIL_BASE_HI */
206 } else {
207 OUT_RELOC(ring, rsc->stencil->bo, 0, 0,
208 0); /* RB_STENCIL_BASE_LO/HI */
209 }
210 OUT_RING(ring, A5XX_RB_STENCIL_PITCH(stride));
211 OUT_RING(ring, A5XX_RB_STENCIL_ARRAY_PITCH(size));
212 } else {
213 OUT_PKT4(ring, REG_A5XX_RB_STENCIL_INFO, 1);
214 OUT_RING(ring, 0x00000000); /* RB_STENCIL_INFO */
215 }
216 } else {
217 OUT_PKT4(ring, REG_A5XX_RB_DEPTH_BUFFER_INFO, 5);
218 OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(DEPTH5_NONE));
219 OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_LO */
220 OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_HI */
221 OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_PITCH */
222 OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_ARRAY_PITCH */
223
224 OUT_PKT4(ring, REG_A5XX_GRAS_SU_DEPTH_BUFFER_INFO, 1);
225 OUT_RING(ring, A5XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT(DEPTH5_NONE));
226
227 OUT_PKT4(ring, REG_A5XX_RB_DEPTH_FLAG_BUFFER_BASE_LO, 3);
228 OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_LO */
229 OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_HI */
230 OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_PITCH */
231
232 OUT_PKT4(ring, REG_A5XX_RB_STENCIL_INFO, 1);
233 OUT_RING(ring, 0x00000000); /* RB_STENCIL_INFO */
234 }
235 }
236
237 static bool
use_hw_binning(struct fd_batch * batch)238 use_hw_binning(struct fd_batch *batch)
239 {
240 const struct fd_gmem_stateobj *gmem = batch->gmem_state;
241
242 if ((gmem->maxpw * gmem->maxph) > 32)
243 return false;
244
245 if ((gmem->maxpw > 15) || (gmem->maxph > 15))
246 return false;
247
248 return fd_binning_enabled && ((gmem->nbins_x * gmem->nbins_y) > 2) &&
249 (batch->num_draws > 0);
250 }
251
252 static void
patch_draws(struct fd_batch * batch,enum pc_di_vis_cull_mode vismode)253 patch_draws(struct fd_batch *batch, enum pc_di_vis_cull_mode vismode)
254 {
255 unsigned i;
256 for (i = 0; i < fd_patch_num_elements(&batch->draw_patches); i++) {
257 struct fd_cs_patch *patch = fd_patch_element(&batch->draw_patches, i);
258 *patch->cs = patch->val | DRAW4(0, 0, 0, vismode);
259 }
260 util_dynarray_clear(&batch->draw_patches);
261 }
262
263 static void
update_vsc_pipe(struct fd_batch * batch)264 update_vsc_pipe(struct fd_batch *batch) assert_dt
265 {
266 struct fd_context *ctx = batch->ctx;
267 struct fd5_context *fd5_ctx = fd5_context(ctx);
268 const struct fd_gmem_stateobj *gmem = batch->gmem_state;
269 struct fd_ringbuffer *ring = batch->gmem;
270 int i;
271
272 OUT_PKT4(ring, REG_A5XX_VSC_BIN_SIZE, 3);
273 OUT_RING(ring, A5XX_VSC_BIN_SIZE_WIDTH(gmem->bin_w) |
274 A5XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h));
275 OUT_RELOC(ring, fd5_ctx->vsc_size_mem, 0, 0, 0); /* VSC_SIZE_ADDRESS_LO/HI */
276
277 OUT_PKT4(ring, REG_A5XX_UNKNOWN_0BC5, 2);
278 OUT_RING(ring, 0x00000000); /* UNKNOWN_0BC5 */
279 OUT_RING(ring, 0x00000000); /* UNKNOWN_0BC6 */
280
281 OUT_PKT4(ring, REG_A5XX_VSC_PIPE_CONFIG_REG(0), 16);
282 for (i = 0; i < 16; i++) {
283 const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[i];
284 OUT_RING(ring, A5XX_VSC_PIPE_CONFIG_REG_X(pipe->x) |
285 A5XX_VSC_PIPE_CONFIG_REG_Y(pipe->y) |
286 A5XX_VSC_PIPE_CONFIG_REG_W(pipe->w) |
287 A5XX_VSC_PIPE_CONFIG_REG_H(pipe->h));
288 }
289
290 OUT_PKT4(ring, REG_A5XX_VSC_PIPE_DATA_ADDRESS_LO(0), 32);
291 for (i = 0; i < 16; i++) {
292 if (!ctx->vsc_pipe_bo[i]) {
293 ctx->vsc_pipe_bo[i] = fd_bo_new(
294 ctx->dev, 0x20000, 0, "vsc_pipe[%u]", i);
295 }
296 OUT_RELOC(ring, ctx->vsc_pipe_bo[i], 0, 0,
297 0); /* VSC_PIPE_DATA_ADDRESS[i].LO/HI */
298 }
299
300 OUT_PKT4(ring, REG_A5XX_VSC_PIPE_DATA_LENGTH_REG(0), 16);
301 for (i = 0; i < 16; i++) {
302 OUT_RING(ring, fd_bo_size(ctx->vsc_pipe_bo[i]) -
303 32); /* VSC_PIPE_DATA_LENGTH[i] */
304 }
305 }
306
307 static void
emit_binning_pass(struct fd_batch * batch)308 emit_binning_pass(struct fd_batch *batch) assert_dt
309 {
310 struct fd_ringbuffer *ring = batch->gmem;
311 const struct fd_gmem_stateobj *gmem = batch->gmem_state;
312
313 uint32_t x1 = gmem->minx;
314 uint32_t y1 = gmem->miny;
315 uint32_t x2 = gmem->minx + gmem->width - 1;
316 uint32_t y2 = gmem->miny + gmem->height - 1;
317
318 fd5_set_render_mode(batch->ctx, ring, BINNING);
319
320 OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
321 OUT_RING(ring,
322 A5XX_RB_CNTL_WIDTH(gmem->bin_w) | A5XX_RB_CNTL_HEIGHT(gmem->bin_h));
323
324 OUT_PKT4(ring, REG_A5XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
325 OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X(x1) |
326 A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(y1));
327 OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X(x2) |
328 A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(y2));
329
330 OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_1, 2);
331 OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_1_X(x1) | A5XX_RB_RESOLVE_CNTL_1_Y(y1));
332 OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_2_X(x2) | A5XX_RB_RESOLVE_CNTL_2_Y(y2));
333
334 update_vsc_pipe(batch);
335
336 OUT_PKT4(ring, REG_A5XX_VPC_MODE_CNTL, 1);
337 OUT_RING(ring, A5XX_VPC_MODE_CNTL_BINNING_PASS);
338
339 fd5_event_write(batch, ring, UNK_2C, false);
340
341 OUT_PKT4(ring, REG_A5XX_RB_WINDOW_OFFSET, 1);
342 OUT_RING(ring, A5XX_RB_WINDOW_OFFSET_X(0) | A5XX_RB_WINDOW_OFFSET_Y(0));
343
344 /* emit IB to binning drawcmds: */
345 fd5_emit_ib(ring, batch->binning);
346
347 fd_reset_wfi(batch);
348
349 fd5_event_write(batch, ring, UNK_2D, false);
350
351 fd5_event_write(batch, ring, CACHE_FLUSH_TS, true);
352
353 // TODO CP_COND_WRITE's for all the vsc buffers (check for overflow??)
354
355 fd_wfi(batch, ring);
356
357 OUT_PKT4(ring, REG_A5XX_VPC_MODE_CNTL, 1);
358 OUT_RING(ring, 0x0);
359 }
360
361 /* before first tile */
362 static void
fd5_emit_tile_init(struct fd_batch * batch)363 fd5_emit_tile_init(struct fd_batch *batch) assert_dt
364 {
365 struct fd_ringbuffer *ring = batch->gmem;
366 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
367
368 fd5_emit_restore(batch, ring);
369
370 if (batch->prologue)
371 fd5_emit_ib(ring, batch->prologue);
372
373 fd5_emit_lrz_flush(batch, ring);
374
375 OUT_PKT4(ring, REG_A5XX_GRAS_CL_CNTL, 1);
376 OUT_RING(ring, 0x00000080); /* GRAS_CL_CNTL */
377
378 OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
379 OUT_RING(ring, 0x0);
380
381 OUT_PKT4(ring, REG_A5XX_PC_POWER_CNTL, 1);
382 OUT_RING(ring, 0x00000003); /* PC_POWER_CNTL */
383
384 OUT_PKT4(ring, REG_A5XX_VFD_POWER_CNTL, 1);
385 OUT_RING(ring, 0x00000003); /* VFD_POWER_CNTL */
386
387 /* 0x10000000 for BYPASS.. 0x7c13c080 for GMEM: */
388 fd_wfi(batch, ring);
389 OUT_PKT4(ring, REG_A5XX_RB_CCU_CNTL, 1);
390 OUT_RING(ring, 0x7c13c080); /* RB_CCU_CNTL */
391
392 emit_zs(ring, pfb->zsbuf, batch->gmem_state);
393 emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, batch->gmem_state);
394
395 /* Enable stream output for the first pass (likely the binning). */
396 OUT_PKT4(ring, REG_A5XX_VPC_SO_OVERRIDE, 1);
397 OUT_RING(ring, 0);
398
399 if (use_hw_binning(batch)) {
400 emit_binning_pass(batch);
401
402 /* Disable stream output after binning, since each VS output should get
403 * streamed out once.
404 */
405 OUT_PKT4(ring, REG_A5XX_VPC_SO_OVERRIDE, 1);
406 OUT_RING(ring, A5XX_VPC_SO_OVERRIDE_SO_DISABLE);
407
408 fd5_emit_lrz_flush(batch, ring);
409 patch_draws(batch, USE_VISIBILITY);
410 } else {
411 patch_draws(batch, IGNORE_VISIBILITY);
412 }
413
414 fd5_set_render_mode(batch->ctx, ring, GMEM);
415
416 /* XXX If we're in gmem mode but not doing HW binning, then after the first
417 * tile we should disable stream output (fd6_gmem.c doesn't do that either).
418 */
419 }
420
421 /* before mem2gmem */
422 static void
fd5_emit_tile_prep(struct fd_batch * batch,const struct fd_tile * tile)423 fd5_emit_tile_prep(struct fd_batch *batch, const struct fd_tile *tile) assert_dt
424 {
425 struct fd_context *ctx = batch->ctx;
426 const struct fd_gmem_stateobj *gmem = batch->gmem_state;
427 struct fd5_context *fd5_ctx = fd5_context(ctx);
428 struct fd_ringbuffer *ring = batch->gmem;
429
430 uint32_t x1 = tile->xoff;
431 uint32_t y1 = tile->yoff;
432 uint32_t x2 = tile->xoff + tile->bin_w - 1;
433 uint32_t y2 = tile->yoff + tile->bin_h - 1;
434
435 OUT_PKT4(ring, REG_A5XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
436 OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X(x1) |
437 A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(y1));
438 OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X(x2) |
439 A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(y2));
440
441 OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_1, 2);
442 OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_1_X(x1) | A5XX_RB_RESOLVE_CNTL_1_Y(y1));
443 OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_2_X(x2) | A5XX_RB_RESOLVE_CNTL_2_Y(y2));
444
445 if (use_hw_binning(batch)) {
446 const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[tile->p];
447 struct fd_bo *pipe_bo = ctx->vsc_pipe_bo[tile->p];
448
449 OUT_PKT7(ring, CP_WAIT_FOR_ME, 0);
450
451 OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
452 OUT_RING(ring, 0x0);
453
454 OUT_PKT7(ring, CP_SET_BIN_DATA5, 5);
455 OUT_RING(ring, CP_SET_BIN_DATA5_0_VSC_SIZE(pipe->w * pipe->h) |
456 CP_SET_BIN_DATA5_0_VSC_N(tile->n));
457 OUT_RELOC(ring, pipe_bo, 0, 0, 0); /* VSC_PIPE[p].DATA_ADDRESS */
458 OUT_RELOC(ring, fd5_ctx->vsc_size_mem, /* VSC_SIZE_ADDRESS + (p * 4) */
459 (tile->p * 4), 0, 0);
460 } else {
461 OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
462 OUT_RING(ring, 0x1);
463 }
464
465 OUT_PKT4(ring, REG_A5XX_RB_WINDOW_OFFSET, 1);
466 OUT_RING(ring, A5XX_RB_WINDOW_OFFSET_X(x1) | A5XX_RB_WINDOW_OFFSET_Y(y1));
467 }
468
469 /*
470 * transfer from system memory to gmem
471 */
472
473 static void
emit_mem2gmem_surf(struct fd_batch * batch,uint32_t base,struct pipe_surface * psurf,enum a5xx_blit_buf buf)474 emit_mem2gmem_surf(struct fd_batch *batch, uint32_t base,
475 struct pipe_surface *psurf, enum a5xx_blit_buf buf)
476 {
477 struct fd_ringbuffer *ring = batch->gmem;
478 const struct fd_gmem_stateobj *gmem = batch->gmem_state;
479 struct fd_resource *rsc = fd_resource(psurf->texture);
480 uint32_t stride, size;
481
482 debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
483
484 if (buf == BLIT_S)
485 rsc = rsc->stencil;
486
487 if ((buf == BLIT_ZS) || (buf == BLIT_S)) {
488 // XXX hack import via BLIT_MRT0 instead of BLIT_ZS, since I don't
489 // know otherwise how to go from linear in sysmem to tiled in gmem.
490 // possibly we want to flip this around gmem2mem and keep depth
491 // tiled in sysmem (and fixup sampler state to assume tiled).. this
492 // might be required for doing depth/stencil in bypass mode?
493 struct fdl_slice *slice = fd_resource_slice(rsc, 0);
494 enum a5xx_color_fmt format =
495 fd5_pipe2color(fd_gmem_restore_format(rsc->b.b.format));
496
497 OUT_PKT4(ring, REG_A5XX_RB_MRT_BUF_INFO(0), 5);
498 OUT_RING(ring,
499 A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) |
500 A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(rsc->layout.tile_mode) |
501 A5XX_RB_MRT_BUF_INFO_COLOR_SWAP(WZYX));
502 OUT_RING(ring, A5XX_RB_MRT_PITCH(fd_resource_pitch(rsc, 0)));
503 OUT_RING(ring, A5XX_RB_MRT_ARRAY_PITCH(slice->size0));
504 OUT_RELOC(ring, rsc->bo, 0, 0, 0); /* BASE_LO/HI */
505
506 buf = BLIT_MRT0;
507 }
508
509 stride = gmem->bin_w << fdl_cpp_shift(&rsc->layout);
510 size = stride * gmem->bin_h;
511
512 OUT_PKT4(ring, REG_A5XX_RB_BLIT_FLAG_DST_LO, 4);
513 OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_LO */
514 OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_HI */
515 OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_PITCH */
516 OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_ARRAY_PITCH */
517
518 OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_3, 5);
519 OUT_RING(ring, 0x00000000); /* RB_RESOLVE_CNTL_3 */
520 OUT_RING(ring, base); /* RB_BLIT_DST_LO */
521 OUT_RING(ring, 0x00000000); /* RB_BLIT_DST_HI */
522 OUT_RING(ring, A5XX_RB_BLIT_DST_PITCH(stride));
523 OUT_RING(ring, A5XX_RB_BLIT_DST_ARRAY_PITCH(size));
524
525 OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1);
526 OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(buf));
527
528 fd5_emit_blit(batch, ring);
529 }
530
531 static void
fd5_emit_tile_mem2gmem(struct fd_batch * batch,const struct fd_tile * tile)532 fd5_emit_tile_mem2gmem(struct fd_batch *batch, const struct fd_tile *tile)
533 {
534 struct fd_ringbuffer *ring = batch->gmem;
535 const struct fd_gmem_stateobj *gmem = batch->gmem_state;
536 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
537
538 /*
539 * setup mrt and zs with system memory base addresses:
540 */
541
542 emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL);
543 // emit_zs(ring, pfb->zsbuf, NULL);
544
545 OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
546 OUT_RING(ring, A5XX_RB_CNTL_WIDTH(gmem->bin_w) |
547 A5XX_RB_CNTL_HEIGHT(gmem->bin_h) | A5XX_RB_CNTL_BYPASS);
548
549 if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_COLOR)) {
550 unsigned i;
551 for (i = 0; i < pfb->nr_cbufs; i++) {
552 if (!pfb->cbufs[i])
553 continue;
554 if (!(batch->restore & (PIPE_CLEAR_COLOR0 << i)))
555 continue;
556 emit_mem2gmem_surf(batch, gmem->cbuf_base[i], pfb->cbufs[i],
557 BLIT_MRT0 + i);
558 }
559 }
560
561 if (fd_gmem_needs_restore(batch, tile,
562 FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
563 struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
564
565 if (!rsc->stencil || fd_gmem_needs_restore(batch, tile, FD_BUFFER_DEPTH))
566 emit_mem2gmem_surf(batch, gmem->zsbuf_base[0], pfb->zsbuf, BLIT_ZS);
567 if (rsc->stencil && fd_gmem_needs_restore(batch, tile, FD_BUFFER_STENCIL))
568 emit_mem2gmem_surf(batch, gmem->zsbuf_base[1], pfb->zsbuf, BLIT_S);
569 }
570 }
571
572 /* before IB to rendering cmds: */
573 static void
fd5_emit_tile_renderprep(struct fd_batch * batch,const struct fd_tile * tile)574 fd5_emit_tile_renderprep(struct fd_batch *batch, const struct fd_tile *tile)
575 {
576 struct fd_ringbuffer *ring = batch->gmem;
577 const struct fd_gmem_stateobj *gmem = batch->gmem_state;
578 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
579
580 OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
581 OUT_RING(ring,
582 A5XX_RB_CNTL_WIDTH(gmem->bin_w) | A5XX_RB_CNTL_HEIGHT(gmem->bin_h));
583
584 emit_zs(ring, pfb->zsbuf, gmem);
585 emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, gmem);
586
587 enum a3xx_msaa_samples samples = fd_msaa_samples(pfb->samples);
588
589 OUT_PKT4(ring, REG_A5XX_TPL1_TP_RAS_MSAA_CNTL, 2);
590 OUT_RING(ring, A5XX_TPL1_TP_RAS_MSAA_CNTL_SAMPLES(samples));
591 OUT_RING(ring, A5XX_TPL1_TP_DEST_MSAA_CNTL_SAMPLES(samples) |
592 COND(samples == MSAA_ONE,
593 A5XX_TPL1_TP_DEST_MSAA_CNTL_MSAA_DISABLE));
594
595 OUT_PKT4(ring, REG_A5XX_RB_RAS_MSAA_CNTL, 2);
596 OUT_RING(ring, A5XX_RB_RAS_MSAA_CNTL_SAMPLES(samples));
597 OUT_RING(ring,
598 A5XX_RB_DEST_MSAA_CNTL_SAMPLES(samples) |
599 COND(samples == MSAA_ONE, A5XX_RB_DEST_MSAA_CNTL_MSAA_DISABLE));
600
601 OUT_PKT4(ring, REG_A5XX_GRAS_SC_RAS_MSAA_CNTL, 2);
602 OUT_RING(ring, A5XX_GRAS_SC_RAS_MSAA_CNTL_SAMPLES(samples));
603 OUT_RING(ring, A5XX_GRAS_SC_DEST_MSAA_CNTL_SAMPLES(samples) |
604 COND(samples == MSAA_ONE,
605 A5XX_GRAS_SC_DEST_MSAA_CNTL_MSAA_DISABLE));
606 }
607
608 /*
609 * transfer from gmem to system memory (ie. normal RAM)
610 */
611
612 static void
emit_gmem2mem_surf(struct fd_batch * batch,uint32_t base,struct pipe_surface * psurf,enum a5xx_blit_buf buf)613 emit_gmem2mem_surf(struct fd_batch *batch, uint32_t base,
614 struct pipe_surface *psurf, enum a5xx_blit_buf buf)
615 {
616 struct fd_ringbuffer *ring = batch->gmem;
617 struct fd_resource *rsc = fd_resource(psurf->texture);
618 struct fdl_slice *slice;
619 bool tiled;
620 uint32_t offset, pitch;
621
622 if (!rsc->valid)
623 return;
624
625 if (buf == BLIT_S)
626 rsc = rsc->stencil;
627
628 slice = fd_resource_slice(rsc, psurf->u.tex.level);
629 offset =
630 fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer);
631 pitch = fd_resource_pitch(rsc, psurf->u.tex.level);
632
633 debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
634
635 OUT_PKT4(ring, REG_A5XX_RB_BLIT_FLAG_DST_LO, 4);
636 OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_LO */
637 OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_HI */
638 OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_PITCH */
639 OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_ARRAY_PITCH */
640
641 tiled = fd_resource_tile_mode(psurf->texture, psurf->u.tex.level);
642
643 OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_3, 5);
644 OUT_RING(ring, 0x00000004 | /* XXX RB_RESOLVE_CNTL_3 */
645 COND(tiled, A5XX_RB_RESOLVE_CNTL_3_TILED));
646 OUT_RELOC(ring, rsc->bo, offset, 0, 0); /* RB_BLIT_DST_LO/HI */
647 OUT_RING(ring, A5XX_RB_BLIT_DST_PITCH(pitch));
648 OUT_RING(ring, A5XX_RB_BLIT_DST_ARRAY_PITCH(slice->size0));
649
650 OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1);
651 OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(buf));
652
653 // bool msaa_resolve = pfb->samples > 1;
654 bool msaa_resolve = false;
655 OUT_PKT4(ring, REG_A5XX_RB_CLEAR_CNTL, 1);
656 OUT_RING(ring, COND(msaa_resolve, A5XX_RB_CLEAR_CNTL_MSAA_RESOLVE));
657
658 fd5_emit_blit(batch, ring);
659 }
660
661 static void
fd5_emit_tile_gmem2mem(struct fd_batch * batch,const struct fd_tile * tile)662 fd5_emit_tile_gmem2mem(struct fd_batch *batch, const struct fd_tile *tile)
663 {
664 const struct fd_gmem_stateobj *gmem = batch->gmem_state;
665 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
666
667 if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
668 struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
669
670 if (!rsc->stencil || (batch->resolve & FD_BUFFER_DEPTH))
671 emit_gmem2mem_surf(batch, gmem->zsbuf_base[0], pfb->zsbuf, BLIT_ZS);
672 if (rsc->stencil && (batch->resolve & FD_BUFFER_STENCIL))
673 emit_gmem2mem_surf(batch, gmem->zsbuf_base[1], pfb->zsbuf, BLIT_S);
674 }
675
676 if (batch->resolve & FD_BUFFER_COLOR) {
677 unsigned i;
678 for (i = 0; i < pfb->nr_cbufs; i++) {
679 if (!pfb->cbufs[i])
680 continue;
681 if (!(batch->resolve & (PIPE_CLEAR_COLOR0 << i)))
682 continue;
683 emit_gmem2mem_surf(batch, gmem->cbuf_base[i], pfb->cbufs[i],
684 BLIT_MRT0 + i);
685 }
686 }
687 }
688
689 static void
fd5_emit_tile_fini(struct fd_batch * batch)690 fd5_emit_tile_fini(struct fd_batch *batch) assert_dt
691 {
692 struct fd_ringbuffer *ring = batch->gmem;
693
694 OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
695 OUT_RING(ring, 0x0);
696
697 fd5_emit_lrz_flush(batch, ring);
698
699 fd5_cache_flush(batch, ring);
700 fd5_set_render_mode(batch->ctx, ring, BYPASS);
701 }
702
703 static void
fd5_emit_sysmem_prep(struct fd_batch * batch)704 fd5_emit_sysmem_prep(struct fd_batch *batch) assert_dt
705 {
706 struct fd_ringbuffer *ring = batch->gmem;
707
708 fd5_emit_restore(batch, ring);
709
710 fd5_emit_lrz_flush(batch, ring);
711
712 if (batch->prologue)
713 fd5_emit_ib(ring, batch->prologue);
714
715 OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
716 OUT_RING(ring, 0x0);
717
718 fd5_event_write(batch, ring, PC_CCU_INVALIDATE_COLOR, false);
719
720 OUT_PKT4(ring, REG_A5XX_PC_POWER_CNTL, 1);
721 OUT_RING(ring, 0x00000003); /* PC_POWER_CNTL */
722
723 OUT_PKT4(ring, REG_A5XX_VFD_POWER_CNTL, 1);
724 OUT_RING(ring, 0x00000003); /* VFD_POWER_CNTL */
725
726 /* 0x10000000 for BYPASS.. 0x7c13c080 for GMEM: */
727 fd_wfi(batch, ring);
728 OUT_PKT4(ring, REG_A5XX_RB_CCU_CNTL, 1);
729 OUT_RING(ring, 0x10000000); /* RB_CCU_CNTL */
730
731 OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
732 OUT_RING(ring, A5XX_RB_CNTL_WIDTH(0) | A5XX_RB_CNTL_HEIGHT(0) |
733 A5XX_RB_CNTL_BYPASS);
734
735 /* remaining setup below here does not apply to blit/compute: */
736 if (batch->nondraw)
737 return;
738
739 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
740
741 OUT_PKT4(ring, REG_A5XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
742 OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) |
743 A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(0));
744 OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X(pfb->width - 1) |
745 A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(pfb->height - 1));
746
747 OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_1, 2);
748 OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_1_X(0) | A5XX_RB_RESOLVE_CNTL_1_Y(0));
749 OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_2_X(pfb->width - 1) |
750 A5XX_RB_RESOLVE_CNTL_2_Y(pfb->height - 1));
751
752 OUT_PKT4(ring, REG_A5XX_RB_WINDOW_OFFSET, 1);
753 OUT_RING(ring, A5XX_RB_WINDOW_OFFSET_X(0) | A5XX_RB_WINDOW_OFFSET_Y(0));
754
755 /* Enable stream output, since there's no binning pass to put it in. */
756 OUT_PKT4(ring, REG_A5XX_VPC_SO_OVERRIDE, 1);
757 OUT_RING(ring, 0);
758
759 OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
760 OUT_RING(ring, 0x1);
761
762 patch_draws(batch, IGNORE_VISIBILITY);
763
764 emit_zs(ring, pfb->zsbuf, NULL);
765 emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL);
766
767 OUT_PKT4(ring, REG_A5XX_TPL1_TP_RAS_MSAA_CNTL, 2);
768 OUT_RING(ring, A5XX_TPL1_TP_RAS_MSAA_CNTL_SAMPLES(MSAA_ONE));
769 OUT_RING(ring, A5XX_TPL1_TP_DEST_MSAA_CNTL_SAMPLES(MSAA_ONE) |
770 A5XX_TPL1_TP_DEST_MSAA_CNTL_MSAA_DISABLE);
771
772 OUT_PKT4(ring, REG_A5XX_RB_RAS_MSAA_CNTL, 2);
773 OUT_RING(ring, A5XX_RB_RAS_MSAA_CNTL_SAMPLES(MSAA_ONE));
774 OUT_RING(ring, A5XX_RB_DEST_MSAA_CNTL_SAMPLES(MSAA_ONE) |
775 A5XX_RB_DEST_MSAA_CNTL_MSAA_DISABLE);
776
777 OUT_PKT4(ring, REG_A5XX_GRAS_SC_RAS_MSAA_CNTL, 2);
778 OUT_RING(ring, A5XX_GRAS_SC_RAS_MSAA_CNTL_SAMPLES(MSAA_ONE));
779 OUT_RING(ring, A5XX_GRAS_SC_DEST_MSAA_CNTL_SAMPLES(MSAA_ONE) |
780 A5XX_GRAS_SC_DEST_MSAA_CNTL_MSAA_DISABLE);
781 }
782
783 static void
fd5_emit_sysmem_fini(struct fd_batch * batch)784 fd5_emit_sysmem_fini(struct fd_batch *batch)
785 {
786 struct fd_ringbuffer *ring = batch->gmem;
787
788 OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
789 OUT_RING(ring, 0x0);
790
791 fd5_emit_lrz_flush(batch, ring);
792
793 fd5_event_write(batch, ring, PC_CCU_FLUSH_COLOR_TS, true);
794 fd5_event_write(batch, ring, PC_CCU_FLUSH_DEPTH_TS, true);
795 }
796
797 void
fd5_gmem_init(struct pipe_context * pctx)798 fd5_gmem_init(struct pipe_context *pctx) disable_thread_safety_analysis
799 {
800 struct fd_context *ctx = fd_context(pctx);
801
802 ctx->emit_tile_init = fd5_emit_tile_init;
803 ctx->emit_tile_prep = fd5_emit_tile_prep;
804 ctx->emit_tile_mem2gmem = fd5_emit_tile_mem2gmem;
805 ctx->emit_tile_renderprep = fd5_emit_tile_renderprep;
806 ctx->emit_tile_gmem2mem = fd5_emit_tile_gmem2mem;
807 ctx->emit_tile_fini = fd5_emit_tile_fini;
808 ctx->emit_sysmem_prep = fd5_emit_sysmem_prep;
809 ctx->emit_sysmem_fini = fd5_emit_sysmem_fini;
810 }
811