1 /*
2 * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 * Rob Clark <robclark@freedesktop.org>
25 */
26
27 #include "pipe/p_state.h"
28 #include "util/format/u_format.h"
29 #include "util/u_inlines.h"
30 #include "util/u_memory.h"
31 #include "util/u_string.h"
32
33 #include "freedreno_draw.h"
34 #include "freedreno_resource.h"
35 #include "freedreno_state.h"
36
37 #include "fd5_context.h"
38 #include "fd5_draw.h"
39 #include "fd5_emit.h"
40 #include "fd5_format.h"
41 #include "fd5_gmem.h"
42 #include "fd5_program.h"
43 #include "fd5_zsa.h"
44
45 static void
emit_mrt(struct fd_ringbuffer * ring,unsigned nr_bufs,struct pipe_surface ** bufs,const struct fd_gmem_stateobj * gmem)46 emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
47 struct pipe_surface **bufs, const struct fd_gmem_stateobj *gmem)
48 {
49 enum a5xx_tile_mode tile_mode;
50 unsigned i;
51
52 for (i = 0; i < A5XX_MAX_RENDER_TARGETS; i++) {
53 enum a5xx_color_fmt format = 0;
54 enum a3xx_color_swap swap = WZYX;
55 bool srgb = false, sint = false, uint = false;
56 struct fd_resource *rsc = NULL;
57 uint32_t stride = 0;
58 uint32_t size = 0;
59 uint32_t base = 0;
60 uint32_t offset = 0;
61
62 if (gmem) {
63 tile_mode = TILE5_2;
64 } else {
65 tile_mode = TILE5_LINEAR;
66 }
67
68 if ((i < nr_bufs) && bufs[i]) {
69 struct pipe_surface *psurf = bufs[i];
70 enum pipe_format pformat = psurf->format;
71
72 rsc = fd_resource(psurf->texture);
73
74 format = fd5_pipe2color(pformat);
75 swap = fd5_pipe2swap(pformat);
76 srgb = util_format_is_srgb(pformat);
77 sint = util_format_is_pure_sint(pformat);
78 uint = util_format_is_pure_uint(pformat);
79
80 assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
81
82 offset = fd_resource_offset(rsc, psurf->u.tex.level,
83 psurf->u.tex.first_layer);
84
85 if (gmem) {
86 stride = gmem->bin_w * gmem->cbuf_cpp[i];
87 size = stride * gmem->bin_h;
88 base = gmem->cbuf_base[i];
89 } else {
90 stride = fd_resource_pitch(rsc, psurf->u.tex.level);
91 size = fd_resource_layer_stride(rsc, psurf->u.tex.level);
92
93 tile_mode =
94 fd_resource_tile_mode(psurf->texture, psurf->u.tex.level);
95 }
96 }
97
98 OUT_PKT4(ring, REG_A5XX_RB_MRT_BUF_INFO(i), 5);
99 OUT_RING(
100 ring,
101 A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) |
102 A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(tile_mode) |
103 A5XX_RB_MRT_BUF_INFO_COLOR_SWAP(swap) |
104 COND(gmem,
105 0x800) | /* XXX 0x1000 for RECTLIST clear, 0x0 for BLIT.. */
106 COND(srgb, A5XX_RB_MRT_BUF_INFO_COLOR_SRGB));
107 OUT_RING(ring, A5XX_RB_MRT_PITCH(stride));
108 OUT_RING(ring, A5XX_RB_MRT_ARRAY_PITCH(size));
109 if (gmem || (i >= nr_bufs) || !bufs[i]) {
110 OUT_RING(ring, base); /* RB_MRT[i].BASE_LO */
111 OUT_RING(ring, 0x00000000); /* RB_MRT[i].BASE_HI */
112 } else {
113 OUT_RELOC(ring, rsc->bo, offset, 0, 0); /* BASE_LO/HI */
114 }
115
116 OUT_PKT4(ring, REG_A5XX_SP_FS_MRT_REG(i), 1);
117 OUT_RING(ring, A5XX_SP_FS_MRT_REG_COLOR_FORMAT(format) |
118 COND(sint, A5XX_SP_FS_MRT_REG_COLOR_SINT) |
119 COND(uint, A5XX_SP_FS_MRT_REG_COLOR_UINT) |
120 COND(srgb, A5XX_SP_FS_MRT_REG_COLOR_SRGB));
121
122 /* when we support UBWC, these would be the system memory
123 * addr/pitch/etc:
124 */
125 OUT_PKT4(ring, REG_A5XX_RB_MRT_FLAG_BUFFER(i), 4);
126 OUT_RING(ring, 0x00000000); /* RB_MRT_FLAG_BUFFER[i].ADDR_LO */
127 OUT_RING(ring, 0x00000000); /* RB_MRT_FLAG_BUFFER[i].ADDR_HI */
128 OUT_RING(ring, A5XX_RB_MRT_FLAG_BUFFER_PITCH(0));
129 OUT_RING(ring, A5XX_RB_MRT_FLAG_BUFFER_ARRAY_PITCH(0));
130 }
131 }
132
133 static void
emit_zs(struct fd_ringbuffer * ring,struct pipe_surface * zsbuf,const struct fd_gmem_stateobj * gmem)134 emit_zs(struct fd_ringbuffer *ring, struct pipe_surface *zsbuf,
135 const struct fd_gmem_stateobj *gmem)
136 {
137 if (zsbuf) {
138 struct fd_resource *rsc = fd_resource(zsbuf->texture);
139 enum a5xx_depth_format fmt = fd5_pipe2depth(zsbuf->format);
140 uint32_t cpp = rsc->layout.cpp;
141 uint32_t stride = 0;
142 uint32_t size = 0;
143
144 if (gmem) {
145 stride = cpp * gmem->bin_w;
146 size = stride * gmem->bin_h;
147 } else {
148 stride = fd_resource_pitch(rsc, zsbuf->u.tex.level);
149 size = fd_resource_layer_stride(rsc, zsbuf->u.tex.level);
150 }
151
152 OUT_PKT4(ring, REG_A5XX_RB_DEPTH_BUFFER_INFO, 5);
153 OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(fmt));
154 if (gmem) {
155 OUT_RING(ring, gmem->zsbuf_base[0]); /* RB_DEPTH_BUFFER_BASE_LO */
156 OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_HI */
157 } else {
158 OUT_RELOC(ring, rsc->bo,
159 fd_resource_offset(rsc, zsbuf->u.tex.level, zsbuf->u.tex.first_layer),
160 0, 0); /* RB_DEPTH_BUFFER_BASE_LO/HI */
161 }
162 OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_PITCH(stride));
163 OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_ARRAY_PITCH(size));
164
165 OUT_PKT4(ring, REG_A5XX_GRAS_SU_DEPTH_BUFFER_INFO, 1);
166 OUT_RING(ring, A5XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT(fmt));
167
168 OUT_PKT4(ring, REG_A5XX_RB_DEPTH_FLAG_BUFFER_BASE_LO, 3);
169 OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_LO */
170 OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_HI */
171 OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_PITCH */
172
173 if (rsc->lrz) {
174 OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_BUFFER_BASE_LO, 3);
175 OUT_RELOC(ring, rsc->lrz, 0x1000, 0, 0);
176 OUT_RING(ring, A5XX_GRAS_LRZ_BUFFER_PITCH(rsc->lrz_pitch));
177
178 OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO, 2);
179 OUT_RELOC(ring, rsc->lrz, 0, 0, 0);
180 } else {
181 OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_BUFFER_BASE_LO, 3);
182 OUT_RING(ring, 0x00000000);
183 OUT_RING(ring, 0x00000000);
184 OUT_RING(ring, 0x00000000); /* GRAS_LRZ_BUFFER_PITCH */
185
186 OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO, 2);
187 OUT_RING(ring, 0x00000000);
188 OUT_RING(ring, 0x00000000);
189 }
190
191 if (rsc->stencil) {
192 if (gmem) {
193 stride = 1 * gmem->bin_w;
194 size = stride * gmem->bin_h;
195 } else {
196 stride = fd_resource_pitch(rsc->stencil, zsbuf->u.tex.level);
197 size = fd_resource_layer_stride(rsc, zsbuf->u.tex.level);
198 }
199
200 OUT_PKT4(ring, REG_A5XX_RB_STENCIL_INFO, 5);
201 OUT_RING(ring, A5XX_RB_STENCIL_INFO_SEPARATE_STENCIL);
202 if (gmem) {
203 OUT_RING(ring, gmem->zsbuf_base[1]); /* RB_STENCIL_BASE_LO */
204 OUT_RING(ring, 0x00000000); /* RB_STENCIL_BASE_HI */
205 } else {
206 OUT_RELOC(ring, rsc->stencil->bo,
207 fd_resource_offset(rsc->stencil, zsbuf->u.tex.level, zsbuf->u.tex.first_layer),
208 0, 0); /* RB_STENCIL_BASE_LO/HI */
209 }
210 OUT_RING(ring, A5XX_RB_STENCIL_PITCH(stride));
211 OUT_RING(ring, A5XX_RB_STENCIL_ARRAY_PITCH(size));
212 } else {
213 OUT_PKT4(ring, REG_A5XX_RB_STENCIL_INFO, 1);
214 OUT_RING(ring, 0x00000000); /* RB_STENCIL_INFO */
215 }
216 } else {
217 OUT_PKT4(ring, REG_A5XX_RB_DEPTH_BUFFER_INFO, 5);
218 OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(DEPTH5_NONE));
219 OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_LO */
220 OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_HI */
221 OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_PITCH */
222 OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_ARRAY_PITCH */
223
224 OUT_PKT4(ring, REG_A5XX_GRAS_SU_DEPTH_BUFFER_INFO, 1);
225 OUT_RING(ring, A5XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT(DEPTH5_NONE));
226
227 OUT_PKT4(ring, REG_A5XX_RB_DEPTH_FLAG_BUFFER_BASE_LO, 3);
228 OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_LO */
229 OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_HI */
230 OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_PITCH */
231
232 OUT_PKT4(ring, REG_A5XX_RB_STENCIL_INFO, 1);
233 OUT_RING(ring, 0x00000000); /* RB_STENCIL_INFO */
234 }
235 }
236
237 static void
emit_msaa(struct fd_ringbuffer * ring,uint32_t nr_samples)238 emit_msaa(struct fd_ringbuffer *ring, uint32_t nr_samples)
239 {
240 enum a3xx_msaa_samples samples = fd_msaa_samples(nr_samples);
241
242 OUT_PKT4(ring, REG_A5XX_TPL1_TP_RAS_MSAA_CNTL, 2);
243 OUT_RING(ring, A5XX_TPL1_TP_RAS_MSAA_CNTL_SAMPLES(samples));
244 OUT_RING(ring, A5XX_TPL1_TP_DEST_MSAA_CNTL_SAMPLES(samples) |
245 COND(samples == MSAA_ONE,
246 A5XX_TPL1_TP_DEST_MSAA_CNTL_MSAA_DISABLE));
247
248 OUT_PKT4(ring, REG_A5XX_RB_RAS_MSAA_CNTL, 2);
249 OUT_RING(ring, A5XX_RB_RAS_MSAA_CNTL_SAMPLES(samples));
250 OUT_RING(ring,
251 A5XX_RB_DEST_MSAA_CNTL_SAMPLES(samples) |
252 COND(samples == MSAA_ONE, A5XX_RB_DEST_MSAA_CNTL_MSAA_DISABLE));
253
254 OUT_PKT4(ring, REG_A5XX_GRAS_SC_RAS_MSAA_CNTL, 2);
255 OUT_RING(ring, A5XX_GRAS_SC_RAS_MSAA_CNTL_SAMPLES(samples));
256 OUT_RING(ring, A5XX_GRAS_SC_DEST_MSAA_CNTL_SAMPLES(samples) |
257 COND(samples == MSAA_ONE,
258 A5XX_GRAS_SC_DEST_MSAA_CNTL_MSAA_DISABLE));
259 }
260
261 static bool
use_hw_binning(struct fd_batch * batch)262 use_hw_binning(struct fd_batch *batch)
263 {
264 const struct fd_gmem_stateobj *gmem = batch->gmem_state;
265
266 /* workaround: Like on a3xx, hw binning and scissor optimization
267 * don't play nice together.
268 *
269 * Disable binning if scissor optimization is used.
270 */
271 if (gmem->minx || gmem->miny)
272 return false;
273
274 if ((gmem->maxpw * gmem->maxph) > 32)
275 return false;
276
277 if ((gmem->maxpw > 15) || (gmem->maxph > 15))
278 return false;
279
280 return fd_binning_enabled && ((gmem->nbins_x * gmem->nbins_y) > 2) &&
281 (batch->num_draws > 0);
282 }
283
284 static void
patch_draws(struct fd_batch * batch,enum pc_di_vis_cull_mode vismode)285 patch_draws(struct fd_batch *batch, enum pc_di_vis_cull_mode vismode)
286 {
287 unsigned i;
288 for (i = 0; i < fd_patch_num_elements(&batch->draw_patches); i++) {
289 struct fd_cs_patch *patch = fd_patch_element(&batch->draw_patches, i);
290 *patch->cs = patch->val | DRAW4(0, 0, 0, vismode);
291 }
292 util_dynarray_clear(&batch->draw_patches);
293 }
294
295 static void
update_vsc_pipe(struct fd_batch * batch)296 update_vsc_pipe(struct fd_batch *batch) assert_dt
297 {
298 struct fd_context *ctx = batch->ctx;
299 struct fd5_context *fd5_ctx = fd5_context(ctx);
300 const struct fd_gmem_stateobj *gmem = batch->gmem_state;
301 struct fd_ringbuffer *ring = batch->gmem;
302 int i;
303
304 OUT_PKT4(ring, REG_A5XX_VSC_BIN_SIZE, 3);
305 OUT_RING(ring, A5XX_VSC_BIN_SIZE_WIDTH(gmem->bin_w) |
306 A5XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h));
307 OUT_RELOC(ring, fd5_ctx->vsc_size_mem, 0, 0, 0); /* VSC_SIZE_ADDRESS_LO/HI */
308
309 OUT_PKT4(ring, REG_A5XX_UNKNOWN_0BC5, 2);
310 OUT_RING(ring, 0x00000000); /* UNKNOWN_0BC5 */
311 OUT_RING(ring, 0x00000000); /* UNKNOWN_0BC6 */
312
313 OUT_PKT4(ring, REG_A5XX_VSC_PIPE_CONFIG_REG(0), 16);
314 for (i = 0; i < 16; i++) {
315 const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[i];
316 OUT_RING(ring, A5XX_VSC_PIPE_CONFIG_REG_X(pipe->x) |
317 A5XX_VSC_PIPE_CONFIG_REG_Y(pipe->y) |
318 A5XX_VSC_PIPE_CONFIG_REG_W(pipe->w) |
319 A5XX_VSC_PIPE_CONFIG_REG_H(pipe->h));
320 }
321
322 OUT_PKT4(ring, REG_A5XX_VSC_PIPE_DATA_ADDRESS_LO(0), 32);
323 for (i = 0; i < 16; i++) {
324 if (!ctx->vsc_pipe_bo[i]) {
325 ctx->vsc_pipe_bo[i] = fd_bo_new(
326 ctx->dev, 0x20000, 0, "vsc_pipe[%u]", i);
327 }
328 OUT_RELOC(ring, ctx->vsc_pipe_bo[i], 0, 0,
329 0); /* VSC_PIPE_DATA_ADDRESS[i].LO/HI */
330 }
331
332 OUT_PKT4(ring, REG_A5XX_VSC_PIPE_DATA_LENGTH_REG(0), 16);
333 for (i = 0; i < 16; i++) {
334 OUT_RING(ring, fd_bo_size(ctx->vsc_pipe_bo[i]) -
335 32); /* VSC_PIPE_DATA_LENGTH[i] */
336 }
337 }
338
339 static void
emit_binning_pass(struct fd_batch * batch)340 emit_binning_pass(struct fd_batch *batch) assert_dt
341 {
342 struct fd_ringbuffer *ring = batch->gmem;
343 const struct fd_gmem_stateobj *gmem = batch->gmem_state;
344
345 uint32_t x1 = gmem->minx;
346 uint32_t y1 = gmem->miny;
347 uint32_t x2 = gmem->minx + gmem->width - 1;
348 uint32_t y2 = gmem->miny + gmem->height - 1;
349
350 fd5_set_render_mode(batch->ctx, ring, BINNING);
351
352 OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
353 OUT_RING(ring,
354 A5XX_RB_CNTL_WIDTH(gmem->bin_w) | A5XX_RB_CNTL_HEIGHT(gmem->bin_h));
355
356 OUT_PKT4(ring, REG_A5XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
357 OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X(x1) |
358 A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(y1));
359 OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X(x2) |
360 A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(y2));
361
362 OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_1, 2);
363 OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_1_X(x1) | A5XX_RB_RESOLVE_CNTL_1_Y(y1));
364 OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_2_X(x2) | A5XX_RB_RESOLVE_CNTL_2_Y(y2));
365
366 update_vsc_pipe(batch);
367
368 OUT_PKT4(ring, REG_A5XX_VPC_MODE_CNTL, 1);
369 OUT_RING(ring, A5XX_VPC_MODE_CNTL_BINNING_PASS);
370
371 fd5_event_write(batch, ring, UNK_2C, false);
372
373 OUT_PKT4(ring, REG_A5XX_RB_WINDOW_OFFSET, 1);
374 OUT_RING(ring, A5XX_RB_WINDOW_OFFSET_X(0) | A5XX_RB_WINDOW_OFFSET_Y(0));
375
376 /* emit IB to binning drawcmds: */
377 fd5_emit_ib(ring, batch->binning);
378
379 fd_reset_wfi(batch);
380
381 fd5_event_write(batch, ring, UNK_2D, false);
382
383 fd5_event_write(batch, ring, CACHE_FLUSH_TS, true);
384
385 // TODO CP_COND_WRITE's for all the vsc buffers (check for overflow??)
386
387 fd_wfi(batch, ring);
388
389 OUT_PKT4(ring, REG_A5XX_VPC_MODE_CNTL, 1);
390 OUT_RING(ring, 0x0);
391 }
392
393 /* before first tile */
394 static void
fd5_emit_tile_init(struct fd_batch * batch)395 fd5_emit_tile_init(struct fd_batch *batch) assert_dt
396 {
397 struct fd_ringbuffer *ring = batch->gmem;
398 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
399
400 fd5_emit_restore(batch, ring);
401
402 if (batch->prologue)
403 fd5_emit_ib(ring, batch->prologue);
404
405 fd5_emit_lrz_flush(batch, ring);
406
407 OUT_PKT4(ring, REG_A5XX_GRAS_CL_CNTL, 1);
408 OUT_RING(ring, 0x00000080); /* GRAS_CL_CNTL */
409
410 OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
411 OUT_RING(ring, 0x0);
412
413 OUT_PKT4(ring, REG_A5XX_PC_POWER_CNTL, 1);
414 OUT_RING(ring, 0x00000003); /* PC_POWER_CNTL */
415
416 OUT_PKT4(ring, REG_A5XX_VFD_POWER_CNTL, 1);
417 OUT_RING(ring, 0x00000003); /* VFD_POWER_CNTL */
418
419 /* 0x10000000 for BYPASS.. 0x7c13c080 for GMEM: */
420 fd_wfi(batch, ring);
421 OUT_PKT4(ring, REG_A5XX_RB_CCU_CNTL, 1);
422 OUT_RING(ring, 0x7c13c080); /* RB_CCU_CNTL */
423
424 emit_zs(ring, pfb->zsbuf, batch->gmem_state);
425 emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, batch->gmem_state);
426
427 /* Enable stream output for the first pass (likely the binning). */
428 OUT_PKT4(ring, REG_A5XX_VPC_SO_OVERRIDE, 1);
429 OUT_RING(ring, 0);
430
431 if (use_hw_binning(batch)) {
432 emit_binning_pass(batch);
433
434 /* Disable stream output after binning, since each VS output should get
435 * streamed out once.
436 */
437 OUT_PKT4(ring, REG_A5XX_VPC_SO_OVERRIDE, 1);
438 OUT_RING(ring, A5XX_VPC_SO_OVERRIDE_SO_DISABLE);
439
440 fd5_emit_lrz_flush(batch, ring);
441 patch_draws(batch, USE_VISIBILITY);
442 } else {
443 patch_draws(batch, IGNORE_VISIBILITY);
444 }
445
446 fd5_set_render_mode(batch->ctx, ring, GMEM);
447
448 /* XXX If we're in gmem mode but not doing HW binning, then after the first
449 * tile we should disable stream output (fd6_gmem.c doesn't do that either).
450 */
451 }
452
453 /* before mem2gmem */
454 static void
fd5_emit_tile_prep(struct fd_batch * batch,const struct fd_tile * tile)455 fd5_emit_tile_prep(struct fd_batch *batch, const struct fd_tile *tile) assert_dt
456 {
457 struct fd_context *ctx = batch->ctx;
458 const struct fd_gmem_stateobj *gmem = batch->gmem_state;
459 struct fd5_context *fd5_ctx = fd5_context(ctx);
460 struct fd_ringbuffer *ring = batch->gmem;
461
462 uint32_t x1 = tile->xoff;
463 uint32_t y1 = tile->yoff;
464 uint32_t x2 = tile->xoff + tile->bin_w - 1;
465 uint32_t y2 = tile->yoff + tile->bin_h - 1;
466
467 OUT_PKT4(ring, REG_A5XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
468 OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X(x1) |
469 A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(y1));
470 OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X(x2) |
471 A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(y2));
472
473 OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_1, 2);
474 OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_1_X(x1) | A5XX_RB_RESOLVE_CNTL_1_Y(y1));
475 OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_2_X(x2) | A5XX_RB_RESOLVE_CNTL_2_Y(y2));
476
477 if (use_hw_binning(batch)) {
478 const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[tile->p];
479 struct fd_bo *pipe_bo = ctx->vsc_pipe_bo[tile->p];
480
481 OUT_PKT7(ring, CP_WAIT_FOR_ME, 0);
482
483 OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
484 OUT_RING(ring, 0x0);
485
486 OUT_PKT7(ring, CP_SET_BIN_DATA5, 5);
487 OUT_RING(ring, CP_SET_BIN_DATA5_0_VSC_SIZE(pipe->w * pipe->h) |
488 CP_SET_BIN_DATA5_0_VSC_N(tile->n));
489 OUT_RELOC(ring, pipe_bo, 0, 0, 0); /* VSC_PIPE[p].DATA_ADDRESS */
490 OUT_RELOC(ring, fd5_ctx->vsc_size_mem, /* VSC_SIZE_ADDRESS + (p * 4) */
491 (tile->p * 4), 0, 0);
492 } else {
493 OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
494 OUT_RING(ring, 0x1);
495 }
496
497 OUT_PKT4(ring, REG_A5XX_RB_WINDOW_OFFSET, 1);
498 OUT_RING(ring, A5XX_RB_WINDOW_OFFSET_X(x1) | A5XX_RB_WINDOW_OFFSET_Y(y1));
499 }
500
501 /*
502 * transfer from system memory to gmem
503 */
504
505 static void
emit_mem2gmem_surf(struct fd_batch * batch,uint32_t base,struct pipe_surface * psurf,enum a5xx_blit_buf buf)506 emit_mem2gmem_surf(struct fd_batch *batch, uint32_t base,
507 struct pipe_surface *psurf, enum a5xx_blit_buf buf)
508 {
509 struct fd_ringbuffer *ring = batch->gmem;
510 const struct fd_gmem_stateobj *gmem = batch->gmem_state;
511 struct fd_resource *rsc = fd_resource(psurf->texture);
512 uint32_t stride, size;
513
514 assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
515
516 if (buf == BLIT_S)
517 rsc = rsc->stencil;
518
519 if ((buf == BLIT_ZS) || (buf == BLIT_S)) {
520 // XXX hack import via BLIT_MRT0 instead of BLIT_ZS, since I don't
521 // know otherwise how to go from linear in sysmem to tiled in gmem.
522 // possibly we want to flip this around gmem2mem and keep depth
523 // tiled in sysmem (and fixup sampler state to assume tiled).. this
524 // might be required for doing depth/stencil in bypass mode?
525 enum a5xx_color_fmt format =
526 fd5_pipe2color(fd_gmem_restore_format(rsc->b.b.format));
527
528 OUT_PKT4(ring, REG_A5XX_RB_MRT_BUF_INFO(0), 5);
529 OUT_RING(ring,
530 A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) |
531 A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(rsc->layout.tile_mode) |
532 A5XX_RB_MRT_BUF_INFO_COLOR_SWAP(WZYX));
533 OUT_RING(ring, A5XX_RB_MRT_PITCH(fd_resource_pitch(rsc, psurf->u.tex.level)));
534 OUT_RING(ring, A5XX_RB_MRT_ARRAY_PITCH(fd_resource_layer_stride(rsc, psurf->u.tex.level)));
535 OUT_RELOC(ring, rsc->bo,
536 fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer),
537 0, 0); /* BASE_LO/HI */
538
539 buf = BLIT_MRT0;
540 }
541
542 stride = gmem->bin_w << fdl_cpp_shift(&rsc->layout);
543 size = stride * gmem->bin_h;
544
545 OUT_PKT4(ring, REG_A5XX_RB_BLIT_FLAG_DST_LO, 4);
546 OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_LO */
547 OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_HI */
548 OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_PITCH */
549 OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_ARRAY_PITCH */
550
551 OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_3, 5);
552 OUT_RING(ring, 0x00000000); /* RB_RESOLVE_CNTL_3 */
553 OUT_RING(ring, base); /* RB_BLIT_DST_LO */
554 OUT_RING(ring, 0x00000000); /* RB_BLIT_DST_HI */
555 OUT_RING(ring, A5XX_RB_BLIT_DST_PITCH(stride));
556 OUT_RING(ring, A5XX_RB_BLIT_DST_ARRAY_PITCH(size));
557
558 OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1);
559 OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(buf));
560
561 fd5_emit_blit(batch, ring);
562 }
563
564 static void
fd5_emit_tile_mem2gmem(struct fd_batch * batch,const struct fd_tile * tile)565 fd5_emit_tile_mem2gmem(struct fd_batch *batch, const struct fd_tile *tile)
566 {
567 struct fd_ringbuffer *ring = batch->gmem;
568 const struct fd_gmem_stateobj *gmem = batch->gmem_state;
569 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
570
571 /*
572 * setup mrt and zs with system memory base addresses:
573 */
574
575 emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL);
576 // emit_zs(ring, pfb->zsbuf, NULL);
577
578 OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
579 OUT_RING(ring, A5XX_RB_CNTL_WIDTH(gmem->bin_w) |
580 A5XX_RB_CNTL_HEIGHT(gmem->bin_h) | A5XX_RB_CNTL_BYPASS);
581
582 if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_COLOR)) {
583 unsigned i;
584 for (i = 0; i < pfb->nr_cbufs; i++) {
585 if (!pfb->cbufs[i])
586 continue;
587 if (!(batch->restore & (PIPE_CLEAR_COLOR0 << i)))
588 continue;
589 emit_mem2gmem_surf(batch, gmem->cbuf_base[i], pfb->cbufs[i],
590 BLIT_MRT0 + i);
591 }
592 }
593
594 if (fd_gmem_needs_restore(batch, tile,
595 FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
596 struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
597
598 if (!rsc->stencil || fd_gmem_needs_restore(batch, tile, FD_BUFFER_DEPTH))
599 emit_mem2gmem_surf(batch, gmem->zsbuf_base[0], pfb->zsbuf, BLIT_ZS);
600 if (rsc->stencil && fd_gmem_needs_restore(batch, tile, FD_BUFFER_STENCIL))
601 emit_mem2gmem_surf(batch, gmem->zsbuf_base[1], pfb->zsbuf, BLIT_S);
602 }
603 }
604
605 /* before IB to rendering cmds: */
606 static void
fd5_emit_tile_renderprep(struct fd_batch * batch,const struct fd_tile * tile)607 fd5_emit_tile_renderprep(struct fd_batch *batch, const struct fd_tile *tile)
608 {
609 struct fd_ringbuffer *ring = batch->gmem;
610 const struct fd_gmem_stateobj *gmem = batch->gmem_state;
611 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
612
613 OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
614 OUT_RING(ring,
615 A5XX_RB_CNTL_WIDTH(gmem->bin_w) | A5XX_RB_CNTL_HEIGHT(gmem->bin_h));
616
617 emit_zs(ring, pfb->zsbuf, gmem);
618 emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, gmem);
619 emit_msaa(ring, pfb->samples);
620 }
621
622 /*
623 * transfer from gmem to system memory (ie. normal RAM)
624 */
625
626 static void
emit_gmem2mem_surf(struct fd_batch * batch,uint32_t base,struct pipe_surface * psurf,enum a5xx_blit_buf buf)627 emit_gmem2mem_surf(struct fd_batch *batch, uint32_t base,
628 struct pipe_surface *psurf, enum a5xx_blit_buf buf)
629 {
630 struct fd_ringbuffer *ring = batch->gmem;
631 struct fd_resource *rsc = fd_resource(psurf->texture);
632 bool tiled;
633 uint32_t offset, pitch;
634
635 if (!rsc->valid)
636 return;
637
638 if (buf == BLIT_S)
639 rsc = rsc->stencil;
640
641 offset =
642 fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer);
643 pitch = fd_resource_pitch(rsc, psurf->u.tex.level);
644
645 assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
646
647 OUT_PKT4(ring, REG_A5XX_RB_BLIT_FLAG_DST_LO, 4);
648 OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_LO */
649 OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_HI */
650 OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_PITCH */
651 OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_ARRAY_PITCH */
652
653 tiled = fd_resource_tile_mode(psurf->texture, psurf->u.tex.level);
654
655 OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_3, 5);
656 OUT_RING(ring, 0x00000004 | /* XXX RB_RESOLVE_CNTL_3 */
657 COND(tiled, A5XX_RB_RESOLVE_CNTL_3_TILED));
658 OUT_RELOC(ring, rsc->bo, offset, 0, 0); /* RB_BLIT_DST_LO/HI */
659 OUT_RING(ring, A5XX_RB_BLIT_DST_PITCH(pitch));
660 OUT_RING(ring, A5XX_RB_BLIT_DST_ARRAY_PITCH(fd_resource_layer_stride(rsc, psurf->u.tex.level)));
661
662 OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1);
663 OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(buf));
664
665 // bool msaa_resolve = pfb->samples > 1;
666 bool msaa_resolve = false;
667 OUT_PKT4(ring, REG_A5XX_RB_CLEAR_CNTL, 1);
668 OUT_RING(ring, COND(msaa_resolve, A5XX_RB_CLEAR_CNTL_MSAA_RESOLVE));
669
670 fd5_emit_blit(batch, ring);
671 }
672
673 static void
fd5_emit_tile_gmem2mem(struct fd_batch * batch,const struct fd_tile * tile)674 fd5_emit_tile_gmem2mem(struct fd_batch *batch, const struct fd_tile *tile)
675 {
676 const struct fd_gmem_stateobj *gmem = batch->gmem_state;
677 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
678
679 if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
680 struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
681
682 if (!rsc->stencil || (batch->resolve & FD_BUFFER_DEPTH))
683 emit_gmem2mem_surf(batch, gmem->zsbuf_base[0], pfb->zsbuf, BLIT_ZS);
684 if (rsc->stencil && (batch->resolve & FD_BUFFER_STENCIL))
685 emit_gmem2mem_surf(batch, gmem->zsbuf_base[1], pfb->zsbuf, BLIT_S);
686 }
687
688 if (batch->resolve & FD_BUFFER_COLOR) {
689 unsigned i;
690 for (i = 0; i < pfb->nr_cbufs; i++) {
691 if (!pfb->cbufs[i])
692 continue;
693 if (!(batch->resolve & (PIPE_CLEAR_COLOR0 << i)))
694 continue;
695 emit_gmem2mem_surf(batch, gmem->cbuf_base[i], pfb->cbufs[i],
696 BLIT_MRT0 + i);
697 }
698 }
699 }
700
701 static void
fd5_emit_tile_fini(struct fd_batch * batch)702 fd5_emit_tile_fini(struct fd_batch *batch) assert_dt
703 {
704 struct fd_ringbuffer *ring = batch->gmem;
705
706 OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
707 OUT_RING(ring, 0x0);
708
709 fd5_emit_lrz_flush(batch, ring);
710
711 fd5_cache_flush(batch, ring);
712 fd5_set_render_mode(batch->ctx, ring, BYPASS);
713 }
714
715 static void
fd5_emit_sysmem_prep(struct fd_batch * batch)716 fd5_emit_sysmem_prep(struct fd_batch *batch) assert_dt
717 {
718 struct fd_ringbuffer *ring = batch->gmem;
719
720 fd5_emit_restore(batch, ring);
721
722 fd5_emit_lrz_flush(batch, ring);
723
724 if (batch->prologue)
725 fd5_emit_ib(ring, batch->prologue);
726
727 OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
728 OUT_RING(ring, 0x0);
729
730 fd5_event_write(batch, ring, PC_CCU_INVALIDATE_COLOR, false);
731
732 OUT_PKT4(ring, REG_A5XX_PC_POWER_CNTL, 1);
733 OUT_RING(ring, 0x00000003); /* PC_POWER_CNTL */
734
735 OUT_PKT4(ring, REG_A5XX_VFD_POWER_CNTL, 1);
736 OUT_RING(ring, 0x00000003); /* VFD_POWER_CNTL */
737
738 /* 0x10000000 for BYPASS.. 0x7c13c080 for GMEM: */
739 fd_wfi(batch, ring);
740 OUT_PKT4(ring, REG_A5XX_RB_CCU_CNTL, 1);
741 OUT_RING(ring, 0x10000000); /* RB_CCU_CNTL */
742
743 OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
744 OUT_RING(ring, A5XX_RB_CNTL_WIDTH(0) | A5XX_RB_CNTL_HEIGHT(0) |
745 A5XX_RB_CNTL_BYPASS);
746
747 /* remaining setup below here does not apply to blit/compute: */
748 if (batch->nondraw)
749 return;
750
751 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
752
753 OUT_PKT4(ring, REG_A5XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
754 OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) |
755 A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(0));
756 OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X(pfb->width - 1) |
757 A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(pfb->height - 1));
758
759 OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_1, 2);
760 OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_1_X(0) | A5XX_RB_RESOLVE_CNTL_1_Y(0));
761 OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_2_X(pfb->width - 1) |
762 A5XX_RB_RESOLVE_CNTL_2_Y(pfb->height - 1));
763
764 OUT_PKT4(ring, REG_A5XX_RB_WINDOW_OFFSET, 1);
765 OUT_RING(ring, A5XX_RB_WINDOW_OFFSET_X(0) | A5XX_RB_WINDOW_OFFSET_Y(0));
766
767 /* Enable stream output, since there's no binning pass to put it in. */
768 OUT_PKT4(ring, REG_A5XX_VPC_SO_OVERRIDE, 1);
769 OUT_RING(ring, 0);
770
771 OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
772 OUT_RING(ring, 0x1);
773
774 patch_draws(batch, IGNORE_VISIBILITY);
775
776 emit_zs(ring, pfb->zsbuf, NULL);
777 emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL);
778 emit_msaa(ring, pfb->samples);
779 }
780
781 static void
fd5_emit_sysmem_fini(struct fd_batch * batch)782 fd5_emit_sysmem_fini(struct fd_batch *batch)
783 {
784 struct fd_ringbuffer *ring = batch->gmem;
785
786 OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
787 OUT_RING(ring, 0x0);
788
789 fd5_emit_lrz_flush(batch, ring);
790
791 fd5_event_write(batch, ring, PC_CCU_FLUSH_COLOR_TS, true);
792 fd5_event_write(batch, ring, PC_CCU_FLUSH_DEPTH_TS, true);
793 }
794
795 void
fd5_gmem_init(struct pipe_context * pctx)796 fd5_gmem_init(struct pipe_context *pctx) disable_thread_safety_analysis
797 {
798 struct fd_context *ctx = fd_context(pctx);
799
800 ctx->emit_tile_init = fd5_emit_tile_init;
801 ctx->emit_tile_prep = fd5_emit_tile_prep;
802 ctx->emit_tile_mem2gmem = fd5_emit_tile_mem2gmem;
803 ctx->emit_tile_renderprep = fd5_emit_tile_renderprep;
804 ctx->emit_tile_gmem2mem = fd5_emit_tile_gmem2mem;
805 ctx->emit_tile_fini = fd5_emit_tile_fini;
806 ctx->emit_sysmem_prep = fd5_emit_sysmem_prep;
807 ctx->emit_sysmem_fini = fd5_emit_sysmem_fini;
808 }
809