1 /*
2 * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 * Rob Clark <robclark@freedesktop.org>
25 */
26
27 #include "pipe/p_state.h"
28 #include "util/format/u_format.h"
29 #include "util/u_inlines.h"
30 #include "util/u_memory.h"
31 #include "util/u_string.h"
32
33 #include "freedreno_draw.h"
34 #include "freedreno_resource.h"
35 #include "freedreno_state.h"
36
37 #include "fd5_context.h"
38 #include "fd5_draw.h"
39 #include "fd5_emit.h"
40 #include "fd5_format.h"
41 #include "fd5_gmem.h"
42 #include "fd5_program.h"
43 #include "fd5_zsa.h"
44
45 static void
emit_mrt(struct fd_ringbuffer * ring,unsigned nr_bufs,struct pipe_surface ** bufs,const struct fd_gmem_stateobj * gmem)46 emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
47 struct pipe_surface **bufs, const struct fd_gmem_stateobj *gmem)
48 {
49 enum a5xx_tile_mode tile_mode;
50 unsigned i;
51
52 for (i = 0; i < A5XX_MAX_RENDER_TARGETS; i++) {
53 enum a5xx_color_fmt format = 0;
54 enum a3xx_color_swap swap = WZYX;
55 bool srgb = false, sint = false, uint = false;
56 struct fd_resource *rsc = NULL;
57 uint32_t stride = 0;
58 uint32_t size = 0;
59 uint32_t base = 0;
60 uint32_t offset = 0;
61
62 if (gmem) {
63 tile_mode = TILE5_2;
64 } else {
65 tile_mode = TILE5_LINEAR;
66 }
67
68 if ((i < nr_bufs) && bufs[i]) {
69 struct pipe_surface *psurf = bufs[i];
70 enum pipe_format pformat = psurf->format;
71
72 rsc = fd_resource(psurf->texture);
73
74 format = fd5_pipe2color(pformat);
75 swap = fd5_pipe2swap(pformat);
76 srgb = util_format_is_srgb(pformat);
77 sint = util_format_is_pure_sint(pformat);
78 uint = util_format_is_pure_uint(pformat);
79
80 assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
81
82 offset = fd_resource_offset(rsc, psurf->u.tex.level,
83 psurf->u.tex.first_layer);
84
85 if (gmem) {
86 stride = gmem->bin_w * gmem->cbuf_cpp[i];
87 size = stride * gmem->bin_h;
88 base = gmem->cbuf_base[i];
89 } else {
90 stride = fd_resource_pitch(rsc, psurf->u.tex.level);
91 size = fd_resource_layer_stride(rsc, psurf->u.tex.level);
92
93 tile_mode =
94 fd_resource_tile_mode(psurf->texture, psurf->u.tex.level);
95 }
96 }
97
98 OUT_PKT4(ring, REG_A5XX_RB_MRT_BUF_INFO(i), 5);
99 OUT_RING(
100 ring,
101 A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) |
102 A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(tile_mode) |
103 A5XX_RB_MRT_BUF_INFO_COLOR_SWAP(swap) |
104 COND(gmem,
105 0x800) | /* XXX 0x1000 for RECTLIST clear, 0x0 for BLIT.. */
106 COND(srgb, A5XX_RB_MRT_BUF_INFO_COLOR_SRGB));
107 OUT_RING(ring, A5XX_RB_MRT_PITCH(stride));
108 OUT_RING(ring, A5XX_RB_MRT_ARRAY_PITCH(size));
109 if (gmem || (i >= nr_bufs) || !bufs[i]) {
110 OUT_RING(ring, base); /* RB_MRT[i].BASE_LO */
111 OUT_RING(ring, 0x00000000); /* RB_MRT[i].BASE_HI */
112 } else {
113 OUT_RELOC(ring, rsc->bo, offset, 0, 0); /* BASE_LO/HI */
114 }
115
116 OUT_PKT4(ring, REG_A5XX_SP_FS_MRT_REG(i), 1);
117 OUT_RING(ring, A5XX_SP_FS_MRT_REG_COLOR_FORMAT(format) |
118 COND(sint, A5XX_SP_FS_MRT_REG_COLOR_SINT) |
119 COND(uint, A5XX_SP_FS_MRT_REG_COLOR_UINT) |
120 COND(srgb, A5XX_SP_FS_MRT_REG_COLOR_SRGB));
121
122 /* when we support UBWC, these would be the system memory
123 * addr/pitch/etc:
124 */
125 OUT_PKT4(ring, REG_A5XX_RB_MRT_FLAG_BUFFER(i), 4);
126 OUT_RING(ring, 0x00000000); /* RB_MRT_FLAG_BUFFER[i].ADDR_LO */
127 OUT_RING(ring, 0x00000000); /* RB_MRT_FLAG_BUFFER[i].ADDR_HI */
128 OUT_RING(ring, A5XX_RB_MRT_FLAG_BUFFER_PITCH(0));
129 OUT_RING(ring, A5XX_RB_MRT_FLAG_BUFFER_ARRAY_PITCH(0));
130 }
131 }
132
133 static void
emit_zs(struct fd_ringbuffer * ring,struct pipe_surface * zsbuf,const struct fd_gmem_stateobj * gmem)134 emit_zs(struct fd_ringbuffer *ring, struct pipe_surface *zsbuf,
135 const struct fd_gmem_stateobj *gmem)
136 {
137 if (zsbuf) {
138 struct fd_resource *rsc = fd_resource(zsbuf->texture);
139 enum a5xx_depth_format fmt = fd5_pipe2depth(zsbuf->format);
140 uint32_t cpp = rsc->layout.cpp;
141 uint32_t stride = 0;
142 uint32_t size = 0;
143
144 if (gmem) {
145 stride = cpp * gmem->bin_w;
146 size = stride * gmem->bin_h;
147 } else {
148 stride = fd_resource_pitch(rsc, zsbuf->u.tex.level);
149 size = fd_resource_layer_stride(rsc, zsbuf->u.tex.level);
150 }
151
152 OUT_PKT4(ring, REG_A5XX_RB_DEPTH_BUFFER_INFO, 5);
153 OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(fmt));
154 if (gmem) {
155 OUT_RING(ring, gmem->zsbuf_base[0]); /* RB_DEPTH_BUFFER_BASE_LO */
156 OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_HI */
157 } else {
158 OUT_RELOC(ring, rsc->bo,
159 fd_resource_offset(rsc, zsbuf->u.tex.level, zsbuf->u.tex.first_layer),
160 0, 0); /* RB_DEPTH_BUFFER_BASE_LO/HI */
161 }
162 OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_PITCH(stride));
163 OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_ARRAY_PITCH(size));
164
165 OUT_PKT4(ring, REG_A5XX_GRAS_SU_DEPTH_BUFFER_INFO, 1);
166 OUT_RING(ring, A5XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT(fmt));
167
168 OUT_PKT4(ring, REG_A5XX_RB_DEPTH_FLAG_BUFFER_BASE_LO, 3);
169 OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_LO */
170 OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_HI */
171 OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_PITCH */
172
173 if (rsc->lrz) {
174 OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_BUFFER_BASE_LO, 3);
175 OUT_RELOC(ring, rsc->lrz, 0x1000, 0, 0);
176 OUT_RING(ring, A5XX_GRAS_LRZ_BUFFER_PITCH(rsc->lrz_pitch));
177
178 OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO, 2);
179 OUT_RELOC(ring, rsc->lrz, 0, 0, 0);
180 } else {
181 OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_BUFFER_BASE_LO, 3);
182 OUT_RING(ring, 0x00000000);
183 OUT_RING(ring, 0x00000000);
184 OUT_RING(ring, 0x00000000); /* GRAS_LRZ_BUFFER_PITCH */
185
186 OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO, 2);
187 OUT_RING(ring, 0x00000000);
188 OUT_RING(ring, 0x00000000);
189 }
190
191 if (rsc->stencil) {
192 if (gmem) {
193 stride = 1 * gmem->bin_w;
194 size = stride * gmem->bin_h;
195 } else {
196 stride = fd_resource_pitch(rsc->stencil, zsbuf->u.tex.level);
197 size = fd_resource_layer_stride(rsc, zsbuf->u.tex.level);
198 }
199
200 OUT_PKT4(ring, REG_A5XX_RB_STENCIL_INFO, 5);
201 OUT_RING(ring, A5XX_RB_STENCIL_INFO_SEPARATE_STENCIL);
202 if (gmem) {
203 OUT_RING(ring, gmem->zsbuf_base[1]); /* RB_STENCIL_BASE_LO */
204 OUT_RING(ring, 0x00000000); /* RB_STENCIL_BASE_HI */
205 } else {
206 OUT_RELOC(ring, rsc->stencil->bo,
207 fd_resource_offset(rsc->stencil, zsbuf->u.tex.level, zsbuf->u.tex.first_layer),
208 0, 0); /* RB_STENCIL_BASE_LO/HI */
209 }
210 OUT_RING(ring, A5XX_RB_STENCIL_PITCH(stride));
211 OUT_RING(ring, A5XX_RB_STENCIL_ARRAY_PITCH(size));
212 } else {
213 OUT_PKT4(ring, REG_A5XX_RB_STENCIL_INFO, 1);
214 OUT_RING(ring, 0x00000000); /* RB_STENCIL_INFO */
215 }
216 } else {
217 OUT_PKT4(ring, REG_A5XX_RB_DEPTH_BUFFER_INFO, 5);
218 OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(DEPTH5_NONE));
219 OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_LO */
220 OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_HI */
221 OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_PITCH */
222 OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_ARRAY_PITCH */
223
224 OUT_PKT4(ring, REG_A5XX_GRAS_SU_DEPTH_BUFFER_INFO, 1);
225 OUT_RING(ring, A5XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT(DEPTH5_NONE));
226
227 OUT_PKT4(ring, REG_A5XX_RB_DEPTH_FLAG_BUFFER_BASE_LO, 3);
228 OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_LO */
229 OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_HI */
230 OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_PITCH */
231
232 OUT_PKT4(ring, REG_A5XX_RB_STENCIL_INFO, 1);
233 OUT_RING(ring, 0x00000000); /* RB_STENCIL_INFO */
234 }
235 }
236
237 static void
emit_msaa(struct fd_ringbuffer * ring,uint32_t nr_samples)238 emit_msaa(struct fd_ringbuffer *ring, uint32_t nr_samples)
239 {
240 enum a3xx_msaa_samples samples = fd_msaa_samples(nr_samples);
241
242 OUT_PKT4(ring, REG_A5XX_TPL1_TP_RAS_MSAA_CNTL, 2);
243 OUT_RING(ring, A5XX_TPL1_TP_RAS_MSAA_CNTL_SAMPLES(samples));
244 OUT_RING(ring, A5XX_TPL1_TP_DEST_MSAA_CNTL_SAMPLES(samples) |
245 COND(samples == MSAA_ONE,
246 A5XX_TPL1_TP_DEST_MSAA_CNTL_MSAA_DISABLE));
247
248 OUT_PKT4(ring, REG_A5XX_RB_RAS_MSAA_CNTL, 2);
249 OUT_RING(ring, A5XX_RB_RAS_MSAA_CNTL_SAMPLES(samples));
250 OUT_RING(ring,
251 A5XX_RB_DEST_MSAA_CNTL_SAMPLES(samples) |
252 COND(samples == MSAA_ONE, A5XX_RB_DEST_MSAA_CNTL_MSAA_DISABLE));
253
254 OUT_PKT4(ring, REG_A5XX_GRAS_SC_RAS_MSAA_CNTL, 2);
255 OUT_RING(ring, A5XX_GRAS_SC_RAS_MSAA_CNTL_SAMPLES(samples));
256 OUT_RING(ring, A5XX_GRAS_SC_DEST_MSAA_CNTL_SAMPLES(samples) |
257 COND(samples == MSAA_ONE,
258 A5XX_GRAS_SC_DEST_MSAA_CNTL_MSAA_DISABLE));
259 }
260
261 static bool
use_hw_binning(struct fd_batch * batch)262 use_hw_binning(struct fd_batch *batch)
263 {
264 const struct fd_gmem_stateobj *gmem = batch->gmem_state;
265
266 /* workaround: Like on a3xx, hw binning and scissor optimization
267 * don't play nice together.
268 *
269 * Disable binning if scissor optimization is used.
270 */
271 if (gmem->minx || gmem->miny)
272 return false;
273
274 if ((gmem->maxpw * gmem->maxph) > 32)
275 return false;
276
277 if ((gmem->maxpw > 15) || (gmem->maxph > 15))
278 return false;
279
280 return fd_binning_enabled && ((gmem->nbins_x * gmem->nbins_y) > 2) &&
281 (batch->num_draws > 0);
282 }
283
284 static void
patch_draws(struct fd_batch * batch,enum pc_di_vis_cull_mode vismode)285 patch_draws(struct fd_batch *batch, enum pc_di_vis_cull_mode vismode)
286 {
287 unsigned i;
288 for (i = 0; i < fd_patch_num_elements(&batch->draw_patches); i++) {
289 struct fd_cs_patch *patch = fd_patch_element(&batch->draw_patches, i);
290 *patch->cs = patch->val | DRAW4(0, 0, 0, vismode);
291 }
292 util_dynarray_clear(&batch->draw_patches);
293 }
294
295 static void
update_vsc_pipe(struct fd_batch * batch)296 update_vsc_pipe(struct fd_batch *batch) assert_dt
297 {
298 struct fd_context *ctx = batch->ctx;
299 struct fd5_context *fd5_ctx = fd5_context(ctx);
300 const struct fd_gmem_stateobj *gmem = batch->gmem_state;
301 struct fd_ringbuffer *ring = batch->gmem;
302 int i;
303
304 OUT_PKT4(ring, REG_A5XX_VSC_BIN_SIZE, 3);
305 OUT_RING(ring, A5XX_VSC_BIN_SIZE_WIDTH(gmem->bin_w) |
306 A5XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h));
307 OUT_RELOC(ring, fd5_ctx->vsc_size_mem, 0, 0, 0); /* VSC_SIZE_ADDRESS_LO/HI */
308
309 OUT_PKT4(ring, REG_A5XX_UNKNOWN_0BC5, 2);
310 OUT_RING(ring, 0x00000000); /* UNKNOWN_0BC5 */
311 OUT_RING(ring, 0x00000000); /* UNKNOWN_0BC6 */
312
313 OUT_PKT4(ring, REG_A5XX_VSC_PIPE_CONFIG_REG(0), 16);
314 for (i = 0; i < 16; i++) {
315 const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[i];
316 OUT_RING(ring, A5XX_VSC_PIPE_CONFIG_REG_X(pipe->x) |
317 A5XX_VSC_PIPE_CONFIG_REG_Y(pipe->y) |
318 A5XX_VSC_PIPE_CONFIG_REG_W(pipe->w) |
319 A5XX_VSC_PIPE_CONFIG_REG_H(pipe->h));
320 }
321
322 OUT_PKT4(ring, REG_A5XX_VSC_PIPE_DATA_ADDRESS_LO(0), 32);
323 for (i = 0; i < 16; i++) {
324 if (!ctx->vsc_pipe_bo[i]) {
325 ctx->vsc_pipe_bo[i] = fd_bo_new(
326 ctx->dev, 0x20000, 0, "vsc_pipe[%u]", i);
327 }
328 OUT_RELOC(ring, ctx->vsc_pipe_bo[i], 0, 0,
329 0); /* VSC_PIPE_DATA_ADDRESS[i].LO/HI */
330 }
331
332 OUT_PKT4(ring, REG_A5XX_VSC_PIPE_DATA_LENGTH_REG(0), 16);
333 for (i = 0; i < 16; i++) {
334 OUT_RING(ring, fd_bo_size(ctx->vsc_pipe_bo[i]) -
335 32); /* VSC_PIPE_DATA_LENGTH[i] */
336 }
337 }
338
339 static void
emit_binning_pass(struct fd_batch * batch)340 emit_binning_pass(struct fd_batch *batch) assert_dt
341 {
342 struct fd_ringbuffer *ring = batch->gmem;
343 const struct fd_gmem_stateobj *gmem = batch->gmem_state;
344
345 uint32_t x1 = gmem->minx;
346 uint32_t y1 = gmem->miny;
347 uint32_t x2 = gmem->minx + gmem->width - 1;
348 uint32_t y2 = gmem->miny + gmem->height - 1;
349
350 fd5_set_render_mode(batch->ctx, ring, BINNING);
351
352 OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
353 OUT_RING(ring,
354 A5XX_RB_CNTL_WIDTH(gmem->bin_w) | A5XX_RB_CNTL_HEIGHT(gmem->bin_h));
355
356 OUT_PKT4(ring, REG_A5XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
357 OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X(x1) |
358 A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(y1));
359 OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X(x2) |
360 A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(y2));
361
362 OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_1, 2);
363 OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_1_X(x1) | A5XX_RB_RESOLVE_CNTL_1_Y(y1));
364 OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_2_X(x2) | A5XX_RB_RESOLVE_CNTL_2_Y(y2));
365
366 update_vsc_pipe(batch);
367
368 OUT_PKT4(ring, REG_A5XX_VPC_MODE_CNTL, 1);
369 OUT_RING(ring, A5XX_VPC_MODE_CNTL_BINNING_PASS);
370
371 fd5_event_write(batch, ring, UNK_2C, false);
372
373 OUT_PKT4(ring, REG_A5XX_RB_WINDOW_OFFSET, 1);
374 OUT_RING(ring, A5XX_RB_WINDOW_OFFSET_X(0) | A5XX_RB_WINDOW_OFFSET_Y(0));
375
376 /* emit IB to binning drawcmds: */
377 fd5_emit_ib(ring, batch->binning);
378
379 fd_reset_wfi(batch);
380
381 fd5_event_write(batch, ring, UNK_2D, false);
382
383 fd5_event_write(batch, ring, CACHE_FLUSH_TS, true);
384
385 // TODO CP_COND_WRITE's for all the vsc buffers (check for overflow??)
386
387 fd_wfi(batch, ring);
388
389 OUT_PKT4(ring, REG_A5XX_VPC_MODE_CNTL, 1);
390 OUT_RING(ring, 0x0);
391 }
392
393 /* before first tile */
394 static void
fd5_emit_tile_init(struct fd_batch * batch)395 fd5_emit_tile_init(struct fd_batch *batch) assert_dt
396 {
397 struct fd_context *ctx = batch->ctx;
398 struct fd_ringbuffer *ring = batch->gmem;
399 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
400
401 fd5_emit_restore(batch, ring);
402
403 if (batch->prologue)
404 fd5_emit_ib(ring, batch->prologue);
405
406 fd5_emit_lrz_flush(batch, ring);
407
408 OUT_PKT4(ring, REG_A5XX_GRAS_CL_CNTL, 1);
409 OUT_RING(ring, 0x00000080); /* GRAS_CL_CNTL */
410
411 OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
412 OUT_RING(ring, 0x0);
413
414 OUT_PKT4(ring, REG_A5XX_PC_POWER_CNTL, 1);
415 OUT_RING(ring, ctx->screen->info->num_sp_cores - 1); /* PC_POWER_CNTL */
416
417 OUT_PKT4(ring, REG_A5XX_VFD_POWER_CNTL, 1);
418 OUT_RING(ring, ctx->screen->info->num_sp_cores - 1); /* VFD_POWER_CNTL */
419
420 /* 0x10000000 for BYPASS.. 0x7c13c080 for GMEM: */
421 fd_wfi(batch, ring);
422 OUT_PKT4(ring, REG_A5XX_RB_CCU_CNTL, 1);
423 OUT_RING(ring, 0x7c13c080); /* RB_CCU_CNTL */
424
425 emit_zs(ring, pfb->zsbuf, batch->gmem_state);
426 emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, batch->gmem_state);
427
428 /* Enable stream output for the first pass (likely the binning). */
429 OUT_PKT4(ring, REG_A5XX_VPC_SO_OVERRIDE, 1);
430 OUT_RING(ring, 0);
431
432 if (use_hw_binning(batch)) {
433 emit_binning_pass(batch);
434
435 /* Disable stream output after binning, since each VS output should get
436 * streamed out once.
437 */
438 OUT_PKT4(ring, REG_A5XX_VPC_SO_OVERRIDE, 1);
439 OUT_RING(ring, A5XX_VPC_SO_OVERRIDE_SO_DISABLE);
440
441 fd5_emit_lrz_flush(batch, ring);
442 patch_draws(batch, USE_VISIBILITY);
443 } else {
444 patch_draws(batch, IGNORE_VISIBILITY);
445 }
446
447 fd5_set_render_mode(batch->ctx, ring, GMEM);
448
449 /* XXX If we're in gmem mode but not doing HW binning, then after the first
450 * tile we should disable stream output (fd6_gmem.c doesn't do that either).
451 */
452 }
453
454 /* before mem2gmem */
455 static void
fd5_emit_tile_prep(struct fd_batch * batch,const struct fd_tile * tile)456 fd5_emit_tile_prep(struct fd_batch *batch, const struct fd_tile *tile) assert_dt
457 {
458 struct fd_context *ctx = batch->ctx;
459 const struct fd_gmem_stateobj *gmem = batch->gmem_state;
460 struct fd5_context *fd5_ctx = fd5_context(ctx);
461 struct fd_ringbuffer *ring = batch->gmem;
462
463 uint32_t x1 = tile->xoff;
464 uint32_t y1 = tile->yoff;
465 uint32_t x2 = tile->xoff + tile->bin_w - 1;
466 uint32_t y2 = tile->yoff + tile->bin_h - 1;
467
468 OUT_PKT4(ring, REG_A5XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
469 OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X(x1) |
470 A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(y1));
471 OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X(x2) |
472 A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(y2));
473
474 OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_1, 2);
475 OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_1_X(x1) | A5XX_RB_RESOLVE_CNTL_1_Y(y1));
476 OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_2_X(x2) | A5XX_RB_RESOLVE_CNTL_2_Y(y2));
477
478 if (use_hw_binning(batch)) {
479 const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[tile->p];
480 struct fd_bo *pipe_bo = ctx->vsc_pipe_bo[tile->p];
481
482 OUT_PKT7(ring, CP_WAIT_FOR_ME, 0);
483
484 OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
485 OUT_RING(ring, 0x0);
486
487 OUT_PKT7(ring, CP_SET_BIN_DATA5, 5);
488 OUT_RING(ring, CP_SET_BIN_DATA5_0_VSC_SIZE(pipe->w * pipe->h) |
489 CP_SET_BIN_DATA5_0_VSC_N(tile->n));
490 OUT_RELOC(ring, pipe_bo, 0, 0, 0); /* VSC_PIPE[p].DATA_ADDRESS */
491 OUT_RELOC(ring, fd5_ctx->vsc_size_mem, /* VSC_SIZE_ADDRESS + (p * 4) */
492 (tile->p * 4), 0, 0);
493 } else {
494 OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
495 OUT_RING(ring, 0x1);
496 }
497
498 OUT_PKT4(ring, REG_A5XX_RB_WINDOW_OFFSET, 1);
499 OUT_RING(ring, A5XX_RB_WINDOW_OFFSET_X(x1) | A5XX_RB_WINDOW_OFFSET_Y(y1));
500 }
501
502 /*
503 * transfer from system memory to gmem
504 */
505
506 static void
emit_mem2gmem_surf(struct fd_batch * batch,uint32_t base,struct pipe_surface * psurf,enum a5xx_blit_buf buf)507 emit_mem2gmem_surf(struct fd_batch *batch, uint32_t base,
508 struct pipe_surface *psurf, enum a5xx_blit_buf buf)
509 {
510 struct fd_ringbuffer *ring = batch->gmem;
511 const struct fd_gmem_stateobj *gmem = batch->gmem_state;
512 struct fd_resource *rsc = fd_resource(psurf->texture);
513 uint32_t stride, size;
514
515 assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
516
517 if (buf == BLIT_S)
518 rsc = rsc->stencil;
519
520 if ((buf == BLIT_ZS) || (buf == BLIT_S)) {
521 // XXX hack import via BLIT_MRT0 instead of BLIT_ZS, since I don't
522 // know otherwise how to go from linear in sysmem to tiled in gmem.
523 // possibly we want to flip this around gmem2mem and keep depth
524 // tiled in sysmem (and fixup sampler state to assume tiled).. this
525 // might be required for doing depth/stencil in bypass mode?
526 enum a5xx_color_fmt format =
527 fd5_pipe2color(fd_gmem_restore_format(rsc->b.b.format));
528
529 OUT_PKT4(ring, REG_A5XX_RB_MRT_BUF_INFO(0), 5);
530 OUT_RING(ring,
531 A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) |
532 A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(rsc->layout.tile_mode) |
533 A5XX_RB_MRT_BUF_INFO_COLOR_SWAP(WZYX));
534 OUT_RING(ring, A5XX_RB_MRT_PITCH(fd_resource_pitch(rsc, psurf->u.tex.level)));
535 OUT_RING(ring, A5XX_RB_MRT_ARRAY_PITCH(fd_resource_layer_stride(rsc, psurf->u.tex.level)));
536 OUT_RELOC(ring, rsc->bo,
537 fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer),
538 0, 0); /* BASE_LO/HI */
539
540 buf = BLIT_MRT0;
541 }
542
543 stride = gmem->bin_w << fdl_cpp_shift(&rsc->layout);
544 size = stride * gmem->bin_h;
545
546 OUT_PKT4(ring, REG_A5XX_RB_BLIT_FLAG_DST_LO, 4);
547 OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_LO */
548 OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_HI */
549 OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_PITCH */
550 OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_ARRAY_PITCH */
551
552 OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_3, 5);
553 OUT_RING(ring, 0x00000000); /* RB_RESOLVE_CNTL_3 */
554 OUT_RING(ring, base); /* RB_BLIT_DST_LO */
555 OUT_RING(ring, 0x00000000); /* RB_BLIT_DST_HI */
556 OUT_RING(ring, A5XX_RB_BLIT_DST_PITCH(stride));
557 OUT_RING(ring, A5XX_RB_BLIT_DST_ARRAY_PITCH(size));
558
559 OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1);
560 OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(buf));
561
562 fd5_emit_blit(batch, ring);
563 }
564
565 static void
fd5_emit_tile_mem2gmem(struct fd_batch * batch,const struct fd_tile * tile)566 fd5_emit_tile_mem2gmem(struct fd_batch *batch, const struct fd_tile *tile)
567 {
568 struct fd_ringbuffer *ring = batch->gmem;
569 const struct fd_gmem_stateobj *gmem = batch->gmem_state;
570 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
571
572 /*
573 * setup mrt and zs with system memory base addresses:
574 */
575
576 emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL);
577 // emit_zs(ring, pfb->zsbuf, NULL);
578
579 OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
580 OUT_RING(ring, A5XX_RB_CNTL_WIDTH(gmem->bin_w) |
581 A5XX_RB_CNTL_HEIGHT(gmem->bin_h) | A5XX_RB_CNTL_BYPASS);
582
583 if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_COLOR)) {
584 unsigned i;
585 for (i = 0; i < pfb->nr_cbufs; i++) {
586 if (!pfb->cbufs[i])
587 continue;
588 if (!(batch->restore & (PIPE_CLEAR_COLOR0 << i)))
589 continue;
590 emit_mem2gmem_surf(batch, gmem->cbuf_base[i], pfb->cbufs[i],
591 BLIT_MRT0 + i);
592 }
593 }
594
595 if (fd_gmem_needs_restore(batch, tile,
596 FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
597 struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
598
599 if (!rsc->stencil || fd_gmem_needs_restore(batch, tile, FD_BUFFER_DEPTH))
600 emit_mem2gmem_surf(batch, gmem->zsbuf_base[0], pfb->zsbuf, BLIT_ZS);
601 if (rsc->stencil && fd_gmem_needs_restore(batch, tile, FD_BUFFER_STENCIL))
602 emit_mem2gmem_surf(batch, gmem->zsbuf_base[1], pfb->zsbuf, BLIT_S);
603 }
604 }
605
606 /* before IB to rendering cmds: */
607 static void
fd5_emit_tile_renderprep(struct fd_batch * batch,const struct fd_tile * tile)608 fd5_emit_tile_renderprep(struct fd_batch *batch, const struct fd_tile *tile)
609 {
610 struct fd_ringbuffer *ring = batch->gmem;
611 const struct fd_gmem_stateobj *gmem = batch->gmem_state;
612 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
613
614 OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
615 OUT_RING(ring,
616 A5XX_RB_CNTL_WIDTH(gmem->bin_w) | A5XX_RB_CNTL_HEIGHT(gmem->bin_h));
617
618 emit_zs(ring, pfb->zsbuf, gmem);
619 emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, gmem);
620 emit_msaa(ring, pfb->samples);
621 }
622
623 /*
624 * transfer from gmem to system memory (ie. normal RAM)
625 */
626
627 static void
emit_gmem2mem_surf(struct fd_batch * batch,uint32_t base,struct pipe_surface * psurf,enum a5xx_blit_buf buf)628 emit_gmem2mem_surf(struct fd_batch *batch, uint32_t base,
629 struct pipe_surface *psurf, enum a5xx_blit_buf buf)
630 {
631 struct fd_ringbuffer *ring = batch->gmem;
632 struct fd_resource *rsc = fd_resource(psurf->texture);
633 bool tiled;
634 uint32_t offset, pitch;
635
636 if (!rsc->valid)
637 return;
638
639 if (buf == BLIT_S)
640 rsc = rsc->stencil;
641
642 offset =
643 fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer);
644 pitch = fd_resource_pitch(rsc, psurf->u.tex.level);
645
646 assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
647
648 OUT_PKT4(ring, REG_A5XX_RB_BLIT_FLAG_DST_LO, 4);
649 OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_LO */
650 OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_HI */
651 OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_PITCH */
652 OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_ARRAY_PITCH */
653
654 tiled = fd_resource_tile_mode(psurf->texture, psurf->u.tex.level);
655
656 OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_3, 5);
657 OUT_RING(ring, 0x00000004 | /* XXX RB_RESOLVE_CNTL_3 */
658 COND(tiled, A5XX_RB_RESOLVE_CNTL_3_TILED));
659 OUT_RELOC(ring, rsc->bo, offset, 0, 0); /* RB_BLIT_DST_LO/HI */
660 OUT_RING(ring, A5XX_RB_BLIT_DST_PITCH(pitch));
661 OUT_RING(ring, A5XX_RB_BLIT_DST_ARRAY_PITCH(fd_resource_layer_stride(rsc, psurf->u.tex.level)));
662
663 OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1);
664 OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(buf));
665
666 // bool msaa_resolve = pfb->samples > 1;
667 bool msaa_resolve = false;
668 OUT_PKT4(ring, REG_A5XX_RB_CLEAR_CNTL, 1);
669 OUT_RING(ring, COND(msaa_resolve, A5XX_RB_CLEAR_CNTL_MSAA_RESOLVE));
670
671 fd5_emit_blit(batch, ring);
672 }
673
674 static void
fd5_emit_tile_gmem2mem(struct fd_batch * batch,const struct fd_tile * tile)675 fd5_emit_tile_gmem2mem(struct fd_batch *batch, const struct fd_tile *tile)
676 {
677 const struct fd_gmem_stateobj *gmem = batch->gmem_state;
678 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
679
680 if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
681 struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
682
683 if (!rsc->stencil || (batch->resolve & FD_BUFFER_DEPTH))
684 emit_gmem2mem_surf(batch, gmem->zsbuf_base[0], pfb->zsbuf, BLIT_ZS);
685 if (rsc->stencil && (batch->resolve & FD_BUFFER_STENCIL))
686 emit_gmem2mem_surf(batch, gmem->zsbuf_base[1], pfb->zsbuf, BLIT_S);
687 }
688
689 if (batch->resolve & FD_BUFFER_COLOR) {
690 unsigned i;
691 for (i = 0; i < pfb->nr_cbufs; i++) {
692 if (!pfb->cbufs[i])
693 continue;
694 if (!(batch->resolve & (PIPE_CLEAR_COLOR0 << i)))
695 continue;
696 emit_gmem2mem_surf(batch, gmem->cbuf_base[i], pfb->cbufs[i],
697 BLIT_MRT0 + i);
698 }
699 }
700 }
701
702 static void
fd5_emit_tile_fini(struct fd_batch * batch)703 fd5_emit_tile_fini(struct fd_batch *batch) assert_dt
704 {
705 struct fd_ringbuffer *ring = batch->gmem;
706
707 OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
708 OUT_RING(ring, 0x0);
709
710 fd5_emit_lrz_flush(batch, ring);
711
712 fd5_cache_flush(batch, ring);
713 fd5_set_render_mode(batch->ctx, ring, BYPASS);
714 }
715
716 static void
fd5_emit_sysmem_prep(struct fd_batch * batch)717 fd5_emit_sysmem_prep(struct fd_batch *batch) assert_dt
718 {
719 struct fd_context *ctx = batch->ctx;
720 struct fd_ringbuffer *ring = batch->gmem;
721
722 fd5_emit_restore(batch, ring);
723
724 fd5_emit_lrz_flush(batch, ring);
725
726 if (batch->prologue)
727 fd5_emit_ib(ring, batch->prologue);
728
729 OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
730 OUT_RING(ring, 0x0);
731
732 fd5_event_write(batch, ring, PC_CCU_INVALIDATE_COLOR, false);
733
734 OUT_PKT4(ring, REG_A5XX_PC_POWER_CNTL, 1);
735 OUT_RING(ring, ctx->screen->info->num_sp_cores - 1); /* PC_POWER_CNTL */
736
737 OUT_PKT4(ring, REG_A5XX_VFD_POWER_CNTL, 1);
738 OUT_RING(ring, ctx->screen->info->num_sp_cores - 1); /* VFD_POWER_CNTL */
739
740 /* 0x10000000 for BYPASS.. 0x7c13c080 for GMEM: */
741 fd_wfi(batch, ring);
742 OUT_PKT4(ring, REG_A5XX_RB_CCU_CNTL, 1);
743 OUT_RING(ring, 0x10000000); /* RB_CCU_CNTL */
744
745 OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
746 OUT_RING(ring, A5XX_RB_CNTL_WIDTH(0) | A5XX_RB_CNTL_HEIGHT(0) |
747 A5XX_RB_CNTL_BYPASS);
748
749 /* remaining setup below here does not apply to blit/compute: */
750 if (batch->nondraw)
751 return;
752
753 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
754
755 OUT_PKT4(ring, REG_A5XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
756 OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) |
757 A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(0));
758 OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X(pfb->width - 1) |
759 A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(pfb->height - 1));
760
761 OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_1, 2);
762 OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_1_X(0) | A5XX_RB_RESOLVE_CNTL_1_Y(0));
763 OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_2_X(pfb->width - 1) |
764 A5XX_RB_RESOLVE_CNTL_2_Y(pfb->height - 1));
765
766 OUT_PKT4(ring, REG_A5XX_RB_WINDOW_OFFSET, 1);
767 OUT_RING(ring, A5XX_RB_WINDOW_OFFSET_X(0) | A5XX_RB_WINDOW_OFFSET_Y(0));
768
769 /* Enable stream output, since there's no binning pass to put it in. */
770 OUT_PKT4(ring, REG_A5XX_VPC_SO_OVERRIDE, 1);
771 OUT_RING(ring, 0);
772
773 OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
774 OUT_RING(ring, 0x1);
775
776 patch_draws(batch, IGNORE_VISIBILITY);
777
778 emit_zs(ring, pfb->zsbuf, NULL);
779 emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL);
780 emit_msaa(ring, pfb->samples);
781 }
782
783 static void
fd5_emit_sysmem_fini(struct fd_batch * batch)784 fd5_emit_sysmem_fini(struct fd_batch *batch)
785 {
786 struct fd_ringbuffer *ring = batch->gmem;
787
788 OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
789 OUT_RING(ring, 0x0);
790
791 fd5_emit_lrz_flush(batch, ring);
792
793 fd5_event_write(batch, ring, PC_CCU_FLUSH_COLOR_TS, true);
794 fd5_event_write(batch, ring, PC_CCU_FLUSH_DEPTH_TS, true);
795 }
796
797 void
fd5_gmem_init(struct pipe_context * pctx)798 fd5_gmem_init(struct pipe_context *pctx) disable_thread_safety_analysis
799 {
800 struct fd_context *ctx = fd_context(pctx);
801
802 ctx->emit_tile_init = fd5_emit_tile_init;
803 ctx->emit_tile_prep = fd5_emit_tile_prep;
804 ctx->emit_tile_mem2gmem = fd5_emit_tile_mem2gmem;
805 ctx->emit_tile_renderprep = fd5_emit_tile_renderprep;
806 ctx->emit_tile_gmem2mem = fd5_emit_tile_gmem2mem;
807 ctx->emit_tile_fini = fd5_emit_tile_fini;
808 ctx->emit_sysmem_prep = fd5_emit_sysmem_prep;
809 ctx->emit_sysmem_fini = fd5_emit_sysmem_fini;
810 }
811