• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *    Rob Clark <robclark@freedesktop.org>
25  */
26 
27 #include "pipe/p_state.h"
28 #include "util/format/u_format.h"
29 #include "util/u_inlines.h"
30 #include "util/u_memory.h"
31 #include "util/u_string.h"
32 
33 #include "freedreno_draw.h"
34 #include "freedreno_resource.h"
35 #include "freedreno_state.h"
36 
37 #include "fd5_context.h"
38 #include "fd5_draw.h"
39 #include "fd5_emit.h"
40 #include "fd5_format.h"
41 #include "fd5_gmem.h"
42 #include "fd5_program.h"
43 #include "fd5_zsa.h"
44 
45 static void
emit_mrt(struct fd_ringbuffer * ring,unsigned nr_bufs,struct pipe_surface ** bufs,const struct fd_gmem_stateobj * gmem)46 emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
47          struct pipe_surface **bufs, const struct fd_gmem_stateobj *gmem)
48 {
49    enum a5xx_tile_mode tile_mode;
50    unsigned i;
51 
52    for (i = 0; i < A5XX_MAX_RENDER_TARGETS; i++) {
53       enum a5xx_color_fmt format = 0;
54       enum a3xx_color_swap swap = WZYX;
55       bool srgb = false, sint = false, uint = false;
56       struct fd_resource *rsc = NULL;
57       uint32_t stride = 0;
58       uint32_t size = 0;
59       uint32_t base = 0;
60       uint32_t offset = 0;
61 
62       if (gmem) {
63          tile_mode = TILE5_2;
64       } else {
65          tile_mode = TILE5_LINEAR;
66       }
67 
68       if ((i < nr_bufs) && bufs[i]) {
69          struct pipe_surface *psurf = bufs[i];
70          enum pipe_format pformat = psurf->format;
71 
72          rsc = fd_resource(psurf->texture);
73 
74          format = fd5_pipe2color(pformat);
75          swap = fd5_pipe2swap(pformat);
76          srgb = util_format_is_srgb(pformat);
77          sint = util_format_is_pure_sint(pformat);
78          uint = util_format_is_pure_uint(pformat);
79 
80          assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
81 
82          offset = fd_resource_offset(rsc, psurf->u.tex.level,
83                                      psurf->u.tex.first_layer);
84 
85          if (gmem) {
86             stride = gmem->bin_w * gmem->cbuf_cpp[i];
87             size = stride * gmem->bin_h;
88             base = gmem->cbuf_base[i];
89          } else {
90             stride = fd_resource_pitch(rsc, psurf->u.tex.level);
91             size = fd_resource_layer_stride(rsc, psurf->u.tex.level);
92 
93             tile_mode =
94                fd_resource_tile_mode(psurf->texture, psurf->u.tex.level);
95          }
96       }
97 
98       OUT_PKT4(ring, REG_A5XX_RB_MRT_BUF_INFO(i), 5);
99       OUT_RING(
100          ring,
101          A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) |
102             A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(tile_mode) |
103             A5XX_RB_MRT_BUF_INFO_COLOR_SWAP(swap) |
104             COND(gmem,
105                  0x800) | /* XXX 0x1000 for RECTLIST clear, 0x0 for BLIT.. */
106             COND(srgb, A5XX_RB_MRT_BUF_INFO_COLOR_SRGB));
107       OUT_RING(ring, A5XX_RB_MRT_PITCH(stride));
108       OUT_RING(ring, A5XX_RB_MRT_ARRAY_PITCH(size));
109       if (gmem || (i >= nr_bufs) || !bufs[i]) {
110          OUT_RING(ring, base);       /* RB_MRT[i].BASE_LO */
111          OUT_RING(ring, 0x00000000); /* RB_MRT[i].BASE_HI */
112       } else {
113          OUT_RELOC(ring, rsc->bo, offset, 0, 0); /* BASE_LO/HI */
114       }
115 
116       OUT_PKT4(ring, REG_A5XX_SP_FS_MRT_REG(i), 1);
117       OUT_RING(ring, A5XX_SP_FS_MRT_REG_COLOR_FORMAT(format) |
118                         COND(sint, A5XX_SP_FS_MRT_REG_COLOR_SINT) |
119                         COND(uint, A5XX_SP_FS_MRT_REG_COLOR_UINT) |
120                         COND(srgb, A5XX_SP_FS_MRT_REG_COLOR_SRGB));
121 
122       /* when we support UBWC, these would be the system memory
123        * addr/pitch/etc:
124        */
125       OUT_PKT4(ring, REG_A5XX_RB_MRT_FLAG_BUFFER(i), 4);
126       OUT_RING(ring, 0x00000000); /* RB_MRT_FLAG_BUFFER[i].ADDR_LO */
127       OUT_RING(ring, 0x00000000); /* RB_MRT_FLAG_BUFFER[i].ADDR_HI */
128       OUT_RING(ring, A5XX_RB_MRT_FLAG_BUFFER_PITCH(0));
129       OUT_RING(ring, A5XX_RB_MRT_FLAG_BUFFER_ARRAY_PITCH(0));
130    }
131 }
132 
133 static void
emit_zs(struct fd_ringbuffer * ring,struct pipe_surface * zsbuf,const struct fd_gmem_stateobj * gmem)134 emit_zs(struct fd_ringbuffer *ring, struct pipe_surface *zsbuf,
135         const struct fd_gmem_stateobj *gmem)
136 {
137    if (zsbuf) {
138       struct fd_resource *rsc = fd_resource(zsbuf->texture);
139       enum a5xx_depth_format fmt = fd5_pipe2depth(zsbuf->format);
140       uint32_t cpp = rsc->layout.cpp;
141       uint32_t stride = 0;
142       uint32_t size = 0;
143 
144       if (gmem) {
145          stride = cpp * gmem->bin_w;
146          size = stride * gmem->bin_h;
147       } else {
148          stride = fd_resource_pitch(rsc, zsbuf->u.tex.level);
149          size = fd_resource_layer_stride(rsc, zsbuf->u.tex.level);
150       }
151 
152       OUT_PKT4(ring, REG_A5XX_RB_DEPTH_BUFFER_INFO, 5);
153       OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(fmt));
154       if (gmem) {
155          OUT_RING(ring, gmem->zsbuf_base[0]); /* RB_DEPTH_BUFFER_BASE_LO */
156          OUT_RING(ring, 0x00000000);          /* RB_DEPTH_BUFFER_BASE_HI */
157       } else {
158          OUT_RELOC(ring, rsc->bo,
159             fd_resource_offset(rsc, zsbuf->u.tex.level, zsbuf->u.tex.first_layer),
160             0, 0); /* RB_DEPTH_BUFFER_BASE_LO/HI */
161       }
162       OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_PITCH(stride));
163       OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_ARRAY_PITCH(size));
164 
165       OUT_PKT4(ring, REG_A5XX_GRAS_SU_DEPTH_BUFFER_INFO, 1);
166       OUT_RING(ring, A5XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT(fmt));
167 
168       OUT_PKT4(ring, REG_A5XX_RB_DEPTH_FLAG_BUFFER_BASE_LO, 3);
169       OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_LO */
170       OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_HI */
171       OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_PITCH */
172 
173       if (rsc->lrz) {
174          OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_BUFFER_BASE_LO, 3);
175          OUT_RELOC(ring, rsc->lrz, 0x1000, 0, 0);
176          OUT_RING(ring, A5XX_GRAS_LRZ_BUFFER_PITCH(rsc->lrz_pitch));
177 
178          OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO, 2);
179          OUT_RELOC(ring, rsc->lrz, 0, 0, 0);
180       } else {
181          OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_BUFFER_BASE_LO, 3);
182          OUT_RING(ring, 0x00000000);
183          OUT_RING(ring, 0x00000000);
184          OUT_RING(ring, 0x00000000); /* GRAS_LRZ_BUFFER_PITCH */
185 
186          OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO, 2);
187          OUT_RING(ring, 0x00000000);
188          OUT_RING(ring, 0x00000000);
189       }
190 
191       if (rsc->stencil) {
192          if (gmem) {
193             stride = 1 * gmem->bin_w;
194             size = stride * gmem->bin_h;
195          } else {
196             stride = fd_resource_pitch(rsc->stencil, zsbuf->u.tex.level);
197             size = fd_resource_layer_stride(rsc, zsbuf->u.tex.level);
198          }
199 
200          OUT_PKT4(ring, REG_A5XX_RB_STENCIL_INFO, 5);
201          OUT_RING(ring, A5XX_RB_STENCIL_INFO_SEPARATE_STENCIL);
202          if (gmem) {
203             OUT_RING(ring, gmem->zsbuf_base[1]); /* RB_STENCIL_BASE_LO */
204             OUT_RING(ring, 0x00000000);          /* RB_STENCIL_BASE_HI */
205          } else {
206             OUT_RELOC(ring, rsc->stencil->bo,
207                fd_resource_offset(rsc->stencil, zsbuf->u.tex.level, zsbuf->u.tex.first_layer),
208                       0, 0); /* RB_STENCIL_BASE_LO/HI */
209          }
210          OUT_RING(ring, A5XX_RB_STENCIL_PITCH(stride));
211          OUT_RING(ring, A5XX_RB_STENCIL_ARRAY_PITCH(size));
212       } else {
213          OUT_PKT4(ring, REG_A5XX_RB_STENCIL_INFO, 1);
214          OUT_RING(ring, 0x00000000); /* RB_STENCIL_INFO */
215       }
216    } else {
217       OUT_PKT4(ring, REG_A5XX_RB_DEPTH_BUFFER_INFO, 5);
218       OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(DEPTH5_NONE));
219       OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_LO */
220       OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_HI */
221       OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_PITCH */
222       OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_ARRAY_PITCH */
223 
224       OUT_PKT4(ring, REG_A5XX_GRAS_SU_DEPTH_BUFFER_INFO, 1);
225       OUT_RING(ring, A5XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT(DEPTH5_NONE));
226 
227       OUT_PKT4(ring, REG_A5XX_RB_DEPTH_FLAG_BUFFER_BASE_LO, 3);
228       OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_LO */
229       OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_HI */
230       OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_PITCH */
231 
232       OUT_PKT4(ring, REG_A5XX_RB_STENCIL_INFO, 1);
233       OUT_RING(ring, 0x00000000); /* RB_STENCIL_INFO */
234    }
235 }
236 
237 static void
emit_msaa(struct fd_ringbuffer * ring,uint32_t nr_samples)238 emit_msaa(struct fd_ringbuffer *ring, uint32_t nr_samples)
239 {
240    enum a3xx_msaa_samples samples = fd_msaa_samples(nr_samples);
241 
242    OUT_PKT4(ring, REG_A5XX_TPL1_TP_RAS_MSAA_CNTL, 2);
243    OUT_RING(ring, A5XX_TPL1_TP_RAS_MSAA_CNTL_SAMPLES(samples));
244    OUT_RING(ring, A5XX_TPL1_TP_DEST_MSAA_CNTL_SAMPLES(samples) |
245                      COND(samples == MSAA_ONE,
246                           A5XX_TPL1_TP_DEST_MSAA_CNTL_MSAA_DISABLE));
247 
248    OUT_PKT4(ring, REG_A5XX_RB_RAS_MSAA_CNTL, 2);
249    OUT_RING(ring, A5XX_RB_RAS_MSAA_CNTL_SAMPLES(samples));
250    OUT_RING(ring,
251             A5XX_RB_DEST_MSAA_CNTL_SAMPLES(samples) |
252                COND(samples == MSAA_ONE, A5XX_RB_DEST_MSAA_CNTL_MSAA_DISABLE));
253 
254    OUT_PKT4(ring, REG_A5XX_GRAS_SC_RAS_MSAA_CNTL, 2);
255    OUT_RING(ring, A5XX_GRAS_SC_RAS_MSAA_CNTL_SAMPLES(samples));
256    OUT_RING(ring, A5XX_GRAS_SC_DEST_MSAA_CNTL_SAMPLES(samples) |
257                      COND(samples == MSAA_ONE,
258                           A5XX_GRAS_SC_DEST_MSAA_CNTL_MSAA_DISABLE));
259 }
260 
261 static bool
use_hw_binning(struct fd_batch * batch)262 use_hw_binning(struct fd_batch *batch)
263 {
264    const struct fd_gmem_stateobj *gmem = batch->gmem_state;
265 
266    /* workaround: Like on a3xx, hw binning and scissor optimization
267     * don't play nice together.
268     *
269     * Disable binning if scissor optimization is used.
270     */
271    if (gmem->minx || gmem->miny)
272       return false;
273 
274    if ((gmem->maxpw * gmem->maxph) > 32)
275       return false;
276 
277    if ((gmem->maxpw > 15) || (gmem->maxph > 15))
278       return false;
279 
280    return fd_binning_enabled && ((gmem->nbins_x * gmem->nbins_y) > 2) &&
281           (batch->num_draws > 0);
282 }
283 
284 static void
patch_draws(struct fd_batch * batch,enum pc_di_vis_cull_mode vismode)285 patch_draws(struct fd_batch *batch, enum pc_di_vis_cull_mode vismode)
286 {
287    unsigned i;
288    for (i = 0; i < fd_patch_num_elements(&batch->draw_patches); i++) {
289       struct fd_cs_patch *patch = fd_patch_element(&batch->draw_patches, i);
290       *patch->cs = patch->val | DRAW4(0, 0, 0, vismode);
291    }
292    util_dynarray_clear(&batch->draw_patches);
293 }
294 
295 static void
update_vsc_pipe(struct fd_batch * batch)296 update_vsc_pipe(struct fd_batch *batch) assert_dt
297 {
298    struct fd_context *ctx = batch->ctx;
299    struct fd5_context *fd5_ctx = fd5_context(ctx);
300    const struct fd_gmem_stateobj *gmem = batch->gmem_state;
301    struct fd_ringbuffer *ring = batch->gmem;
302    int i;
303 
304    OUT_PKT4(ring, REG_A5XX_VSC_BIN_SIZE, 3);
305    OUT_RING(ring, A5XX_VSC_BIN_SIZE_WIDTH(gmem->bin_w) |
306                      A5XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h));
307    OUT_RELOC(ring, fd5_ctx->vsc_size_mem, 0, 0, 0); /* VSC_SIZE_ADDRESS_LO/HI */
308 
309    OUT_PKT4(ring, REG_A5XX_UNKNOWN_0BC5, 2);
310    OUT_RING(ring, 0x00000000); /* UNKNOWN_0BC5 */
311    OUT_RING(ring, 0x00000000); /* UNKNOWN_0BC6 */
312 
313    OUT_PKT4(ring, REG_A5XX_VSC_PIPE_CONFIG_REG(0), 16);
314    for (i = 0; i < 16; i++) {
315       const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[i];
316       OUT_RING(ring, A5XX_VSC_PIPE_CONFIG_REG_X(pipe->x) |
317                         A5XX_VSC_PIPE_CONFIG_REG_Y(pipe->y) |
318                         A5XX_VSC_PIPE_CONFIG_REG_W(pipe->w) |
319                         A5XX_VSC_PIPE_CONFIG_REG_H(pipe->h));
320    }
321 
322    OUT_PKT4(ring, REG_A5XX_VSC_PIPE_DATA_ADDRESS_LO(0), 32);
323    for (i = 0; i < 16; i++) {
324       if (!ctx->vsc_pipe_bo[i]) {
325          ctx->vsc_pipe_bo[i] = fd_bo_new(
326             ctx->dev, 0x20000, 0, "vsc_pipe[%u]", i);
327       }
328       OUT_RELOC(ring, ctx->vsc_pipe_bo[i], 0, 0,
329                 0); /* VSC_PIPE_DATA_ADDRESS[i].LO/HI */
330    }
331 
332    OUT_PKT4(ring, REG_A5XX_VSC_PIPE_DATA_LENGTH_REG(0), 16);
333    for (i = 0; i < 16; i++) {
334       OUT_RING(ring, fd_bo_size(ctx->vsc_pipe_bo[i]) -
335                         32); /* VSC_PIPE_DATA_LENGTH[i] */
336    }
337 }
338 
339 static void
emit_binning_pass(struct fd_batch * batch)340 emit_binning_pass(struct fd_batch *batch) assert_dt
341 {
342    struct fd_ringbuffer *ring = batch->gmem;
343    const struct fd_gmem_stateobj *gmem = batch->gmem_state;
344 
345    uint32_t x1 = gmem->minx;
346    uint32_t y1 = gmem->miny;
347    uint32_t x2 = gmem->minx + gmem->width - 1;
348    uint32_t y2 = gmem->miny + gmem->height - 1;
349 
350    fd5_set_render_mode(batch->ctx, ring, BINNING);
351 
352    OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
353    OUT_RING(ring,
354             A5XX_RB_CNTL_WIDTH(gmem->bin_w) | A5XX_RB_CNTL_HEIGHT(gmem->bin_h));
355 
356    OUT_PKT4(ring, REG_A5XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
357    OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X(x1) |
358                      A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(y1));
359    OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X(x2) |
360                      A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(y2));
361 
362    OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_1, 2);
363    OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_1_X(x1) | A5XX_RB_RESOLVE_CNTL_1_Y(y1));
364    OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_2_X(x2) | A5XX_RB_RESOLVE_CNTL_2_Y(y2));
365 
366    update_vsc_pipe(batch);
367 
368    OUT_PKT4(ring, REG_A5XX_VPC_MODE_CNTL, 1);
369    OUT_RING(ring, A5XX_VPC_MODE_CNTL_BINNING_PASS);
370 
371    fd5_event_write(batch, ring, UNK_2C, false);
372 
373    OUT_PKT4(ring, REG_A5XX_RB_WINDOW_OFFSET, 1);
374    OUT_RING(ring, A5XX_RB_WINDOW_OFFSET_X(0) | A5XX_RB_WINDOW_OFFSET_Y(0));
375 
376    /* emit IB to binning drawcmds: */
377    fd5_emit_ib(ring, batch->binning);
378 
379    fd_reset_wfi(batch);
380 
381    fd5_event_write(batch, ring, UNK_2D, false);
382 
383    fd5_event_write(batch, ring, CACHE_FLUSH_TS, true);
384 
385    // TODO CP_COND_WRITE's for all the vsc buffers (check for overflow??)
386 
387    fd_wfi(batch, ring);
388 
389    OUT_PKT4(ring, REG_A5XX_VPC_MODE_CNTL, 1);
390    OUT_RING(ring, 0x0);
391 }
392 
393 /* before first tile */
394 static void
fd5_emit_tile_init(struct fd_batch * batch)395 fd5_emit_tile_init(struct fd_batch *batch) assert_dt
396 {
397    struct fd_ringbuffer *ring = batch->gmem;
398    struct pipe_framebuffer_state *pfb = &batch->framebuffer;
399 
400    fd5_emit_restore(batch, ring);
401 
402    if (batch->prologue)
403       fd5_emit_ib(ring, batch->prologue);
404 
405    fd5_emit_lrz_flush(batch, ring);
406 
407    OUT_PKT4(ring, REG_A5XX_GRAS_CL_CNTL, 1);
408    OUT_RING(ring, 0x00000080); /* GRAS_CL_CNTL */
409 
410    OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
411    OUT_RING(ring, 0x0);
412 
413    OUT_PKT4(ring, REG_A5XX_PC_POWER_CNTL, 1);
414    OUT_RING(ring, 0x00000003); /* PC_POWER_CNTL */
415 
416    OUT_PKT4(ring, REG_A5XX_VFD_POWER_CNTL, 1);
417    OUT_RING(ring, 0x00000003); /* VFD_POWER_CNTL */
418 
419    /* 0x10000000 for BYPASS.. 0x7c13c080 for GMEM: */
420    fd_wfi(batch, ring);
421    OUT_PKT4(ring, REG_A5XX_RB_CCU_CNTL, 1);
422    OUT_RING(ring, 0x7c13c080); /* RB_CCU_CNTL */
423 
424    emit_zs(ring, pfb->zsbuf, batch->gmem_state);
425    emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, batch->gmem_state);
426 
427    /* Enable stream output for the first pass (likely the binning). */
428    OUT_PKT4(ring, REG_A5XX_VPC_SO_OVERRIDE, 1);
429    OUT_RING(ring, 0);
430 
431    if (use_hw_binning(batch)) {
432       emit_binning_pass(batch);
433 
434       /* Disable stream output after binning, since each VS output should get
435        * streamed out once.
436        */
437       OUT_PKT4(ring, REG_A5XX_VPC_SO_OVERRIDE, 1);
438       OUT_RING(ring, A5XX_VPC_SO_OVERRIDE_SO_DISABLE);
439 
440       fd5_emit_lrz_flush(batch, ring);
441       patch_draws(batch, USE_VISIBILITY);
442    } else {
443       patch_draws(batch, IGNORE_VISIBILITY);
444    }
445 
446    fd5_set_render_mode(batch->ctx, ring, GMEM);
447 
448    /* XXX If we're in gmem mode but not doing HW binning, then after the first
449     * tile we should disable stream output (fd6_gmem.c doesn't do that either).
450     */
451 }
452 
453 /* before mem2gmem */
454 static void
fd5_emit_tile_prep(struct fd_batch * batch,const struct fd_tile * tile)455 fd5_emit_tile_prep(struct fd_batch *batch, const struct fd_tile *tile) assert_dt
456 {
457    struct fd_context *ctx = batch->ctx;
458    const struct fd_gmem_stateobj *gmem = batch->gmem_state;
459    struct fd5_context *fd5_ctx = fd5_context(ctx);
460    struct fd_ringbuffer *ring = batch->gmem;
461 
462    uint32_t x1 = tile->xoff;
463    uint32_t y1 = tile->yoff;
464    uint32_t x2 = tile->xoff + tile->bin_w - 1;
465    uint32_t y2 = tile->yoff + tile->bin_h - 1;
466 
467    OUT_PKT4(ring, REG_A5XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
468    OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X(x1) |
469                      A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(y1));
470    OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X(x2) |
471                      A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(y2));
472 
473    OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_1, 2);
474    OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_1_X(x1) | A5XX_RB_RESOLVE_CNTL_1_Y(y1));
475    OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_2_X(x2) | A5XX_RB_RESOLVE_CNTL_2_Y(y2));
476 
477    if (use_hw_binning(batch)) {
478       const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[tile->p];
479       struct fd_bo *pipe_bo = ctx->vsc_pipe_bo[tile->p];
480 
481       OUT_PKT7(ring, CP_WAIT_FOR_ME, 0);
482 
483       OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
484       OUT_RING(ring, 0x0);
485 
486       OUT_PKT7(ring, CP_SET_BIN_DATA5, 5);
487       OUT_RING(ring, CP_SET_BIN_DATA5_0_VSC_SIZE(pipe->w * pipe->h) |
488                         CP_SET_BIN_DATA5_0_VSC_N(tile->n));
489       OUT_RELOC(ring, pipe_bo, 0, 0, 0);     /* VSC_PIPE[p].DATA_ADDRESS */
490       OUT_RELOC(ring, fd5_ctx->vsc_size_mem, /* VSC_SIZE_ADDRESS + (p * 4) */
491                 (tile->p * 4), 0, 0);
492    } else {
493       OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
494       OUT_RING(ring, 0x1);
495    }
496 
497    OUT_PKT4(ring, REG_A5XX_RB_WINDOW_OFFSET, 1);
498    OUT_RING(ring, A5XX_RB_WINDOW_OFFSET_X(x1) | A5XX_RB_WINDOW_OFFSET_Y(y1));
499 }
500 
501 /*
502  * transfer from system memory to gmem
503  */
504 
505 static void
emit_mem2gmem_surf(struct fd_batch * batch,uint32_t base,struct pipe_surface * psurf,enum a5xx_blit_buf buf)506 emit_mem2gmem_surf(struct fd_batch *batch, uint32_t base,
507                    struct pipe_surface *psurf, enum a5xx_blit_buf buf)
508 {
509    struct fd_ringbuffer *ring = batch->gmem;
510    const struct fd_gmem_stateobj *gmem = batch->gmem_state;
511    struct fd_resource *rsc = fd_resource(psurf->texture);
512    uint32_t stride, size;
513 
514    assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
515 
516    if (buf == BLIT_S)
517       rsc = rsc->stencil;
518 
519    if ((buf == BLIT_ZS) || (buf == BLIT_S)) {
520       // XXX hack import via BLIT_MRT0 instead of BLIT_ZS, since I don't
521       // know otherwise how to go from linear in sysmem to tiled in gmem.
522       // possibly we want to flip this around gmem2mem and keep depth
523       // tiled in sysmem (and fixup sampler state to assume tiled).. this
524       // might be required for doing depth/stencil in bypass mode?
525       enum a5xx_color_fmt format =
526          fd5_pipe2color(fd_gmem_restore_format(rsc->b.b.format));
527 
528       OUT_PKT4(ring, REG_A5XX_RB_MRT_BUF_INFO(0), 5);
529       OUT_RING(ring,
530                A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) |
531                   A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(rsc->layout.tile_mode) |
532                   A5XX_RB_MRT_BUF_INFO_COLOR_SWAP(WZYX));
533       OUT_RING(ring, A5XX_RB_MRT_PITCH(fd_resource_pitch(rsc, psurf->u.tex.level)));
534       OUT_RING(ring, A5XX_RB_MRT_ARRAY_PITCH(fd_resource_layer_stride(rsc, psurf->u.tex.level)));
535       OUT_RELOC(ring, rsc->bo,
536          fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer),
537          0, 0); /* BASE_LO/HI */
538 
539       buf = BLIT_MRT0;
540    }
541 
542    stride = gmem->bin_w << fdl_cpp_shift(&rsc->layout);
543    size = stride * gmem->bin_h;
544 
545    OUT_PKT4(ring, REG_A5XX_RB_BLIT_FLAG_DST_LO, 4);
546    OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_LO */
547    OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_HI */
548    OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_PITCH */
549    OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_ARRAY_PITCH */
550 
551    OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_3, 5);
552    OUT_RING(ring, 0x00000000); /* RB_RESOLVE_CNTL_3 */
553    OUT_RING(ring, base);       /* RB_BLIT_DST_LO */
554    OUT_RING(ring, 0x00000000); /* RB_BLIT_DST_HI */
555    OUT_RING(ring, A5XX_RB_BLIT_DST_PITCH(stride));
556    OUT_RING(ring, A5XX_RB_BLIT_DST_ARRAY_PITCH(size));
557 
558    OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1);
559    OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(buf));
560 
561    fd5_emit_blit(batch, ring);
562 }
563 
564 static void
fd5_emit_tile_mem2gmem(struct fd_batch * batch,const struct fd_tile * tile)565 fd5_emit_tile_mem2gmem(struct fd_batch *batch, const struct fd_tile *tile)
566 {
567    struct fd_ringbuffer *ring = batch->gmem;
568    const struct fd_gmem_stateobj *gmem = batch->gmem_state;
569    struct pipe_framebuffer_state *pfb = &batch->framebuffer;
570 
571    /*
572     * setup mrt and zs with system memory base addresses:
573     */
574 
575    emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL);
576    //	emit_zs(ring, pfb->zsbuf, NULL);
577 
578    OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
579    OUT_RING(ring, A5XX_RB_CNTL_WIDTH(gmem->bin_w) |
580                      A5XX_RB_CNTL_HEIGHT(gmem->bin_h) | A5XX_RB_CNTL_BYPASS);
581 
582    if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_COLOR)) {
583       unsigned i;
584       for (i = 0; i < pfb->nr_cbufs; i++) {
585          if (!pfb->cbufs[i])
586             continue;
587          if (!(batch->restore & (PIPE_CLEAR_COLOR0 << i)))
588             continue;
589          emit_mem2gmem_surf(batch, gmem->cbuf_base[i], pfb->cbufs[i],
590                             BLIT_MRT0 + i);
591       }
592    }
593 
594    if (fd_gmem_needs_restore(batch, tile,
595                              FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
596       struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
597 
598       if (!rsc->stencil || fd_gmem_needs_restore(batch, tile, FD_BUFFER_DEPTH))
599          emit_mem2gmem_surf(batch, gmem->zsbuf_base[0], pfb->zsbuf, BLIT_ZS);
600       if (rsc->stencil && fd_gmem_needs_restore(batch, tile, FD_BUFFER_STENCIL))
601          emit_mem2gmem_surf(batch, gmem->zsbuf_base[1], pfb->zsbuf, BLIT_S);
602    }
603 }
604 
605 /* before IB to rendering cmds: */
606 static void
fd5_emit_tile_renderprep(struct fd_batch * batch,const struct fd_tile * tile)607 fd5_emit_tile_renderprep(struct fd_batch *batch, const struct fd_tile *tile)
608 {
609    struct fd_ringbuffer *ring = batch->gmem;
610    const struct fd_gmem_stateobj *gmem = batch->gmem_state;
611    struct pipe_framebuffer_state *pfb = &batch->framebuffer;
612 
613    OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
614    OUT_RING(ring,
615             A5XX_RB_CNTL_WIDTH(gmem->bin_w) | A5XX_RB_CNTL_HEIGHT(gmem->bin_h));
616 
617    emit_zs(ring, pfb->zsbuf, gmem);
618    emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, gmem);
619    emit_msaa(ring, pfb->samples);
620 }
621 
622 /*
623  * transfer from gmem to system memory (ie. normal RAM)
624  */
625 
626 static void
emit_gmem2mem_surf(struct fd_batch * batch,uint32_t base,struct pipe_surface * psurf,enum a5xx_blit_buf buf)627 emit_gmem2mem_surf(struct fd_batch *batch, uint32_t base,
628                    struct pipe_surface *psurf, enum a5xx_blit_buf buf)
629 {
630    struct fd_ringbuffer *ring = batch->gmem;
631    struct fd_resource *rsc = fd_resource(psurf->texture);
632    bool tiled;
633    uint32_t offset, pitch;
634 
635    if (!rsc->valid)
636       return;
637 
638    if (buf == BLIT_S)
639       rsc = rsc->stencil;
640 
641    offset =
642       fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer);
643    pitch = fd_resource_pitch(rsc, psurf->u.tex.level);
644 
645    assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
646 
647    OUT_PKT4(ring, REG_A5XX_RB_BLIT_FLAG_DST_LO, 4);
648    OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_LO */
649    OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_HI */
650    OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_PITCH */
651    OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_ARRAY_PITCH */
652 
653    tiled = fd_resource_tile_mode(psurf->texture, psurf->u.tex.level);
654 
655    OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_3, 5);
656    OUT_RING(ring, 0x00000004 | /* XXX RB_RESOLVE_CNTL_3 */
657                      COND(tiled, A5XX_RB_RESOLVE_CNTL_3_TILED));
658    OUT_RELOC(ring, rsc->bo, offset, 0, 0); /* RB_BLIT_DST_LO/HI */
659    OUT_RING(ring, A5XX_RB_BLIT_DST_PITCH(pitch));
660    OUT_RING(ring, A5XX_RB_BLIT_DST_ARRAY_PITCH(fd_resource_layer_stride(rsc, psurf->u.tex.level)));
661 
662    OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1);
663    OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(buf));
664 
665    //	bool msaa_resolve = pfb->samples > 1;
666    bool msaa_resolve = false;
667    OUT_PKT4(ring, REG_A5XX_RB_CLEAR_CNTL, 1);
668    OUT_RING(ring, COND(msaa_resolve, A5XX_RB_CLEAR_CNTL_MSAA_RESOLVE));
669 
670    fd5_emit_blit(batch, ring);
671 }
672 
673 static void
fd5_emit_tile_gmem2mem(struct fd_batch * batch,const struct fd_tile * tile)674 fd5_emit_tile_gmem2mem(struct fd_batch *batch, const struct fd_tile *tile)
675 {
676    const struct fd_gmem_stateobj *gmem = batch->gmem_state;
677    struct pipe_framebuffer_state *pfb = &batch->framebuffer;
678 
679    if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
680       struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
681 
682       if (!rsc->stencil || (batch->resolve & FD_BUFFER_DEPTH))
683          emit_gmem2mem_surf(batch, gmem->zsbuf_base[0], pfb->zsbuf, BLIT_ZS);
684       if (rsc->stencil && (batch->resolve & FD_BUFFER_STENCIL))
685          emit_gmem2mem_surf(batch, gmem->zsbuf_base[1], pfb->zsbuf, BLIT_S);
686    }
687 
688    if (batch->resolve & FD_BUFFER_COLOR) {
689       unsigned i;
690       for (i = 0; i < pfb->nr_cbufs; i++) {
691          if (!pfb->cbufs[i])
692             continue;
693          if (!(batch->resolve & (PIPE_CLEAR_COLOR0 << i)))
694             continue;
695          emit_gmem2mem_surf(batch, gmem->cbuf_base[i], pfb->cbufs[i],
696                             BLIT_MRT0 + i);
697       }
698    }
699 }
700 
701 static void
fd5_emit_tile_fini(struct fd_batch * batch)702 fd5_emit_tile_fini(struct fd_batch *batch) assert_dt
703 {
704    struct fd_ringbuffer *ring = batch->gmem;
705 
706    OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
707    OUT_RING(ring, 0x0);
708 
709    fd5_emit_lrz_flush(batch, ring);
710 
711    fd5_cache_flush(batch, ring);
712    fd5_set_render_mode(batch->ctx, ring, BYPASS);
713 }
714 
715 static void
fd5_emit_sysmem_prep(struct fd_batch * batch)716 fd5_emit_sysmem_prep(struct fd_batch *batch) assert_dt
717 {
718    struct fd_ringbuffer *ring = batch->gmem;
719 
720    fd5_emit_restore(batch, ring);
721 
722    fd5_emit_lrz_flush(batch, ring);
723 
724    if (batch->prologue)
725       fd5_emit_ib(ring, batch->prologue);
726 
727    OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
728    OUT_RING(ring, 0x0);
729 
730    fd5_event_write(batch, ring, PC_CCU_INVALIDATE_COLOR, false);
731 
732    OUT_PKT4(ring, REG_A5XX_PC_POWER_CNTL, 1);
733    OUT_RING(ring, 0x00000003); /* PC_POWER_CNTL */
734 
735    OUT_PKT4(ring, REG_A5XX_VFD_POWER_CNTL, 1);
736    OUT_RING(ring, 0x00000003); /* VFD_POWER_CNTL */
737 
738    /* 0x10000000 for BYPASS.. 0x7c13c080 for GMEM: */
739    fd_wfi(batch, ring);
740    OUT_PKT4(ring, REG_A5XX_RB_CCU_CNTL, 1);
741    OUT_RING(ring, 0x10000000); /* RB_CCU_CNTL */
742 
743    OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
744    OUT_RING(ring, A5XX_RB_CNTL_WIDTH(0) | A5XX_RB_CNTL_HEIGHT(0) |
745                      A5XX_RB_CNTL_BYPASS);
746 
747    /* remaining setup below here does not apply to blit/compute: */
748    if (batch->nondraw)
749       return;
750 
751    struct pipe_framebuffer_state *pfb = &batch->framebuffer;
752 
753    OUT_PKT4(ring, REG_A5XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
754    OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) |
755                      A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(0));
756    OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X(pfb->width - 1) |
757                      A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(pfb->height - 1));
758 
759    OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_1, 2);
760    OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_1_X(0) | A5XX_RB_RESOLVE_CNTL_1_Y(0));
761    OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_2_X(pfb->width - 1) |
762                      A5XX_RB_RESOLVE_CNTL_2_Y(pfb->height - 1));
763 
764    OUT_PKT4(ring, REG_A5XX_RB_WINDOW_OFFSET, 1);
765    OUT_RING(ring, A5XX_RB_WINDOW_OFFSET_X(0) | A5XX_RB_WINDOW_OFFSET_Y(0));
766 
767    /* Enable stream output, since there's no binning pass to put it in. */
768    OUT_PKT4(ring, REG_A5XX_VPC_SO_OVERRIDE, 1);
769    OUT_RING(ring, 0);
770 
771    OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
772    OUT_RING(ring, 0x1);
773 
774    patch_draws(batch, IGNORE_VISIBILITY);
775 
776    emit_zs(ring, pfb->zsbuf, NULL);
777    emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL);
778    emit_msaa(ring, pfb->samples);
779 }
780 
781 static void
fd5_emit_sysmem_fini(struct fd_batch * batch)782 fd5_emit_sysmem_fini(struct fd_batch *batch)
783 {
784    struct fd_ringbuffer *ring = batch->gmem;
785 
786    OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
787    OUT_RING(ring, 0x0);
788 
789    fd5_emit_lrz_flush(batch, ring);
790 
791    fd5_event_write(batch, ring, PC_CCU_FLUSH_COLOR_TS, true);
792    fd5_event_write(batch, ring, PC_CCU_FLUSH_DEPTH_TS, true);
793 }
794 
795 void
fd5_gmem_init(struct pipe_context * pctx)796 fd5_gmem_init(struct pipe_context *pctx) disable_thread_safety_analysis
797 {
798    struct fd_context *ctx = fd_context(pctx);
799 
800    ctx->emit_tile_init = fd5_emit_tile_init;
801    ctx->emit_tile_prep = fd5_emit_tile_prep;
802    ctx->emit_tile_mem2gmem = fd5_emit_tile_mem2gmem;
803    ctx->emit_tile_renderprep = fd5_emit_tile_renderprep;
804    ctx->emit_tile_gmem2mem = fd5_emit_tile_gmem2mem;
805    ctx->emit_tile_fini = fd5_emit_tile_fini;
806    ctx->emit_sysmem_prep = fd5_emit_sysmem_prep;
807    ctx->emit_sysmem_fini = fd5_emit_sysmem_fini;
808 }
809