• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *    Rob Clark <robclark@freedesktop.org>
25  */
26 
27 #include "pipe/p_state.h"
28 #include "util/format/u_format.h"
29 #include "util/u_inlines.h"
30 #include "util/u_memory.h"
31 #include "util/u_string.h"
32 
33 #include "freedreno_draw.h"
34 #include "freedreno_resource.h"
35 #include "freedreno_state.h"
36 
37 #include "fd5_context.h"
38 #include "fd5_draw.h"
39 #include "fd5_emit.h"
40 #include "fd5_format.h"
41 #include "fd5_gmem.h"
42 #include "fd5_program.h"
43 #include "fd5_zsa.h"
44 
45 static void
emit_mrt(struct fd_ringbuffer * ring,unsigned nr_bufs,struct pipe_surface ** bufs,const struct fd_gmem_stateobj * gmem)46 emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
47          struct pipe_surface **bufs, const struct fd_gmem_stateobj *gmem)
48 {
49    enum a5xx_tile_mode tile_mode;
50    unsigned i;
51 
52    for (i = 0; i < A5XX_MAX_RENDER_TARGETS; i++) {
53       enum a5xx_color_fmt format = 0;
54       enum a3xx_color_swap swap = WZYX;
55       bool srgb = false, sint = false, uint = false;
56       struct fd_resource *rsc = NULL;
57       uint32_t stride = 0;
58       uint32_t size = 0;
59       uint32_t base = 0;
60       uint32_t offset = 0;
61 
62       if (gmem) {
63          tile_mode = TILE5_2;
64       } else {
65          tile_mode = TILE5_LINEAR;
66       }
67 
68       if ((i < nr_bufs) && bufs[i]) {
69          struct pipe_surface *psurf = bufs[i];
70          enum pipe_format pformat = psurf->format;
71 
72          rsc = fd_resource(psurf->texture);
73 
74          format = fd5_pipe2color(pformat);
75          swap = fd5_pipe2swap(pformat);
76          srgb = util_format_is_srgb(pformat);
77          sint = util_format_is_pure_sint(pformat);
78          uint = util_format_is_pure_uint(pformat);
79 
80          assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
81 
82          offset = fd_resource_offset(rsc, psurf->u.tex.level,
83                                      psurf->u.tex.first_layer);
84 
85          if (gmem) {
86             stride = gmem->bin_w * gmem->cbuf_cpp[i];
87             size = stride * gmem->bin_h;
88             base = gmem->cbuf_base[i];
89          } else {
90             stride = fd_resource_pitch(rsc, psurf->u.tex.level);
91             size = fd_resource_layer_stride(rsc, psurf->u.tex.level);
92 
93             tile_mode =
94                fd_resource_tile_mode(psurf->texture, psurf->u.tex.level);
95          }
96       }
97 
98       OUT_PKT4(ring, REG_A5XX_RB_MRT_BUF_INFO(i), 5);
99       OUT_RING(
100          ring,
101          A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) |
102             A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(tile_mode) |
103             A5XX_RB_MRT_BUF_INFO_COLOR_SWAP(swap) |
104             COND(gmem,
105                  0x800) | /* XXX 0x1000 for RECTLIST clear, 0x0 for BLIT.. */
106             COND(srgb, A5XX_RB_MRT_BUF_INFO_COLOR_SRGB));
107       OUT_RING(ring, A5XX_RB_MRT_PITCH(stride));
108       OUT_RING(ring, A5XX_RB_MRT_ARRAY_PITCH(size));
109       if (gmem || (i >= nr_bufs) || !bufs[i]) {
110          OUT_RING(ring, base);       /* RB_MRT[i].BASE_LO */
111          OUT_RING(ring, 0x00000000); /* RB_MRT[i].BASE_HI */
112       } else {
113          OUT_RELOC(ring, rsc->bo, offset, 0, 0); /* BASE_LO/HI */
114       }
115 
116       OUT_PKT4(ring, REG_A5XX_SP_FS_MRT_REG(i), 1);
117       OUT_RING(ring, A5XX_SP_FS_MRT_REG_COLOR_FORMAT(format) |
118                         COND(sint, A5XX_SP_FS_MRT_REG_COLOR_SINT) |
119                         COND(uint, A5XX_SP_FS_MRT_REG_COLOR_UINT) |
120                         COND(srgb, A5XX_SP_FS_MRT_REG_COLOR_SRGB));
121 
122       /* when we support UBWC, these would be the system memory
123        * addr/pitch/etc:
124        */
125       OUT_PKT4(ring, REG_A5XX_RB_MRT_FLAG_BUFFER(i), 4);
126       OUT_RING(ring, 0x00000000); /* RB_MRT_FLAG_BUFFER[i].ADDR_LO */
127       OUT_RING(ring, 0x00000000); /* RB_MRT_FLAG_BUFFER[i].ADDR_HI */
128       OUT_RING(ring, A5XX_RB_MRT_FLAG_BUFFER_PITCH(0));
129       OUT_RING(ring, A5XX_RB_MRT_FLAG_BUFFER_ARRAY_PITCH(0));
130    }
131 }
132 
133 static void
emit_zs(struct fd_ringbuffer * ring,struct pipe_surface * zsbuf,const struct fd_gmem_stateobj * gmem)134 emit_zs(struct fd_ringbuffer *ring, struct pipe_surface *zsbuf,
135         const struct fd_gmem_stateobj *gmem)
136 {
137    if (zsbuf) {
138       struct fd_resource *rsc = fd_resource(zsbuf->texture);
139       enum a5xx_depth_format fmt = fd5_pipe2depth(zsbuf->format);
140       uint32_t cpp = rsc->layout.cpp;
141       uint32_t stride = 0;
142       uint32_t size = 0;
143 
144       if (gmem) {
145          stride = cpp * gmem->bin_w;
146          size = stride * gmem->bin_h;
147       } else {
148          stride = fd_resource_pitch(rsc, zsbuf->u.tex.level);
149          size = fd_resource_layer_stride(rsc, zsbuf->u.tex.level);
150       }
151 
152       OUT_PKT4(ring, REG_A5XX_RB_DEPTH_BUFFER_INFO, 5);
153       OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(fmt));
154       if (gmem) {
155          OUT_RING(ring, gmem->zsbuf_base[0]); /* RB_DEPTH_BUFFER_BASE_LO */
156          OUT_RING(ring, 0x00000000);          /* RB_DEPTH_BUFFER_BASE_HI */
157       } else {
158          OUT_RELOC(ring, rsc->bo,
159             fd_resource_offset(rsc, zsbuf->u.tex.level, zsbuf->u.tex.first_layer),
160             0, 0); /* RB_DEPTH_BUFFER_BASE_LO/HI */
161       }
162       OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_PITCH(stride));
163       OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_ARRAY_PITCH(size));
164 
165       OUT_PKT4(ring, REG_A5XX_GRAS_SU_DEPTH_BUFFER_INFO, 1);
166       OUT_RING(ring, A5XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT(fmt));
167 
168       OUT_PKT4(ring, REG_A5XX_RB_DEPTH_FLAG_BUFFER_BASE_LO, 3);
169       OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_LO */
170       OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_HI */
171       OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_PITCH */
172 
173       if (rsc->lrz) {
174          OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_BUFFER_BASE_LO, 3);
175          OUT_RELOC(ring, rsc->lrz, 0x1000, 0, 0);
176          OUT_RING(ring, A5XX_GRAS_LRZ_BUFFER_PITCH(rsc->lrz_pitch));
177 
178          OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO, 2);
179          OUT_RELOC(ring, rsc->lrz, 0, 0, 0);
180       } else {
181          OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_BUFFER_BASE_LO, 3);
182          OUT_RING(ring, 0x00000000);
183          OUT_RING(ring, 0x00000000);
184          OUT_RING(ring, 0x00000000); /* GRAS_LRZ_BUFFER_PITCH */
185 
186          OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO, 2);
187          OUT_RING(ring, 0x00000000);
188          OUT_RING(ring, 0x00000000);
189       }
190 
191       if (rsc->stencil) {
192          if (gmem) {
193             stride = 1 * gmem->bin_w;
194             size = stride * gmem->bin_h;
195          } else {
196             stride = fd_resource_pitch(rsc->stencil, zsbuf->u.tex.level);
197             size = fd_resource_layer_stride(rsc, zsbuf->u.tex.level);
198          }
199 
200          OUT_PKT4(ring, REG_A5XX_RB_STENCIL_INFO, 5);
201          OUT_RING(ring, A5XX_RB_STENCIL_INFO_SEPARATE_STENCIL);
202          if (gmem) {
203             OUT_RING(ring, gmem->zsbuf_base[1]); /* RB_STENCIL_BASE_LO */
204             OUT_RING(ring, 0x00000000);          /* RB_STENCIL_BASE_HI */
205          } else {
206             OUT_RELOC(ring, rsc->stencil->bo,
207                fd_resource_offset(rsc->stencil, zsbuf->u.tex.level, zsbuf->u.tex.first_layer),
208                       0, 0); /* RB_STENCIL_BASE_LO/HI */
209          }
210          OUT_RING(ring, A5XX_RB_STENCIL_PITCH(stride));
211          OUT_RING(ring, A5XX_RB_STENCIL_ARRAY_PITCH(size));
212       } else {
213          OUT_PKT4(ring, REG_A5XX_RB_STENCIL_INFO, 1);
214          OUT_RING(ring, 0x00000000); /* RB_STENCIL_INFO */
215       }
216    } else {
217       OUT_PKT4(ring, REG_A5XX_RB_DEPTH_BUFFER_INFO, 5);
218       OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(DEPTH5_NONE));
219       OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_LO */
220       OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_HI */
221       OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_PITCH */
222       OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_ARRAY_PITCH */
223 
224       OUT_PKT4(ring, REG_A5XX_GRAS_SU_DEPTH_BUFFER_INFO, 1);
225       OUT_RING(ring, A5XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT(DEPTH5_NONE));
226 
227       OUT_PKT4(ring, REG_A5XX_RB_DEPTH_FLAG_BUFFER_BASE_LO, 3);
228       OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_LO */
229       OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_HI */
230       OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_PITCH */
231 
232       OUT_PKT4(ring, REG_A5XX_RB_STENCIL_INFO, 1);
233       OUT_RING(ring, 0x00000000); /* RB_STENCIL_INFO */
234    }
235 }
236 
237 static void
emit_msaa(struct fd_ringbuffer * ring,uint32_t nr_samples)238 emit_msaa(struct fd_ringbuffer *ring, uint32_t nr_samples)
239 {
240    enum a3xx_msaa_samples samples = fd_msaa_samples(nr_samples);
241 
242    OUT_PKT4(ring, REG_A5XX_TPL1_TP_RAS_MSAA_CNTL, 2);
243    OUT_RING(ring, A5XX_TPL1_TP_RAS_MSAA_CNTL_SAMPLES(samples));
244    OUT_RING(ring, A5XX_TPL1_TP_DEST_MSAA_CNTL_SAMPLES(samples) |
245                      COND(samples == MSAA_ONE,
246                           A5XX_TPL1_TP_DEST_MSAA_CNTL_MSAA_DISABLE));
247 
248    OUT_PKT4(ring, REG_A5XX_RB_RAS_MSAA_CNTL, 2);
249    OUT_RING(ring, A5XX_RB_RAS_MSAA_CNTL_SAMPLES(samples));
250    OUT_RING(ring,
251             A5XX_RB_DEST_MSAA_CNTL_SAMPLES(samples) |
252                COND(samples == MSAA_ONE, A5XX_RB_DEST_MSAA_CNTL_MSAA_DISABLE));
253 
254    OUT_PKT4(ring, REG_A5XX_GRAS_SC_RAS_MSAA_CNTL, 2);
255    OUT_RING(ring, A5XX_GRAS_SC_RAS_MSAA_CNTL_SAMPLES(samples));
256    OUT_RING(ring, A5XX_GRAS_SC_DEST_MSAA_CNTL_SAMPLES(samples) |
257                      COND(samples == MSAA_ONE,
258                           A5XX_GRAS_SC_DEST_MSAA_CNTL_MSAA_DISABLE));
259 }
260 
261 static bool
use_hw_binning(struct fd_batch * batch)262 use_hw_binning(struct fd_batch *batch)
263 {
264    const struct fd_gmem_stateobj *gmem = batch->gmem_state;
265 
266    /* workaround: Like on a3xx, hw binning and scissor optimization
267     * don't play nice together.
268     *
269     * Disable binning if scissor optimization is used.
270     */
271    if (gmem->minx || gmem->miny)
272       return false;
273 
274    if ((gmem->maxpw * gmem->maxph) > 32)
275       return false;
276 
277    if ((gmem->maxpw > 15) || (gmem->maxph > 15))
278       return false;
279 
280    return fd_binning_enabled && ((gmem->nbins_x * gmem->nbins_y) > 2) &&
281           (batch->num_draws > 0);
282 }
283 
284 static void
patch_draws(struct fd_batch * batch,enum pc_di_vis_cull_mode vismode)285 patch_draws(struct fd_batch *batch, enum pc_di_vis_cull_mode vismode)
286 {
287    unsigned i;
288    for (i = 0; i < fd_patch_num_elements(&batch->draw_patches); i++) {
289       struct fd_cs_patch *patch = fd_patch_element(&batch->draw_patches, i);
290       *patch->cs = patch->val | DRAW4(0, 0, 0, vismode);
291    }
292    util_dynarray_clear(&batch->draw_patches);
293 }
294 
295 static void
update_vsc_pipe(struct fd_batch * batch)296 update_vsc_pipe(struct fd_batch *batch) assert_dt
297 {
298    struct fd_context *ctx = batch->ctx;
299    struct fd5_context *fd5_ctx = fd5_context(ctx);
300    const struct fd_gmem_stateobj *gmem = batch->gmem_state;
301    struct fd_ringbuffer *ring = batch->gmem;
302    int i;
303 
304    OUT_PKT4(ring, REG_A5XX_VSC_BIN_SIZE, 3);
305    OUT_RING(ring, A5XX_VSC_BIN_SIZE_WIDTH(gmem->bin_w) |
306                      A5XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h));
307    OUT_RELOC(ring, fd5_ctx->vsc_size_mem, 0, 0, 0); /* VSC_SIZE_ADDRESS_LO/HI */
308 
309    OUT_PKT4(ring, REG_A5XX_UNKNOWN_0BC5, 2);
310    OUT_RING(ring, 0x00000000); /* UNKNOWN_0BC5 */
311    OUT_RING(ring, 0x00000000); /* UNKNOWN_0BC6 */
312 
313    OUT_PKT4(ring, REG_A5XX_VSC_PIPE_CONFIG_REG(0), 16);
314    for (i = 0; i < 16; i++) {
315       const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[i];
316       OUT_RING(ring, A5XX_VSC_PIPE_CONFIG_REG_X(pipe->x) |
317                         A5XX_VSC_PIPE_CONFIG_REG_Y(pipe->y) |
318                         A5XX_VSC_PIPE_CONFIG_REG_W(pipe->w) |
319                         A5XX_VSC_PIPE_CONFIG_REG_H(pipe->h));
320    }
321 
322    OUT_PKT4(ring, REG_A5XX_VSC_PIPE_DATA_ADDRESS_LO(0), 32);
323    for (i = 0; i < 16; i++) {
324       if (!ctx->vsc_pipe_bo[i]) {
325          ctx->vsc_pipe_bo[i] = fd_bo_new(
326             ctx->dev, 0x20000, 0, "vsc_pipe[%u]", i);
327       }
328       OUT_RELOC(ring, ctx->vsc_pipe_bo[i], 0, 0,
329                 0); /* VSC_PIPE_DATA_ADDRESS[i].LO/HI */
330    }
331 
332    OUT_PKT4(ring, REG_A5XX_VSC_PIPE_DATA_LENGTH_REG(0), 16);
333    for (i = 0; i < 16; i++) {
334       OUT_RING(ring, fd_bo_size(ctx->vsc_pipe_bo[i]) -
335                         32); /* VSC_PIPE_DATA_LENGTH[i] */
336    }
337 }
338 
339 static void
emit_binning_pass(struct fd_batch * batch)340 emit_binning_pass(struct fd_batch *batch) assert_dt
341 {
342    struct fd_ringbuffer *ring = batch->gmem;
343    const struct fd_gmem_stateobj *gmem = batch->gmem_state;
344 
345    uint32_t x1 = gmem->minx;
346    uint32_t y1 = gmem->miny;
347    uint32_t x2 = gmem->minx + gmem->width - 1;
348    uint32_t y2 = gmem->miny + gmem->height - 1;
349 
350    fd5_set_render_mode(batch->ctx, ring, BINNING);
351 
352    OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
353    OUT_RING(ring,
354             A5XX_RB_CNTL_WIDTH(gmem->bin_w) | A5XX_RB_CNTL_HEIGHT(gmem->bin_h));
355 
356    OUT_PKT4(ring, REG_A5XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
357    OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X(x1) |
358                      A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(y1));
359    OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X(x2) |
360                      A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(y2));
361 
362    OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_1, 2);
363    OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_1_X(x1) | A5XX_RB_RESOLVE_CNTL_1_Y(y1));
364    OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_2_X(x2) | A5XX_RB_RESOLVE_CNTL_2_Y(y2));
365 
366    update_vsc_pipe(batch);
367 
368    OUT_PKT4(ring, REG_A5XX_VPC_MODE_CNTL, 1);
369    OUT_RING(ring, A5XX_VPC_MODE_CNTL_BINNING_PASS);
370 
371    fd5_event_write(batch, ring, UNK_2C, false);
372 
373    OUT_PKT4(ring, REG_A5XX_RB_WINDOW_OFFSET, 1);
374    OUT_RING(ring, A5XX_RB_WINDOW_OFFSET_X(0) | A5XX_RB_WINDOW_OFFSET_Y(0));
375 
376    /* emit IB to binning drawcmds: */
377    fd5_emit_ib(ring, batch->binning);
378 
379    fd_reset_wfi(batch);
380 
381    fd5_event_write(batch, ring, UNK_2D, false);
382 
383    fd5_event_write(batch, ring, CACHE_FLUSH_TS, true);
384 
385    // TODO CP_COND_WRITE's for all the vsc buffers (check for overflow??)
386 
387    fd_wfi(batch, ring);
388 
389    OUT_PKT4(ring, REG_A5XX_VPC_MODE_CNTL, 1);
390    OUT_RING(ring, 0x0);
391 }
392 
393 /* before first tile */
394 static void
fd5_emit_tile_init(struct fd_batch * batch)395 fd5_emit_tile_init(struct fd_batch *batch) assert_dt
396 {
397    struct fd_context *ctx = batch->ctx;
398    struct fd_ringbuffer *ring = batch->gmem;
399    struct pipe_framebuffer_state *pfb = &batch->framebuffer;
400 
401    fd5_emit_restore(batch, ring);
402 
403    if (batch->prologue)
404       fd5_emit_ib(ring, batch->prologue);
405 
406    fd5_emit_lrz_flush(batch, ring);
407 
408    OUT_PKT4(ring, REG_A5XX_GRAS_CL_CNTL, 1);
409    OUT_RING(ring, 0x00000080); /* GRAS_CL_CNTL */
410 
411    OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
412    OUT_RING(ring, 0x0);
413 
414    OUT_PKT4(ring, REG_A5XX_PC_POWER_CNTL, 1);
415    OUT_RING(ring, ctx->screen->info->num_sp_cores - 1); /* PC_POWER_CNTL */
416 
417    OUT_PKT4(ring, REG_A5XX_VFD_POWER_CNTL, 1);
418    OUT_RING(ring, ctx->screen->info->num_sp_cores - 1); /* VFD_POWER_CNTL */
419 
420    /* 0x10000000 for BYPASS.. 0x7c13c080 for GMEM: */
421    fd_wfi(batch, ring);
422    OUT_PKT4(ring, REG_A5XX_RB_CCU_CNTL, 1);
423    OUT_RING(ring, 0x7c13c080); /* RB_CCU_CNTL */
424 
425    emit_zs(ring, pfb->zsbuf, batch->gmem_state);
426    emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, batch->gmem_state);
427 
428    /* Enable stream output for the first pass (likely the binning). */
429    OUT_PKT4(ring, REG_A5XX_VPC_SO_OVERRIDE, 1);
430    OUT_RING(ring, 0);
431 
432    if (use_hw_binning(batch)) {
433       emit_binning_pass(batch);
434 
435       /* Disable stream output after binning, since each VS output should get
436        * streamed out once.
437        */
438       OUT_PKT4(ring, REG_A5XX_VPC_SO_OVERRIDE, 1);
439       OUT_RING(ring, A5XX_VPC_SO_OVERRIDE_SO_DISABLE);
440 
441       fd5_emit_lrz_flush(batch, ring);
442       patch_draws(batch, USE_VISIBILITY);
443    } else {
444       patch_draws(batch, IGNORE_VISIBILITY);
445    }
446 
447    fd5_set_render_mode(batch->ctx, ring, GMEM);
448 
449    /* XXX If we're in gmem mode but not doing HW binning, then after the first
450     * tile we should disable stream output (fd6_gmem.c doesn't do that either).
451     */
452 }
453 
454 /* before mem2gmem */
455 static void
fd5_emit_tile_prep(struct fd_batch * batch,const struct fd_tile * tile)456 fd5_emit_tile_prep(struct fd_batch *batch, const struct fd_tile *tile) assert_dt
457 {
458    struct fd_context *ctx = batch->ctx;
459    const struct fd_gmem_stateobj *gmem = batch->gmem_state;
460    struct fd5_context *fd5_ctx = fd5_context(ctx);
461    struct fd_ringbuffer *ring = batch->gmem;
462 
463    uint32_t x1 = tile->xoff;
464    uint32_t y1 = tile->yoff;
465    uint32_t x2 = tile->xoff + tile->bin_w - 1;
466    uint32_t y2 = tile->yoff + tile->bin_h - 1;
467 
468    OUT_PKT4(ring, REG_A5XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
469    OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X(x1) |
470                      A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(y1));
471    OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X(x2) |
472                      A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(y2));
473 
474    OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_1, 2);
475    OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_1_X(x1) | A5XX_RB_RESOLVE_CNTL_1_Y(y1));
476    OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_2_X(x2) | A5XX_RB_RESOLVE_CNTL_2_Y(y2));
477 
478    if (use_hw_binning(batch)) {
479       const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[tile->p];
480       struct fd_bo *pipe_bo = ctx->vsc_pipe_bo[tile->p];
481 
482       OUT_PKT7(ring, CP_WAIT_FOR_ME, 0);
483 
484       OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
485       OUT_RING(ring, 0x0);
486 
487       OUT_PKT7(ring, CP_SET_BIN_DATA5, 5);
488       OUT_RING(ring, CP_SET_BIN_DATA5_0_VSC_SIZE(pipe->w * pipe->h) |
489                         CP_SET_BIN_DATA5_0_VSC_N(tile->n));
490       OUT_RELOC(ring, pipe_bo, 0, 0, 0);     /* VSC_PIPE[p].DATA_ADDRESS */
491       OUT_RELOC(ring, fd5_ctx->vsc_size_mem, /* VSC_SIZE_ADDRESS + (p * 4) */
492                 (tile->p * 4), 0, 0);
493    } else {
494       OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
495       OUT_RING(ring, 0x1);
496    }
497 
498    OUT_PKT4(ring, REG_A5XX_RB_WINDOW_OFFSET, 1);
499    OUT_RING(ring, A5XX_RB_WINDOW_OFFSET_X(x1) | A5XX_RB_WINDOW_OFFSET_Y(y1));
500 }
501 
502 /*
503  * transfer from system memory to gmem
504  */
505 
506 static void
emit_mem2gmem_surf(struct fd_batch * batch,uint32_t base,struct pipe_surface * psurf,enum a5xx_blit_buf buf)507 emit_mem2gmem_surf(struct fd_batch *batch, uint32_t base,
508                    struct pipe_surface *psurf, enum a5xx_blit_buf buf)
509 {
510    struct fd_ringbuffer *ring = batch->gmem;
511    const struct fd_gmem_stateobj *gmem = batch->gmem_state;
512    struct fd_resource *rsc = fd_resource(psurf->texture);
513    uint32_t stride, size;
514 
515    assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
516 
517    if (buf == BLIT_S)
518       rsc = rsc->stencil;
519 
520    if ((buf == BLIT_ZS) || (buf == BLIT_S)) {
521       // XXX hack import via BLIT_MRT0 instead of BLIT_ZS, since I don't
522       // know otherwise how to go from linear in sysmem to tiled in gmem.
523       // possibly we want to flip this around gmem2mem and keep depth
524       // tiled in sysmem (and fixup sampler state to assume tiled).. this
525       // might be required for doing depth/stencil in bypass mode?
526       enum a5xx_color_fmt format =
527          fd5_pipe2color(fd_gmem_restore_format(rsc->b.b.format));
528 
529       OUT_PKT4(ring, REG_A5XX_RB_MRT_BUF_INFO(0), 5);
530       OUT_RING(ring,
531                A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) |
532                   A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(rsc->layout.tile_mode) |
533                   A5XX_RB_MRT_BUF_INFO_COLOR_SWAP(WZYX));
534       OUT_RING(ring, A5XX_RB_MRT_PITCH(fd_resource_pitch(rsc, psurf->u.tex.level)));
535       OUT_RING(ring, A5XX_RB_MRT_ARRAY_PITCH(fd_resource_layer_stride(rsc, psurf->u.tex.level)));
536       OUT_RELOC(ring, rsc->bo,
537          fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer),
538          0, 0); /* BASE_LO/HI */
539 
540       buf = BLIT_MRT0;
541    }
542 
543    stride = gmem->bin_w << fdl_cpp_shift(&rsc->layout);
544    size = stride * gmem->bin_h;
545 
546    OUT_PKT4(ring, REG_A5XX_RB_BLIT_FLAG_DST_LO, 4);
547    OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_LO */
548    OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_HI */
549    OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_PITCH */
550    OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_ARRAY_PITCH */
551 
552    OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_3, 5);
553    OUT_RING(ring, 0x00000000); /* RB_RESOLVE_CNTL_3 */
554    OUT_RING(ring, base);       /* RB_BLIT_DST_LO */
555    OUT_RING(ring, 0x00000000); /* RB_BLIT_DST_HI */
556    OUT_RING(ring, A5XX_RB_BLIT_DST_PITCH(stride));
557    OUT_RING(ring, A5XX_RB_BLIT_DST_ARRAY_PITCH(size));
558 
559    OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1);
560    OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(buf));
561 
562    fd5_emit_blit(batch, ring);
563 }
564 
565 static void
fd5_emit_tile_mem2gmem(struct fd_batch * batch,const struct fd_tile * tile)566 fd5_emit_tile_mem2gmem(struct fd_batch *batch, const struct fd_tile *tile)
567 {
568    struct fd_ringbuffer *ring = batch->gmem;
569    const struct fd_gmem_stateobj *gmem = batch->gmem_state;
570    struct pipe_framebuffer_state *pfb = &batch->framebuffer;
571 
572    /*
573     * setup mrt and zs with system memory base addresses:
574     */
575 
576    emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL);
577    //	emit_zs(ring, pfb->zsbuf, NULL);
578 
579    OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
580    OUT_RING(ring, A5XX_RB_CNTL_WIDTH(gmem->bin_w) |
581                      A5XX_RB_CNTL_HEIGHT(gmem->bin_h) | A5XX_RB_CNTL_BYPASS);
582 
583    if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_COLOR)) {
584       unsigned i;
585       for (i = 0; i < pfb->nr_cbufs; i++) {
586          if (!pfb->cbufs[i])
587             continue;
588          if (!(batch->restore & (PIPE_CLEAR_COLOR0 << i)))
589             continue;
590          emit_mem2gmem_surf(batch, gmem->cbuf_base[i], pfb->cbufs[i],
591                             BLIT_MRT0 + i);
592       }
593    }
594 
595    if (fd_gmem_needs_restore(batch, tile,
596                              FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
597       struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
598 
599       if (!rsc->stencil || fd_gmem_needs_restore(batch, tile, FD_BUFFER_DEPTH))
600          emit_mem2gmem_surf(batch, gmem->zsbuf_base[0], pfb->zsbuf, BLIT_ZS);
601       if (rsc->stencil && fd_gmem_needs_restore(batch, tile, FD_BUFFER_STENCIL))
602          emit_mem2gmem_surf(batch, gmem->zsbuf_base[1], pfb->zsbuf, BLIT_S);
603    }
604 }
605 
606 /* before IB to rendering cmds: */
607 static void
fd5_emit_tile_renderprep(struct fd_batch * batch,const struct fd_tile * tile)608 fd5_emit_tile_renderprep(struct fd_batch *batch, const struct fd_tile *tile)
609 {
610    struct fd_ringbuffer *ring = batch->gmem;
611    const struct fd_gmem_stateobj *gmem = batch->gmem_state;
612    struct pipe_framebuffer_state *pfb = &batch->framebuffer;
613 
614    OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
615    OUT_RING(ring,
616             A5XX_RB_CNTL_WIDTH(gmem->bin_w) | A5XX_RB_CNTL_HEIGHT(gmem->bin_h));
617 
618    emit_zs(ring, pfb->zsbuf, gmem);
619    emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, gmem);
620    emit_msaa(ring, pfb->samples);
621 }
622 
623 /*
624  * transfer from gmem to system memory (ie. normal RAM)
625  */
626 
627 static void
emit_gmem2mem_surf(struct fd_batch * batch,uint32_t base,struct pipe_surface * psurf,enum a5xx_blit_buf buf)628 emit_gmem2mem_surf(struct fd_batch *batch, uint32_t base,
629                    struct pipe_surface *psurf, enum a5xx_blit_buf buf)
630 {
631    struct fd_ringbuffer *ring = batch->gmem;
632    struct fd_resource *rsc = fd_resource(psurf->texture);
633    bool tiled;
634    uint32_t offset, pitch;
635 
636    if (!rsc->valid)
637       return;
638 
639    if (buf == BLIT_S)
640       rsc = rsc->stencil;
641 
642    offset =
643       fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer);
644    pitch = fd_resource_pitch(rsc, psurf->u.tex.level);
645 
646    assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
647 
648    OUT_PKT4(ring, REG_A5XX_RB_BLIT_FLAG_DST_LO, 4);
649    OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_LO */
650    OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_HI */
651    OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_PITCH */
652    OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_ARRAY_PITCH */
653 
654    tiled = fd_resource_tile_mode(psurf->texture, psurf->u.tex.level);
655 
656    OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_3, 5);
657    OUT_RING(ring, 0x00000004 | /* XXX RB_RESOLVE_CNTL_3 */
658                      COND(tiled, A5XX_RB_RESOLVE_CNTL_3_TILED));
659    OUT_RELOC(ring, rsc->bo, offset, 0, 0); /* RB_BLIT_DST_LO/HI */
660    OUT_RING(ring, A5XX_RB_BLIT_DST_PITCH(pitch));
661    OUT_RING(ring, A5XX_RB_BLIT_DST_ARRAY_PITCH(fd_resource_layer_stride(rsc, psurf->u.tex.level)));
662 
663    OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1);
664    OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(buf));
665 
666    //	bool msaa_resolve = pfb->samples > 1;
667    bool msaa_resolve = false;
668    OUT_PKT4(ring, REG_A5XX_RB_CLEAR_CNTL, 1);
669    OUT_RING(ring, COND(msaa_resolve, A5XX_RB_CLEAR_CNTL_MSAA_RESOLVE));
670 
671    fd5_emit_blit(batch, ring);
672 }
673 
674 static void
fd5_emit_tile_gmem2mem(struct fd_batch * batch,const struct fd_tile * tile)675 fd5_emit_tile_gmem2mem(struct fd_batch *batch, const struct fd_tile *tile)
676 {
677    const struct fd_gmem_stateobj *gmem = batch->gmem_state;
678    struct pipe_framebuffer_state *pfb = &batch->framebuffer;
679 
680    if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
681       struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
682 
683       if (!rsc->stencil || (batch->resolve & FD_BUFFER_DEPTH))
684          emit_gmem2mem_surf(batch, gmem->zsbuf_base[0], pfb->zsbuf, BLIT_ZS);
685       if (rsc->stencil && (batch->resolve & FD_BUFFER_STENCIL))
686          emit_gmem2mem_surf(batch, gmem->zsbuf_base[1], pfb->zsbuf, BLIT_S);
687    }
688 
689    if (batch->resolve & FD_BUFFER_COLOR) {
690       unsigned i;
691       for (i = 0; i < pfb->nr_cbufs; i++) {
692          if (!pfb->cbufs[i])
693             continue;
694          if (!(batch->resolve & (PIPE_CLEAR_COLOR0 << i)))
695             continue;
696          emit_gmem2mem_surf(batch, gmem->cbuf_base[i], pfb->cbufs[i],
697                             BLIT_MRT0 + i);
698       }
699    }
700 }
701 
702 static void
fd5_emit_tile_fini(struct fd_batch * batch)703 fd5_emit_tile_fini(struct fd_batch *batch) assert_dt
704 {
705    struct fd_ringbuffer *ring = batch->gmem;
706 
707    OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
708    OUT_RING(ring, 0x0);
709 
710    fd5_emit_lrz_flush(batch, ring);
711 
712    fd5_cache_flush(batch, ring);
713    fd5_set_render_mode(batch->ctx, ring, BYPASS);
714 }
715 
716 static void
fd5_emit_sysmem_prep(struct fd_batch * batch)717 fd5_emit_sysmem_prep(struct fd_batch *batch) assert_dt
718 {
719    struct fd_context *ctx = batch->ctx;
720    struct fd_ringbuffer *ring = batch->gmem;
721 
722    fd5_emit_restore(batch, ring);
723 
724    fd5_emit_lrz_flush(batch, ring);
725 
726    if (batch->prologue)
727       fd5_emit_ib(ring, batch->prologue);
728 
729    OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
730    OUT_RING(ring, 0x0);
731 
732    fd5_event_write(batch, ring, PC_CCU_INVALIDATE_COLOR, false);
733 
734    OUT_PKT4(ring, REG_A5XX_PC_POWER_CNTL, 1);
735    OUT_RING(ring, ctx->screen->info->num_sp_cores - 1); /* PC_POWER_CNTL */
736 
737    OUT_PKT4(ring, REG_A5XX_VFD_POWER_CNTL, 1);
738    OUT_RING(ring, ctx->screen->info->num_sp_cores - 1); /* VFD_POWER_CNTL */
739 
740    /* 0x10000000 for BYPASS.. 0x7c13c080 for GMEM: */
741    fd_wfi(batch, ring);
742    OUT_PKT4(ring, REG_A5XX_RB_CCU_CNTL, 1);
743    OUT_RING(ring, 0x10000000); /* RB_CCU_CNTL */
744 
745    OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
746    OUT_RING(ring, A5XX_RB_CNTL_WIDTH(0) | A5XX_RB_CNTL_HEIGHT(0) |
747                      A5XX_RB_CNTL_BYPASS);
748 
749    /* remaining setup below here does not apply to blit/compute: */
750    if (batch->nondraw)
751       return;
752 
753    struct pipe_framebuffer_state *pfb = &batch->framebuffer;
754 
755    OUT_PKT4(ring, REG_A5XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
756    OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) |
757                      A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(0));
758    OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X(pfb->width - 1) |
759                      A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(pfb->height - 1));
760 
761    OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_1, 2);
762    OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_1_X(0) | A5XX_RB_RESOLVE_CNTL_1_Y(0));
763    OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_2_X(pfb->width - 1) |
764                      A5XX_RB_RESOLVE_CNTL_2_Y(pfb->height - 1));
765 
766    OUT_PKT4(ring, REG_A5XX_RB_WINDOW_OFFSET, 1);
767    OUT_RING(ring, A5XX_RB_WINDOW_OFFSET_X(0) | A5XX_RB_WINDOW_OFFSET_Y(0));
768 
769    /* Enable stream output, since there's no binning pass to put it in. */
770    OUT_PKT4(ring, REG_A5XX_VPC_SO_OVERRIDE, 1);
771    OUT_RING(ring, 0);
772 
773    OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
774    OUT_RING(ring, 0x1);
775 
776    patch_draws(batch, IGNORE_VISIBILITY);
777 
778    emit_zs(ring, pfb->zsbuf, NULL);
779    emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL);
780    emit_msaa(ring, pfb->samples);
781 }
782 
783 static void
fd5_emit_sysmem_fini(struct fd_batch * batch)784 fd5_emit_sysmem_fini(struct fd_batch *batch)
785 {
786    struct fd_ringbuffer *ring = batch->gmem;
787 
788    OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
789    OUT_RING(ring, 0x0);
790 
791    fd5_emit_lrz_flush(batch, ring);
792 
793    fd5_event_write(batch, ring, PC_CCU_FLUSH_COLOR_TS, true);
794    fd5_event_write(batch, ring, PC_CCU_FLUSH_DEPTH_TS, true);
795 }
796 
797 void
fd5_gmem_init(struct pipe_context * pctx)798 fd5_gmem_init(struct pipe_context *pctx) disable_thread_safety_analysis
799 {
800    struct fd_context *ctx = fd_context(pctx);
801 
802    ctx->emit_tile_init = fd5_emit_tile_init;
803    ctx->emit_tile_prep = fd5_emit_tile_prep;
804    ctx->emit_tile_mem2gmem = fd5_emit_tile_mem2gmem;
805    ctx->emit_tile_renderprep = fd5_emit_tile_renderprep;
806    ctx->emit_tile_gmem2mem = fd5_emit_tile_gmem2mem;
807    ctx->emit_tile_fini = fd5_emit_tile_fini;
808    ctx->emit_sysmem_prep = fd5_emit_sysmem_prep;
809    ctx->emit_sysmem_fini = fd5_emit_sysmem_fini;
810 }
811