• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
3  * Copyright © 2018 Google, Inc.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  *
24  * Authors:
25  *    Rob Clark <robclark@freedesktop.org>
26  */
27 
28 #include <stdio.h>
29 
30 #include "pipe/p_state.h"
31 #include "util/format/u_format.h"
32 #include "util/u_inlines.h"
33 #include "util/u_memory.h"
34 #include "util/u_string.h"
35 
36 #include "freedreno_draw.h"
37 #include "freedreno_resource.h"
38 #include "freedreno_state.h"
39 #include "freedreno_tracepoints.h"
40 
41 #include "fd6_blitter.h"
42 #include "fd6_context.h"
43 #include "fd6_draw.h"
44 #include "fd6_emit.h"
45 #include "fd6_format.h"
46 #include "fd6_gmem.h"
47 #include "fd6_pack.h"
48 #include "fd6_program.h"
49 #include "fd6_resource.h"
50 #include "fd6_zsa.h"
51 
52 /**
53  * Emits the flags registers, suitable for RB_MRT_FLAG_BUFFER,
54  * RB_DEPTH_FLAG_BUFFER, SP_PS_2D_SRC_FLAGS, and RB_BLIT_FLAG_DST.
55  */
56 void
fd6_emit_flag_reference(struct fd_ringbuffer * ring,struct fd_resource * rsc,int level,int layer)57 fd6_emit_flag_reference(struct fd_ringbuffer *ring, struct fd_resource *rsc,
58                         int level, int layer)
59 {
60    if (fd_resource_ubwc_enabled(rsc, level)) {
61       OUT_RELOC(ring, rsc->bo, fd_resource_ubwc_offset(rsc, level, layer), 0,
62                 0);
63       OUT_RING(ring, A6XX_RB_MRT_FLAG_BUFFER_PITCH_PITCH(
64                         fdl_ubwc_pitch(&rsc->layout, level)) |
65                         A6XX_RB_MRT_FLAG_BUFFER_PITCH_ARRAY_PITCH(
66                            rsc->layout.ubwc_layer_size >> 2));
67    } else {
68       OUT_RING(ring, 0x00000000); /* RB_MRT_FLAG_BUFFER[i].ADDR_LO */
69       OUT_RING(ring, 0x00000000); /* RB_MRT_FLAG_BUFFER[i].ADDR_HI */
70       OUT_RING(ring, 0x00000000);
71    }
72 }
73 
74 static void
emit_mrt(struct fd_ringbuffer * ring,struct pipe_framebuffer_state * pfb,const struct fd_gmem_stateobj * gmem)75 emit_mrt(struct fd_ringbuffer *ring, struct pipe_framebuffer_state *pfb,
76          const struct fd_gmem_stateobj *gmem)
77 {
78    unsigned srgb_cntl = 0;
79    unsigned i;
80 
81    unsigned max_layer_index = 0;
82 
83    for (i = 0; i < pfb->nr_cbufs; i++) {
84       enum a3xx_color_swap swap = WZYX;
85       bool sint = false, uint = false;
86       struct fd_resource *rsc = NULL;
87       struct fdl_slice *slice = NULL;
88       uint32_t stride = 0;
89       uint32_t array_stride = 0;
90       uint32_t offset;
91 
92       if (!pfb->cbufs[i])
93          continue;
94 
95       struct pipe_surface *psurf = pfb->cbufs[i];
96       enum pipe_format pformat = psurf->format;
97       rsc = fd_resource(psurf->texture);
98       if (!rsc->bo)
99          continue;
100 
101       uint32_t base = gmem ? gmem->cbuf_base[i] : 0;
102       slice = fd_resource_slice(rsc, psurf->u.tex.level);
103       uint32_t tile_mode = fd_resource_tile_mode(psurf->texture, psurf->u.tex.level);
104       enum a6xx_format format = fd6_color_format(pformat, tile_mode);
105       sint = util_format_is_pure_sint(pformat);
106       uint = util_format_is_pure_uint(pformat);
107 
108       if (util_format_is_srgb(pformat))
109          srgb_cntl |= (1 << i);
110 
111       offset =
112          fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer);
113 
114       stride = fd_resource_pitch(rsc, psurf->u.tex.level);
115       array_stride = fd_resource_layer_stride(rsc, psurf->u.tex.level);
116       swap = fd6_color_swap(pformat, rsc->layout.tile_mode);
117 
118       max_layer_index = psurf->u.tex.last_layer - psurf->u.tex.first_layer;
119 
120       debug_assert((offset + slice->size0) <= fd_bo_size(rsc->bo));
121 
122       OUT_REG(
123          ring,
124          A6XX_RB_MRT_BUF_INFO(i, .color_format = format,
125                               .color_tile_mode = tile_mode, .color_swap = swap),
126          A6XX_RB_MRT_PITCH(i, .a6xx_rb_mrt_pitch = stride),
127          A6XX_RB_MRT_ARRAY_PITCH(i, .a6xx_rb_mrt_array_pitch = array_stride),
128          A6XX_RB_MRT_BASE(i, .bo = rsc->bo, .bo_offset = offset),
129          A6XX_RB_MRT_BASE_GMEM(i, .unknown = base));
130 
131       OUT_REG(ring, A6XX_SP_FS_MRT_REG(i, .color_format = format,
132                                        .color_sint = sint, .color_uint = uint));
133 
134       OUT_PKT4(ring, REG_A6XX_RB_MRT_FLAG_BUFFER(i), 3);
135       fd6_emit_flag_reference(ring, rsc, psurf->u.tex.level,
136                               psurf->u.tex.first_layer);
137    }
138 
139    OUT_REG(ring, A6XX_RB_SRGB_CNTL(.dword = srgb_cntl));
140    OUT_REG(ring, A6XX_SP_SRGB_CNTL(.dword = srgb_cntl));
141 
142    OUT_REG(ring, A6XX_GRAS_MAX_LAYER_INDEX(max_layer_index));
143 }
144 
145 static void
emit_zs(struct fd_ringbuffer * ring,struct pipe_surface * zsbuf,const struct fd_gmem_stateobj * gmem)146 emit_zs(struct fd_ringbuffer *ring, struct pipe_surface *zsbuf,
147         const struct fd_gmem_stateobj *gmem)
148 {
149    if (zsbuf) {
150       struct fd_resource *rsc = fd_resource(zsbuf->texture);
151       enum a6xx_depth_format fmt = fd6_pipe2depth(zsbuf->format);
152       uint32_t stride = fd_resource_pitch(rsc, 0);
153       uint32_t array_stride = fd_resource_layer_stride(rsc, 0);
154       uint32_t base = gmem ? gmem->zsbuf_base[0] : 0;
155       uint32_t offset =
156          fd_resource_offset(rsc, zsbuf->u.tex.level, zsbuf->u.tex.first_layer);
157 
158       OUT_REG(
159          ring, A6XX_RB_DEPTH_BUFFER_INFO(.depth_format = fmt),
160          A6XX_RB_DEPTH_BUFFER_PITCH(.a6xx_rb_depth_buffer_pitch = stride),
161          A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH(.a6xx_rb_depth_buffer_array_pitch =
162                                              array_stride),
163          A6XX_RB_DEPTH_BUFFER_BASE(.bo = rsc->bo, .bo_offset = offset),
164          A6XX_RB_DEPTH_BUFFER_BASE_GMEM(.dword = base));
165 
166       OUT_REG(ring, A6XX_GRAS_SU_DEPTH_BUFFER_INFO(.depth_format = fmt));
167 
168       OUT_PKT4(ring, REG_A6XX_RB_DEPTH_FLAG_BUFFER_BASE, 3);
169       fd6_emit_flag_reference(ring, rsc, zsbuf->u.tex.level,
170                               zsbuf->u.tex.first_layer);
171 
172       if (rsc->lrz) {
173          OUT_REG(ring, A6XX_GRAS_LRZ_BUFFER_BASE(.bo = rsc->lrz),
174                  A6XX_GRAS_LRZ_BUFFER_PITCH(.pitch = rsc->lrz_pitch),
175                  // XXX a6xx seems to use a different buffer here.. not sure
176                  // what for..
177                  A6XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE());
178       } else {
179          OUT_PKT4(ring, REG_A6XX_GRAS_LRZ_BUFFER_BASE, 5);
180          OUT_RING(ring, 0x00000000);
181          OUT_RING(ring, 0x00000000);
182          OUT_RING(ring, 0x00000000); /* GRAS_LRZ_BUFFER_PITCH */
183          OUT_RING(ring, 0x00000000); /* GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO */
184          OUT_RING(ring, 0x00000000);
185       }
186 
187       /* NOTE: blob emits GRAS_LRZ_CNTL plus GRAZ_LRZ_BUFFER_BASE
188        * plus this CP_EVENT_WRITE at the end in it's own IB..
189        */
190       OUT_PKT7(ring, CP_EVENT_WRITE, 1);
191       OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(UNK_25));
192 
193       if (rsc->stencil) {
194          stride = fd_resource_pitch(rsc->stencil, 0);
195          array_stride = fd_resource_layer_stride(rsc->stencil, 0);
196          uint32_t base = gmem ? gmem->zsbuf_base[1] : 0;
197 
198          OUT_REG(ring, A6XX_RB_STENCIL_INFO(.separate_stencil = true),
199                  A6XX_RB_STENCIL_BUFFER_PITCH(.a6xx_rb_stencil_buffer_pitch =
200                                                  stride),
201                  A6XX_RB_STENCIL_BUFFER_ARRAY_PITCH(
202                        .a6xx_rb_stencil_buffer_array_pitch = array_stride),
203                  A6XX_RB_STENCIL_BUFFER_BASE(.bo = rsc->stencil->bo),
204                  A6XX_RB_STENCIL_BUFFER_BASE_GMEM(.dword = base));
205       } else {
206          OUT_REG(ring, A6XX_RB_STENCIL_INFO(0));
207       }
208    } else {
209       OUT_PKT4(ring, REG_A6XX_RB_DEPTH_BUFFER_INFO, 6);
210       OUT_RING(ring, A6XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(DEPTH6_NONE));
211       OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_PITCH */
212       OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_ARRAY_PITCH */
213       OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_LO */
214       OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_HI */
215       OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_GMEM */
216 
217       OUT_REG(ring,
218               A6XX_GRAS_SU_DEPTH_BUFFER_INFO(.depth_format = DEPTH6_NONE));
219 
220       OUT_PKT4(ring, REG_A6XX_GRAS_LRZ_BUFFER_BASE, 5);
221       OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_LO */
222       OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_HI */
223       OUT_RING(ring, 0x00000000); /* GRAS_LRZ_BUFFER_PITCH */
224       OUT_RING(ring, 0x00000000); /* GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO */
225       OUT_RING(ring, 0x00000000); /* GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_HI */
226 
227       OUT_REG(ring, A6XX_RB_STENCIL_INFO(0));
228    }
229 }
230 
231 static bool
use_hw_binning(struct fd_batch * batch)232 use_hw_binning(struct fd_batch *batch)
233 {
234    const struct fd_gmem_stateobj *gmem = batch->gmem_state;
235 
236    if ((gmem->maxpw * gmem->maxph) > 32)
237       return false;
238 
239    return fd_binning_enabled && ((gmem->nbins_x * gmem->nbins_y) >= 2) &&
240           (batch->num_draws > 0);
241 }
242 
243 static void
patch_fb_read_gmem(struct fd_batch * batch)244 patch_fb_read_gmem(struct fd_batch *batch)
245 {
246    unsigned num_patches = fd_patch_num_elements(&batch->fb_read_patches);
247    if (!num_patches)
248       return;
249 
250    struct fd_screen *screen = batch->ctx->screen;
251    const struct fd_gmem_stateobj *gmem = batch->gmem_state;
252    struct pipe_framebuffer_state *pfb = &batch->framebuffer;
253    struct pipe_surface *psurf = pfb->cbufs[0];
254    uint32_t texconst0 = fd6_tex_const_0(
255       psurf->texture, psurf->u.tex.level, psurf->format, PIPE_SWIZZLE_X,
256       PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W);
257 
258    /* always TILE6_2 mode in GMEM.. which also means no swap: */
259    texconst0 &=
260       ~(A6XX_TEX_CONST_0_SWAP__MASK | A6XX_TEX_CONST_0_TILE_MODE__MASK);
261    texconst0 |= A6XX_TEX_CONST_0_TILE_MODE(TILE6_2);
262 
263    for (unsigned i = 0; i < num_patches; i++) {
264       struct fd_cs_patch *patch = fd_patch_element(&batch->fb_read_patches, i);
265       patch->cs[0] = texconst0;
266       patch->cs[2] = A6XX_TEX_CONST_2_PITCH(gmem->bin_w * gmem->cbuf_cpp[0]) |
267                      A6XX_TEX_CONST_2_TYPE(A6XX_TEX_2D);
268       patch->cs[4] = A6XX_TEX_CONST_4_BASE_LO(screen->gmem_base);
269       patch->cs[5] = A6XX_TEX_CONST_5_BASE_HI(screen->gmem_base >> 32) |
270                      A6XX_TEX_CONST_5_DEPTH(1);
271    }
272    util_dynarray_clear(&batch->fb_read_patches);
273 }
274 
275 static void
patch_fb_read_sysmem(struct fd_batch * batch)276 patch_fb_read_sysmem(struct fd_batch *batch)
277 {
278    unsigned num_patches = fd_patch_num_elements(&batch->fb_read_patches);
279    if (!num_patches)
280       return;
281 
282    struct pipe_framebuffer_state *pfb = &batch->framebuffer;
283    struct pipe_surface *psurf = pfb->cbufs[0];
284    if (!psurf)
285       return;
286 
287    struct fd_resource *rsc = fd_resource(psurf->texture);
288    unsigned lvl = psurf->u.tex.level;
289    unsigned layer = psurf->u.tex.first_layer;
290    bool ubwc_enabled = fd_resource_ubwc_enabled(rsc, lvl);
291    uint64_t iova = fd_bo_get_iova(rsc->bo) + fd_resource_offset(rsc, lvl, layer);
292    uint64_t ubwc_iova = fd_bo_get_iova(rsc->bo) + fd_resource_ubwc_offset(rsc, lvl, layer);
293    uint32_t texconst0 = fd6_tex_const_0(
294       psurf->texture, psurf->u.tex.level, psurf->format, PIPE_SWIZZLE_X,
295       PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W);
296    uint32_t block_width, block_height;
297    fdl6_get_ubwc_blockwidth(&rsc->layout, &block_width, &block_height);
298 
299    for (unsigned i = 0; i < num_patches; i++) {
300       struct fd_cs_patch *patch = fd_patch_element(&batch->fb_read_patches, i);
301       patch->cs[0] = texconst0;
302       patch->cs[2] = A6XX_TEX_CONST_2_PITCH(fd_resource_pitch(rsc, lvl)) |
303                      A6XX_TEX_CONST_2_TYPE(A6XX_TEX_2D);
304       /* This is cheating a bit, since we can't use OUT_RELOC() here.. but
305        * the render target will already have a reloc emitted for RB_MRT state,
306        * so we can get away with manually patching in the address here:
307        */
308       patch->cs[4] = A6XX_TEX_CONST_4_BASE_LO(iova);
309       patch->cs[5] = A6XX_TEX_CONST_5_BASE_HI(iova >> 32) |
310                      A6XX_TEX_CONST_5_DEPTH(1);
311 
312       if (!ubwc_enabled)
313          continue;
314 
315       patch->cs[3] |= A6XX_TEX_CONST_3_FLAG;
316       patch->cs[7] = A6XX_TEX_CONST_7_FLAG_LO(ubwc_iova);
317       patch->cs[8] = A6XX_TEX_CONST_8_FLAG_HI(ubwc_iova >> 32);
318       patch->cs[9] = A6XX_TEX_CONST_9_FLAG_BUFFER_ARRAY_PITCH(
319             rsc->layout.ubwc_layer_size >> 2);
320       patch->cs[10] =
321             A6XX_TEX_CONST_10_FLAG_BUFFER_PITCH(
322                fdl_ubwc_pitch(&rsc->layout, lvl)) |
323             A6XX_TEX_CONST_10_FLAG_BUFFER_LOGW(util_logbase2_ceil(
324                DIV_ROUND_UP(u_minify(psurf->texture->width0, lvl), block_width))) |
325             A6XX_TEX_CONST_10_FLAG_BUFFER_LOGH(util_logbase2_ceil(
326                DIV_ROUND_UP(u_minify(psurf->texture->height0, lvl), block_height)));
327    }
328    util_dynarray_clear(&batch->fb_read_patches);
329 }
330 
331 static void
update_render_cntl(struct fd_batch * batch,struct pipe_framebuffer_state * pfb,bool binning)332 update_render_cntl(struct fd_batch *batch, struct pipe_framebuffer_state *pfb,
333                    bool binning)
334 {
335    struct fd_ringbuffer *ring = batch->gmem;
336    struct fd_screen *screen = batch->ctx->screen;
337    uint32_t cntl = 0;
338    bool depth_ubwc_enable = false;
339    uint32_t mrts_ubwc_enable = 0;
340    int i;
341 
342    if (pfb->zsbuf) {
343       struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
344       depth_ubwc_enable =
345          fd_resource_ubwc_enabled(rsc, pfb->zsbuf->u.tex.level);
346    }
347 
348    for (i = 0; i < pfb->nr_cbufs; i++) {
349       if (!pfb->cbufs[i])
350          continue;
351 
352       struct pipe_surface *psurf = pfb->cbufs[i];
353       struct fd_resource *rsc = fd_resource(psurf->texture);
354       if (!rsc->bo)
355          continue;
356 
357       if (fd_resource_ubwc_enabled(rsc, psurf->u.tex.level))
358          mrts_ubwc_enable |= 1 << i;
359    }
360 
361    cntl |= A6XX_RB_RENDER_CNTL_CCUSINGLECACHELINESIZE(2);
362    if (binning)
363       cntl |= A6XX_RB_RENDER_CNTL_BINNING;
364 
365    if (screen->info->a6xx.has_cp_reg_write) {
366       OUT_PKT7(ring, CP_REG_WRITE, 3);
367       OUT_RING(ring, CP_REG_WRITE_0_TRACKER(TRACK_RENDER_CNTL));
368       OUT_RING(ring, REG_A6XX_RB_RENDER_CNTL);
369    } else {
370       OUT_PKT4(ring, REG_A6XX_RB_RENDER_CNTL, 1);
371    }
372    OUT_RING(ring, cntl |
373                      COND(depth_ubwc_enable, A6XX_RB_RENDER_CNTL_FLAG_DEPTH) |
374                      A6XX_RB_RENDER_CNTL_FLAG_MRTS(mrts_ubwc_enable));
375 }
376 
377 /* extra size to store VSC_DRAW_STRM_SIZE: */
378 #define VSC_DRAW_STRM_SIZE(pitch) ((pitch)*32 + 0x100)
379 #define VSC_PRIM_STRM_SIZE(pitch) ((pitch)*32)
380 
381 static void
update_vsc_pipe(struct fd_batch * batch)382 update_vsc_pipe(struct fd_batch *batch)
383 {
384    struct fd_context *ctx = batch->ctx;
385    struct fd6_context *fd6_ctx = fd6_context(ctx);
386    const struct fd_gmem_stateobj *gmem = batch->gmem_state;
387    struct fd_ringbuffer *ring = batch->gmem;
388    int i;
389 
390    if (batch->draw_strm_bits / 8 > fd6_ctx->vsc_draw_strm_pitch) {
391       if (fd6_ctx->vsc_draw_strm)
392          fd_bo_del(fd6_ctx->vsc_draw_strm);
393       fd6_ctx->vsc_draw_strm = NULL;
394       /* Note: probably only need to align to 0x40, but aligning stronger
395        * reduces the odds that we will have to realloc again on the next
396        * frame:
397        */
398       fd6_ctx->vsc_draw_strm_pitch = align(batch->draw_strm_bits / 8, 0x4000);
399       mesa_logd("pre-resize VSC_DRAW_STRM_PITCH to: 0x%x",
400                 fd6_ctx->vsc_draw_strm_pitch);
401    }
402 
403    if (batch->prim_strm_bits / 8 > fd6_ctx->vsc_prim_strm_pitch) {
404       if (fd6_ctx->vsc_prim_strm)
405          fd_bo_del(fd6_ctx->vsc_prim_strm);
406       fd6_ctx->vsc_prim_strm = NULL;
407       fd6_ctx->vsc_prim_strm_pitch = align(batch->prim_strm_bits / 8, 0x4000);
408       mesa_logd("pre-resize VSC_PRIM_STRM_PITCH to: 0x%x",
409                 fd6_ctx->vsc_prim_strm_pitch);
410    }
411 
412    if (!fd6_ctx->vsc_draw_strm) {
413       fd6_ctx->vsc_draw_strm = fd_bo_new(
414          ctx->screen->dev, VSC_DRAW_STRM_SIZE(fd6_ctx->vsc_draw_strm_pitch),
415          0, "vsc_draw_strm");
416    }
417 
418    if (!fd6_ctx->vsc_prim_strm) {
419       fd6_ctx->vsc_prim_strm = fd_bo_new(
420          ctx->screen->dev, VSC_PRIM_STRM_SIZE(fd6_ctx->vsc_prim_strm_pitch),
421          0, "vsc_prim_strm");
422    }
423 
424    OUT_REG(
425       ring, A6XX_VSC_BIN_SIZE(.width = gmem->bin_w, .height = gmem->bin_h),
426       A6XX_VSC_DRAW_STRM_SIZE_ADDRESS(.bo = fd6_ctx->vsc_draw_strm,
427                                       .bo_offset =
428                                          32 * fd6_ctx->vsc_draw_strm_pitch));
429 
430    OUT_REG(ring, A6XX_VSC_BIN_COUNT(.nx = gmem->nbins_x, .ny = gmem->nbins_y));
431 
432    OUT_PKT4(ring, REG_A6XX_VSC_PIPE_CONFIG_REG(0), 32);
433    for (i = 0; i < 32; i++) {
434       const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[i];
435       OUT_RING(ring, A6XX_VSC_PIPE_CONFIG_REG_X(pipe->x) |
436                         A6XX_VSC_PIPE_CONFIG_REG_Y(pipe->y) |
437                         A6XX_VSC_PIPE_CONFIG_REG_W(pipe->w) |
438                         A6XX_VSC_PIPE_CONFIG_REG_H(pipe->h));
439    }
440 
441    OUT_REG(
442       ring, A6XX_VSC_PRIM_STRM_ADDRESS(.bo = fd6_ctx->vsc_prim_strm),
443       A6XX_VSC_PRIM_STRM_PITCH(.dword = fd6_ctx->vsc_prim_strm_pitch),
444       A6XX_VSC_PRIM_STRM_LIMIT(.dword = fd6_ctx->vsc_prim_strm_pitch - 64));
445 
446    OUT_REG(
447       ring, A6XX_VSC_DRAW_STRM_ADDRESS(.bo = fd6_ctx->vsc_draw_strm),
448       A6XX_VSC_DRAW_STRM_PITCH(.dword = fd6_ctx->vsc_draw_strm_pitch),
449       A6XX_VSC_DRAW_STRM_LIMIT(.dword = fd6_ctx->vsc_draw_strm_pitch - 64));
450 }
451 
452 /*
453  * If overflow is detected, either 0x1 (VSC_DRAW_STRM overflow) or 0x3
454  * (VSC_PRIM_STRM overflow) plus the size of the overflowed buffer is
455  * written to control->vsc_overflow.  This allows the CPU to
456  * detect which buffer overflowed (and, since the current size is
457  * encoded as well, this protects against already-submitted but
458  * not executed batches from fooling the CPU into increasing the
459  * size again unnecessarily).
460  */
461 static void
emit_vsc_overflow_test(struct fd_batch * batch)462 emit_vsc_overflow_test(struct fd_batch *batch)
463 {
464    struct fd_ringbuffer *ring = batch->gmem;
465    const struct fd_gmem_stateobj *gmem = batch->gmem_state;
466    struct fd6_context *fd6_ctx = fd6_context(batch->ctx);
467 
468    debug_assert((fd6_ctx->vsc_draw_strm_pitch & 0x3) == 0);
469    debug_assert((fd6_ctx->vsc_prim_strm_pitch & 0x3) == 0);
470 
471    /* Check for overflow, write vsc_scratch if detected: */
472    for (int i = 0; i < gmem->num_vsc_pipes; i++) {
473       OUT_PKT7(ring, CP_COND_WRITE5, 8);
474       OUT_RING(ring, CP_COND_WRITE5_0_FUNCTION(WRITE_GE) |
475                         CP_COND_WRITE5_0_WRITE_MEMORY);
476       OUT_RING(ring, CP_COND_WRITE5_1_POLL_ADDR_LO(
477                         REG_A6XX_VSC_DRAW_STRM_SIZE_REG(i)));
478       OUT_RING(ring, CP_COND_WRITE5_2_POLL_ADDR_HI(0));
479       OUT_RING(ring, CP_COND_WRITE5_3_REF(fd6_ctx->vsc_draw_strm_pitch - 64));
480       OUT_RING(ring, CP_COND_WRITE5_4_MASK(~0));
481       OUT_RELOC(ring,
482                 control_ptr(fd6_ctx, vsc_overflow)); /* WRITE_ADDR_LO/HI */
483       OUT_RING(ring,
484                CP_COND_WRITE5_7_WRITE_DATA(1 + fd6_ctx->vsc_draw_strm_pitch));
485 
486       OUT_PKT7(ring, CP_COND_WRITE5, 8);
487       OUT_RING(ring, CP_COND_WRITE5_0_FUNCTION(WRITE_GE) |
488                         CP_COND_WRITE5_0_WRITE_MEMORY);
489       OUT_RING(ring, CP_COND_WRITE5_1_POLL_ADDR_LO(
490                         REG_A6XX_VSC_PRIM_STRM_SIZE_REG(i)));
491       OUT_RING(ring, CP_COND_WRITE5_2_POLL_ADDR_HI(0));
492       OUT_RING(ring, CP_COND_WRITE5_3_REF(fd6_ctx->vsc_prim_strm_pitch - 64));
493       OUT_RING(ring, CP_COND_WRITE5_4_MASK(~0));
494       OUT_RELOC(ring,
495                 control_ptr(fd6_ctx, vsc_overflow)); /* WRITE_ADDR_LO/HI */
496       OUT_RING(ring,
497                CP_COND_WRITE5_7_WRITE_DATA(3 + fd6_ctx->vsc_prim_strm_pitch));
498    }
499 
500    OUT_PKT7(ring, CP_WAIT_MEM_WRITES, 0);
501 }
502 
503 static void
check_vsc_overflow(struct fd_context * ctx)504 check_vsc_overflow(struct fd_context *ctx)
505 {
506    struct fd6_context *fd6_ctx = fd6_context(ctx);
507    struct fd6_control *control = fd_bo_map(fd6_ctx->control_mem);
508    uint32_t vsc_overflow = control->vsc_overflow;
509 
510    if (!vsc_overflow)
511       return;
512 
513    /* clear overflow flag: */
514    control->vsc_overflow = 0;
515 
516    unsigned buffer = vsc_overflow & 0x3;
517    unsigned size = vsc_overflow & ~0x3;
518 
519    if (buffer == 0x1) {
520       /* VSC_DRAW_STRM overflow: */
521 
522       if (size < fd6_ctx->vsc_draw_strm_pitch) {
523          /* we've already increased the size, this overflow is
524           * from a batch submitted before resize, but executed
525           * after
526           */
527          return;
528       }
529 
530       fd_bo_del(fd6_ctx->vsc_draw_strm);
531       fd6_ctx->vsc_draw_strm = NULL;
532       fd6_ctx->vsc_draw_strm_pitch *= 2;
533 
534       mesa_logd("resized VSC_DRAW_STRM_PITCH to: 0x%x",
535                 fd6_ctx->vsc_draw_strm_pitch);
536 
537    } else if (buffer == 0x3) {
538       /* VSC_PRIM_STRM overflow: */
539 
540       if (size < fd6_ctx->vsc_prim_strm_pitch) {
541          /* we've already increased the size */
542          return;
543       }
544 
545       fd_bo_del(fd6_ctx->vsc_prim_strm);
546       fd6_ctx->vsc_prim_strm = NULL;
547       fd6_ctx->vsc_prim_strm_pitch *= 2;
548 
549       mesa_logd("resized VSC_PRIM_STRM_PITCH to: 0x%x",
550                 fd6_ctx->vsc_prim_strm_pitch);
551 
552    } else {
553       /* NOTE: it's possible, for example, for overflow to corrupt the
554        * control page.  I mostly just see this hit if I set initial VSC
555        * buffer size extremely small.  Things still seem to recover,
556        * but maybe we should pre-emptively realloc vsc_data/vsc_data2
557        * and hope for different memory placement?
558        */
559       mesa_loge("invalid vsc_overflow value: 0x%08x", vsc_overflow);
560    }
561 }
562 
563 static void
emit_common_init(struct fd_batch * batch)564 emit_common_init(struct fd_batch *batch)
565 {
566    struct fd_ringbuffer *ring = batch->gmem;
567    struct fd_autotune *at = &batch->ctx->autotune;
568    struct fd_batch_result *result = batch->autotune_result;
569 
570    if (!result)
571       return;
572 
573    OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_CONTROL, 1);
574    OUT_RING(ring, A6XX_RB_SAMPLE_COUNT_CONTROL_COPY);
575 
576    OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_ADDR, 2);
577    OUT_RELOC(ring, results_ptr(at, result[result->idx].samples_start));
578 
579    fd6_event_write(batch, ring, ZPASS_DONE, false);
580 }
581 
582 static void
emit_common_fini(struct fd_batch * batch)583 emit_common_fini(struct fd_batch *batch)
584 {
585    struct fd_ringbuffer *ring = batch->gmem;
586    struct fd_autotune *at = &batch->ctx->autotune;
587    struct fd_batch_result *result = batch->autotune_result;
588 
589    if (!result)
590       return;
591 
592    OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_CONTROL, 1);
593    OUT_RING(ring, A6XX_RB_SAMPLE_COUNT_CONTROL_COPY);
594 
595    OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_ADDR, 2);
596    OUT_RELOC(ring, results_ptr(at, result[result->idx].samples_end));
597 
598    fd6_event_write(batch, ring, ZPASS_DONE, false);
599 
600    // TODO is there a better event to use.. a single ZPASS_DONE_TS would be nice
601    OUT_PKT7(ring, CP_EVENT_WRITE, 4);
602    OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_TS));
603    OUT_RELOC(ring, results_ptr(at, fence));
604    OUT_RING(ring, result->fence);
605 }
606 
607 /*
608  * Emit conditional CP_INDIRECT_BRANCH based on VSC_STATE[p], ie. the IB
609  * is skipped for tiles that have no visible geometry.
610  */
611 static void
emit_conditional_ib(struct fd_batch * batch,const struct fd_tile * tile,struct fd_ringbuffer * target)612 emit_conditional_ib(struct fd_batch *batch, const struct fd_tile *tile,
613                     struct fd_ringbuffer *target)
614 {
615    struct fd_ringbuffer *ring = batch->gmem;
616 
617    if (target->cur == target->start)
618       return;
619 
620    emit_marker6(ring, 6);
621 
622    unsigned count = fd_ringbuffer_cmd_count(target);
623 
624    BEGIN_RING(ring, 5 + 4 * count); /* ensure conditional doesn't get split */
625 
626    OUT_PKT7(ring, CP_REG_TEST, 1);
627    OUT_RING(ring, A6XX_CP_REG_TEST_0_REG(REG_A6XX_VSC_STATE_REG(tile->p)) |
628                      A6XX_CP_REG_TEST_0_BIT(tile->n) |
629                      A6XX_CP_REG_TEST_0_WAIT_FOR_ME);
630 
631    OUT_PKT7(ring, CP_COND_REG_EXEC, 2);
632    OUT_RING(ring, CP_COND_REG_EXEC_0_MODE(PRED_TEST));
633    OUT_RING(ring, CP_COND_REG_EXEC_1_DWORDS(4 * count));
634 
635    for (unsigned i = 0; i < count; i++) {
636       uint32_t dwords;
637       OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3);
638       dwords = fd_ringbuffer_emit_reloc_ring_full(ring, target, i) / 4;
639       assert(dwords > 0);
640       OUT_RING(ring, dwords);
641    }
642 
643    emit_marker6(ring, 6);
644 }
645 
646 static void
set_scissor(struct fd_ringbuffer * ring,uint32_t x1,uint32_t y1,uint32_t x2,uint32_t y2)647 set_scissor(struct fd_ringbuffer *ring, uint32_t x1, uint32_t y1, uint32_t x2,
648             uint32_t y2)
649 {
650    OUT_REG(ring, A6XX_GRAS_SC_WINDOW_SCISSOR_TL(.x = x1, .y = y1),
651            A6XX_GRAS_SC_WINDOW_SCISSOR_BR(.x = x2, .y = y2));
652 
653    OUT_REG(ring, A6XX_GRAS_2D_RESOLVE_CNTL_1(.x = x1, .y = y1),
654            A6XX_GRAS_2D_RESOLVE_CNTL_2(.x = x2, .y = y2));
655 }
656 
657 static void
set_bin_size(struct fd_ringbuffer * ring,uint32_t w,uint32_t h,uint32_t flag)658 set_bin_size(struct fd_ringbuffer *ring, uint32_t w, uint32_t h, uint32_t flag)
659 {
660    OUT_REG(ring, A6XX_GRAS_BIN_CONTROL(.binw = w, .binh = h, .dword = flag));
661    OUT_REG(ring, A6XX_RB_BIN_CONTROL(.binw = w, .binh = h, .dword = flag));
662    /* no flag for RB_BIN_CONTROL2... */
663    OUT_REG(ring, A6XX_RB_BIN_CONTROL2(.binw = w, .binh = h));
664 }
665 
666 static void
emit_binning_pass(struct fd_batch * batch)667 emit_binning_pass(struct fd_batch *batch) assert_dt
668 {
669    struct fd_ringbuffer *ring = batch->gmem;
670    const struct fd_gmem_stateobj *gmem = batch->gmem_state;
671    struct fd_screen *screen = batch->ctx->screen;
672 
673    debug_assert(!batch->tessellation);
674 
675    set_scissor(ring, 0, 0, gmem->width - 1, gmem->height - 1);
676 
677    emit_marker6(ring, 7);
678    OUT_PKT7(ring, CP_SET_MARKER, 1);
679    OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_BINNING));
680    emit_marker6(ring, 7);
681 
682    OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
683    OUT_RING(ring, 0x1);
684 
685    OUT_PKT7(ring, CP_SET_MODE, 1);
686    OUT_RING(ring, 0x1);
687 
688    OUT_WFI5(ring);
689 
690    OUT_REG(ring, A6XX_VFD_MODE_CNTL(.render_mode = BINNING_PASS));
691 
692    update_vsc_pipe(batch);
693 
694    OUT_PKT4(ring, REG_A6XX_PC_POWER_CNTL, 1);
695    OUT_RING(ring, screen->info->a6xx.magic.PC_POWER_CNTL);
696 
697    OUT_PKT4(ring, REG_A6XX_VFD_POWER_CNTL, 1);
698    OUT_RING(ring, screen->info->a6xx.magic.PC_POWER_CNTL);
699 
700    OUT_PKT7(ring, CP_EVENT_WRITE, 1);
701    OUT_RING(ring, UNK_2C);
702 
703    OUT_PKT4(ring, REG_A6XX_RB_WINDOW_OFFSET, 1);
704    OUT_RING(ring, A6XX_RB_WINDOW_OFFSET_X(0) | A6XX_RB_WINDOW_OFFSET_Y(0));
705 
706    OUT_PKT4(ring, REG_A6XX_SP_TP_WINDOW_OFFSET, 1);
707    OUT_RING(ring,
708             A6XX_SP_TP_WINDOW_OFFSET_X(0) | A6XX_SP_TP_WINDOW_OFFSET_Y(0));
709 
710    /* emit IB to binning drawcmds: */
711    trace_start_binning_ib(&batch->trace, ring);
712    fd6_emit_ib(ring, batch->draw);
713    trace_end_binning_ib(&batch->trace, ring);
714 
715    fd_reset_wfi(batch);
716 
717    OUT_PKT7(ring, CP_SET_DRAW_STATE, 3);
718    OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) |
719                      CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS |
720                      CP_SET_DRAW_STATE__0_GROUP_ID(0));
721    OUT_RING(ring, CP_SET_DRAW_STATE__1_ADDR_LO(0));
722    OUT_RING(ring, CP_SET_DRAW_STATE__2_ADDR_HI(0));
723 
724    OUT_PKT7(ring, CP_EVENT_WRITE, 1);
725    OUT_RING(ring, UNK_2D);
726 
727    fd6_cache_inv(batch, ring);
728    fd6_cache_flush(batch, ring);
729    fd_wfi(batch, ring);
730 
731    OUT_PKT7(ring, CP_WAIT_FOR_ME, 0);
732 
733    trace_start_vsc_overflow_test(&batch->trace, batch->gmem);
734    emit_vsc_overflow_test(batch);
735    trace_end_vsc_overflow_test(&batch->trace, batch->gmem);
736 
737    OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
738    OUT_RING(ring, 0x0);
739 
740    OUT_PKT7(ring, CP_SET_MODE, 1);
741    OUT_RING(ring, 0x0);
742 
743    OUT_WFI5(ring);
744 
745    OUT_REG(ring,
746            A6XX_RB_CCU_CNTL(.color_offset = screen->ccu_offset_gmem,
747                             .gmem = true,
748                             .unk2 = screen->info->a6xx.ccu_cntl_gmem_unk2));
749 }
750 
751 static void
emit_msaa(struct fd_ringbuffer * ring,unsigned nr)752 emit_msaa(struct fd_ringbuffer *ring, unsigned nr)
753 {
754    enum a3xx_msaa_samples samples = fd_msaa_samples(nr);
755 
756    OUT_PKT4(ring, REG_A6XX_SP_TP_RAS_MSAA_CNTL, 2);
757    OUT_RING(ring, A6XX_SP_TP_RAS_MSAA_CNTL_SAMPLES(samples));
758    OUT_RING(ring, A6XX_SP_TP_DEST_MSAA_CNTL_SAMPLES(samples) |
759                      COND(samples == MSAA_ONE,
760                           A6XX_SP_TP_DEST_MSAA_CNTL_MSAA_DISABLE));
761 
762    OUT_PKT4(ring, REG_A6XX_GRAS_RAS_MSAA_CNTL, 2);
763    OUT_RING(ring, A6XX_GRAS_RAS_MSAA_CNTL_SAMPLES(samples));
764    OUT_RING(ring, A6XX_GRAS_DEST_MSAA_CNTL_SAMPLES(samples) |
765                      COND(samples == MSAA_ONE,
766                           A6XX_GRAS_DEST_MSAA_CNTL_MSAA_DISABLE));
767 
768    OUT_PKT4(ring, REG_A6XX_RB_RAS_MSAA_CNTL, 2);
769    OUT_RING(ring, A6XX_RB_RAS_MSAA_CNTL_SAMPLES(samples));
770    OUT_RING(ring,
771             A6XX_RB_DEST_MSAA_CNTL_SAMPLES(samples) |
772                COND(samples == MSAA_ONE, A6XX_RB_DEST_MSAA_CNTL_MSAA_DISABLE));
773 
774    OUT_PKT4(ring, REG_A6XX_RB_MSAA_CNTL, 1);
775    OUT_RING(ring, A6XX_RB_MSAA_CNTL_SAMPLES(samples));
776 }
777 
778 static void prepare_tile_setup_ib(struct fd_batch *batch);
779 static void prepare_tile_fini_ib(struct fd_batch *batch);
780 
781 /* before first tile */
782 static void
fd6_emit_tile_init(struct fd_batch * batch)783 fd6_emit_tile_init(struct fd_batch *batch) assert_dt
784 {
785    struct fd_ringbuffer *ring = batch->gmem;
786    struct pipe_framebuffer_state *pfb = &batch->framebuffer;
787    const struct fd_gmem_stateobj *gmem = batch->gmem_state;
788    struct fd_screen *screen = batch->ctx->screen;
789 
790    fd6_emit_restore(batch, ring);
791 
792    fd6_emit_lrz_flush(ring);
793 
794    if (batch->prologue) {
795       trace_start_prologue(&batch->trace, ring);
796       fd6_emit_ib(ring, batch->prologue);
797       trace_end_prologue(&batch->trace, ring);
798    }
799 
800    fd6_cache_inv(batch, ring);
801 
802    prepare_tile_setup_ib(batch);
803    prepare_tile_fini_ib(batch);
804 
805    OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
806    OUT_RING(ring, 0x0);
807 
808    /* blob controls "local" in IB2, but I think that is not required */
809    OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_LOCAL, 1);
810    OUT_RING(ring, 0x1);
811 
812    fd_wfi(batch, ring);
813    OUT_REG(ring,
814            A6XX_RB_CCU_CNTL(.color_offset = screen->ccu_offset_gmem,
815                             .gmem = true,
816                             .unk2 = screen->info->a6xx.ccu_cntl_gmem_unk2));
817 
818    emit_zs(ring, pfb->zsbuf, batch->gmem_state);
819    emit_mrt(ring, pfb, batch->gmem_state);
820    emit_msaa(ring, pfb->samples);
821    patch_fb_read_gmem(batch);
822 
823    if (use_hw_binning(batch)) {
824       /* enable stream-out during binning pass: */
825       OUT_REG(ring, A6XX_VPC_SO_DISABLE(false));
826 
827       set_bin_size(ring, gmem->bin_w, gmem->bin_h,
828                    A6XX_RB_BIN_CONTROL_RENDER_MODE(BINNING_PASS) |
829                    A6XX_RB_BIN_CONTROL_LRZ_FEEDBACK_ZMODE_MASK(0x6));
830       update_render_cntl(batch, pfb, true);
831       emit_binning_pass(batch);
832 
833       /* and disable stream-out for draw pass: */
834       OUT_REG(ring, A6XX_VPC_SO_DISABLE(true));
835 
836       /*
837        * NOTE: even if we detect VSC overflow and disable use of
838        * visibility stream in draw pass, it is still safe to execute
839        * the reset of these cmds:
840        */
841 
842       // NOTE a618 not setting .FORCE_LRZ_WRITE_DIS ..
843       set_bin_size(ring, gmem->bin_w, gmem->bin_h,
844                    A6XX_RB_BIN_CONTROL_FORCE_LRZ_WRITE_DIS |
845                    A6XX_RB_BIN_CONTROL_LRZ_FEEDBACK_ZMODE_MASK(0x6));
846 
847       OUT_PKT4(ring, REG_A6XX_VFD_MODE_CNTL, 1);
848       OUT_RING(ring, 0x0);
849 
850       OUT_PKT4(ring, REG_A6XX_PC_POWER_CNTL, 1);
851       OUT_RING(ring, screen->info->a6xx.magic.PC_POWER_CNTL);
852 
853       OUT_PKT4(ring, REG_A6XX_VFD_POWER_CNTL, 1);
854       OUT_RING(ring, screen->info->a6xx.magic.PC_POWER_CNTL);
855 
856       OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
857       OUT_RING(ring, 0x1);
858    } else {
859       /* no binning pass, so enable stream-out for draw pass:: */
860       OUT_REG(ring, A6XX_VPC_SO_DISABLE(false));
861 
862       set_bin_size(ring, gmem->bin_w, gmem->bin_h, 0x6000000);
863    }
864 
865    update_render_cntl(batch, pfb, false);
866 
867    emit_common_init(batch);
868 }
869 
870 static void
set_window_offset(struct fd_ringbuffer * ring,uint32_t x1,uint32_t y1)871 set_window_offset(struct fd_ringbuffer *ring, uint32_t x1, uint32_t y1)
872 {
873    OUT_PKT4(ring, REG_A6XX_RB_WINDOW_OFFSET, 1);
874    OUT_RING(ring, A6XX_RB_WINDOW_OFFSET_X(x1) | A6XX_RB_WINDOW_OFFSET_Y(y1));
875 
876    OUT_PKT4(ring, REG_A6XX_RB_WINDOW_OFFSET2, 1);
877    OUT_RING(ring, A6XX_RB_WINDOW_OFFSET2_X(x1) | A6XX_RB_WINDOW_OFFSET2_Y(y1));
878 
879    OUT_PKT4(ring, REG_A6XX_SP_WINDOW_OFFSET, 1);
880    OUT_RING(ring, A6XX_SP_WINDOW_OFFSET_X(x1) | A6XX_SP_WINDOW_OFFSET_Y(y1));
881 
882    OUT_PKT4(ring, REG_A6XX_SP_TP_WINDOW_OFFSET, 1);
883    OUT_RING(ring,
884             A6XX_SP_TP_WINDOW_OFFSET_X(x1) | A6XX_SP_TP_WINDOW_OFFSET_Y(y1));
885 }
886 
887 /* before mem2gmem */
888 static void
fd6_emit_tile_prep(struct fd_batch * batch,const struct fd_tile * tile)889 fd6_emit_tile_prep(struct fd_batch *batch, const struct fd_tile *tile)
890 {
891    struct fd_context *ctx = batch->ctx;
892    const struct fd_gmem_stateobj *gmem = batch->gmem_state;
893    struct fd6_context *fd6_ctx = fd6_context(ctx);
894    struct fd_ringbuffer *ring = batch->gmem;
895 
896    emit_marker6(ring, 7);
897    OUT_PKT7(ring, CP_SET_MARKER, 1);
898    OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_GMEM));
899    emit_marker6(ring, 7);
900 
901    uint32_t x1 = tile->xoff;
902    uint32_t y1 = tile->yoff;
903    uint32_t x2 = tile->xoff + tile->bin_w - 1;
904    uint32_t y2 = tile->yoff + tile->bin_h - 1;
905 
906    set_scissor(ring, x1, y1, x2, y2);
907 
908    if (use_hw_binning(batch)) {
909       const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[tile->p];
910 
911       OUT_PKT7(ring, CP_WAIT_FOR_ME, 0);
912 
913       OUT_PKT7(ring, CP_SET_MODE, 1);
914       OUT_RING(ring, 0x0);
915 
916       OUT_PKT7(ring, CP_SET_BIN_DATA5, 7);
917       OUT_RING(ring, CP_SET_BIN_DATA5_0_VSC_SIZE(pipe->w * pipe->h) |
918                         CP_SET_BIN_DATA5_0_VSC_N(tile->n));
919       OUT_RELOC(ring, fd6_ctx->vsc_draw_strm, /* per-pipe draw-stream address */
920                 (tile->p * fd6_ctx->vsc_draw_strm_pitch), 0, 0);
921       OUT_RELOC(ring,
922                 fd6_ctx->vsc_draw_strm, /* VSC_DRAW_STRM_ADDRESS + (p * 4) */
923                 (tile->p * 4) + (32 * fd6_ctx->vsc_draw_strm_pitch), 0, 0);
924       OUT_RELOC(ring, fd6_ctx->vsc_prim_strm,
925                 (tile->p * fd6_ctx->vsc_prim_strm_pitch), 0, 0);
926 
927       OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
928       OUT_RING(ring, 0x0);
929 
930       set_window_offset(ring, x1, y1);
931 
932       const struct fd_gmem_stateobj *gmem = batch->gmem_state;
933       set_bin_size(ring, gmem->bin_w, gmem->bin_h, 0x6000000);
934 
935       OUT_PKT7(ring, CP_SET_MODE, 1);
936       OUT_RING(ring, 0x0);
937    } else {
938       set_window_offset(ring, x1, y1);
939 
940       OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
941       OUT_RING(ring, 0x1);
942 
943       OUT_PKT7(ring, CP_SET_MODE, 1);
944       OUT_RING(ring, 0x0);
945    }
946 }
947 
948 static void
set_blit_scissor(struct fd_batch * batch,struct fd_ringbuffer * ring)949 set_blit_scissor(struct fd_batch *batch, struct fd_ringbuffer *ring)
950 {
951    struct pipe_scissor_state blit_scissor = batch->max_scissor;
952 
953    blit_scissor.minx = ROUND_DOWN_TO(blit_scissor.minx, 16);
954    blit_scissor.miny = ROUND_DOWN_TO(blit_scissor.miny, 4);
955    blit_scissor.maxx = ALIGN(blit_scissor.maxx, 16);
956    blit_scissor.maxy = ALIGN(blit_scissor.maxy, 4);
957 
958    OUT_PKT4(ring, REG_A6XX_RB_BLIT_SCISSOR_TL, 2);
959    OUT_RING(ring, A6XX_RB_BLIT_SCISSOR_TL_X(blit_scissor.minx) |
960                      A6XX_RB_BLIT_SCISSOR_TL_Y(blit_scissor.miny));
961    OUT_RING(ring, A6XX_RB_BLIT_SCISSOR_BR_X(blit_scissor.maxx - 1) |
962                      A6XX_RB_BLIT_SCISSOR_BR_Y(blit_scissor.maxy - 1));
963 }
964 
965 static void
emit_blit(struct fd_batch * batch,struct fd_ringbuffer * ring,uint32_t base,struct pipe_surface * psurf,bool stencil)966 emit_blit(struct fd_batch *batch, struct fd_ringbuffer *ring, uint32_t base,
967           struct pipe_surface *psurf, bool stencil)
968 {
969    struct fd_resource *rsc = fd_resource(psurf->texture);
970    enum pipe_format pfmt = psurf->format;
971    uint32_t offset;
972    bool ubwc_enabled;
973 
974    debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
975 
976    /* separate stencil case: */
977    if (stencil) {
978       rsc = rsc->stencil;
979       pfmt = rsc->b.b.format;
980    }
981 
982    offset =
983       fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer);
984    ubwc_enabled = fd_resource_ubwc_enabled(rsc, psurf->u.tex.level);
985 
986    debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
987 
988    uint32_t tile_mode = fd_resource_tile_mode(&rsc->b.b, psurf->u.tex.level);
989    enum a6xx_format format = fd6_color_format(pfmt, tile_mode);
990    uint32_t stride = fd_resource_pitch(rsc, psurf->u.tex.level);
991    uint32_t size = fd_resource_slice(rsc, psurf->u.tex.level)->size0;
992    enum a3xx_color_swap swap = fd6_color_swap(pfmt, rsc->layout.tile_mode);
993    enum a3xx_msaa_samples samples = fd_msaa_samples(rsc->b.b.nr_samples);
994 
995    OUT_REG(ring,
996            A6XX_RB_BLIT_DST_INFO(.tile_mode = tile_mode, .samples = samples,
997                                  .color_format = format, .color_swap = swap,
998                                  .flags = ubwc_enabled),
999            A6XX_RB_BLIT_DST(.bo = rsc->bo, .bo_offset = offset),
1000            A6XX_RB_BLIT_DST_PITCH(.a6xx_rb_blit_dst_pitch = stride),
1001            A6XX_RB_BLIT_DST_ARRAY_PITCH(.a6xx_rb_blit_dst_array_pitch = size));
1002 
1003    OUT_REG(ring, A6XX_RB_BLIT_BASE_GMEM(.dword = base));
1004 
1005    if (ubwc_enabled) {
1006       OUT_PKT4(ring, REG_A6XX_RB_BLIT_FLAG_DST, 3);
1007       fd6_emit_flag_reference(ring, rsc, psurf->u.tex.level,
1008                               psurf->u.tex.first_layer);
1009    }
1010 
1011    fd6_emit_blit(batch, ring);
1012 }
1013 
1014 static void
emit_restore_blit(struct fd_batch * batch,struct fd_ringbuffer * ring,uint32_t base,struct pipe_surface * psurf,unsigned buffer)1015 emit_restore_blit(struct fd_batch *batch, struct fd_ringbuffer *ring,
1016                   uint32_t base, struct pipe_surface *psurf, unsigned buffer)
1017 {
1018    bool stencil = (buffer == FD_BUFFER_STENCIL);
1019 
1020    OUT_REG(ring, A6XX_RB_BLIT_INFO(.gmem = true, .unk0 = true,
1021                                    .depth = (buffer == FD_BUFFER_DEPTH),
1022                                    .sample_0 = util_format_is_pure_integer(
1023                                       psurf->format)));
1024 
1025    emit_blit(batch, ring, base, psurf, stencil);
1026 }
1027 
1028 static void
emit_clears(struct fd_batch * batch,struct fd_ringbuffer * ring)1029 emit_clears(struct fd_batch *batch, struct fd_ringbuffer *ring)
1030 {
1031    struct pipe_framebuffer_state *pfb = &batch->framebuffer;
1032    const struct fd_gmem_stateobj *gmem = batch->gmem_state;
1033    enum a3xx_msaa_samples samples = fd_msaa_samples(pfb->samples);
1034 
1035    uint32_t buffers = batch->fast_cleared;
1036 
1037    if (buffers & PIPE_CLEAR_COLOR) {
1038 
1039       for (int i = 0; i < pfb->nr_cbufs; i++) {
1040          union pipe_color_union *color = &batch->clear_color[i];
1041          union util_color uc = {0};
1042 
1043          if (!pfb->cbufs[i])
1044             continue;
1045 
1046          if (!(buffers & (PIPE_CLEAR_COLOR0 << i)))
1047             continue;
1048 
1049          enum pipe_format pfmt = pfb->cbufs[i]->format;
1050 
1051          // XXX I think RB_CLEAR_COLOR_DWn wants to take into account SWAP??
1052          union pipe_color_union swapped;
1053          switch (fd6_color_swap(pfmt, TILE6_LINEAR)) {
1054          case WZYX:
1055             swapped.ui[0] = color->ui[0];
1056             swapped.ui[1] = color->ui[1];
1057             swapped.ui[2] = color->ui[2];
1058             swapped.ui[3] = color->ui[3];
1059             break;
1060          case WXYZ:
1061             swapped.ui[2] = color->ui[0];
1062             swapped.ui[1] = color->ui[1];
1063             swapped.ui[0] = color->ui[2];
1064             swapped.ui[3] = color->ui[3];
1065             break;
1066          case ZYXW:
1067             swapped.ui[3] = color->ui[0];
1068             swapped.ui[0] = color->ui[1];
1069             swapped.ui[1] = color->ui[2];
1070             swapped.ui[2] = color->ui[3];
1071             break;
1072          case XYZW:
1073             swapped.ui[3] = color->ui[0];
1074             swapped.ui[2] = color->ui[1];
1075             swapped.ui[1] = color->ui[2];
1076             swapped.ui[0] = color->ui[3];
1077             break;
1078          }
1079 
1080          util_pack_color_union(pfmt, &uc, &swapped);
1081 
1082          OUT_PKT4(ring, REG_A6XX_RB_BLIT_DST_INFO, 1);
1083          OUT_RING(ring,
1084                   A6XX_RB_BLIT_DST_INFO_TILE_MODE(TILE6_LINEAR) |
1085                      A6XX_RB_BLIT_DST_INFO_SAMPLES(samples) |
1086                      A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(fd6_color_format(pfmt, TILE6_LINEAR)));
1087 
1088          OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1);
1089          OUT_RING(ring,
1090                   A6XX_RB_BLIT_INFO_GMEM | A6XX_RB_BLIT_INFO_CLEAR_MASK(0xf));
1091 
1092          OUT_PKT4(ring, REG_A6XX_RB_BLIT_BASE_GMEM, 1);
1093          OUT_RING(ring, gmem->cbuf_base[i]);
1094 
1095          OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_88D0, 1);
1096          OUT_RING(ring, 0);
1097 
1098          OUT_PKT4(ring, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 4);
1099          OUT_RING(ring, uc.ui[0]);
1100          OUT_RING(ring, uc.ui[1]);
1101          OUT_RING(ring, uc.ui[2]);
1102          OUT_RING(ring, uc.ui[3]);
1103 
1104          fd6_emit_blit(batch, ring);
1105       }
1106    }
1107 
1108    const bool has_depth = pfb->zsbuf;
1109    const bool has_separate_stencil =
1110       has_depth && fd_resource(pfb->zsbuf->texture)->stencil;
1111 
1112    /* First clear depth or combined depth/stencil. */
1113    if ((has_depth && (buffers & PIPE_CLEAR_DEPTH)) ||
1114        (!has_separate_stencil && (buffers & PIPE_CLEAR_STENCIL))) {
1115       enum pipe_format pfmt = pfb->zsbuf->format;
1116       uint32_t clear_value;
1117       uint32_t mask = 0;
1118 
1119       if (has_separate_stencil) {
1120          pfmt = util_format_get_depth_only(pfb->zsbuf->format);
1121          clear_value = util_pack_z(pfmt, batch->clear_depth);
1122       } else {
1123          pfmt = pfb->zsbuf->format;
1124          clear_value =
1125             util_pack_z_stencil(pfmt, batch->clear_depth, batch->clear_stencil);
1126       }
1127 
1128       if (buffers & PIPE_CLEAR_DEPTH)
1129          mask |= 0x1;
1130 
1131       if (!has_separate_stencil && (buffers & PIPE_CLEAR_STENCIL))
1132          mask |= 0x2;
1133 
1134       OUT_PKT4(ring, REG_A6XX_RB_BLIT_DST_INFO, 1);
1135       OUT_RING(ring,
1136                A6XX_RB_BLIT_DST_INFO_TILE_MODE(TILE6_LINEAR) |
1137                   A6XX_RB_BLIT_DST_INFO_SAMPLES(samples) |
1138                   A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(fd6_color_format(pfmt, TILE6_LINEAR)));
1139 
1140       OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1);
1141       OUT_RING(ring, A6XX_RB_BLIT_INFO_GMEM |
1142                         // XXX UNK0 for separate stencil ??
1143                         A6XX_RB_BLIT_INFO_DEPTH |
1144                         A6XX_RB_BLIT_INFO_CLEAR_MASK(mask));
1145 
1146       OUT_PKT4(ring, REG_A6XX_RB_BLIT_BASE_GMEM, 1);
1147       OUT_RING(ring, gmem->zsbuf_base[0]);
1148 
1149       OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_88D0, 1);
1150       OUT_RING(ring, 0);
1151 
1152       OUT_PKT4(ring, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 1);
1153       OUT_RING(ring, clear_value);
1154 
1155       fd6_emit_blit(batch, ring);
1156    }
1157 
1158    /* Then clear the separate stencil buffer in case of 32 bit depth
1159     * formats with separate stencil. */
1160    if (has_separate_stencil && (buffers & PIPE_CLEAR_STENCIL)) {
1161       OUT_PKT4(ring, REG_A6XX_RB_BLIT_DST_INFO, 1);
1162       OUT_RING(ring, A6XX_RB_BLIT_DST_INFO_TILE_MODE(TILE6_LINEAR) |
1163                         A6XX_RB_BLIT_DST_INFO_SAMPLES(samples) |
1164                         A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(FMT6_8_UINT));
1165 
1166       OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1);
1167       OUT_RING(ring, A6XX_RB_BLIT_INFO_GMEM |
1168                         // A6XX_RB_BLIT_INFO_UNK0 |
1169                         A6XX_RB_BLIT_INFO_DEPTH |
1170                         A6XX_RB_BLIT_INFO_CLEAR_MASK(0x1));
1171 
1172       OUT_PKT4(ring, REG_A6XX_RB_BLIT_BASE_GMEM, 1);
1173       OUT_RING(ring, gmem->zsbuf_base[1]);
1174 
1175       OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_88D0, 1);
1176       OUT_RING(ring, 0);
1177 
1178       OUT_PKT4(ring, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 1);
1179       OUT_RING(ring, batch->clear_stencil & 0xff);
1180 
1181       fd6_emit_blit(batch, ring);
1182    }
1183 }
1184 
1185 /*
1186  * transfer from system memory to gmem
1187  */
1188 static void
emit_restore_blits(struct fd_batch * batch,struct fd_ringbuffer * ring)1189 emit_restore_blits(struct fd_batch *batch, struct fd_ringbuffer *ring)
1190 {
1191    const struct fd_gmem_stateobj *gmem = batch->gmem_state;
1192    struct pipe_framebuffer_state *pfb = &batch->framebuffer;
1193 
1194    if (batch->restore & FD_BUFFER_COLOR) {
1195       unsigned i;
1196       for (i = 0; i < pfb->nr_cbufs; i++) {
1197          if (!pfb->cbufs[i])
1198             continue;
1199          if (!(batch->restore & (PIPE_CLEAR_COLOR0 << i)))
1200             continue;
1201          emit_restore_blit(batch, ring, gmem->cbuf_base[i], pfb->cbufs[i],
1202                            FD_BUFFER_COLOR);
1203       }
1204    }
1205 
1206    if (batch->restore & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
1207       struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
1208 
1209       if (!rsc->stencil || (batch->restore & FD_BUFFER_DEPTH)) {
1210          emit_restore_blit(batch, ring, gmem->zsbuf_base[0], pfb->zsbuf,
1211                            FD_BUFFER_DEPTH);
1212       }
1213       if (rsc->stencil && (batch->restore & FD_BUFFER_STENCIL)) {
1214          emit_restore_blit(batch, ring, gmem->zsbuf_base[1], pfb->zsbuf,
1215                            FD_BUFFER_STENCIL);
1216       }
1217    }
1218 }
1219 
1220 static void
prepare_tile_setup_ib(struct fd_batch * batch)1221 prepare_tile_setup_ib(struct fd_batch *batch)
1222 {
1223    if (!(batch->restore || batch->fast_cleared))
1224       return;
1225 
1226    batch->tile_setup =
1227       fd_submit_new_ringbuffer(batch->submit, 0x1000, FD_RINGBUFFER_STREAMING);
1228 
1229    set_blit_scissor(batch, batch->tile_setup);
1230 
1231    emit_restore_blits(batch, batch->tile_setup);
1232    emit_clears(batch, batch->tile_setup);
1233 }
1234 
1235 /*
1236  * transfer from system memory to gmem
1237  */
1238 static void
fd6_emit_tile_mem2gmem(struct fd_batch * batch,const struct fd_tile * tile)1239 fd6_emit_tile_mem2gmem(struct fd_batch *batch, const struct fd_tile *tile)
1240 {
1241 }
1242 
1243 /* before IB to rendering cmds: */
1244 static void
fd6_emit_tile_renderprep(struct fd_batch * batch,const struct fd_tile * tile)1245 fd6_emit_tile_renderprep(struct fd_batch *batch, const struct fd_tile *tile)
1246 {
1247    if (!batch->tile_setup)
1248       return;
1249 
1250    trace_start_clear_restore(&batch->trace, batch->gmem, batch->fast_cleared);
1251    if (batch->fast_cleared || !use_hw_binning(batch)) {
1252       fd6_emit_ib(batch->gmem, batch->tile_setup);
1253    } else {
1254       emit_conditional_ib(batch, tile, batch->tile_setup);
1255    }
1256    trace_end_clear_restore(&batch->trace, batch->gmem);
1257 }
1258 
1259 static bool
blit_can_resolve(enum pipe_format format)1260 blit_can_resolve(enum pipe_format format)
1261 {
1262    const struct util_format_description *desc = util_format_description(format);
1263 
1264    /* blit event can only do resolve for simple cases:
1265     * averaging samples as unsigned integers or choosing only one sample
1266     */
1267    if (util_format_is_snorm(format) || util_format_is_srgb(format))
1268       return false;
1269 
1270    /* can't do formats with larger channel sizes
1271     * note: this includes all float formats
1272     * note2: single channel integer formats seem OK
1273     */
1274    if (desc->channel[0].size > 10)
1275       return false;
1276 
1277    switch (format) {
1278    /* for unknown reasons blit event can't msaa resolve these formats when tiled
1279     * likely related to these formats having different layout from other cpp=2
1280     * formats
1281     */
1282    case PIPE_FORMAT_R8G8_UNORM:
1283    case PIPE_FORMAT_R8G8_UINT:
1284    case PIPE_FORMAT_R8G8_SINT:
1285    /* TODO: this one should be able to work? */
1286    case PIPE_FORMAT_Z24_UNORM_S8_UINT:
1287       return false;
1288    default:
1289       break;
1290    }
1291 
1292    return true;
1293 }
1294 
1295 static bool
needs_resolve(struct pipe_surface * psurf)1296 needs_resolve(struct pipe_surface *psurf)
1297 {
1298    return psurf->nr_samples &&
1299           (psurf->nr_samples != psurf->texture->nr_samples);
1300 }
1301 
1302 /**
1303  * Returns the UNKNOWN_8C01 value for handling partial depth/stencil
1304  * clear/stores to Z24S8.
1305  */
1306 static uint32_t
fd6_unknown_8c01(enum pipe_format format,unsigned buffers)1307 fd6_unknown_8c01(enum pipe_format format, unsigned buffers)
1308 {
1309    if (format == PIPE_FORMAT_Z24_UNORM_S8_UINT) {
1310       if (buffers == FD_BUFFER_DEPTH)
1311          return 0x08000041;
1312       else if (buffers == FD_BUFFER_STENCIL)
1313          return 0x00084001;
1314    }
1315    return 0;
1316 }
1317 
1318 static void
emit_resolve_blit(struct fd_batch * batch,struct fd_ringbuffer * ring,uint32_t base,struct pipe_surface * psurf,unsigned buffer)1319 emit_resolve_blit(struct fd_batch *batch, struct fd_ringbuffer *ring,
1320                   uint32_t base, struct pipe_surface *psurf,
1321                   unsigned buffer) assert_dt
1322 {
1323    uint32_t info = 0;
1324    bool stencil = false;
1325 
1326    if (!fd_resource(psurf->texture)->valid)
1327       return;
1328 
1329    /* if we need to resolve, but cannot with BLIT event, we instead need
1330     * to generate per-tile CP_BLIT (r2d) commands:
1331     *
1332     * The separate-stencil is a special case, we might need to use CP_BLIT
1333     * for depth, but we can still resolve stencil with a BLIT event
1334     */
1335    if (needs_resolve(psurf) && !blit_can_resolve(psurf->format) &&
1336        (buffer != FD_BUFFER_STENCIL)) {
1337       /* We could potentially use fd6_unknown_8c01() to handle partial z/s
1338        * resolve to packed z/s, but we would need a corresponding ability in the
1339        * !resolve case below, so batch_draw_tracking_for_dirty_bits() has us
1340        * just do a restore of the other channel for partial packed z/s writes.
1341        */
1342       fd6_resolve_tile(batch, ring, base, psurf, 0);
1343       return;
1344    }
1345 
1346    switch (buffer) {
1347    case FD_BUFFER_COLOR:
1348       break;
1349    case FD_BUFFER_STENCIL:
1350       info |= A6XX_RB_BLIT_INFO_UNK0;
1351       stencil = true;
1352       break;
1353    case FD_BUFFER_DEPTH:
1354       info |= A6XX_RB_BLIT_INFO_DEPTH;
1355       break;
1356    }
1357 
1358    if (util_format_is_pure_integer(psurf->format) ||
1359        util_format_is_depth_or_stencil(psurf->format))
1360       info |= A6XX_RB_BLIT_INFO_SAMPLE_0;
1361 
1362    OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1);
1363    OUT_RING(ring, info);
1364 
1365    emit_blit(batch, ring, base, psurf, stencil);
1366 }
1367 
1368 /*
1369  * transfer from gmem to system memory (ie. normal RAM)
1370  */
1371 
1372 static void
prepare_tile_fini_ib(struct fd_batch * batch)1373 prepare_tile_fini_ib(struct fd_batch *batch) assert_dt
1374 {
1375    const struct fd_gmem_stateobj *gmem = batch->gmem_state;
1376    struct pipe_framebuffer_state *pfb = &batch->framebuffer;
1377    struct fd_ringbuffer *ring;
1378 
1379    batch->tile_fini =
1380       fd_submit_new_ringbuffer(batch->submit, 0x1000, FD_RINGBUFFER_STREAMING);
1381    ring = batch->tile_fini;
1382 
1383    set_blit_scissor(batch, ring);
1384 
1385    if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
1386       struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
1387 
1388       if (!rsc->stencil || (batch->resolve & FD_BUFFER_DEPTH)) {
1389          emit_resolve_blit(batch, ring, gmem->zsbuf_base[0], pfb->zsbuf,
1390                            FD_BUFFER_DEPTH);
1391       }
1392       if (rsc->stencil && (batch->resolve & FD_BUFFER_STENCIL)) {
1393          emit_resolve_blit(batch, ring, gmem->zsbuf_base[1], pfb->zsbuf,
1394                            FD_BUFFER_STENCIL);
1395       }
1396    }
1397 
1398    if (batch->resolve & FD_BUFFER_COLOR) {
1399       unsigned i;
1400       for (i = 0; i < pfb->nr_cbufs; i++) {
1401          if (!pfb->cbufs[i])
1402             continue;
1403          if (!(batch->resolve & (PIPE_CLEAR_COLOR0 << i)))
1404             continue;
1405          emit_resolve_blit(batch, ring, gmem->cbuf_base[i], pfb->cbufs[i],
1406                            FD_BUFFER_COLOR);
1407       }
1408    }
1409 }
1410 
1411 static void
fd6_emit_tile(struct fd_batch * batch,const struct fd_tile * tile)1412 fd6_emit_tile(struct fd_batch *batch, const struct fd_tile *tile)
1413 {
1414    if (!use_hw_binning(batch)) {
1415       fd6_emit_ib(batch->gmem, batch->draw);
1416    } else {
1417       emit_conditional_ib(batch, tile, batch->draw);
1418    }
1419 
1420    if (batch->epilogue)
1421       fd6_emit_ib(batch->gmem, batch->epilogue);
1422 }
1423 
1424 static void
fd6_emit_tile_gmem2mem(struct fd_batch * batch,const struct fd_tile * tile)1425 fd6_emit_tile_gmem2mem(struct fd_batch *batch, const struct fd_tile *tile)
1426 {
1427    struct fd_ringbuffer *ring = batch->gmem;
1428 
1429    if (use_hw_binning(batch)) {
1430       OUT_PKT7(ring, CP_SET_MARKER, 1);
1431       OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_ENDVIS));
1432    }
1433 
1434    OUT_PKT7(ring, CP_SET_DRAW_STATE, 3);
1435    OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) |
1436                      CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS |
1437                      CP_SET_DRAW_STATE__0_GROUP_ID(0));
1438    OUT_RING(ring, CP_SET_DRAW_STATE__1_ADDR_LO(0));
1439    OUT_RING(ring, CP_SET_DRAW_STATE__2_ADDR_HI(0));
1440 
1441    OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_LOCAL, 1);
1442    OUT_RING(ring, 0x0);
1443 
1444    emit_marker6(ring, 7);
1445    OUT_PKT7(ring, CP_SET_MARKER, 1);
1446    OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_RESOLVE));
1447    emit_marker6(ring, 7);
1448 
1449    trace_start_resolve(&batch->trace, batch->gmem);
1450    if (batch->fast_cleared || !use_hw_binning(batch)) {
1451       fd6_emit_ib(batch->gmem, batch->tile_fini);
1452    } else {
1453       emit_conditional_ib(batch, tile, batch->tile_fini);
1454    }
1455    trace_end_resolve(&batch->trace, batch->gmem);
1456 }
1457 
1458 static void
fd6_emit_tile_fini(struct fd_batch * batch)1459 fd6_emit_tile_fini(struct fd_batch *batch)
1460 {
1461    struct fd_ringbuffer *ring = batch->gmem;
1462 
1463    emit_common_fini(batch);
1464 
1465    OUT_PKT4(ring, REG_A6XX_GRAS_LRZ_CNTL, 1);
1466    OUT_RING(ring, A6XX_GRAS_LRZ_CNTL_ENABLE);
1467 
1468    fd6_emit_lrz_flush(ring);
1469 
1470    fd6_event_write(batch, ring, PC_CCU_RESOLVE_TS, true);
1471 
1472    if (use_hw_binning(batch)) {
1473       check_vsc_overflow(batch->ctx);
1474    }
1475 }
1476 
1477 static void
emit_sysmem_clears(struct fd_batch * batch,struct fd_ringbuffer * ring)1478 emit_sysmem_clears(struct fd_batch *batch, struct fd_ringbuffer *ring) assert_dt
1479 {
1480    struct fd_context *ctx = batch->ctx;
1481    struct pipe_framebuffer_state *pfb = &batch->framebuffer;
1482 
1483    uint32_t buffers = batch->fast_cleared;
1484 
1485    if (!buffers)
1486       return;
1487 
1488    trace_start_clear_restore(&batch->trace, ring, buffers);
1489 
1490    if (buffers & PIPE_CLEAR_COLOR) {
1491       for (int i = 0; i < pfb->nr_cbufs; i++) {
1492          union pipe_color_union color = batch->clear_color[i];
1493 
1494          if (!pfb->cbufs[i])
1495             continue;
1496 
1497          if (!(buffers & (PIPE_CLEAR_COLOR0 << i)))
1498             continue;
1499 
1500          fd6_clear_surface(ctx, ring, pfb->cbufs[i], pfb->width, pfb->height,
1501                            &color, 0);
1502       }
1503    }
1504    if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
1505       union pipe_color_union value = {};
1506 
1507       const bool has_depth = pfb->zsbuf;
1508       struct pipe_resource *separate_stencil =
1509          has_depth && fd_resource(pfb->zsbuf->texture)->stencil
1510             ? &fd_resource(pfb->zsbuf->texture)->stencil->b.b
1511             : NULL;
1512 
1513       if ((buffers & PIPE_CLEAR_DEPTH) || (!separate_stencil && (buffers & PIPE_CLEAR_STENCIL))) {
1514          value.f[0] = batch->clear_depth;
1515          value.ui[1] = batch->clear_stencil;
1516          fd6_clear_surface(ctx, ring, pfb->zsbuf, pfb->width, pfb->height,
1517                            &value, fd6_unknown_8c01(pfb->zsbuf->format, buffers));
1518       }
1519 
1520       if (separate_stencil && (buffers & PIPE_CLEAR_STENCIL)) {
1521          value.ui[0] = batch->clear_stencil;
1522 
1523          struct pipe_surface stencil_surf = *pfb->zsbuf;
1524          stencil_surf.format = PIPE_FORMAT_S8_UINT;
1525          stencil_surf.texture = separate_stencil;
1526 
1527          fd6_clear_surface(ctx, ring, &stencil_surf, pfb->width, pfb->height,
1528                            &value, 0);
1529       }
1530    }
1531 
1532    fd6_event_write(batch, ring, PC_CCU_FLUSH_COLOR_TS, true);
1533    fd_wfi(batch, ring);
1534 
1535    trace_end_clear_restore(&batch->trace, ring);
1536 }
1537 
1538 static void
setup_tess_buffers(struct fd_batch * batch,struct fd_ringbuffer * ring)1539 setup_tess_buffers(struct fd_batch *batch, struct fd_ringbuffer *ring)
1540 {
1541    struct fd_context *ctx = batch->ctx;
1542 
1543    batch->tessfactor_bo = fd_bo_new(ctx->screen->dev, batch->tessfactor_size,
1544                                     0, "tessfactor");
1545 
1546    batch->tessparam_bo = fd_bo_new(ctx->screen->dev, batch->tessparam_size,
1547                                    0, "tessparam");
1548 
1549    OUT_PKT4(ring, REG_A6XX_PC_TESSFACTOR_ADDR, 2);
1550    OUT_RELOC(ring, batch->tessfactor_bo, 0, 0, 0);
1551 
1552    batch->tess_addrs_constobj->cur = batch->tess_addrs_constobj->start;
1553    OUT_RELOC(batch->tess_addrs_constobj, batch->tessparam_bo, 0, 0, 0);
1554    OUT_RELOC(batch->tess_addrs_constobj, batch->tessfactor_bo, 0, 0, 0);
1555 }
1556 
1557 static void
fd6_emit_sysmem_prep(struct fd_batch * batch)1558 fd6_emit_sysmem_prep(struct fd_batch *batch) assert_dt
1559 {
1560    struct fd_ringbuffer *ring = batch->gmem;
1561    struct fd_screen *screen = batch->ctx->screen;
1562 
1563    fd6_emit_restore(batch, ring);
1564    fd6_emit_lrz_flush(ring);
1565 
1566    if (batch->prologue) {
1567       if (!batch->nondraw) {
1568          trace_start_prologue(&batch->trace, ring);
1569       }
1570       fd6_emit_ib(ring, batch->prologue);
1571       if (!batch->nondraw) {
1572          trace_end_prologue(&batch->trace, ring);
1573       }
1574    }
1575 
1576    /* remaining setup below here does not apply to blit/compute: */
1577    if (batch->nondraw)
1578       return;
1579 
1580    struct pipe_framebuffer_state *pfb = &batch->framebuffer;
1581 
1582    if (pfb->width > 0 && pfb->height > 0)
1583       set_scissor(ring, 0, 0, pfb->width - 1, pfb->height - 1);
1584    else
1585       set_scissor(ring, 0, 0, 0, 0);
1586 
1587    set_window_offset(ring, 0, 0);
1588 
1589    set_bin_size(ring, 0, 0, 0xc00000); /* 0xc00000 = BYPASS? */
1590 
1591    emit_sysmem_clears(batch, ring);
1592 
1593    emit_marker6(ring, 7);
1594    OUT_PKT7(ring, CP_SET_MARKER, 1);
1595    OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_BYPASS));
1596    emit_marker6(ring, 7);
1597 
1598    if (batch->tessellation)
1599       setup_tess_buffers(batch, ring);
1600 
1601    OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
1602    OUT_RING(ring, 0x0);
1603 
1604    /* blob controls "local" in IB2, but I think that is not required */
1605    OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_LOCAL, 1);
1606    OUT_RING(ring, 0x1);
1607 
1608    fd6_event_write(batch, ring, PC_CCU_INVALIDATE_COLOR, false);
1609    fd6_cache_inv(batch, ring);
1610 
1611    fd_wfi(batch, ring);
1612    OUT_REG(ring, A6XX_RB_CCU_CNTL(.color_offset = screen->ccu_offset_bypass));
1613 
1614    /* enable stream-out, with sysmem there is only one pass: */
1615    OUT_REG(ring, A6XX_VPC_SO_DISABLE(false));
1616 
1617    OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
1618    OUT_RING(ring, 0x1);
1619 
1620    emit_zs(ring, pfb->zsbuf, NULL);
1621    emit_mrt(ring, pfb, NULL);
1622    emit_msaa(ring, pfb->samples);
1623    patch_fb_read_sysmem(batch);
1624 
1625    update_render_cntl(batch, pfb, false);
1626 
1627    emit_common_init(batch);
1628 }
1629 
1630 static void
fd6_emit_sysmem_fini(struct fd_batch * batch)1631 fd6_emit_sysmem_fini(struct fd_batch *batch) assert_dt
1632 {
1633    struct fd_ringbuffer *ring = batch->gmem;
1634 
1635    emit_common_fini(batch);
1636 
1637    if (batch->epilogue)
1638       fd6_emit_ib(batch->gmem, batch->epilogue);
1639 
1640    OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
1641    OUT_RING(ring, 0x0);
1642 
1643    fd6_emit_lrz_flush(ring);
1644 
1645    fd6_event_write(batch, ring, PC_CCU_FLUSH_COLOR_TS, true);
1646    fd6_event_write(batch, ring, PC_CCU_FLUSH_DEPTH_TS, true);
1647    fd_wfi(batch, ring);
1648 }
1649 
1650 void
fd6_gmem_init(struct pipe_context * pctx)1651 fd6_gmem_init(struct pipe_context *pctx) disable_thread_safety_analysis
1652 {
1653    struct fd_context *ctx = fd_context(pctx);
1654 
1655    ctx->emit_tile_init = fd6_emit_tile_init;
1656    ctx->emit_tile_prep = fd6_emit_tile_prep;
1657    ctx->emit_tile_mem2gmem = fd6_emit_tile_mem2gmem;
1658    ctx->emit_tile_renderprep = fd6_emit_tile_renderprep;
1659    ctx->emit_tile = fd6_emit_tile;
1660    ctx->emit_tile_gmem2mem = fd6_emit_tile_gmem2mem;
1661    ctx->emit_tile_fini = fd6_emit_tile_fini;
1662    ctx->emit_sysmem_prep = fd6_emit_sysmem_prep;
1663    ctx->emit_sysmem_fini = fd6_emit_sysmem_fini;
1664 }
1665