1 /*
2 * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
3 * Copyright © 2018 Google, Inc.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 * Authors:
25 * Rob Clark <robclark@freedesktop.org>
26 */
27
28 #include <stdio.h>
29
30 #include "pipe/p_state.h"
31 #include "util/format/u_format.h"
32 #include "util/u_inlines.h"
33 #include "util/u_memory.h"
34 #include "util/u_string.h"
35
36 #include "freedreno_draw.h"
37 #include "freedreno_resource.h"
38 #include "freedreno_state.h"
39 #include "freedreno_tracepoints.h"
40
41 #include "fd6_blitter.h"
42 #include "fd6_context.h"
43 #include "fd6_draw.h"
44 #include "fd6_emit.h"
45 #include "fd6_format.h"
46 #include "fd6_gmem.h"
47 #include "fd6_pack.h"
48 #include "fd6_program.h"
49 #include "fd6_resource.h"
50 #include "fd6_zsa.h"
51
52 /**
53 * Emits the flags registers, suitable for RB_MRT_FLAG_BUFFER,
54 * RB_DEPTH_FLAG_BUFFER, SP_PS_2D_SRC_FLAGS, and RB_BLIT_FLAG_DST.
55 */
56 void
fd6_emit_flag_reference(struct fd_ringbuffer * ring,struct fd_resource * rsc,int level,int layer)57 fd6_emit_flag_reference(struct fd_ringbuffer *ring, struct fd_resource *rsc,
58 int level, int layer)
59 {
60 if (fd_resource_ubwc_enabled(rsc, level)) {
61 OUT_RELOC(ring, rsc->bo, fd_resource_ubwc_offset(rsc, level, layer), 0,
62 0);
63 OUT_RING(ring, A6XX_RB_MRT_FLAG_BUFFER_PITCH_PITCH(
64 fdl_ubwc_pitch(&rsc->layout, level)) |
65 A6XX_RB_MRT_FLAG_BUFFER_PITCH_ARRAY_PITCH(
66 rsc->layout.ubwc_layer_size >> 2));
67 } else {
68 OUT_RING(ring, 0x00000000); /* RB_MRT_FLAG_BUFFER[i].ADDR_LO */
69 OUT_RING(ring, 0x00000000); /* RB_MRT_FLAG_BUFFER[i].ADDR_HI */
70 OUT_RING(ring, 0x00000000);
71 }
72 }
73
74 static void
emit_mrt(struct fd_ringbuffer * ring,struct pipe_framebuffer_state * pfb,const struct fd_gmem_stateobj * gmem)75 emit_mrt(struct fd_ringbuffer *ring, struct pipe_framebuffer_state *pfb,
76 const struct fd_gmem_stateobj *gmem)
77 {
78 unsigned srgb_cntl = 0;
79 unsigned i;
80
81 unsigned max_layer_index = 0;
82
83 for (i = 0; i < pfb->nr_cbufs; i++) {
84 enum a3xx_color_swap swap = WZYX;
85 bool sint = false, uint = false;
86 struct fd_resource *rsc = NULL;
87 struct fdl_slice *slice = NULL;
88 uint32_t stride = 0;
89 uint32_t array_stride = 0;
90 uint32_t offset;
91
92 if (!pfb->cbufs[i])
93 continue;
94
95 struct pipe_surface *psurf = pfb->cbufs[i];
96 enum pipe_format pformat = psurf->format;
97 rsc = fd_resource(psurf->texture);
98 if (!rsc->bo)
99 continue;
100
101 uint32_t base = gmem ? gmem->cbuf_base[i] : 0;
102 slice = fd_resource_slice(rsc, psurf->u.tex.level);
103 uint32_t tile_mode = fd_resource_tile_mode(psurf->texture, psurf->u.tex.level);
104 enum a6xx_format format = fd6_color_format(pformat, tile_mode);
105 sint = util_format_is_pure_sint(pformat);
106 uint = util_format_is_pure_uint(pformat);
107
108 if (util_format_is_srgb(pformat))
109 srgb_cntl |= (1 << i);
110
111 offset =
112 fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer);
113
114 stride = fd_resource_pitch(rsc, psurf->u.tex.level);
115 array_stride = fd_resource_layer_stride(rsc, psurf->u.tex.level);
116 swap = fd6_color_swap(pformat, rsc->layout.tile_mode);
117
118 max_layer_index = psurf->u.tex.last_layer - psurf->u.tex.first_layer;
119
120 debug_assert((offset + slice->size0) <= fd_bo_size(rsc->bo));
121
122 OUT_REG(
123 ring,
124 A6XX_RB_MRT_BUF_INFO(i, .color_format = format,
125 .color_tile_mode = tile_mode, .color_swap = swap),
126 A6XX_RB_MRT_PITCH(i, .a6xx_rb_mrt_pitch = stride),
127 A6XX_RB_MRT_ARRAY_PITCH(i, .a6xx_rb_mrt_array_pitch = array_stride),
128 A6XX_RB_MRT_BASE(i, .bo = rsc->bo, .bo_offset = offset),
129 A6XX_RB_MRT_BASE_GMEM(i, .unknown = base));
130
131 OUT_REG(ring, A6XX_SP_FS_MRT_REG(i, .color_format = format,
132 .color_sint = sint, .color_uint = uint));
133
134 OUT_PKT4(ring, REG_A6XX_RB_MRT_FLAG_BUFFER(i), 3);
135 fd6_emit_flag_reference(ring, rsc, psurf->u.tex.level,
136 psurf->u.tex.first_layer);
137 }
138
139 OUT_REG(ring, A6XX_RB_SRGB_CNTL(.dword = srgb_cntl));
140 OUT_REG(ring, A6XX_SP_SRGB_CNTL(.dword = srgb_cntl));
141
142 OUT_REG(ring, A6XX_GRAS_MAX_LAYER_INDEX(max_layer_index));
143 }
144
145 static void
emit_zs(struct fd_ringbuffer * ring,struct pipe_surface * zsbuf,const struct fd_gmem_stateobj * gmem)146 emit_zs(struct fd_ringbuffer *ring, struct pipe_surface *zsbuf,
147 const struct fd_gmem_stateobj *gmem)
148 {
149 if (zsbuf) {
150 struct fd_resource *rsc = fd_resource(zsbuf->texture);
151 enum a6xx_depth_format fmt = fd6_pipe2depth(zsbuf->format);
152 uint32_t stride = fd_resource_pitch(rsc, 0);
153 uint32_t array_stride = fd_resource_layer_stride(rsc, 0);
154 uint32_t base = gmem ? gmem->zsbuf_base[0] : 0;
155 uint32_t offset =
156 fd_resource_offset(rsc, zsbuf->u.tex.level, zsbuf->u.tex.first_layer);
157
158 OUT_REG(
159 ring, A6XX_RB_DEPTH_BUFFER_INFO(.depth_format = fmt),
160 A6XX_RB_DEPTH_BUFFER_PITCH(.a6xx_rb_depth_buffer_pitch = stride),
161 A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH(.a6xx_rb_depth_buffer_array_pitch =
162 array_stride),
163 A6XX_RB_DEPTH_BUFFER_BASE(.bo = rsc->bo, .bo_offset = offset),
164 A6XX_RB_DEPTH_BUFFER_BASE_GMEM(.dword = base));
165
166 OUT_REG(ring, A6XX_GRAS_SU_DEPTH_BUFFER_INFO(.depth_format = fmt));
167
168 OUT_PKT4(ring, REG_A6XX_RB_DEPTH_FLAG_BUFFER_BASE, 3);
169 fd6_emit_flag_reference(ring, rsc, zsbuf->u.tex.level,
170 zsbuf->u.tex.first_layer);
171
172 if (rsc->lrz) {
173 OUT_REG(ring, A6XX_GRAS_LRZ_BUFFER_BASE(.bo = rsc->lrz),
174 A6XX_GRAS_LRZ_BUFFER_PITCH(.pitch = rsc->lrz_pitch),
175 // XXX a6xx seems to use a different buffer here.. not sure
176 // what for..
177 A6XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE());
178 } else {
179 OUT_PKT4(ring, REG_A6XX_GRAS_LRZ_BUFFER_BASE, 5);
180 OUT_RING(ring, 0x00000000);
181 OUT_RING(ring, 0x00000000);
182 OUT_RING(ring, 0x00000000); /* GRAS_LRZ_BUFFER_PITCH */
183 OUT_RING(ring, 0x00000000); /* GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO */
184 OUT_RING(ring, 0x00000000);
185 }
186
187 /* NOTE: blob emits GRAS_LRZ_CNTL plus GRAZ_LRZ_BUFFER_BASE
188 * plus this CP_EVENT_WRITE at the end in it's own IB..
189 */
190 OUT_PKT7(ring, CP_EVENT_WRITE, 1);
191 OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(UNK_25));
192
193 if (rsc->stencil) {
194 stride = fd_resource_pitch(rsc->stencil, 0);
195 array_stride = fd_resource_layer_stride(rsc->stencil, 0);
196 uint32_t base = gmem ? gmem->zsbuf_base[1] : 0;
197
198 OUT_REG(ring, A6XX_RB_STENCIL_INFO(.separate_stencil = true),
199 A6XX_RB_STENCIL_BUFFER_PITCH(.a6xx_rb_stencil_buffer_pitch =
200 stride),
201 A6XX_RB_STENCIL_BUFFER_ARRAY_PITCH(
202 .a6xx_rb_stencil_buffer_array_pitch = array_stride),
203 A6XX_RB_STENCIL_BUFFER_BASE(.bo = rsc->stencil->bo),
204 A6XX_RB_STENCIL_BUFFER_BASE_GMEM(.dword = base));
205 } else {
206 OUT_REG(ring, A6XX_RB_STENCIL_INFO(0));
207 }
208 } else {
209 OUT_PKT4(ring, REG_A6XX_RB_DEPTH_BUFFER_INFO, 6);
210 OUT_RING(ring, A6XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(DEPTH6_NONE));
211 OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_PITCH */
212 OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_ARRAY_PITCH */
213 OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_LO */
214 OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_HI */
215 OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_GMEM */
216
217 OUT_REG(ring,
218 A6XX_GRAS_SU_DEPTH_BUFFER_INFO(.depth_format = DEPTH6_NONE));
219
220 OUT_PKT4(ring, REG_A6XX_GRAS_LRZ_BUFFER_BASE, 5);
221 OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_LO */
222 OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_HI */
223 OUT_RING(ring, 0x00000000); /* GRAS_LRZ_BUFFER_PITCH */
224 OUT_RING(ring, 0x00000000); /* GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO */
225 OUT_RING(ring, 0x00000000); /* GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_HI */
226
227 OUT_REG(ring, A6XX_RB_STENCIL_INFO(0));
228 }
229 }
230
231 static bool
use_hw_binning(struct fd_batch * batch)232 use_hw_binning(struct fd_batch *batch)
233 {
234 const struct fd_gmem_stateobj *gmem = batch->gmem_state;
235
236 if ((gmem->maxpw * gmem->maxph) > 32)
237 return false;
238
239 return fd_binning_enabled && ((gmem->nbins_x * gmem->nbins_y) >= 2) &&
240 (batch->num_draws > 0);
241 }
242
243 static void
patch_fb_read_gmem(struct fd_batch * batch)244 patch_fb_read_gmem(struct fd_batch *batch)
245 {
246 unsigned num_patches = fd_patch_num_elements(&batch->fb_read_patches);
247 if (!num_patches)
248 return;
249
250 struct fd_screen *screen = batch->ctx->screen;
251 const struct fd_gmem_stateobj *gmem = batch->gmem_state;
252 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
253 struct pipe_surface *psurf = pfb->cbufs[0];
254 uint32_t texconst0 = fd6_tex_const_0(
255 psurf->texture, psurf->u.tex.level, psurf->format, PIPE_SWIZZLE_X,
256 PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W);
257
258 /* always TILE6_2 mode in GMEM.. which also means no swap: */
259 texconst0 &=
260 ~(A6XX_TEX_CONST_0_SWAP__MASK | A6XX_TEX_CONST_0_TILE_MODE__MASK);
261 texconst0 |= A6XX_TEX_CONST_0_TILE_MODE(TILE6_2);
262
263 for (unsigned i = 0; i < num_patches; i++) {
264 struct fd_cs_patch *patch = fd_patch_element(&batch->fb_read_patches, i);
265 patch->cs[0] = texconst0;
266 patch->cs[2] = A6XX_TEX_CONST_2_PITCH(gmem->bin_w * gmem->cbuf_cpp[0]) |
267 A6XX_TEX_CONST_2_TYPE(A6XX_TEX_2D);
268 patch->cs[4] = A6XX_TEX_CONST_4_BASE_LO(screen->gmem_base);
269 patch->cs[5] = A6XX_TEX_CONST_5_BASE_HI(screen->gmem_base >> 32) |
270 A6XX_TEX_CONST_5_DEPTH(1);
271 }
272 util_dynarray_clear(&batch->fb_read_patches);
273 }
274
275 static void
patch_fb_read_sysmem(struct fd_batch * batch)276 patch_fb_read_sysmem(struct fd_batch *batch)
277 {
278 unsigned num_patches = fd_patch_num_elements(&batch->fb_read_patches);
279 if (!num_patches)
280 return;
281
282 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
283 struct pipe_surface *psurf = pfb->cbufs[0];
284 if (!psurf)
285 return;
286
287 struct fd_resource *rsc = fd_resource(psurf->texture);
288 unsigned lvl = psurf->u.tex.level;
289 unsigned layer = psurf->u.tex.first_layer;
290 bool ubwc_enabled = fd_resource_ubwc_enabled(rsc, lvl);
291 uint64_t iova = fd_bo_get_iova(rsc->bo) + fd_resource_offset(rsc, lvl, layer);
292 uint64_t ubwc_iova = fd_bo_get_iova(rsc->bo) + fd_resource_ubwc_offset(rsc, lvl, layer);
293 uint32_t texconst0 = fd6_tex_const_0(
294 psurf->texture, psurf->u.tex.level, psurf->format, PIPE_SWIZZLE_X,
295 PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W);
296 uint32_t block_width, block_height;
297 fdl6_get_ubwc_blockwidth(&rsc->layout, &block_width, &block_height);
298
299 for (unsigned i = 0; i < num_patches; i++) {
300 struct fd_cs_patch *patch = fd_patch_element(&batch->fb_read_patches, i);
301 patch->cs[0] = texconst0;
302 patch->cs[2] = A6XX_TEX_CONST_2_PITCH(fd_resource_pitch(rsc, lvl)) |
303 A6XX_TEX_CONST_2_TYPE(A6XX_TEX_2D);
304 /* This is cheating a bit, since we can't use OUT_RELOC() here.. but
305 * the render target will already have a reloc emitted for RB_MRT state,
306 * so we can get away with manually patching in the address here:
307 */
308 patch->cs[4] = A6XX_TEX_CONST_4_BASE_LO(iova);
309 patch->cs[5] = A6XX_TEX_CONST_5_BASE_HI(iova >> 32) |
310 A6XX_TEX_CONST_5_DEPTH(1);
311
312 if (!ubwc_enabled)
313 continue;
314
315 patch->cs[3] |= A6XX_TEX_CONST_3_FLAG;
316 patch->cs[7] = A6XX_TEX_CONST_7_FLAG_LO(ubwc_iova);
317 patch->cs[8] = A6XX_TEX_CONST_8_FLAG_HI(ubwc_iova >> 32);
318 patch->cs[9] = A6XX_TEX_CONST_9_FLAG_BUFFER_ARRAY_PITCH(
319 rsc->layout.ubwc_layer_size >> 2);
320 patch->cs[10] =
321 A6XX_TEX_CONST_10_FLAG_BUFFER_PITCH(
322 fdl_ubwc_pitch(&rsc->layout, lvl)) |
323 A6XX_TEX_CONST_10_FLAG_BUFFER_LOGW(util_logbase2_ceil(
324 DIV_ROUND_UP(u_minify(psurf->texture->width0, lvl), block_width))) |
325 A6XX_TEX_CONST_10_FLAG_BUFFER_LOGH(util_logbase2_ceil(
326 DIV_ROUND_UP(u_minify(psurf->texture->height0, lvl), block_height)));
327 }
328 util_dynarray_clear(&batch->fb_read_patches);
329 }
330
331 static void
update_render_cntl(struct fd_batch * batch,struct pipe_framebuffer_state * pfb,bool binning)332 update_render_cntl(struct fd_batch *batch, struct pipe_framebuffer_state *pfb,
333 bool binning)
334 {
335 struct fd_ringbuffer *ring = batch->gmem;
336 struct fd_screen *screen = batch->ctx->screen;
337 uint32_t cntl = 0;
338 bool depth_ubwc_enable = false;
339 uint32_t mrts_ubwc_enable = 0;
340 int i;
341
342 if (pfb->zsbuf) {
343 struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
344 depth_ubwc_enable =
345 fd_resource_ubwc_enabled(rsc, pfb->zsbuf->u.tex.level);
346 }
347
348 for (i = 0; i < pfb->nr_cbufs; i++) {
349 if (!pfb->cbufs[i])
350 continue;
351
352 struct pipe_surface *psurf = pfb->cbufs[i];
353 struct fd_resource *rsc = fd_resource(psurf->texture);
354 if (!rsc->bo)
355 continue;
356
357 if (fd_resource_ubwc_enabled(rsc, psurf->u.tex.level))
358 mrts_ubwc_enable |= 1 << i;
359 }
360
361 cntl |= A6XX_RB_RENDER_CNTL_CCUSINGLECACHELINESIZE(2);
362 if (binning)
363 cntl |= A6XX_RB_RENDER_CNTL_BINNING;
364
365 if (screen->info->a6xx.has_cp_reg_write) {
366 OUT_PKT7(ring, CP_REG_WRITE, 3);
367 OUT_RING(ring, CP_REG_WRITE_0_TRACKER(TRACK_RENDER_CNTL));
368 OUT_RING(ring, REG_A6XX_RB_RENDER_CNTL);
369 } else {
370 OUT_PKT4(ring, REG_A6XX_RB_RENDER_CNTL, 1);
371 }
372 OUT_RING(ring, cntl |
373 COND(depth_ubwc_enable, A6XX_RB_RENDER_CNTL_FLAG_DEPTH) |
374 A6XX_RB_RENDER_CNTL_FLAG_MRTS(mrts_ubwc_enable));
375 }
376
377 /* extra size to store VSC_DRAW_STRM_SIZE: */
378 #define VSC_DRAW_STRM_SIZE(pitch) ((pitch)*32 + 0x100)
379 #define VSC_PRIM_STRM_SIZE(pitch) ((pitch)*32)
380
381 static void
update_vsc_pipe(struct fd_batch * batch)382 update_vsc_pipe(struct fd_batch *batch)
383 {
384 struct fd_context *ctx = batch->ctx;
385 struct fd6_context *fd6_ctx = fd6_context(ctx);
386 const struct fd_gmem_stateobj *gmem = batch->gmem_state;
387 struct fd_ringbuffer *ring = batch->gmem;
388 int i;
389
390 if (batch->draw_strm_bits / 8 > fd6_ctx->vsc_draw_strm_pitch) {
391 if (fd6_ctx->vsc_draw_strm)
392 fd_bo_del(fd6_ctx->vsc_draw_strm);
393 fd6_ctx->vsc_draw_strm = NULL;
394 /* Note: probably only need to align to 0x40, but aligning stronger
395 * reduces the odds that we will have to realloc again on the next
396 * frame:
397 */
398 fd6_ctx->vsc_draw_strm_pitch = align(batch->draw_strm_bits / 8, 0x4000);
399 mesa_logd("pre-resize VSC_DRAW_STRM_PITCH to: 0x%x",
400 fd6_ctx->vsc_draw_strm_pitch);
401 }
402
403 if (batch->prim_strm_bits / 8 > fd6_ctx->vsc_prim_strm_pitch) {
404 if (fd6_ctx->vsc_prim_strm)
405 fd_bo_del(fd6_ctx->vsc_prim_strm);
406 fd6_ctx->vsc_prim_strm = NULL;
407 fd6_ctx->vsc_prim_strm_pitch = align(batch->prim_strm_bits / 8, 0x4000);
408 mesa_logd("pre-resize VSC_PRIM_STRM_PITCH to: 0x%x",
409 fd6_ctx->vsc_prim_strm_pitch);
410 }
411
412 if (!fd6_ctx->vsc_draw_strm) {
413 fd6_ctx->vsc_draw_strm = fd_bo_new(
414 ctx->screen->dev, VSC_DRAW_STRM_SIZE(fd6_ctx->vsc_draw_strm_pitch),
415 0, "vsc_draw_strm");
416 }
417
418 if (!fd6_ctx->vsc_prim_strm) {
419 fd6_ctx->vsc_prim_strm = fd_bo_new(
420 ctx->screen->dev, VSC_PRIM_STRM_SIZE(fd6_ctx->vsc_prim_strm_pitch),
421 0, "vsc_prim_strm");
422 }
423
424 OUT_REG(
425 ring, A6XX_VSC_BIN_SIZE(.width = gmem->bin_w, .height = gmem->bin_h),
426 A6XX_VSC_DRAW_STRM_SIZE_ADDRESS(.bo = fd6_ctx->vsc_draw_strm,
427 .bo_offset =
428 32 * fd6_ctx->vsc_draw_strm_pitch));
429
430 OUT_REG(ring, A6XX_VSC_BIN_COUNT(.nx = gmem->nbins_x, .ny = gmem->nbins_y));
431
432 OUT_PKT4(ring, REG_A6XX_VSC_PIPE_CONFIG_REG(0), 32);
433 for (i = 0; i < 32; i++) {
434 const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[i];
435 OUT_RING(ring, A6XX_VSC_PIPE_CONFIG_REG_X(pipe->x) |
436 A6XX_VSC_PIPE_CONFIG_REG_Y(pipe->y) |
437 A6XX_VSC_PIPE_CONFIG_REG_W(pipe->w) |
438 A6XX_VSC_PIPE_CONFIG_REG_H(pipe->h));
439 }
440
441 OUT_REG(
442 ring, A6XX_VSC_PRIM_STRM_ADDRESS(.bo = fd6_ctx->vsc_prim_strm),
443 A6XX_VSC_PRIM_STRM_PITCH(.dword = fd6_ctx->vsc_prim_strm_pitch),
444 A6XX_VSC_PRIM_STRM_LIMIT(.dword = fd6_ctx->vsc_prim_strm_pitch - 64));
445
446 OUT_REG(
447 ring, A6XX_VSC_DRAW_STRM_ADDRESS(.bo = fd6_ctx->vsc_draw_strm),
448 A6XX_VSC_DRAW_STRM_PITCH(.dword = fd6_ctx->vsc_draw_strm_pitch),
449 A6XX_VSC_DRAW_STRM_LIMIT(.dword = fd6_ctx->vsc_draw_strm_pitch - 64));
450 }
451
452 /*
453 * If overflow is detected, either 0x1 (VSC_DRAW_STRM overflow) or 0x3
454 * (VSC_PRIM_STRM overflow) plus the size of the overflowed buffer is
455 * written to control->vsc_overflow. This allows the CPU to
456 * detect which buffer overflowed (and, since the current size is
457 * encoded as well, this protects against already-submitted but
458 * not executed batches from fooling the CPU into increasing the
459 * size again unnecessarily).
460 */
461 static void
emit_vsc_overflow_test(struct fd_batch * batch)462 emit_vsc_overflow_test(struct fd_batch *batch)
463 {
464 struct fd_ringbuffer *ring = batch->gmem;
465 const struct fd_gmem_stateobj *gmem = batch->gmem_state;
466 struct fd6_context *fd6_ctx = fd6_context(batch->ctx);
467
468 debug_assert((fd6_ctx->vsc_draw_strm_pitch & 0x3) == 0);
469 debug_assert((fd6_ctx->vsc_prim_strm_pitch & 0x3) == 0);
470
471 /* Check for overflow, write vsc_scratch if detected: */
472 for (int i = 0; i < gmem->num_vsc_pipes; i++) {
473 OUT_PKT7(ring, CP_COND_WRITE5, 8);
474 OUT_RING(ring, CP_COND_WRITE5_0_FUNCTION(WRITE_GE) |
475 CP_COND_WRITE5_0_WRITE_MEMORY);
476 OUT_RING(ring, CP_COND_WRITE5_1_POLL_ADDR_LO(
477 REG_A6XX_VSC_DRAW_STRM_SIZE_REG(i)));
478 OUT_RING(ring, CP_COND_WRITE5_2_POLL_ADDR_HI(0));
479 OUT_RING(ring, CP_COND_WRITE5_3_REF(fd6_ctx->vsc_draw_strm_pitch - 64));
480 OUT_RING(ring, CP_COND_WRITE5_4_MASK(~0));
481 OUT_RELOC(ring,
482 control_ptr(fd6_ctx, vsc_overflow)); /* WRITE_ADDR_LO/HI */
483 OUT_RING(ring,
484 CP_COND_WRITE5_7_WRITE_DATA(1 + fd6_ctx->vsc_draw_strm_pitch));
485
486 OUT_PKT7(ring, CP_COND_WRITE5, 8);
487 OUT_RING(ring, CP_COND_WRITE5_0_FUNCTION(WRITE_GE) |
488 CP_COND_WRITE5_0_WRITE_MEMORY);
489 OUT_RING(ring, CP_COND_WRITE5_1_POLL_ADDR_LO(
490 REG_A6XX_VSC_PRIM_STRM_SIZE_REG(i)));
491 OUT_RING(ring, CP_COND_WRITE5_2_POLL_ADDR_HI(0));
492 OUT_RING(ring, CP_COND_WRITE5_3_REF(fd6_ctx->vsc_prim_strm_pitch - 64));
493 OUT_RING(ring, CP_COND_WRITE5_4_MASK(~0));
494 OUT_RELOC(ring,
495 control_ptr(fd6_ctx, vsc_overflow)); /* WRITE_ADDR_LO/HI */
496 OUT_RING(ring,
497 CP_COND_WRITE5_7_WRITE_DATA(3 + fd6_ctx->vsc_prim_strm_pitch));
498 }
499
500 OUT_PKT7(ring, CP_WAIT_MEM_WRITES, 0);
501 }
502
503 static void
check_vsc_overflow(struct fd_context * ctx)504 check_vsc_overflow(struct fd_context *ctx)
505 {
506 struct fd6_context *fd6_ctx = fd6_context(ctx);
507 struct fd6_control *control = fd_bo_map(fd6_ctx->control_mem);
508 uint32_t vsc_overflow = control->vsc_overflow;
509
510 if (!vsc_overflow)
511 return;
512
513 /* clear overflow flag: */
514 control->vsc_overflow = 0;
515
516 unsigned buffer = vsc_overflow & 0x3;
517 unsigned size = vsc_overflow & ~0x3;
518
519 if (buffer == 0x1) {
520 /* VSC_DRAW_STRM overflow: */
521
522 if (size < fd6_ctx->vsc_draw_strm_pitch) {
523 /* we've already increased the size, this overflow is
524 * from a batch submitted before resize, but executed
525 * after
526 */
527 return;
528 }
529
530 fd_bo_del(fd6_ctx->vsc_draw_strm);
531 fd6_ctx->vsc_draw_strm = NULL;
532 fd6_ctx->vsc_draw_strm_pitch *= 2;
533
534 mesa_logd("resized VSC_DRAW_STRM_PITCH to: 0x%x",
535 fd6_ctx->vsc_draw_strm_pitch);
536
537 } else if (buffer == 0x3) {
538 /* VSC_PRIM_STRM overflow: */
539
540 if (size < fd6_ctx->vsc_prim_strm_pitch) {
541 /* we've already increased the size */
542 return;
543 }
544
545 fd_bo_del(fd6_ctx->vsc_prim_strm);
546 fd6_ctx->vsc_prim_strm = NULL;
547 fd6_ctx->vsc_prim_strm_pitch *= 2;
548
549 mesa_logd("resized VSC_PRIM_STRM_PITCH to: 0x%x",
550 fd6_ctx->vsc_prim_strm_pitch);
551
552 } else {
553 /* NOTE: it's possible, for example, for overflow to corrupt the
554 * control page. I mostly just see this hit if I set initial VSC
555 * buffer size extremely small. Things still seem to recover,
556 * but maybe we should pre-emptively realloc vsc_data/vsc_data2
557 * and hope for different memory placement?
558 */
559 mesa_loge("invalid vsc_overflow value: 0x%08x", vsc_overflow);
560 }
561 }
562
563 static void
emit_common_init(struct fd_batch * batch)564 emit_common_init(struct fd_batch *batch)
565 {
566 struct fd_ringbuffer *ring = batch->gmem;
567 struct fd_autotune *at = &batch->ctx->autotune;
568 struct fd_batch_result *result = batch->autotune_result;
569
570 if (!result)
571 return;
572
573 OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_CONTROL, 1);
574 OUT_RING(ring, A6XX_RB_SAMPLE_COUNT_CONTROL_COPY);
575
576 OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_ADDR, 2);
577 OUT_RELOC(ring, results_ptr(at, result[result->idx].samples_start));
578
579 fd6_event_write(batch, ring, ZPASS_DONE, false);
580 }
581
582 static void
emit_common_fini(struct fd_batch * batch)583 emit_common_fini(struct fd_batch *batch)
584 {
585 struct fd_ringbuffer *ring = batch->gmem;
586 struct fd_autotune *at = &batch->ctx->autotune;
587 struct fd_batch_result *result = batch->autotune_result;
588
589 if (!result)
590 return;
591
592 OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_CONTROL, 1);
593 OUT_RING(ring, A6XX_RB_SAMPLE_COUNT_CONTROL_COPY);
594
595 OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_ADDR, 2);
596 OUT_RELOC(ring, results_ptr(at, result[result->idx].samples_end));
597
598 fd6_event_write(batch, ring, ZPASS_DONE, false);
599
600 // TODO is there a better event to use.. a single ZPASS_DONE_TS would be nice
601 OUT_PKT7(ring, CP_EVENT_WRITE, 4);
602 OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_TS));
603 OUT_RELOC(ring, results_ptr(at, fence));
604 OUT_RING(ring, result->fence);
605 }
606
607 /*
608 * Emit conditional CP_INDIRECT_BRANCH based on VSC_STATE[p], ie. the IB
609 * is skipped for tiles that have no visible geometry.
610 */
611 static void
emit_conditional_ib(struct fd_batch * batch,const struct fd_tile * tile,struct fd_ringbuffer * target)612 emit_conditional_ib(struct fd_batch *batch, const struct fd_tile *tile,
613 struct fd_ringbuffer *target)
614 {
615 struct fd_ringbuffer *ring = batch->gmem;
616
617 if (target->cur == target->start)
618 return;
619
620 emit_marker6(ring, 6);
621
622 unsigned count = fd_ringbuffer_cmd_count(target);
623
624 BEGIN_RING(ring, 5 + 4 * count); /* ensure conditional doesn't get split */
625
626 OUT_PKT7(ring, CP_REG_TEST, 1);
627 OUT_RING(ring, A6XX_CP_REG_TEST_0_REG(REG_A6XX_VSC_STATE_REG(tile->p)) |
628 A6XX_CP_REG_TEST_0_BIT(tile->n) |
629 A6XX_CP_REG_TEST_0_WAIT_FOR_ME);
630
631 OUT_PKT7(ring, CP_COND_REG_EXEC, 2);
632 OUT_RING(ring, CP_COND_REG_EXEC_0_MODE(PRED_TEST));
633 OUT_RING(ring, CP_COND_REG_EXEC_1_DWORDS(4 * count));
634
635 for (unsigned i = 0; i < count; i++) {
636 uint32_t dwords;
637 OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3);
638 dwords = fd_ringbuffer_emit_reloc_ring_full(ring, target, i) / 4;
639 assert(dwords > 0);
640 OUT_RING(ring, dwords);
641 }
642
643 emit_marker6(ring, 6);
644 }
645
646 static void
set_scissor(struct fd_ringbuffer * ring,uint32_t x1,uint32_t y1,uint32_t x2,uint32_t y2)647 set_scissor(struct fd_ringbuffer *ring, uint32_t x1, uint32_t y1, uint32_t x2,
648 uint32_t y2)
649 {
650 OUT_REG(ring, A6XX_GRAS_SC_WINDOW_SCISSOR_TL(.x = x1, .y = y1),
651 A6XX_GRAS_SC_WINDOW_SCISSOR_BR(.x = x2, .y = y2));
652
653 OUT_REG(ring, A6XX_GRAS_2D_RESOLVE_CNTL_1(.x = x1, .y = y1),
654 A6XX_GRAS_2D_RESOLVE_CNTL_2(.x = x2, .y = y2));
655 }
656
657 static void
set_bin_size(struct fd_ringbuffer * ring,uint32_t w,uint32_t h,uint32_t flag)658 set_bin_size(struct fd_ringbuffer *ring, uint32_t w, uint32_t h, uint32_t flag)
659 {
660 OUT_REG(ring, A6XX_GRAS_BIN_CONTROL(.binw = w, .binh = h, .dword = flag));
661 OUT_REG(ring, A6XX_RB_BIN_CONTROL(.binw = w, .binh = h, .dword = flag));
662 /* no flag for RB_BIN_CONTROL2... */
663 OUT_REG(ring, A6XX_RB_BIN_CONTROL2(.binw = w, .binh = h));
664 }
665
666 static void
emit_binning_pass(struct fd_batch * batch)667 emit_binning_pass(struct fd_batch *batch) assert_dt
668 {
669 struct fd_ringbuffer *ring = batch->gmem;
670 const struct fd_gmem_stateobj *gmem = batch->gmem_state;
671 struct fd_screen *screen = batch->ctx->screen;
672
673 debug_assert(!batch->tessellation);
674
675 set_scissor(ring, 0, 0, gmem->width - 1, gmem->height - 1);
676
677 emit_marker6(ring, 7);
678 OUT_PKT7(ring, CP_SET_MARKER, 1);
679 OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_BINNING));
680 emit_marker6(ring, 7);
681
682 OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
683 OUT_RING(ring, 0x1);
684
685 OUT_PKT7(ring, CP_SET_MODE, 1);
686 OUT_RING(ring, 0x1);
687
688 OUT_WFI5(ring);
689
690 OUT_REG(ring, A6XX_VFD_MODE_CNTL(.render_mode = BINNING_PASS));
691
692 update_vsc_pipe(batch);
693
694 OUT_PKT4(ring, REG_A6XX_PC_POWER_CNTL, 1);
695 OUT_RING(ring, screen->info->a6xx.magic.PC_POWER_CNTL);
696
697 OUT_PKT4(ring, REG_A6XX_VFD_POWER_CNTL, 1);
698 OUT_RING(ring, screen->info->a6xx.magic.PC_POWER_CNTL);
699
700 OUT_PKT7(ring, CP_EVENT_WRITE, 1);
701 OUT_RING(ring, UNK_2C);
702
703 OUT_PKT4(ring, REG_A6XX_RB_WINDOW_OFFSET, 1);
704 OUT_RING(ring, A6XX_RB_WINDOW_OFFSET_X(0) | A6XX_RB_WINDOW_OFFSET_Y(0));
705
706 OUT_PKT4(ring, REG_A6XX_SP_TP_WINDOW_OFFSET, 1);
707 OUT_RING(ring,
708 A6XX_SP_TP_WINDOW_OFFSET_X(0) | A6XX_SP_TP_WINDOW_OFFSET_Y(0));
709
710 /* emit IB to binning drawcmds: */
711 trace_start_binning_ib(&batch->trace, ring);
712 fd6_emit_ib(ring, batch->draw);
713 trace_end_binning_ib(&batch->trace, ring);
714
715 fd_reset_wfi(batch);
716
717 OUT_PKT7(ring, CP_SET_DRAW_STATE, 3);
718 OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) |
719 CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS |
720 CP_SET_DRAW_STATE__0_GROUP_ID(0));
721 OUT_RING(ring, CP_SET_DRAW_STATE__1_ADDR_LO(0));
722 OUT_RING(ring, CP_SET_DRAW_STATE__2_ADDR_HI(0));
723
724 OUT_PKT7(ring, CP_EVENT_WRITE, 1);
725 OUT_RING(ring, UNK_2D);
726
727 fd6_cache_inv(batch, ring);
728 fd6_cache_flush(batch, ring);
729 fd_wfi(batch, ring);
730
731 OUT_PKT7(ring, CP_WAIT_FOR_ME, 0);
732
733 trace_start_vsc_overflow_test(&batch->trace, batch->gmem);
734 emit_vsc_overflow_test(batch);
735 trace_end_vsc_overflow_test(&batch->trace, batch->gmem);
736
737 OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
738 OUT_RING(ring, 0x0);
739
740 OUT_PKT7(ring, CP_SET_MODE, 1);
741 OUT_RING(ring, 0x0);
742
743 OUT_WFI5(ring);
744
745 OUT_REG(ring,
746 A6XX_RB_CCU_CNTL(.color_offset = screen->ccu_offset_gmem,
747 .gmem = true,
748 .unk2 = screen->info->a6xx.ccu_cntl_gmem_unk2));
749 }
750
751 static void
emit_msaa(struct fd_ringbuffer * ring,unsigned nr)752 emit_msaa(struct fd_ringbuffer *ring, unsigned nr)
753 {
754 enum a3xx_msaa_samples samples = fd_msaa_samples(nr);
755
756 OUT_PKT4(ring, REG_A6XX_SP_TP_RAS_MSAA_CNTL, 2);
757 OUT_RING(ring, A6XX_SP_TP_RAS_MSAA_CNTL_SAMPLES(samples));
758 OUT_RING(ring, A6XX_SP_TP_DEST_MSAA_CNTL_SAMPLES(samples) |
759 COND(samples == MSAA_ONE,
760 A6XX_SP_TP_DEST_MSAA_CNTL_MSAA_DISABLE));
761
762 OUT_PKT4(ring, REG_A6XX_GRAS_RAS_MSAA_CNTL, 2);
763 OUT_RING(ring, A6XX_GRAS_RAS_MSAA_CNTL_SAMPLES(samples));
764 OUT_RING(ring, A6XX_GRAS_DEST_MSAA_CNTL_SAMPLES(samples) |
765 COND(samples == MSAA_ONE,
766 A6XX_GRAS_DEST_MSAA_CNTL_MSAA_DISABLE));
767
768 OUT_PKT4(ring, REG_A6XX_RB_RAS_MSAA_CNTL, 2);
769 OUT_RING(ring, A6XX_RB_RAS_MSAA_CNTL_SAMPLES(samples));
770 OUT_RING(ring,
771 A6XX_RB_DEST_MSAA_CNTL_SAMPLES(samples) |
772 COND(samples == MSAA_ONE, A6XX_RB_DEST_MSAA_CNTL_MSAA_DISABLE));
773
774 OUT_PKT4(ring, REG_A6XX_RB_MSAA_CNTL, 1);
775 OUT_RING(ring, A6XX_RB_MSAA_CNTL_SAMPLES(samples));
776 }
777
778 static void prepare_tile_setup_ib(struct fd_batch *batch);
779 static void prepare_tile_fini_ib(struct fd_batch *batch);
780
781 /* before first tile */
782 static void
fd6_emit_tile_init(struct fd_batch * batch)783 fd6_emit_tile_init(struct fd_batch *batch) assert_dt
784 {
785 struct fd_ringbuffer *ring = batch->gmem;
786 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
787 const struct fd_gmem_stateobj *gmem = batch->gmem_state;
788 struct fd_screen *screen = batch->ctx->screen;
789
790 fd6_emit_restore(batch, ring);
791
792 fd6_emit_lrz_flush(ring);
793
794 if (batch->prologue) {
795 trace_start_prologue(&batch->trace, ring);
796 fd6_emit_ib(ring, batch->prologue);
797 trace_end_prologue(&batch->trace, ring);
798 }
799
800 fd6_cache_inv(batch, ring);
801
802 prepare_tile_setup_ib(batch);
803 prepare_tile_fini_ib(batch);
804
805 OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
806 OUT_RING(ring, 0x0);
807
808 /* blob controls "local" in IB2, but I think that is not required */
809 OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_LOCAL, 1);
810 OUT_RING(ring, 0x1);
811
812 fd_wfi(batch, ring);
813 OUT_REG(ring,
814 A6XX_RB_CCU_CNTL(.color_offset = screen->ccu_offset_gmem,
815 .gmem = true,
816 .unk2 = screen->info->a6xx.ccu_cntl_gmem_unk2));
817
818 emit_zs(ring, pfb->zsbuf, batch->gmem_state);
819 emit_mrt(ring, pfb, batch->gmem_state);
820 emit_msaa(ring, pfb->samples);
821 patch_fb_read_gmem(batch);
822
823 if (use_hw_binning(batch)) {
824 /* enable stream-out during binning pass: */
825 OUT_REG(ring, A6XX_VPC_SO_DISABLE(false));
826
827 set_bin_size(ring, gmem->bin_w, gmem->bin_h,
828 A6XX_RB_BIN_CONTROL_RENDER_MODE(BINNING_PASS) |
829 A6XX_RB_BIN_CONTROL_LRZ_FEEDBACK_ZMODE_MASK(0x6));
830 update_render_cntl(batch, pfb, true);
831 emit_binning_pass(batch);
832
833 /* and disable stream-out for draw pass: */
834 OUT_REG(ring, A6XX_VPC_SO_DISABLE(true));
835
836 /*
837 * NOTE: even if we detect VSC overflow and disable use of
838 * visibility stream in draw pass, it is still safe to execute
839 * the reset of these cmds:
840 */
841
842 // NOTE a618 not setting .FORCE_LRZ_WRITE_DIS ..
843 set_bin_size(ring, gmem->bin_w, gmem->bin_h,
844 A6XX_RB_BIN_CONTROL_FORCE_LRZ_WRITE_DIS |
845 A6XX_RB_BIN_CONTROL_LRZ_FEEDBACK_ZMODE_MASK(0x6));
846
847 OUT_PKT4(ring, REG_A6XX_VFD_MODE_CNTL, 1);
848 OUT_RING(ring, 0x0);
849
850 OUT_PKT4(ring, REG_A6XX_PC_POWER_CNTL, 1);
851 OUT_RING(ring, screen->info->a6xx.magic.PC_POWER_CNTL);
852
853 OUT_PKT4(ring, REG_A6XX_VFD_POWER_CNTL, 1);
854 OUT_RING(ring, screen->info->a6xx.magic.PC_POWER_CNTL);
855
856 OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
857 OUT_RING(ring, 0x1);
858 } else {
859 /* no binning pass, so enable stream-out for draw pass:: */
860 OUT_REG(ring, A6XX_VPC_SO_DISABLE(false));
861
862 set_bin_size(ring, gmem->bin_w, gmem->bin_h, 0x6000000);
863 }
864
865 update_render_cntl(batch, pfb, false);
866
867 emit_common_init(batch);
868 }
869
870 static void
set_window_offset(struct fd_ringbuffer * ring,uint32_t x1,uint32_t y1)871 set_window_offset(struct fd_ringbuffer *ring, uint32_t x1, uint32_t y1)
872 {
873 OUT_PKT4(ring, REG_A6XX_RB_WINDOW_OFFSET, 1);
874 OUT_RING(ring, A6XX_RB_WINDOW_OFFSET_X(x1) | A6XX_RB_WINDOW_OFFSET_Y(y1));
875
876 OUT_PKT4(ring, REG_A6XX_RB_WINDOW_OFFSET2, 1);
877 OUT_RING(ring, A6XX_RB_WINDOW_OFFSET2_X(x1) | A6XX_RB_WINDOW_OFFSET2_Y(y1));
878
879 OUT_PKT4(ring, REG_A6XX_SP_WINDOW_OFFSET, 1);
880 OUT_RING(ring, A6XX_SP_WINDOW_OFFSET_X(x1) | A6XX_SP_WINDOW_OFFSET_Y(y1));
881
882 OUT_PKT4(ring, REG_A6XX_SP_TP_WINDOW_OFFSET, 1);
883 OUT_RING(ring,
884 A6XX_SP_TP_WINDOW_OFFSET_X(x1) | A6XX_SP_TP_WINDOW_OFFSET_Y(y1));
885 }
886
887 /* before mem2gmem */
888 static void
fd6_emit_tile_prep(struct fd_batch * batch,const struct fd_tile * tile)889 fd6_emit_tile_prep(struct fd_batch *batch, const struct fd_tile *tile)
890 {
891 struct fd_context *ctx = batch->ctx;
892 const struct fd_gmem_stateobj *gmem = batch->gmem_state;
893 struct fd6_context *fd6_ctx = fd6_context(ctx);
894 struct fd_ringbuffer *ring = batch->gmem;
895
896 emit_marker6(ring, 7);
897 OUT_PKT7(ring, CP_SET_MARKER, 1);
898 OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_GMEM));
899 emit_marker6(ring, 7);
900
901 uint32_t x1 = tile->xoff;
902 uint32_t y1 = tile->yoff;
903 uint32_t x2 = tile->xoff + tile->bin_w - 1;
904 uint32_t y2 = tile->yoff + tile->bin_h - 1;
905
906 set_scissor(ring, x1, y1, x2, y2);
907
908 if (use_hw_binning(batch)) {
909 const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[tile->p];
910
911 OUT_PKT7(ring, CP_WAIT_FOR_ME, 0);
912
913 OUT_PKT7(ring, CP_SET_MODE, 1);
914 OUT_RING(ring, 0x0);
915
916 OUT_PKT7(ring, CP_SET_BIN_DATA5, 7);
917 OUT_RING(ring, CP_SET_BIN_DATA5_0_VSC_SIZE(pipe->w * pipe->h) |
918 CP_SET_BIN_DATA5_0_VSC_N(tile->n));
919 OUT_RELOC(ring, fd6_ctx->vsc_draw_strm, /* per-pipe draw-stream address */
920 (tile->p * fd6_ctx->vsc_draw_strm_pitch), 0, 0);
921 OUT_RELOC(ring,
922 fd6_ctx->vsc_draw_strm, /* VSC_DRAW_STRM_ADDRESS + (p * 4) */
923 (tile->p * 4) + (32 * fd6_ctx->vsc_draw_strm_pitch), 0, 0);
924 OUT_RELOC(ring, fd6_ctx->vsc_prim_strm,
925 (tile->p * fd6_ctx->vsc_prim_strm_pitch), 0, 0);
926
927 OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
928 OUT_RING(ring, 0x0);
929
930 set_window_offset(ring, x1, y1);
931
932 const struct fd_gmem_stateobj *gmem = batch->gmem_state;
933 set_bin_size(ring, gmem->bin_w, gmem->bin_h, 0x6000000);
934
935 OUT_PKT7(ring, CP_SET_MODE, 1);
936 OUT_RING(ring, 0x0);
937 } else {
938 set_window_offset(ring, x1, y1);
939
940 OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
941 OUT_RING(ring, 0x1);
942
943 OUT_PKT7(ring, CP_SET_MODE, 1);
944 OUT_RING(ring, 0x0);
945 }
946 }
947
948 static void
set_blit_scissor(struct fd_batch * batch,struct fd_ringbuffer * ring)949 set_blit_scissor(struct fd_batch *batch, struct fd_ringbuffer *ring)
950 {
951 struct pipe_scissor_state blit_scissor = batch->max_scissor;
952
953 blit_scissor.minx = ROUND_DOWN_TO(blit_scissor.minx, 16);
954 blit_scissor.miny = ROUND_DOWN_TO(blit_scissor.miny, 4);
955 blit_scissor.maxx = ALIGN(blit_scissor.maxx, 16);
956 blit_scissor.maxy = ALIGN(blit_scissor.maxy, 4);
957
958 OUT_PKT4(ring, REG_A6XX_RB_BLIT_SCISSOR_TL, 2);
959 OUT_RING(ring, A6XX_RB_BLIT_SCISSOR_TL_X(blit_scissor.minx) |
960 A6XX_RB_BLIT_SCISSOR_TL_Y(blit_scissor.miny));
961 OUT_RING(ring, A6XX_RB_BLIT_SCISSOR_BR_X(blit_scissor.maxx - 1) |
962 A6XX_RB_BLIT_SCISSOR_BR_Y(blit_scissor.maxy - 1));
963 }
964
965 static void
emit_blit(struct fd_batch * batch,struct fd_ringbuffer * ring,uint32_t base,struct pipe_surface * psurf,bool stencil)966 emit_blit(struct fd_batch *batch, struct fd_ringbuffer *ring, uint32_t base,
967 struct pipe_surface *psurf, bool stencil)
968 {
969 struct fd_resource *rsc = fd_resource(psurf->texture);
970 enum pipe_format pfmt = psurf->format;
971 uint32_t offset;
972 bool ubwc_enabled;
973
974 debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
975
976 /* separate stencil case: */
977 if (stencil) {
978 rsc = rsc->stencil;
979 pfmt = rsc->b.b.format;
980 }
981
982 offset =
983 fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer);
984 ubwc_enabled = fd_resource_ubwc_enabled(rsc, psurf->u.tex.level);
985
986 debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
987
988 uint32_t tile_mode = fd_resource_tile_mode(&rsc->b.b, psurf->u.tex.level);
989 enum a6xx_format format = fd6_color_format(pfmt, tile_mode);
990 uint32_t stride = fd_resource_pitch(rsc, psurf->u.tex.level);
991 uint32_t size = fd_resource_slice(rsc, psurf->u.tex.level)->size0;
992 enum a3xx_color_swap swap = fd6_color_swap(pfmt, rsc->layout.tile_mode);
993 enum a3xx_msaa_samples samples = fd_msaa_samples(rsc->b.b.nr_samples);
994
995 OUT_REG(ring,
996 A6XX_RB_BLIT_DST_INFO(.tile_mode = tile_mode, .samples = samples,
997 .color_format = format, .color_swap = swap,
998 .flags = ubwc_enabled),
999 A6XX_RB_BLIT_DST(.bo = rsc->bo, .bo_offset = offset),
1000 A6XX_RB_BLIT_DST_PITCH(.a6xx_rb_blit_dst_pitch = stride),
1001 A6XX_RB_BLIT_DST_ARRAY_PITCH(.a6xx_rb_blit_dst_array_pitch = size));
1002
1003 OUT_REG(ring, A6XX_RB_BLIT_BASE_GMEM(.dword = base));
1004
1005 if (ubwc_enabled) {
1006 OUT_PKT4(ring, REG_A6XX_RB_BLIT_FLAG_DST, 3);
1007 fd6_emit_flag_reference(ring, rsc, psurf->u.tex.level,
1008 psurf->u.tex.first_layer);
1009 }
1010
1011 fd6_emit_blit(batch, ring);
1012 }
1013
1014 static void
emit_restore_blit(struct fd_batch * batch,struct fd_ringbuffer * ring,uint32_t base,struct pipe_surface * psurf,unsigned buffer)1015 emit_restore_blit(struct fd_batch *batch, struct fd_ringbuffer *ring,
1016 uint32_t base, struct pipe_surface *psurf, unsigned buffer)
1017 {
1018 bool stencil = (buffer == FD_BUFFER_STENCIL);
1019
1020 OUT_REG(ring, A6XX_RB_BLIT_INFO(.gmem = true, .unk0 = true,
1021 .depth = (buffer == FD_BUFFER_DEPTH),
1022 .sample_0 = util_format_is_pure_integer(
1023 psurf->format)));
1024
1025 emit_blit(batch, ring, base, psurf, stencil);
1026 }
1027
1028 static void
emit_clears(struct fd_batch * batch,struct fd_ringbuffer * ring)1029 emit_clears(struct fd_batch *batch, struct fd_ringbuffer *ring)
1030 {
1031 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
1032 const struct fd_gmem_stateobj *gmem = batch->gmem_state;
1033 enum a3xx_msaa_samples samples = fd_msaa_samples(pfb->samples);
1034
1035 uint32_t buffers = batch->fast_cleared;
1036
1037 if (buffers & PIPE_CLEAR_COLOR) {
1038
1039 for (int i = 0; i < pfb->nr_cbufs; i++) {
1040 union pipe_color_union *color = &batch->clear_color[i];
1041 union util_color uc = {0};
1042
1043 if (!pfb->cbufs[i])
1044 continue;
1045
1046 if (!(buffers & (PIPE_CLEAR_COLOR0 << i)))
1047 continue;
1048
1049 enum pipe_format pfmt = pfb->cbufs[i]->format;
1050
1051 // XXX I think RB_CLEAR_COLOR_DWn wants to take into account SWAP??
1052 union pipe_color_union swapped;
1053 switch (fd6_color_swap(pfmt, TILE6_LINEAR)) {
1054 case WZYX:
1055 swapped.ui[0] = color->ui[0];
1056 swapped.ui[1] = color->ui[1];
1057 swapped.ui[2] = color->ui[2];
1058 swapped.ui[3] = color->ui[3];
1059 break;
1060 case WXYZ:
1061 swapped.ui[2] = color->ui[0];
1062 swapped.ui[1] = color->ui[1];
1063 swapped.ui[0] = color->ui[2];
1064 swapped.ui[3] = color->ui[3];
1065 break;
1066 case ZYXW:
1067 swapped.ui[3] = color->ui[0];
1068 swapped.ui[0] = color->ui[1];
1069 swapped.ui[1] = color->ui[2];
1070 swapped.ui[2] = color->ui[3];
1071 break;
1072 case XYZW:
1073 swapped.ui[3] = color->ui[0];
1074 swapped.ui[2] = color->ui[1];
1075 swapped.ui[1] = color->ui[2];
1076 swapped.ui[0] = color->ui[3];
1077 break;
1078 }
1079
1080 util_pack_color_union(pfmt, &uc, &swapped);
1081
1082 OUT_PKT4(ring, REG_A6XX_RB_BLIT_DST_INFO, 1);
1083 OUT_RING(ring,
1084 A6XX_RB_BLIT_DST_INFO_TILE_MODE(TILE6_LINEAR) |
1085 A6XX_RB_BLIT_DST_INFO_SAMPLES(samples) |
1086 A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(fd6_color_format(pfmt, TILE6_LINEAR)));
1087
1088 OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1);
1089 OUT_RING(ring,
1090 A6XX_RB_BLIT_INFO_GMEM | A6XX_RB_BLIT_INFO_CLEAR_MASK(0xf));
1091
1092 OUT_PKT4(ring, REG_A6XX_RB_BLIT_BASE_GMEM, 1);
1093 OUT_RING(ring, gmem->cbuf_base[i]);
1094
1095 OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_88D0, 1);
1096 OUT_RING(ring, 0);
1097
1098 OUT_PKT4(ring, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 4);
1099 OUT_RING(ring, uc.ui[0]);
1100 OUT_RING(ring, uc.ui[1]);
1101 OUT_RING(ring, uc.ui[2]);
1102 OUT_RING(ring, uc.ui[3]);
1103
1104 fd6_emit_blit(batch, ring);
1105 }
1106 }
1107
1108 const bool has_depth = pfb->zsbuf;
1109 const bool has_separate_stencil =
1110 has_depth && fd_resource(pfb->zsbuf->texture)->stencil;
1111
1112 /* First clear depth or combined depth/stencil. */
1113 if ((has_depth && (buffers & PIPE_CLEAR_DEPTH)) ||
1114 (!has_separate_stencil && (buffers & PIPE_CLEAR_STENCIL))) {
1115 enum pipe_format pfmt = pfb->zsbuf->format;
1116 uint32_t clear_value;
1117 uint32_t mask = 0;
1118
1119 if (has_separate_stencil) {
1120 pfmt = util_format_get_depth_only(pfb->zsbuf->format);
1121 clear_value = util_pack_z(pfmt, batch->clear_depth);
1122 } else {
1123 pfmt = pfb->zsbuf->format;
1124 clear_value =
1125 util_pack_z_stencil(pfmt, batch->clear_depth, batch->clear_stencil);
1126 }
1127
1128 if (buffers & PIPE_CLEAR_DEPTH)
1129 mask |= 0x1;
1130
1131 if (!has_separate_stencil && (buffers & PIPE_CLEAR_STENCIL))
1132 mask |= 0x2;
1133
1134 OUT_PKT4(ring, REG_A6XX_RB_BLIT_DST_INFO, 1);
1135 OUT_RING(ring,
1136 A6XX_RB_BLIT_DST_INFO_TILE_MODE(TILE6_LINEAR) |
1137 A6XX_RB_BLIT_DST_INFO_SAMPLES(samples) |
1138 A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(fd6_color_format(pfmt, TILE6_LINEAR)));
1139
1140 OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1);
1141 OUT_RING(ring, A6XX_RB_BLIT_INFO_GMEM |
1142 // XXX UNK0 for separate stencil ??
1143 A6XX_RB_BLIT_INFO_DEPTH |
1144 A6XX_RB_BLIT_INFO_CLEAR_MASK(mask));
1145
1146 OUT_PKT4(ring, REG_A6XX_RB_BLIT_BASE_GMEM, 1);
1147 OUT_RING(ring, gmem->zsbuf_base[0]);
1148
1149 OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_88D0, 1);
1150 OUT_RING(ring, 0);
1151
1152 OUT_PKT4(ring, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 1);
1153 OUT_RING(ring, clear_value);
1154
1155 fd6_emit_blit(batch, ring);
1156 }
1157
1158 /* Then clear the separate stencil buffer in case of 32 bit depth
1159 * formats with separate stencil. */
1160 if (has_separate_stencil && (buffers & PIPE_CLEAR_STENCIL)) {
1161 OUT_PKT4(ring, REG_A6XX_RB_BLIT_DST_INFO, 1);
1162 OUT_RING(ring, A6XX_RB_BLIT_DST_INFO_TILE_MODE(TILE6_LINEAR) |
1163 A6XX_RB_BLIT_DST_INFO_SAMPLES(samples) |
1164 A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(FMT6_8_UINT));
1165
1166 OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1);
1167 OUT_RING(ring, A6XX_RB_BLIT_INFO_GMEM |
1168 // A6XX_RB_BLIT_INFO_UNK0 |
1169 A6XX_RB_BLIT_INFO_DEPTH |
1170 A6XX_RB_BLIT_INFO_CLEAR_MASK(0x1));
1171
1172 OUT_PKT4(ring, REG_A6XX_RB_BLIT_BASE_GMEM, 1);
1173 OUT_RING(ring, gmem->zsbuf_base[1]);
1174
1175 OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_88D0, 1);
1176 OUT_RING(ring, 0);
1177
1178 OUT_PKT4(ring, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 1);
1179 OUT_RING(ring, batch->clear_stencil & 0xff);
1180
1181 fd6_emit_blit(batch, ring);
1182 }
1183 }
1184
1185 /*
1186 * transfer from system memory to gmem
1187 */
1188 static void
emit_restore_blits(struct fd_batch * batch,struct fd_ringbuffer * ring)1189 emit_restore_blits(struct fd_batch *batch, struct fd_ringbuffer *ring)
1190 {
1191 const struct fd_gmem_stateobj *gmem = batch->gmem_state;
1192 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
1193
1194 if (batch->restore & FD_BUFFER_COLOR) {
1195 unsigned i;
1196 for (i = 0; i < pfb->nr_cbufs; i++) {
1197 if (!pfb->cbufs[i])
1198 continue;
1199 if (!(batch->restore & (PIPE_CLEAR_COLOR0 << i)))
1200 continue;
1201 emit_restore_blit(batch, ring, gmem->cbuf_base[i], pfb->cbufs[i],
1202 FD_BUFFER_COLOR);
1203 }
1204 }
1205
1206 if (batch->restore & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
1207 struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
1208
1209 if (!rsc->stencil || (batch->restore & FD_BUFFER_DEPTH)) {
1210 emit_restore_blit(batch, ring, gmem->zsbuf_base[0], pfb->zsbuf,
1211 FD_BUFFER_DEPTH);
1212 }
1213 if (rsc->stencil && (batch->restore & FD_BUFFER_STENCIL)) {
1214 emit_restore_blit(batch, ring, gmem->zsbuf_base[1], pfb->zsbuf,
1215 FD_BUFFER_STENCIL);
1216 }
1217 }
1218 }
1219
1220 static void
prepare_tile_setup_ib(struct fd_batch * batch)1221 prepare_tile_setup_ib(struct fd_batch *batch)
1222 {
1223 if (!(batch->restore || batch->fast_cleared))
1224 return;
1225
1226 batch->tile_setup =
1227 fd_submit_new_ringbuffer(batch->submit, 0x1000, FD_RINGBUFFER_STREAMING);
1228
1229 set_blit_scissor(batch, batch->tile_setup);
1230
1231 emit_restore_blits(batch, batch->tile_setup);
1232 emit_clears(batch, batch->tile_setup);
1233 }
1234
1235 /*
1236 * transfer from system memory to gmem
1237 */
1238 static void
fd6_emit_tile_mem2gmem(struct fd_batch * batch,const struct fd_tile * tile)1239 fd6_emit_tile_mem2gmem(struct fd_batch *batch, const struct fd_tile *tile)
1240 {
1241 }
1242
1243 /* before IB to rendering cmds: */
1244 static void
fd6_emit_tile_renderprep(struct fd_batch * batch,const struct fd_tile * tile)1245 fd6_emit_tile_renderprep(struct fd_batch *batch, const struct fd_tile *tile)
1246 {
1247 if (!batch->tile_setup)
1248 return;
1249
1250 trace_start_clear_restore(&batch->trace, batch->gmem, batch->fast_cleared);
1251 if (batch->fast_cleared || !use_hw_binning(batch)) {
1252 fd6_emit_ib(batch->gmem, batch->tile_setup);
1253 } else {
1254 emit_conditional_ib(batch, tile, batch->tile_setup);
1255 }
1256 trace_end_clear_restore(&batch->trace, batch->gmem);
1257 }
1258
1259 static bool
blit_can_resolve(enum pipe_format format)1260 blit_can_resolve(enum pipe_format format)
1261 {
1262 const struct util_format_description *desc = util_format_description(format);
1263
1264 /* blit event can only do resolve for simple cases:
1265 * averaging samples as unsigned integers or choosing only one sample
1266 */
1267 if (util_format_is_snorm(format) || util_format_is_srgb(format))
1268 return false;
1269
1270 /* can't do formats with larger channel sizes
1271 * note: this includes all float formats
1272 * note2: single channel integer formats seem OK
1273 */
1274 if (desc->channel[0].size > 10)
1275 return false;
1276
1277 switch (format) {
1278 /* for unknown reasons blit event can't msaa resolve these formats when tiled
1279 * likely related to these formats having different layout from other cpp=2
1280 * formats
1281 */
1282 case PIPE_FORMAT_R8G8_UNORM:
1283 case PIPE_FORMAT_R8G8_UINT:
1284 case PIPE_FORMAT_R8G8_SINT:
1285 /* TODO: this one should be able to work? */
1286 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
1287 return false;
1288 default:
1289 break;
1290 }
1291
1292 return true;
1293 }
1294
1295 static bool
needs_resolve(struct pipe_surface * psurf)1296 needs_resolve(struct pipe_surface *psurf)
1297 {
1298 return psurf->nr_samples &&
1299 (psurf->nr_samples != psurf->texture->nr_samples);
1300 }
1301
1302 /**
1303 * Returns the UNKNOWN_8C01 value for handling partial depth/stencil
1304 * clear/stores to Z24S8.
1305 */
1306 static uint32_t
fd6_unknown_8c01(enum pipe_format format,unsigned buffers)1307 fd6_unknown_8c01(enum pipe_format format, unsigned buffers)
1308 {
1309 if (format == PIPE_FORMAT_Z24_UNORM_S8_UINT) {
1310 if (buffers == FD_BUFFER_DEPTH)
1311 return 0x08000041;
1312 else if (buffers == FD_BUFFER_STENCIL)
1313 return 0x00084001;
1314 }
1315 return 0;
1316 }
1317
1318 static void
emit_resolve_blit(struct fd_batch * batch,struct fd_ringbuffer * ring,uint32_t base,struct pipe_surface * psurf,unsigned buffer)1319 emit_resolve_blit(struct fd_batch *batch, struct fd_ringbuffer *ring,
1320 uint32_t base, struct pipe_surface *psurf,
1321 unsigned buffer) assert_dt
1322 {
1323 uint32_t info = 0;
1324 bool stencil = false;
1325
1326 if (!fd_resource(psurf->texture)->valid)
1327 return;
1328
1329 /* if we need to resolve, but cannot with BLIT event, we instead need
1330 * to generate per-tile CP_BLIT (r2d) commands:
1331 *
1332 * The separate-stencil is a special case, we might need to use CP_BLIT
1333 * for depth, but we can still resolve stencil with a BLIT event
1334 */
1335 if (needs_resolve(psurf) && !blit_can_resolve(psurf->format) &&
1336 (buffer != FD_BUFFER_STENCIL)) {
1337 /* We could potentially use fd6_unknown_8c01() to handle partial z/s
1338 * resolve to packed z/s, but we would need a corresponding ability in the
1339 * !resolve case below, so batch_draw_tracking_for_dirty_bits() has us
1340 * just do a restore of the other channel for partial packed z/s writes.
1341 */
1342 fd6_resolve_tile(batch, ring, base, psurf, 0);
1343 return;
1344 }
1345
1346 switch (buffer) {
1347 case FD_BUFFER_COLOR:
1348 break;
1349 case FD_BUFFER_STENCIL:
1350 info |= A6XX_RB_BLIT_INFO_UNK0;
1351 stencil = true;
1352 break;
1353 case FD_BUFFER_DEPTH:
1354 info |= A6XX_RB_BLIT_INFO_DEPTH;
1355 break;
1356 }
1357
1358 if (util_format_is_pure_integer(psurf->format) ||
1359 util_format_is_depth_or_stencil(psurf->format))
1360 info |= A6XX_RB_BLIT_INFO_SAMPLE_0;
1361
1362 OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1);
1363 OUT_RING(ring, info);
1364
1365 emit_blit(batch, ring, base, psurf, stencil);
1366 }
1367
1368 /*
1369 * transfer from gmem to system memory (ie. normal RAM)
1370 */
1371
1372 static void
prepare_tile_fini_ib(struct fd_batch * batch)1373 prepare_tile_fini_ib(struct fd_batch *batch) assert_dt
1374 {
1375 const struct fd_gmem_stateobj *gmem = batch->gmem_state;
1376 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
1377 struct fd_ringbuffer *ring;
1378
1379 batch->tile_fini =
1380 fd_submit_new_ringbuffer(batch->submit, 0x1000, FD_RINGBUFFER_STREAMING);
1381 ring = batch->tile_fini;
1382
1383 set_blit_scissor(batch, ring);
1384
1385 if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
1386 struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
1387
1388 if (!rsc->stencil || (batch->resolve & FD_BUFFER_DEPTH)) {
1389 emit_resolve_blit(batch, ring, gmem->zsbuf_base[0], pfb->zsbuf,
1390 FD_BUFFER_DEPTH);
1391 }
1392 if (rsc->stencil && (batch->resolve & FD_BUFFER_STENCIL)) {
1393 emit_resolve_blit(batch, ring, gmem->zsbuf_base[1], pfb->zsbuf,
1394 FD_BUFFER_STENCIL);
1395 }
1396 }
1397
1398 if (batch->resolve & FD_BUFFER_COLOR) {
1399 unsigned i;
1400 for (i = 0; i < pfb->nr_cbufs; i++) {
1401 if (!pfb->cbufs[i])
1402 continue;
1403 if (!(batch->resolve & (PIPE_CLEAR_COLOR0 << i)))
1404 continue;
1405 emit_resolve_blit(batch, ring, gmem->cbuf_base[i], pfb->cbufs[i],
1406 FD_BUFFER_COLOR);
1407 }
1408 }
1409 }
1410
1411 static void
fd6_emit_tile(struct fd_batch * batch,const struct fd_tile * tile)1412 fd6_emit_tile(struct fd_batch *batch, const struct fd_tile *tile)
1413 {
1414 if (!use_hw_binning(batch)) {
1415 fd6_emit_ib(batch->gmem, batch->draw);
1416 } else {
1417 emit_conditional_ib(batch, tile, batch->draw);
1418 }
1419
1420 if (batch->epilogue)
1421 fd6_emit_ib(batch->gmem, batch->epilogue);
1422 }
1423
1424 static void
fd6_emit_tile_gmem2mem(struct fd_batch * batch,const struct fd_tile * tile)1425 fd6_emit_tile_gmem2mem(struct fd_batch *batch, const struct fd_tile *tile)
1426 {
1427 struct fd_ringbuffer *ring = batch->gmem;
1428
1429 if (use_hw_binning(batch)) {
1430 OUT_PKT7(ring, CP_SET_MARKER, 1);
1431 OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_ENDVIS));
1432 }
1433
1434 OUT_PKT7(ring, CP_SET_DRAW_STATE, 3);
1435 OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) |
1436 CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS |
1437 CP_SET_DRAW_STATE__0_GROUP_ID(0));
1438 OUT_RING(ring, CP_SET_DRAW_STATE__1_ADDR_LO(0));
1439 OUT_RING(ring, CP_SET_DRAW_STATE__2_ADDR_HI(0));
1440
1441 OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_LOCAL, 1);
1442 OUT_RING(ring, 0x0);
1443
1444 emit_marker6(ring, 7);
1445 OUT_PKT7(ring, CP_SET_MARKER, 1);
1446 OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_RESOLVE));
1447 emit_marker6(ring, 7);
1448
1449 trace_start_resolve(&batch->trace, batch->gmem);
1450 if (batch->fast_cleared || !use_hw_binning(batch)) {
1451 fd6_emit_ib(batch->gmem, batch->tile_fini);
1452 } else {
1453 emit_conditional_ib(batch, tile, batch->tile_fini);
1454 }
1455 trace_end_resolve(&batch->trace, batch->gmem);
1456 }
1457
1458 static void
fd6_emit_tile_fini(struct fd_batch * batch)1459 fd6_emit_tile_fini(struct fd_batch *batch)
1460 {
1461 struct fd_ringbuffer *ring = batch->gmem;
1462
1463 emit_common_fini(batch);
1464
1465 OUT_PKT4(ring, REG_A6XX_GRAS_LRZ_CNTL, 1);
1466 OUT_RING(ring, A6XX_GRAS_LRZ_CNTL_ENABLE);
1467
1468 fd6_emit_lrz_flush(ring);
1469
1470 fd6_event_write(batch, ring, PC_CCU_RESOLVE_TS, true);
1471
1472 if (use_hw_binning(batch)) {
1473 check_vsc_overflow(batch->ctx);
1474 }
1475 }
1476
1477 static void
emit_sysmem_clears(struct fd_batch * batch,struct fd_ringbuffer * ring)1478 emit_sysmem_clears(struct fd_batch *batch, struct fd_ringbuffer *ring) assert_dt
1479 {
1480 struct fd_context *ctx = batch->ctx;
1481 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
1482
1483 uint32_t buffers = batch->fast_cleared;
1484
1485 if (!buffers)
1486 return;
1487
1488 trace_start_clear_restore(&batch->trace, ring, buffers);
1489
1490 if (buffers & PIPE_CLEAR_COLOR) {
1491 for (int i = 0; i < pfb->nr_cbufs; i++) {
1492 union pipe_color_union color = batch->clear_color[i];
1493
1494 if (!pfb->cbufs[i])
1495 continue;
1496
1497 if (!(buffers & (PIPE_CLEAR_COLOR0 << i)))
1498 continue;
1499
1500 fd6_clear_surface(ctx, ring, pfb->cbufs[i], pfb->width, pfb->height,
1501 &color, 0);
1502 }
1503 }
1504 if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
1505 union pipe_color_union value = {};
1506
1507 const bool has_depth = pfb->zsbuf;
1508 struct pipe_resource *separate_stencil =
1509 has_depth && fd_resource(pfb->zsbuf->texture)->stencil
1510 ? &fd_resource(pfb->zsbuf->texture)->stencil->b.b
1511 : NULL;
1512
1513 if ((buffers & PIPE_CLEAR_DEPTH) || (!separate_stencil && (buffers & PIPE_CLEAR_STENCIL))) {
1514 value.f[0] = batch->clear_depth;
1515 value.ui[1] = batch->clear_stencil;
1516 fd6_clear_surface(ctx, ring, pfb->zsbuf, pfb->width, pfb->height,
1517 &value, fd6_unknown_8c01(pfb->zsbuf->format, buffers));
1518 }
1519
1520 if (separate_stencil && (buffers & PIPE_CLEAR_STENCIL)) {
1521 value.ui[0] = batch->clear_stencil;
1522
1523 struct pipe_surface stencil_surf = *pfb->zsbuf;
1524 stencil_surf.format = PIPE_FORMAT_S8_UINT;
1525 stencil_surf.texture = separate_stencil;
1526
1527 fd6_clear_surface(ctx, ring, &stencil_surf, pfb->width, pfb->height,
1528 &value, 0);
1529 }
1530 }
1531
1532 fd6_event_write(batch, ring, PC_CCU_FLUSH_COLOR_TS, true);
1533 fd_wfi(batch, ring);
1534
1535 trace_end_clear_restore(&batch->trace, ring);
1536 }
1537
1538 static void
setup_tess_buffers(struct fd_batch * batch,struct fd_ringbuffer * ring)1539 setup_tess_buffers(struct fd_batch *batch, struct fd_ringbuffer *ring)
1540 {
1541 struct fd_context *ctx = batch->ctx;
1542
1543 batch->tessfactor_bo = fd_bo_new(ctx->screen->dev, batch->tessfactor_size,
1544 0, "tessfactor");
1545
1546 batch->tessparam_bo = fd_bo_new(ctx->screen->dev, batch->tessparam_size,
1547 0, "tessparam");
1548
1549 OUT_PKT4(ring, REG_A6XX_PC_TESSFACTOR_ADDR, 2);
1550 OUT_RELOC(ring, batch->tessfactor_bo, 0, 0, 0);
1551
1552 batch->tess_addrs_constobj->cur = batch->tess_addrs_constobj->start;
1553 OUT_RELOC(batch->tess_addrs_constobj, batch->tessparam_bo, 0, 0, 0);
1554 OUT_RELOC(batch->tess_addrs_constobj, batch->tessfactor_bo, 0, 0, 0);
1555 }
1556
1557 static void
fd6_emit_sysmem_prep(struct fd_batch * batch)1558 fd6_emit_sysmem_prep(struct fd_batch *batch) assert_dt
1559 {
1560 struct fd_ringbuffer *ring = batch->gmem;
1561 struct fd_screen *screen = batch->ctx->screen;
1562
1563 fd6_emit_restore(batch, ring);
1564 fd6_emit_lrz_flush(ring);
1565
1566 if (batch->prologue) {
1567 if (!batch->nondraw) {
1568 trace_start_prologue(&batch->trace, ring);
1569 }
1570 fd6_emit_ib(ring, batch->prologue);
1571 if (!batch->nondraw) {
1572 trace_end_prologue(&batch->trace, ring);
1573 }
1574 }
1575
1576 /* remaining setup below here does not apply to blit/compute: */
1577 if (batch->nondraw)
1578 return;
1579
1580 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
1581
1582 if (pfb->width > 0 && pfb->height > 0)
1583 set_scissor(ring, 0, 0, pfb->width - 1, pfb->height - 1);
1584 else
1585 set_scissor(ring, 0, 0, 0, 0);
1586
1587 set_window_offset(ring, 0, 0);
1588
1589 set_bin_size(ring, 0, 0, 0xc00000); /* 0xc00000 = BYPASS? */
1590
1591 emit_sysmem_clears(batch, ring);
1592
1593 emit_marker6(ring, 7);
1594 OUT_PKT7(ring, CP_SET_MARKER, 1);
1595 OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_BYPASS));
1596 emit_marker6(ring, 7);
1597
1598 if (batch->tessellation)
1599 setup_tess_buffers(batch, ring);
1600
1601 OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
1602 OUT_RING(ring, 0x0);
1603
1604 /* blob controls "local" in IB2, but I think that is not required */
1605 OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_LOCAL, 1);
1606 OUT_RING(ring, 0x1);
1607
1608 fd6_event_write(batch, ring, PC_CCU_INVALIDATE_COLOR, false);
1609 fd6_cache_inv(batch, ring);
1610
1611 fd_wfi(batch, ring);
1612 OUT_REG(ring, A6XX_RB_CCU_CNTL(.color_offset = screen->ccu_offset_bypass));
1613
1614 /* enable stream-out, with sysmem there is only one pass: */
1615 OUT_REG(ring, A6XX_VPC_SO_DISABLE(false));
1616
1617 OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
1618 OUT_RING(ring, 0x1);
1619
1620 emit_zs(ring, pfb->zsbuf, NULL);
1621 emit_mrt(ring, pfb, NULL);
1622 emit_msaa(ring, pfb->samples);
1623 patch_fb_read_sysmem(batch);
1624
1625 update_render_cntl(batch, pfb, false);
1626
1627 emit_common_init(batch);
1628 }
1629
1630 static void
fd6_emit_sysmem_fini(struct fd_batch * batch)1631 fd6_emit_sysmem_fini(struct fd_batch *batch) assert_dt
1632 {
1633 struct fd_ringbuffer *ring = batch->gmem;
1634
1635 emit_common_fini(batch);
1636
1637 if (batch->epilogue)
1638 fd6_emit_ib(batch->gmem, batch->epilogue);
1639
1640 OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
1641 OUT_RING(ring, 0x0);
1642
1643 fd6_emit_lrz_flush(ring);
1644
1645 fd6_event_write(batch, ring, PC_CCU_FLUSH_COLOR_TS, true);
1646 fd6_event_write(batch, ring, PC_CCU_FLUSH_DEPTH_TS, true);
1647 fd_wfi(batch, ring);
1648 }
1649
1650 void
fd6_gmem_init(struct pipe_context * pctx)1651 fd6_gmem_init(struct pipe_context *pctx) disable_thread_safety_analysis
1652 {
1653 struct fd_context *ctx = fd_context(pctx);
1654
1655 ctx->emit_tile_init = fd6_emit_tile_init;
1656 ctx->emit_tile_prep = fd6_emit_tile_prep;
1657 ctx->emit_tile_mem2gmem = fd6_emit_tile_mem2gmem;
1658 ctx->emit_tile_renderprep = fd6_emit_tile_renderprep;
1659 ctx->emit_tile = fd6_emit_tile;
1660 ctx->emit_tile_gmem2mem = fd6_emit_tile_gmem2mem;
1661 ctx->emit_tile_fini = fd6_emit_tile_fini;
1662 ctx->emit_sysmem_prep = fd6_emit_sysmem_prep;
1663 ctx->emit_sysmem_fini = fd6_emit_sysmem_fini;
1664 }
1665