• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *    Rob Clark <robclark@freedesktop.org>
25  */
26 
27 #include "pipe/p_state.h"
28 #include "util/u_string.h"
29 #include "util/u_memory.h"
30 #include "util/u_inlines.h"
31 #include "util/format/u_format.h"
32 
33 #include "freedreno_draw.h"
34 #include "freedreno_state.h"
35 #include "freedreno_resource.h"
36 
37 #include "fd5_gmem.h"
38 #include "fd5_context.h"
39 #include "fd5_draw.h"
40 #include "fd5_emit.h"
41 #include "fd5_program.h"
42 #include "fd5_format.h"
43 #include "fd5_zsa.h"
44 
45 static void
emit_mrt(struct fd_ringbuffer * ring,unsigned nr_bufs,struct pipe_surface ** bufs,const struct fd_gmem_stateobj * gmem)46 emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
47 		struct pipe_surface **bufs, const struct fd_gmem_stateobj *gmem)
48 {
49 	enum a5xx_tile_mode tile_mode;
50 	unsigned i;
51 
52 	for (i = 0; i < A5XX_MAX_RENDER_TARGETS; i++) {
53 		enum a5xx_color_fmt format = 0;
54 		enum a3xx_color_swap swap = WZYX;
55 		bool srgb = false, sint = false, uint = false;
56 		struct fd_resource *rsc = NULL;
57 		struct fdl_slice *slice = NULL;
58 		uint32_t stride = 0;
59 		uint32_t size = 0;
60 		uint32_t base = 0;
61 		uint32_t offset = 0;
62 
63 		if (gmem) {
64 			tile_mode = TILE5_2;
65 		} else {
66 			tile_mode = TILE5_LINEAR;
67 		}
68 
69 		if ((i < nr_bufs) && bufs[i]) {
70 			struct pipe_surface *psurf = bufs[i];
71 			enum pipe_format pformat = psurf->format;
72 
73 			rsc = fd_resource(psurf->texture);
74 
75 			slice = fd_resource_slice(rsc, psurf->u.tex.level);
76 			format = fd5_pipe2color(pformat);
77 			swap = fd5_pipe2swap(pformat);
78 			srgb = util_format_is_srgb(pformat);
79 			sint = util_format_is_pure_sint(pformat);
80 			uint = util_format_is_pure_uint(pformat);
81 
82 			debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
83 
84 			offset = fd_resource_offset(rsc, psurf->u.tex.level,
85 					psurf->u.tex.first_layer);
86 
87 			if (gmem) {
88 				stride = gmem->bin_w * gmem->cbuf_cpp[i];
89 				size = stride * gmem->bin_h;
90 				base = gmem->cbuf_base[i];
91 			} else {
92 				stride = fd_resource_pitch(rsc, psurf->u.tex.level);
93 				size = slice->size0;
94 
95 				tile_mode = fd_resource_tile_mode(psurf->texture, psurf->u.tex.level);
96 			}
97 		}
98 
99 		OUT_PKT4(ring, REG_A5XX_RB_MRT_BUF_INFO(i), 5);
100 		OUT_RING(ring, A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) |
101 				A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(tile_mode) |
102 				A5XX_RB_MRT_BUF_INFO_COLOR_SWAP(swap) |
103 				COND(gmem, 0x800) | /* XXX 0x1000 for RECTLIST clear, 0x0 for BLIT.. */
104 				COND(srgb, A5XX_RB_MRT_BUF_INFO_COLOR_SRGB));
105 		OUT_RING(ring, A5XX_RB_MRT_PITCH(stride));
106 		OUT_RING(ring, A5XX_RB_MRT_ARRAY_PITCH(size));
107 		if (gmem || (i >= nr_bufs) || !bufs[i]) {
108 			OUT_RING(ring, base);           /* RB_MRT[i].BASE_LO */
109 			OUT_RING(ring, 0x00000000);     /* RB_MRT[i].BASE_HI */
110 		} else {
111 			debug_assert((offset + size) <= fd_bo_size(rsc->bo));
112 			OUT_RELOC(ring, rsc->bo, offset, 0, 0);  /* BASE_LO/HI */
113 		}
114 
115 		OUT_PKT4(ring, REG_A5XX_SP_FS_MRT_REG(i), 1);
116 		OUT_RING(ring, A5XX_SP_FS_MRT_REG_COLOR_FORMAT(format) |
117 				COND(sint, A5XX_SP_FS_MRT_REG_COLOR_SINT) |
118 				COND(uint, A5XX_SP_FS_MRT_REG_COLOR_UINT) |
119 				COND(srgb, A5XX_SP_FS_MRT_REG_COLOR_SRGB));
120 
121 		/* when we support UBWC, these would be the system memory
122 		 * addr/pitch/etc:
123 		 */
124 		OUT_PKT4(ring, REG_A5XX_RB_MRT_FLAG_BUFFER(i), 4);
125 		OUT_RING(ring, 0x00000000);    /* RB_MRT_FLAG_BUFFER[i].ADDR_LO */
126 		OUT_RING(ring, 0x00000000);    /* RB_MRT_FLAG_BUFFER[i].ADDR_HI */
127 		OUT_RING(ring, A5XX_RB_MRT_FLAG_BUFFER_PITCH(0));
128 		OUT_RING(ring, A5XX_RB_MRT_FLAG_BUFFER_ARRAY_PITCH(0));
129 	}
130 }
131 
132 static void
emit_zs(struct fd_ringbuffer * ring,struct pipe_surface * zsbuf,const struct fd_gmem_stateobj * gmem)133 emit_zs(struct fd_ringbuffer *ring, struct pipe_surface *zsbuf,
134 		const struct fd_gmem_stateobj *gmem)
135 {
136 	if (zsbuf) {
137 		struct fd_resource *rsc = fd_resource(zsbuf->texture);
138 		enum a5xx_depth_format fmt = fd5_pipe2depth(zsbuf->format);
139 		uint32_t cpp = rsc->layout.cpp;
140 		uint32_t stride = 0;
141 		uint32_t size = 0;
142 
143 		if (gmem) {
144 			stride = cpp * gmem->bin_w;
145 			size = stride * gmem->bin_h;
146 		} else {
147 			stride = fd_resource_pitch(rsc, 0);
148 			size = fd_resource_slice(rsc, 0)->size0;
149 		}
150 
151 		OUT_PKT4(ring, REG_A5XX_RB_DEPTH_BUFFER_INFO, 5);
152 		OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(fmt));
153 		if (gmem) {
154 			OUT_RING(ring, gmem->zsbuf_base[0]); /* RB_DEPTH_BUFFER_BASE_LO */
155 			OUT_RING(ring, 0x00000000);          /* RB_DEPTH_BUFFER_BASE_HI */
156 		} else {
157 			OUT_RELOC(ring, rsc->bo, 0, 0, 0);  /* RB_DEPTH_BUFFER_BASE_LO/HI */
158 		}
159 		OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_PITCH(stride));
160 		OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_ARRAY_PITCH(size));
161 
162 		OUT_PKT4(ring, REG_A5XX_GRAS_SU_DEPTH_BUFFER_INFO, 1);
163 		OUT_RING(ring, A5XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT(fmt));
164 
165 		OUT_PKT4(ring, REG_A5XX_RB_DEPTH_FLAG_BUFFER_BASE_LO, 3);
166 		OUT_RING(ring, 0x00000000);    /* RB_DEPTH_FLAG_BUFFER_BASE_LO */
167 		OUT_RING(ring, 0x00000000);    /* RB_DEPTH_FLAG_BUFFER_BASE_HI */
168 		OUT_RING(ring, 0x00000000);    /* RB_DEPTH_FLAG_BUFFER_PITCH */
169 
170 		if (rsc->lrz) {
171 			OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_BUFFER_BASE_LO, 3);
172 			OUT_RELOC(ring, rsc->lrz, 0x1000, 0, 0);
173 			OUT_RING(ring, A5XX_GRAS_LRZ_BUFFER_PITCH(rsc->lrz_pitch));
174 
175 			OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO, 2);
176 			OUT_RELOC(ring, rsc->lrz, 0, 0, 0);
177 		} else {
178 			OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_BUFFER_BASE_LO, 3);
179 			OUT_RING(ring, 0x00000000);
180 			OUT_RING(ring, 0x00000000);
181 			OUT_RING(ring, 0x00000000);     /* GRAS_LRZ_BUFFER_PITCH */
182 
183 			OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO, 2);
184 			OUT_RING(ring, 0x00000000);
185 			OUT_RING(ring, 0x00000000);
186 		}
187 
188 		if (rsc->stencil) {
189 			if (gmem) {
190 				stride = 1 * gmem->bin_w;
191 				size = stride * gmem->bin_h;
192 			} else {
193 				stride = fd_resource_pitch(rsc->stencil, 0);
194 				size = fd_resource_slice(rsc->stencil, 0)->size0;
195 			}
196 
197 			OUT_PKT4(ring, REG_A5XX_RB_STENCIL_INFO, 5);
198 			OUT_RING(ring, A5XX_RB_STENCIL_INFO_SEPARATE_STENCIL);
199 			if (gmem) {
200 				OUT_RING(ring, gmem->zsbuf_base[1]);  /* RB_STENCIL_BASE_LO */
201 				OUT_RING(ring, 0x00000000);           /* RB_STENCIL_BASE_HI */
202 			} else {
203 				OUT_RELOC(ring, rsc->stencil->bo, 0, 0, 0);  /* RB_STENCIL_BASE_LO/HI */
204 			}
205 			OUT_RING(ring, A5XX_RB_STENCIL_PITCH(stride));
206 			OUT_RING(ring, A5XX_RB_STENCIL_ARRAY_PITCH(size));
207 		} else {
208 			OUT_PKT4(ring, REG_A5XX_RB_STENCIL_INFO, 1);
209 			OUT_RING(ring, 0x00000000);     /* RB_STENCIL_INFO */
210 		}
211 	} else {
212 		OUT_PKT4(ring, REG_A5XX_RB_DEPTH_BUFFER_INFO, 5);
213 		OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(DEPTH5_NONE));
214 		OUT_RING(ring, 0x00000000);    /* RB_DEPTH_BUFFER_BASE_LO */
215 		OUT_RING(ring, 0x00000000);    /* RB_DEPTH_BUFFER_BASE_HI */
216 		OUT_RING(ring, 0x00000000);    /* RB_DEPTH_BUFFER_PITCH */
217 		OUT_RING(ring, 0x00000000);    /* RB_DEPTH_BUFFER_ARRAY_PITCH */
218 
219 		OUT_PKT4(ring, REG_A5XX_GRAS_SU_DEPTH_BUFFER_INFO, 1);
220 		OUT_RING(ring, A5XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT(DEPTH5_NONE));
221 
222 		OUT_PKT4(ring, REG_A5XX_RB_DEPTH_FLAG_BUFFER_BASE_LO, 3);
223 		OUT_RING(ring, 0x00000000);    /* RB_DEPTH_FLAG_BUFFER_BASE_LO */
224 		OUT_RING(ring, 0x00000000);    /* RB_DEPTH_FLAG_BUFFER_BASE_HI */
225 		OUT_RING(ring, 0x00000000);    /* RB_DEPTH_FLAG_BUFFER_PITCH */
226 
227 		OUT_PKT4(ring, REG_A5XX_RB_STENCIL_INFO, 1);
228 		OUT_RING(ring, 0x00000000);     /* RB_STENCIL_INFO */
229 	}
230 }
231 
232 static bool
use_hw_binning(struct fd_batch * batch)233 use_hw_binning(struct fd_batch *batch)
234 {
235 	const struct fd_gmem_stateobj *gmem = batch->gmem_state;
236 
237 	if ((gmem->maxpw * gmem->maxph) > 32)
238 		return false;
239 
240 	if ((gmem->maxpw > 15) || (gmem->maxph > 15))
241 		return false;
242 
243 	return fd_binning_enabled && ((gmem->nbins_x * gmem->nbins_y) > 2) &&
244 			(batch->num_draws > 0);
245 }
246 
247 static void
patch_draws(struct fd_batch * batch,enum pc_di_vis_cull_mode vismode)248 patch_draws(struct fd_batch *batch, enum pc_di_vis_cull_mode vismode)
249 {
250 	unsigned i;
251 	for (i = 0; i < fd_patch_num_elements(&batch->draw_patches); i++) {
252 		struct fd_cs_patch *patch = fd_patch_element(&batch->draw_patches, i);
253 		*patch->cs = patch->val | DRAW4(0, 0, 0, vismode);
254 	}
255 	util_dynarray_clear(&batch->draw_patches);
256 }
257 
258 static void
update_vsc_pipe(struct fd_batch * batch)259 update_vsc_pipe(struct fd_batch *batch)
260 {
261 	struct fd_context *ctx = batch->ctx;
262 	struct fd5_context *fd5_ctx = fd5_context(ctx);
263 	const struct fd_gmem_stateobj *gmem = batch->gmem_state;
264 	struct fd_ringbuffer *ring = batch->gmem;
265 	int i;
266 
267 	OUT_PKT4(ring, REG_A5XX_VSC_BIN_SIZE, 3);
268 	OUT_RING(ring, A5XX_VSC_BIN_SIZE_WIDTH(gmem->bin_w) |
269 			A5XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h));
270 	OUT_RELOC(ring, fd5_ctx->vsc_size_mem, 0, 0, 0); /* VSC_SIZE_ADDRESS_LO/HI */
271 
272 	OUT_PKT4(ring, REG_A5XX_UNKNOWN_0BC5, 2);
273 	OUT_RING(ring, 0x00000000);   /* UNKNOWN_0BC5 */
274 	OUT_RING(ring, 0x00000000);   /* UNKNOWN_0BC6 */
275 
276 	OUT_PKT4(ring, REG_A5XX_VSC_PIPE_CONFIG_REG(0), 16);
277 	for (i = 0; i < 16; i++) {
278 		const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[i];
279 		OUT_RING(ring, A5XX_VSC_PIPE_CONFIG_REG_X(pipe->x) |
280 				A5XX_VSC_PIPE_CONFIG_REG_Y(pipe->y) |
281 				A5XX_VSC_PIPE_CONFIG_REG_W(pipe->w) |
282 				A5XX_VSC_PIPE_CONFIG_REG_H(pipe->h));
283 	}
284 
285 	OUT_PKT4(ring, REG_A5XX_VSC_PIPE_DATA_ADDRESS_LO(0), 32);
286 	for (i = 0; i < 16; i++) {
287 		if (!ctx->vsc_pipe_bo[i]) {
288 			ctx->vsc_pipe_bo[i] = fd_bo_new(ctx->dev, 0x20000,
289 					DRM_FREEDRENO_GEM_TYPE_KMEM, "vsc_pipe[%u]", i);
290 		}
291 		OUT_RELOC(ring, ctx->vsc_pipe_bo[i], 0, 0, 0);     /* VSC_PIPE_DATA_ADDRESS[i].LO/HI */
292 	}
293 
294 	OUT_PKT4(ring, REG_A5XX_VSC_PIPE_DATA_LENGTH_REG(0), 16);
295 	for (i = 0; i < 16; i++) {
296 		OUT_RING(ring, fd_bo_size(ctx->vsc_pipe_bo[i]) - 32); /* VSC_PIPE_DATA_LENGTH[i] */
297 	}
298 }
299 
300 static void
emit_binning_pass(struct fd_batch * batch)301 emit_binning_pass(struct fd_batch *batch)
302 {
303 	struct fd_context *ctx = batch->ctx;
304 	struct fd_ringbuffer *ring = batch->gmem;
305 	const struct fd_gmem_stateobj *gmem = batch->gmem_state;
306 
307 	uint32_t x1 = gmem->minx;
308 	uint32_t y1 = gmem->miny;
309 	uint32_t x2 = gmem->minx + gmem->width - 1;
310 	uint32_t y2 = gmem->miny + gmem->height - 1;
311 
312 	fd5_set_render_mode(batch->ctx, ring, BINNING);
313 
314 	OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
315 	OUT_RING(ring, A5XX_RB_CNTL_WIDTH(gmem->bin_w) |
316 			A5XX_RB_CNTL_HEIGHT(gmem->bin_h));
317 
318 	OUT_PKT4(ring, REG_A5XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
319 	OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X(x1) |
320 			A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(y1));
321 	OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X(x2) |
322 			A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(y2));
323 
324 	OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_1, 2);
325 	OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_1_X(x1) |
326 			A5XX_RB_RESOLVE_CNTL_1_Y(y1));
327 	OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_2_X(x2) |
328 			A5XX_RB_RESOLVE_CNTL_2_Y(y2));
329 
330 	update_vsc_pipe(batch);
331 
332 	OUT_PKT4(ring, REG_A5XX_VPC_MODE_CNTL, 1);
333 	OUT_RING(ring, A5XX_VPC_MODE_CNTL_BINNING_PASS);
334 
335 	OUT_PKT7(ring, CP_EVENT_WRITE, 1);
336 	OUT_RING(ring, UNK_2C);
337 
338 	OUT_PKT4(ring, REG_A5XX_RB_WINDOW_OFFSET, 1);
339 	OUT_RING(ring, A5XX_RB_WINDOW_OFFSET_X(0) |
340 			A5XX_RB_WINDOW_OFFSET_Y(0));
341 
342 	/* emit IB to binning drawcmds: */
343 	fd5_emit_ib(ring, batch->binning);
344 
345 	fd_reset_wfi(batch);
346 
347 	OUT_PKT7(ring, CP_EVENT_WRITE, 1);
348 	OUT_RING(ring, UNK_2D);
349 
350 	OUT_PKT7(ring, CP_EVENT_WRITE, 4);
351 	OUT_RING(ring, CACHE_FLUSH_TS);
352 	OUT_RELOC(ring, fd5_context(ctx)->blit_mem, 0, 0, 0);  /* ADDR_LO/HI */
353 	OUT_RING(ring, 0x00000000);
354 
355 	// TODO CP_COND_WRITE's for all the vsc buffers (check for overflow??)
356 
357 	fd_wfi(batch, ring);
358 
359 	OUT_PKT4(ring, REG_A5XX_VPC_MODE_CNTL, 1);
360 	OUT_RING(ring, 0x0);
361 }
362 
363 /* before first tile */
364 static void
fd5_emit_tile_init(struct fd_batch * batch)365 fd5_emit_tile_init(struct fd_batch *batch)
366 {
367 	struct fd_ringbuffer *ring = batch->gmem;
368 	struct pipe_framebuffer_state *pfb = &batch->framebuffer;
369 
370 	fd5_emit_restore(batch, ring);
371 
372 	if (batch->prologue)
373 		fd5_emit_ib(ring, batch->prologue);
374 
375 	fd5_emit_lrz_flush(ring);
376 
377 	OUT_PKT4(ring, REG_A5XX_GRAS_CL_CNTL, 1);
378 	OUT_RING(ring, 0x00000080);   /* GRAS_CL_CNTL */
379 
380 	OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
381 	OUT_RING(ring, 0x0);
382 
383 	OUT_PKT4(ring, REG_A5XX_PC_POWER_CNTL, 1);
384 	OUT_RING(ring, 0x00000003);   /* PC_POWER_CNTL */
385 
386 	OUT_PKT4(ring, REG_A5XX_VFD_POWER_CNTL, 1);
387 	OUT_RING(ring, 0x00000003);   /* VFD_POWER_CNTL */
388 
389 	/* 0x10000000 for BYPASS.. 0x7c13c080 for GMEM: */
390 	fd_wfi(batch, ring);
391 	OUT_PKT4(ring, REG_A5XX_RB_CCU_CNTL, 1);
392 	OUT_RING(ring, 0x7c13c080);   /* RB_CCU_CNTL */
393 
394 	emit_zs(ring, pfb->zsbuf, batch->gmem_state);
395 	emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, batch->gmem_state);
396 
397 	if (use_hw_binning(batch)) {
398 		emit_binning_pass(batch);
399 		fd5_emit_lrz_flush(ring);
400 		patch_draws(batch, USE_VISIBILITY);
401 	} else {
402 		patch_draws(batch, IGNORE_VISIBILITY);
403 	}
404 
405 	fd5_set_render_mode(batch->ctx, ring, GMEM);
406 }
407 
408 /* before mem2gmem */
409 static void
fd5_emit_tile_prep(struct fd_batch * batch,const struct fd_tile * tile)410 fd5_emit_tile_prep(struct fd_batch *batch, const struct fd_tile *tile)
411 {
412 	struct fd_context *ctx = batch->ctx;
413 	const struct fd_gmem_stateobj *gmem = batch->gmem_state;
414 	struct fd5_context *fd5_ctx = fd5_context(ctx);
415 	struct fd_ringbuffer *ring = batch->gmem;
416 
417 	uint32_t x1 = tile->xoff;
418 	uint32_t y1 = tile->yoff;
419 	uint32_t x2 = tile->xoff + tile->bin_w - 1;
420 	uint32_t y2 = tile->yoff + tile->bin_h - 1;
421 
422 	OUT_PKT4(ring, REG_A5XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
423 	OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X(x1) |
424 			A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(y1));
425 	OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X(x2) |
426 			A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(y2));
427 
428 	OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_1, 2);
429 	OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_1_X(x1) |
430 			A5XX_RB_RESOLVE_CNTL_1_Y(y1));
431 	OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_2_X(x2) |
432 			A5XX_RB_RESOLVE_CNTL_2_Y(y2));
433 
434 	if (use_hw_binning(batch)) {
435 		const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[tile->p];
436 		struct fd_bo *pipe_bo = ctx->vsc_pipe_bo[tile->p];
437 
438 		OUT_PKT7(ring, CP_WAIT_FOR_ME, 0);
439 
440 		OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
441 		OUT_RING(ring, 0x0);
442 
443 		OUT_PKT7(ring, CP_SET_BIN_DATA5, 5);
444 		OUT_RING(ring, CP_SET_BIN_DATA5_0_VSC_SIZE(pipe->w * pipe->h) |
445 				CP_SET_BIN_DATA5_0_VSC_N(tile->n));
446 		OUT_RELOC(ring, pipe_bo, 0, 0, 0);       /* VSC_PIPE[p].DATA_ADDRESS */
447 		OUT_RELOC(ring, fd5_ctx->vsc_size_mem,   /* VSC_SIZE_ADDRESS + (p * 4) */
448 				(tile->p * 4), 0, 0);
449 	} else {
450 		OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
451 		OUT_RING(ring, 0x1);
452 	}
453 
454 	OUT_PKT4(ring, REG_A5XX_RB_WINDOW_OFFSET, 1);
455 	OUT_RING(ring, A5XX_RB_WINDOW_OFFSET_X(x1) |
456 			A5XX_RB_WINDOW_OFFSET_Y(y1));
457 }
458 
459 
460 /*
461  * transfer from system memory to gmem
462  */
463 
464 static void
emit_mem2gmem_surf(struct fd_batch * batch,uint32_t base,struct pipe_surface * psurf,enum a5xx_blit_buf buf)465 emit_mem2gmem_surf(struct fd_batch *batch, uint32_t base,
466 		struct pipe_surface *psurf, enum a5xx_blit_buf buf)
467 {
468 	struct fd_ringbuffer *ring = batch->gmem;
469 	const struct fd_gmem_stateobj *gmem = batch->gmem_state;
470 	struct fd_resource *rsc = fd_resource(psurf->texture);
471 	uint32_t stride, size;
472 
473 	debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
474 
475 	if (buf == BLIT_S)
476 		rsc = rsc->stencil;
477 
478 	if ((buf == BLIT_ZS) || (buf == BLIT_S)) {
479 		// XXX hack import via BLIT_MRT0 instead of BLIT_ZS, since I don't
480 		// know otherwise how to go from linear in sysmem to tiled in gmem.
481 		// possibly we want to flip this around gmem2mem and keep depth
482 		// tiled in sysmem (and fixup sampler state to assume tiled).. this
483 		// might be required for doing depth/stencil in bypass mode?
484 		struct fdl_slice *slice = fd_resource_slice(rsc, 0);
485 		enum a5xx_color_fmt format =
486 			fd5_pipe2color(fd_gmem_restore_format(rsc->base.format));
487 
488 		OUT_PKT4(ring, REG_A5XX_RB_MRT_BUF_INFO(0), 5);
489 		OUT_RING(ring, A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) |
490 				A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(rsc->layout.tile_mode) |
491 				A5XX_RB_MRT_BUF_INFO_COLOR_SWAP(WZYX));
492 		OUT_RING(ring, A5XX_RB_MRT_PITCH(fd_resource_pitch(rsc, 0)));
493 		OUT_RING(ring, A5XX_RB_MRT_ARRAY_PITCH(slice->size0));
494 		OUT_RELOC(ring, rsc->bo, 0, 0, 0);  /* BASE_LO/HI */
495 
496 		buf = BLIT_MRT0;
497 	}
498 
499 	stride = gmem->bin_w << fdl_cpp_shift(&rsc->layout);
500 	size = stride * gmem->bin_h;
501 
502 	OUT_PKT4(ring, REG_A5XX_RB_BLIT_FLAG_DST_LO, 4);
503 	OUT_RING(ring, 0x00000000);   /* RB_BLIT_FLAG_DST_LO */
504 	OUT_RING(ring, 0x00000000);   /* RB_BLIT_FLAG_DST_HI */
505 	OUT_RING(ring, 0x00000000);   /* RB_BLIT_FLAG_DST_PITCH */
506 	OUT_RING(ring, 0x00000000);   /* RB_BLIT_FLAG_DST_ARRAY_PITCH */
507 
508 	OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_3, 5);
509 	OUT_RING(ring, 0x00000000);   /* RB_RESOLVE_CNTL_3 */
510 	OUT_RING(ring, base);         /* RB_BLIT_DST_LO */
511 	OUT_RING(ring, 0x00000000);   /* RB_BLIT_DST_HI */
512 	OUT_RING(ring, A5XX_RB_BLIT_DST_PITCH(stride));
513 	OUT_RING(ring, A5XX_RB_BLIT_DST_ARRAY_PITCH(size));
514 
515 	OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1);
516 	OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(buf));
517 
518 	fd5_emit_blit(batch->ctx, ring);
519 }
520 
521 static void
fd5_emit_tile_mem2gmem(struct fd_batch * batch,const struct fd_tile * tile)522 fd5_emit_tile_mem2gmem(struct fd_batch *batch, const struct fd_tile *tile)
523 {
524 	struct fd_ringbuffer *ring = batch->gmem;
525 	const struct fd_gmem_stateobj *gmem = batch->gmem_state;
526 	struct pipe_framebuffer_state *pfb = &batch->framebuffer;
527 
528 	/*
529 	 * setup mrt and zs with system memory base addresses:
530 	 */
531 
532 	emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL);
533 //	emit_zs(ring, pfb->zsbuf, NULL);
534 
535 	OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
536 	OUT_RING(ring, A5XX_RB_CNTL_WIDTH(gmem->bin_w) |
537 			A5XX_RB_CNTL_HEIGHT(gmem->bin_h) |
538 			A5XX_RB_CNTL_BYPASS);
539 
540 	if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_COLOR)) {
541 		unsigned i;
542 		for (i = 0; i < pfb->nr_cbufs; i++) {
543 			if (!pfb->cbufs[i])
544 				continue;
545 			if (!(batch->restore & (PIPE_CLEAR_COLOR0 << i)))
546 				continue;
547 			emit_mem2gmem_surf(batch, gmem->cbuf_base[i],
548 					pfb->cbufs[i], BLIT_MRT0 + i);
549 		}
550 	}
551 
552 	if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
553 		struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
554 
555 		if (!rsc->stencil || fd_gmem_needs_restore(batch, tile, FD_BUFFER_DEPTH))
556 			emit_mem2gmem_surf(batch, gmem->zsbuf_base[0], pfb->zsbuf, BLIT_ZS);
557 		if (rsc->stencil && fd_gmem_needs_restore(batch, tile, FD_BUFFER_STENCIL))
558 			emit_mem2gmem_surf(batch, gmem->zsbuf_base[1], pfb->zsbuf, BLIT_S);
559 	}
560 }
561 
562 
563 /* before IB to rendering cmds: */
564 static void
fd5_emit_tile_renderprep(struct fd_batch * batch,const struct fd_tile * tile)565 fd5_emit_tile_renderprep(struct fd_batch *batch, const struct fd_tile *tile)
566 {
567 	struct fd_ringbuffer *ring = batch->gmem;
568 	const struct fd_gmem_stateobj *gmem = batch->gmem_state;
569 	struct pipe_framebuffer_state *pfb = &batch->framebuffer;
570 
571 	OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
572 	OUT_RING(ring, A5XX_RB_CNTL_WIDTH(gmem->bin_w) |
573 			A5XX_RB_CNTL_HEIGHT(gmem->bin_h));
574 
575 	emit_zs(ring, pfb->zsbuf, gmem);
576 	emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, gmem);
577 
578 	enum a3xx_msaa_samples samples = fd_msaa_samples(pfb->samples);
579 
580 	OUT_PKT4(ring, REG_A5XX_TPL1_TP_RAS_MSAA_CNTL, 2);
581 	OUT_RING(ring, A5XX_TPL1_TP_RAS_MSAA_CNTL_SAMPLES(samples));
582 	OUT_RING(ring, A5XX_TPL1_TP_DEST_MSAA_CNTL_SAMPLES(samples) |
583 			COND(samples == MSAA_ONE, A5XX_TPL1_TP_DEST_MSAA_CNTL_MSAA_DISABLE));
584 
585 	OUT_PKT4(ring, REG_A5XX_RB_RAS_MSAA_CNTL, 2);
586 	OUT_RING(ring, A5XX_RB_RAS_MSAA_CNTL_SAMPLES(samples));
587 	OUT_RING(ring, A5XX_RB_DEST_MSAA_CNTL_SAMPLES(samples) |
588 			COND(samples == MSAA_ONE, A5XX_RB_DEST_MSAA_CNTL_MSAA_DISABLE));
589 
590 
591 	OUT_PKT4(ring, REG_A5XX_GRAS_SC_RAS_MSAA_CNTL, 2);
592 	OUT_RING(ring, A5XX_GRAS_SC_RAS_MSAA_CNTL_SAMPLES(samples));
593 	OUT_RING(ring, A5XX_GRAS_SC_DEST_MSAA_CNTL_SAMPLES(samples) |
594 			COND(samples == MSAA_ONE, A5XX_GRAS_SC_DEST_MSAA_CNTL_MSAA_DISABLE));
595 }
596 
597 
598 /*
599  * transfer from gmem to system memory (ie. normal RAM)
600  */
601 
602 static void
emit_gmem2mem_surf(struct fd_batch * batch,uint32_t base,struct pipe_surface * psurf,enum a5xx_blit_buf buf)603 emit_gmem2mem_surf(struct fd_batch *batch, uint32_t base,
604 		struct pipe_surface *psurf, enum a5xx_blit_buf buf)
605 {
606 	struct fd_ringbuffer *ring = batch->gmem;
607 	struct fd_resource *rsc = fd_resource(psurf->texture);
608 	struct fdl_slice *slice;
609 	bool tiled;
610 	uint32_t offset, pitch;
611 
612 	if (!rsc->valid)
613 		return;
614 
615 	if (buf == BLIT_S)
616 		rsc = rsc->stencil;
617 
618 	slice = fd_resource_slice(rsc, psurf->u.tex.level);
619 	offset = fd_resource_offset(rsc, psurf->u.tex.level,
620 			psurf->u.tex.first_layer);
621 	pitch = fd_resource_pitch(rsc, psurf->u.tex.level);
622 
623 	debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
624 
625 	OUT_PKT4(ring, REG_A5XX_RB_BLIT_FLAG_DST_LO, 4);
626 	OUT_RING(ring, 0x00000000);   /* RB_BLIT_FLAG_DST_LO */
627 	OUT_RING(ring, 0x00000000);   /* RB_BLIT_FLAG_DST_HI */
628 	OUT_RING(ring, 0x00000000);   /* RB_BLIT_FLAG_DST_PITCH */
629 	OUT_RING(ring, 0x00000000);   /* RB_BLIT_FLAG_DST_ARRAY_PITCH */
630 
631 	tiled = fd_resource_tile_mode(psurf->texture, psurf->u.tex.level);
632 
633 	OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_3, 5);
634 	OUT_RING(ring, 0x00000004 |   /* XXX RB_RESOLVE_CNTL_3 */
635 			COND(tiled, A5XX_RB_RESOLVE_CNTL_3_TILED));
636 	OUT_RELOC(ring, rsc->bo, offset, 0, 0);     /* RB_BLIT_DST_LO/HI */
637 	OUT_RING(ring, A5XX_RB_BLIT_DST_PITCH(pitch));
638 	OUT_RING(ring, A5XX_RB_BLIT_DST_ARRAY_PITCH(slice->size0));
639 
640 	OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1);
641 	OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(buf));
642 
643 //	bool msaa_resolve = pfb->samples > 1;
644 	bool msaa_resolve = false;
645 	OUT_PKT4(ring, REG_A5XX_RB_CLEAR_CNTL, 1);
646 	OUT_RING(ring, COND(msaa_resolve, A5XX_RB_CLEAR_CNTL_MSAA_RESOLVE));
647 
648 	fd5_emit_blit(batch->ctx, ring);
649 }
650 
651 static void
fd5_emit_tile_gmem2mem(struct fd_batch * batch,const struct fd_tile * tile)652 fd5_emit_tile_gmem2mem(struct fd_batch *batch, const struct fd_tile *tile)
653 {
654 	const struct fd_gmem_stateobj *gmem = batch->gmem_state;
655 	struct pipe_framebuffer_state *pfb = &batch->framebuffer;
656 
657 	if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
658 		struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
659 
660 		if (!rsc->stencil || (batch->resolve & FD_BUFFER_DEPTH))
661 			emit_gmem2mem_surf(batch, gmem->zsbuf_base[0], pfb->zsbuf, BLIT_ZS);
662 		if (rsc->stencil && (batch->resolve & FD_BUFFER_STENCIL))
663 			emit_gmem2mem_surf(batch, gmem->zsbuf_base[1], pfb->zsbuf, BLIT_S);
664 	}
665 
666 	if (batch->resolve & FD_BUFFER_COLOR) {
667 		unsigned i;
668 		for (i = 0; i < pfb->nr_cbufs; i++) {
669 			if (!pfb->cbufs[i])
670 				continue;
671 			if (!(batch->resolve & (PIPE_CLEAR_COLOR0 << i)))
672 				continue;
673 			emit_gmem2mem_surf(batch, gmem->cbuf_base[i],
674 					pfb->cbufs[i], BLIT_MRT0 + i);
675 		}
676 	}
677 }
678 
679 static void
fd5_emit_tile_fini(struct fd_batch * batch)680 fd5_emit_tile_fini(struct fd_batch *batch)
681 {
682 	struct fd_ringbuffer *ring = batch->gmem;
683 
684 	OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
685 	OUT_RING(ring, 0x0);
686 
687 	fd5_emit_lrz_flush(ring);
688 
689 	fd5_cache_flush(batch, ring);
690 	fd5_set_render_mode(batch->ctx, ring, BYPASS);
691 }
692 
693 static void
fd5_emit_sysmem_prep(struct fd_batch * batch)694 fd5_emit_sysmem_prep(struct fd_batch *batch)
695 {
696 	struct fd_ringbuffer *ring = batch->gmem;
697 
698 	fd5_emit_restore(batch, ring);
699 
700 	fd5_emit_lrz_flush(ring);
701 
702 	if (batch->prologue)
703 		fd5_emit_ib(ring, batch->prologue);
704 
705 	OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
706 	OUT_RING(ring, 0x0);
707 
708 	OUT_PKT7(ring, CP_EVENT_WRITE, 1);
709 	OUT_RING(ring, PC_CCU_INVALIDATE_COLOR);
710 
711 	OUT_PKT4(ring, REG_A5XX_PC_POWER_CNTL, 1);
712 	OUT_RING(ring, 0x00000003);   /* PC_POWER_CNTL */
713 
714 	OUT_PKT4(ring, REG_A5XX_VFD_POWER_CNTL, 1);
715 	OUT_RING(ring, 0x00000003);   /* VFD_POWER_CNTL */
716 
717 	/* 0x10000000 for BYPASS.. 0x7c13c080 for GMEM: */
718 	fd_wfi(batch, ring);
719 	OUT_PKT4(ring, REG_A5XX_RB_CCU_CNTL, 1);
720 	OUT_RING(ring, 0x10000000);   /* RB_CCU_CNTL */
721 
722 	OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
723 	OUT_RING(ring, A5XX_RB_CNTL_WIDTH(0) |
724 			A5XX_RB_CNTL_HEIGHT(0) |
725 			A5XX_RB_CNTL_BYPASS);
726 
727 	/* remaining setup below here does not apply to blit/compute: */
728 	if (batch->nondraw)
729 		return;
730 
731 	struct pipe_framebuffer_state *pfb = &batch->framebuffer;
732 
733 	OUT_PKT4(ring, REG_A5XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
734 	OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) |
735 			A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(0));
736 	OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X(pfb->width - 1) |
737 			A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(pfb->height - 1));
738 
739 	OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_1, 2);
740 	OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_1_X(0) |
741 			A5XX_RB_RESOLVE_CNTL_1_Y(0));
742 	OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_2_X(pfb->width - 1) |
743 			A5XX_RB_RESOLVE_CNTL_2_Y(pfb->height - 1));
744 
745 	OUT_PKT4(ring, REG_A5XX_RB_WINDOW_OFFSET, 1);
746 	OUT_RING(ring, A5XX_RB_WINDOW_OFFSET_X(0) |
747 			A5XX_RB_WINDOW_OFFSET_Y(0));
748 
749 	OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
750 	OUT_RING(ring, 0x1);
751 
752 	patch_draws(batch, IGNORE_VISIBILITY);
753 
754 	emit_zs(ring, pfb->zsbuf, NULL);
755 	emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL);
756 
757 	OUT_PKT4(ring, REG_A5XX_TPL1_TP_RAS_MSAA_CNTL, 2);
758 	OUT_RING(ring, A5XX_TPL1_TP_RAS_MSAA_CNTL_SAMPLES(MSAA_ONE));
759 	OUT_RING(ring, A5XX_TPL1_TP_DEST_MSAA_CNTL_SAMPLES(MSAA_ONE) |
760 			A5XX_TPL1_TP_DEST_MSAA_CNTL_MSAA_DISABLE);
761 
762 	OUT_PKT4(ring, REG_A5XX_RB_RAS_MSAA_CNTL, 2);
763 	OUT_RING(ring, A5XX_RB_RAS_MSAA_CNTL_SAMPLES(MSAA_ONE));
764 	OUT_RING(ring, A5XX_RB_DEST_MSAA_CNTL_SAMPLES(MSAA_ONE) |
765 			A5XX_RB_DEST_MSAA_CNTL_MSAA_DISABLE);
766 
767 	OUT_PKT4(ring, REG_A5XX_GRAS_SC_RAS_MSAA_CNTL, 2);
768 	OUT_RING(ring, A5XX_GRAS_SC_RAS_MSAA_CNTL_SAMPLES(MSAA_ONE));
769 	OUT_RING(ring, A5XX_GRAS_SC_DEST_MSAA_CNTL_SAMPLES(MSAA_ONE) |
770 			A5XX_GRAS_SC_DEST_MSAA_CNTL_MSAA_DISABLE);
771 }
772 
773 static void
fd5_emit_sysmem_fini(struct fd_batch * batch)774 fd5_emit_sysmem_fini(struct fd_batch *batch)
775 {
776 	struct fd5_context *fd5_ctx = fd5_context(batch->ctx);
777 	struct fd_ringbuffer *ring = batch->gmem;
778 
779 	OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
780 	OUT_RING(ring, 0x0);
781 
782 	fd5_emit_lrz_flush(ring);
783 
784 	OUT_PKT7(ring, CP_EVENT_WRITE, 4);
785 	OUT_RING(ring, PC_CCU_FLUSH_COLOR_TS);
786 	OUT_RELOC(ring, fd5_ctx->blit_mem, 0, 0, 0);  /* ADDR_LO/HI */
787 	OUT_RING(ring, 0x00000000);
788 }
789 
790 void
fd5_gmem_init(struct pipe_context * pctx)791 fd5_gmem_init(struct pipe_context *pctx)
792 {
793 	struct fd_context *ctx = fd_context(pctx);
794 
795 	ctx->emit_tile_init = fd5_emit_tile_init;
796 	ctx->emit_tile_prep = fd5_emit_tile_prep;
797 	ctx->emit_tile_mem2gmem = fd5_emit_tile_mem2gmem;
798 	ctx->emit_tile_renderprep = fd5_emit_tile_renderprep;
799 	ctx->emit_tile_gmem2mem = fd5_emit_tile_gmem2mem;
800 	ctx->emit_tile_fini = fd5_emit_tile_fini;
801 	ctx->emit_sysmem_prep = fd5_emit_sysmem_prep;
802 	ctx->emit_sysmem_fini = fd5_emit_sysmem_fini;
803 }
804