• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *    Rob Clark <robclark@freedesktop.org>
25  */
26 
27 #include "pipe/p_state.h"
28 #include "util/u_string.h"
29 #include "util/u_memory.h"
30 #include "util/u_inlines.h"
31 
32 #include "freedreno_draw.h"
33 #include "freedreno_state.h"
34 #include "freedreno_resource.h"
35 
36 #include "fd2_gmem.h"
37 #include "fd2_context.h"
38 #include "fd2_emit.h"
39 #include "fd2_program.h"
40 #include "fd2_util.h"
41 #include "fd2_zsa.h"
42 #include "fd2_draw.h"
43 #include "ir2/instr-a2xx.h"
44 
fmt2swap(enum pipe_format format)45 static uint32_t fmt2swap(enum pipe_format format)
46 {
47 	switch (format) {
48 	case PIPE_FORMAT_B8G8R8A8_UNORM:
49 	case PIPE_FORMAT_B8G8R8X8_UNORM:
50 	case PIPE_FORMAT_B5G6R5_UNORM:
51 	case PIPE_FORMAT_B5G5R5A1_UNORM:
52 	case PIPE_FORMAT_B5G5R5X1_UNORM:
53 	case PIPE_FORMAT_B4G4R4A4_UNORM:
54 	case PIPE_FORMAT_B4G4R4X4_UNORM:
55 	case PIPE_FORMAT_B2G3R3_UNORM:
56 		return 1;
57 	default:
58 		return 0;
59 	}
60 }
61 
62 static bool
use_hw_binning(struct fd_batch * batch)63 use_hw_binning(struct fd_batch *batch)
64 {
65 	const struct fd_gmem_stateobj *gmem = batch->gmem_state;
66 
67 	/* we hardcoded a limit of 8 "pipes", we can increase this limit
68 	 * at the cost of a slightly larger command stream
69 	 * however very few cases will need more than 8
70 	 * gmem->num_vsc_pipes == 0 means empty batch (TODO: does it still happen?)
71 	 */
72 	if (gmem->num_vsc_pipes > 8 || !gmem->num_vsc_pipes)
73 		return false;
74 
75 	/* only a20x hw binning is implement
76 	 * a22x is more like a3xx, but perhaps the a20x works? (TODO)
77 	 */
78 	if (!is_a20x(batch->ctx->screen))
79 		return false;
80 
81 	return fd_binning_enabled && ((gmem->nbins_x * gmem->nbins_y) > 2);
82 }
83 
84 /* transfer from gmem to system memory (ie. normal RAM) */
85 
86 static void
emit_gmem2mem_surf(struct fd_batch * batch,uint32_t base,struct pipe_surface * psurf)87 emit_gmem2mem_surf(struct fd_batch *batch, uint32_t base,
88 		struct pipe_surface *psurf)
89 {
90 	struct fd_ringbuffer *ring = batch->tile_fini;
91 	struct fd_resource *rsc = fd_resource(psurf->texture);
92 	uint32_t offset =
93 		fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer);
94 	enum pipe_format format = fd_gmem_restore_format(psurf->format);
95 	uint32_t pitch = fdl2_pitch_pixels(&rsc->layout, psurf->u.tex.level);
96 
97 	assert((pitch & 31) == 0);
98 	assert((offset & 0xfff) == 0);
99 
100 	if (!rsc->valid)
101 		return;
102 
103 	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
104 	OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO));
105 	OUT_RING(ring, A2XX_RB_COLOR_INFO_BASE(base) |
106 			A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format)));
107 
108 	OUT_PKT3(ring, CP_SET_CONSTANT, 5);
109 	OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL));
110 	OUT_RING(ring, 0x00000000);             /* RB_COPY_CONTROL */
111 	OUT_RELOC(ring, rsc->bo, offset, 0, 0);     /* RB_COPY_DEST_BASE */
112 	OUT_RING(ring, pitch >> 5); /* RB_COPY_DEST_PITCH */
113 	OUT_RING(ring,                          /* RB_COPY_DEST_INFO */
114 			A2XX_RB_COPY_DEST_INFO_FORMAT(fd2_pipe2color(format)) |
115 			COND(!rsc->layout.tile_mode, A2XX_RB_COPY_DEST_INFO_LINEAR) |
116 			A2XX_RB_COPY_DEST_INFO_WRITE_RED |
117 			A2XX_RB_COPY_DEST_INFO_WRITE_GREEN |
118 			A2XX_RB_COPY_DEST_INFO_WRITE_BLUE |
119 			A2XX_RB_COPY_DEST_INFO_WRITE_ALPHA);
120 
121 	if (!is_a20x(batch->ctx->screen)) {
122 		OUT_WFI (ring);
123 
124 		OUT_PKT3(ring, CP_SET_CONSTANT, 3);
125 		OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
126 		OUT_RING(ring, 3);                 /* VGT_MAX_VTX_INDX */
127 		OUT_RING(ring, 0);                 /* VGT_MIN_VTX_INDX */
128 	}
129 
130 	fd_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
131 			DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL);
132 }
133 
134 static void
prepare_tile_fini_ib(struct fd_batch * batch)135 prepare_tile_fini_ib(struct fd_batch *batch)
136 {
137 	struct fd_context *ctx = batch->ctx;
138 	struct fd2_context *fd2_ctx = fd2_context(ctx);
139 	const struct fd_gmem_stateobj *gmem = batch->gmem_state;
140 	struct pipe_framebuffer_state *pfb = &batch->framebuffer;
141 	struct fd_ringbuffer *ring;
142 
143 	batch->tile_fini = fd_submit_new_ringbuffer(batch->submit, 0x1000,
144 			FD_RINGBUFFER_STREAMING);
145 	ring = batch->tile_fini;
146 
147 	fd2_emit_vertex_bufs(ring, 0x9c, (struct fd2_vertex_buf[]) {
148 			{ .prsc = fd2_ctx->solid_vertexbuf, .size = 36 },
149 		}, 1);
150 
151 	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
152 	OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_OFFSET));
153 	OUT_RING(ring, 0x00000000);          /* PA_SC_WINDOW_OFFSET */
154 
155 	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
156 	OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET));
157 	OUT_RING(ring, 0);
158 
159 	if (!is_a20x(ctx->screen)) {
160 		OUT_PKT3(ring, CP_SET_CONSTANT, 2);
161 		OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
162 		OUT_RING(ring, 0x0000028f);
163 	}
164 
165 	fd2_program_emit(ctx, ring, &ctx->solid_prog);
166 
167 	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
168 	OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_MASK));
169 	OUT_RING(ring, 0x0000ffff);
170 
171 	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
172 	OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTHCONTROL));
173 	OUT_RING(ring, A2XX_RB_DEPTHCONTROL_EARLY_Z_ENABLE);
174 
175 	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
176 	OUT_RING(ring, CP_REG(REG_A2XX_PA_SU_SC_MODE_CNTL));
177 	OUT_RING(ring, A2XX_PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST |  /* PA_SU_SC_MODE_CNTL */
178 			A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
179 			A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE(PC_DRAW_TRIANGLES));
180 
181 	OUT_PKT3(ring, CP_SET_CONSTANT, 3);
182 	OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL));
183 	OUT_RING(ring, xy2d(0, 0));                       /* PA_SC_WINDOW_SCISSOR_TL */
184 	OUT_RING(ring, xy2d(pfb->width, pfb->height));    /* PA_SC_WINDOW_SCISSOR_BR */
185 
186 	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
187 	OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_CLIP_CNTL));
188 	OUT_RING(ring, 0x00000000);
189 
190 	OUT_PKT3(ring, CP_SET_CONSTANT, 5);
191 	OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_XSCALE));
192 	OUT_RING(ring, fui((float) gmem->bin_w / 2.0)); /* XSCALE */
193 	OUT_RING(ring, fui((float) gmem->bin_w / 2.0)); /* XOFFSET */
194 	OUT_RING(ring, fui((float) gmem->bin_h / 2.0)); /* YSCALE */
195 	OUT_RING(ring, fui((float) gmem->bin_h / 2.0)); /* YOFFSET */
196 
197 	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
198 	OUT_RING(ring, CP_REG(REG_A2XX_RB_MODECONTROL));
199 	OUT_RING(ring, A2XX_RB_MODECONTROL_EDRAM_MODE(EDRAM_COPY));
200 
201 	if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL))
202 		emit_gmem2mem_surf(batch, gmem->zsbuf_base[0], pfb->zsbuf);
203 
204 	if (batch->resolve & FD_BUFFER_COLOR)
205 		emit_gmem2mem_surf(batch, gmem->cbuf_base[0], pfb->cbufs[0]);
206 
207 	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
208 	OUT_RING(ring, CP_REG(REG_A2XX_RB_MODECONTROL));
209 	OUT_RING(ring, A2XX_RB_MODECONTROL_EDRAM_MODE(COLOR_DEPTH));
210 
211 	if (!is_a20x(ctx->screen)) {
212 		OUT_PKT3(ring, CP_SET_CONSTANT, 2);
213 		OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
214 		OUT_RING(ring, 0x0000003b);
215 	}
216 }
217 
218 static void
fd2_emit_tile_gmem2mem(struct fd_batch * batch,const struct fd_tile * tile)219 fd2_emit_tile_gmem2mem(struct fd_batch *batch, const struct fd_tile *tile)
220 {
221 	fd2_emit_ib(batch->gmem, batch->tile_fini);
222 }
223 
224 /* transfer from system memory to gmem */
225 
226 static void
emit_mem2gmem_surf(struct fd_batch * batch,uint32_t base,struct pipe_surface * psurf)227 emit_mem2gmem_surf(struct fd_batch *batch, uint32_t base,
228 		struct pipe_surface *psurf)
229 {
230 	struct fd_ringbuffer *ring = batch->gmem;
231 	struct fd_resource *rsc = fd_resource(psurf->texture);
232 	uint32_t offset =
233 		fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer);
234 	enum pipe_format format = fd_gmem_restore_format(psurf->format);
235 
236 
237 	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
238 	OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO));
239 	OUT_RING(ring, A2XX_RB_COLOR_INFO_BASE(base) |
240 			A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format)));
241 
242 	/* emit fb as a texture: */
243 	OUT_PKT3(ring, CP_SET_CONSTANT, 7);
244 	OUT_RING(ring, 0x00010000);
245 	OUT_RING(ring, A2XX_SQ_TEX_0_CLAMP_X(SQ_TEX_WRAP) |
246 			A2XX_SQ_TEX_0_CLAMP_Y(SQ_TEX_WRAP) |
247 			A2XX_SQ_TEX_0_CLAMP_Z(SQ_TEX_WRAP) |
248 			A2XX_SQ_TEX_0_PITCH(fdl2_pitch_pixels(&rsc->layout, psurf->u.tex.level)));
249 	OUT_RELOC(ring, rsc->bo, offset,
250 			A2XX_SQ_TEX_1_FORMAT(fd2_pipe2surface(format).format) |
251 			A2XX_SQ_TEX_1_CLAMP_POLICY(SQ_TEX_CLAMP_POLICY_OGL), 0);
252 	OUT_RING(ring, A2XX_SQ_TEX_2_WIDTH(psurf->width - 1) |
253 			A2XX_SQ_TEX_2_HEIGHT(psurf->height - 1));
254 	OUT_RING(ring, A2XX_SQ_TEX_3_MIP_FILTER(SQ_TEX_FILTER_BASEMAP) |
255 			A2XX_SQ_TEX_3_SWIZ_X(0) |
256 			A2XX_SQ_TEX_3_SWIZ_Y(1) |
257 			A2XX_SQ_TEX_3_SWIZ_Z(2) |
258 			A2XX_SQ_TEX_3_SWIZ_W(3) |
259 			A2XX_SQ_TEX_3_XY_MAG_FILTER(SQ_TEX_FILTER_POINT) |
260 			A2XX_SQ_TEX_3_XY_MIN_FILTER(SQ_TEX_FILTER_POINT));
261 	OUT_RING(ring, 0x00000000);
262 	OUT_RING(ring, A2XX_SQ_TEX_5_DIMENSION(SQ_TEX_DIMENSION_2D));
263 
264 	if (!is_a20x(batch->ctx->screen)) {
265 		OUT_PKT3(ring, CP_SET_CONSTANT, 3);
266 		OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
267 		OUT_RING(ring, 3);                 /* VGT_MAX_VTX_INDX */
268 		OUT_RING(ring, 0);                 /* VGT_MIN_VTX_INDX */
269 	}
270 
271 	fd_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
272 			DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL);
273 }
274 
275 static void
fd2_emit_tile_mem2gmem(struct fd_batch * batch,const struct fd_tile * tile)276 fd2_emit_tile_mem2gmem(struct fd_batch *batch, const struct fd_tile *tile)
277 {
278 	struct fd_context *ctx = batch->ctx;
279 	struct fd2_context *fd2_ctx = fd2_context(ctx);
280 	const struct fd_gmem_stateobj *gmem = batch->gmem_state;
281 	struct fd_ringbuffer *ring = batch->gmem;
282 	struct pipe_framebuffer_state *pfb = &batch->framebuffer;
283 	unsigned bin_w = tile->bin_w;
284 	unsigned bin_h = tile->bin_h;
285 	float x0, y0, x1, y1;
286 
287 	fd2_emit_vertex_bufs(ring, 0x9c, (struct fd2_vertex_buf[]) {
288 			{ .prsc = fd2_ctx->solid_vertexbuf, .size = 36 },
289 			{ .prsc = fd2_ctx->solid_vertexbuf, .size = 24, .offset = 36 },
290 		}, 2);
291 
292 	/* write texture coordinates to vertexbuf: */
293 	x0 = ((float)tile->xoff) / ((float)pfb->width);
294 	x1 = ((float)tile->xoff + bin_w) / ((float)pfb->width);
295 	y0 = ((float)tile->yoff) / ((float)pfb->height);
296 	y1 = ((float)tile->yoff + bin_h) / ((float)pfb->height);
297 	OUT_PKT3(ring, CP_MEM_WRITE, 7);
298 	OUT_RELOC(ring, fd_resource(fd2_ctx->solid_vertexbuf)->bo, 36, 0, 0);
299 	OUT_RING(ring, fui(x0));
300 	OUT_RING(ring, fui(y0));
301 	OUT_RING(ring, fui(x1));
302 	OUT_RING(ring, fui(y0));
303 	OUT_RING(ring, fui(x0));
304 	OUT_RING(ring, fui(y1));
305 
306 	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
307 	OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET));
308 	OUT_RING(ring, 0);
309 
310 	fd2_program_emit(ctx, ring, &ctx->blit_prog[0]);
311 
312 	OUT_PKT0(ring, REG_A2XX_TC_CNTL_STATUS, 1);
313 	OUT_RING(ring, A2XX_TC_CNTL_STATUS_L2_INVALIDATE);
314 
315 	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
316 	OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTHCONTROL));
317 	OUT_RING(ring, A2XX_RB_DEPTHCONTROL_EARLY_Z_ENABLE);
318 
319 	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
320 	OUT_RING(ring, CP_REG(REG_A2XX_PA_SU_SC_MODE_CNTL));
321 	OUT_RING(ring, A2XX_PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST |
322 			A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
323 			A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE(PC_DRAW_TRIANGLES));
324 
325 	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
326 	OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_MASK));
327 	OUT_RING(ring, 0x0000ffff);
328 
329 	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
330 	OUT_RING(ring, CP_REG(REG_A2XX_RB_COLORCONTROL));
331 	OUT_RING(ring, A2XX_RB_COLORCONTROL_ALPHA_FUNC(FUNC_ALWAYS) |
332 			A2XX_RB_COLORCONTROL_BLEND_DISABLE |
333 			A2XX_RB_COLORCONTROL_ROP_CODE(12) |
334 			A2XX_RB_COLORCONTROL_DITHER_MODE(DITHER_DISABLE) |
335 			A2XX_RB_COLORCONTROL_DITHER_TYPE(DITHER_PIXEL));
336 
337 	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
338 	OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_CONTROL));
339 	OUT_RING(ring, A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND(FACTOR_ONE) |
340 			A2XX_RB_BLEND_CONTROL_COLOR_COMB_FCN(BLEND2_DST_PLUS_SRC) |
341 			A2XX_RB_BLEND_CONTROL_COLOR_DESTBLEND(FACTOR_ZERO) |
342 			A2XX_RB_BLEND_CONTROL_ALPHA_SRCBLEND(FACTOR_ONE) |
343 			A2XX_RB_BLEND_CONTROL_ALPHA_COMB_FCN(BLEND2_DST_PLUS_SRC) |
344 			A2XX_RB_BLEND_CONTROL_ALPHA_DESTBLEND(FACTOR_ZERO));
345 
346 	OUT_PKT3(ring, CP_SET_CONSTANT, 3);
347 	OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL));
348 	OUT_RING(ring, A2XX_PA_SC_WINDOW_OFFSET_DISABLE |
349 			xy2d(0,0));                     /* PA_SC_WINDOW_SCISSOR_TL */
350 	OUT_RING(ring, xy2d(bin_w, bin_h));     /* PA_SC_WINDOW_SCISSOR_BR */
351 
352 	OUT_PKT3(ring, CP_SET_CONSTANT, 5);
353 	OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_XSCALE));
354 	OUT_RING(ring, fui((float)bin_w/2.0));  /* PA_CL_VPORT_XSCALE */
355 	OUT_RING(ring, fui((float)bin_w/2.0));  /* PA_CL_VPORT_XOFFSET */
356 	OUT_RING(ring, fui(-(float)bin_h/2.0)); /* PA_CL_VPORT_YSCALE */
357 	OUT_RING(ring, fui((float)bin_h/2.0));  /* PA_CL_VPORT_YOFFSET */
358 
359 	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
360 	OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VTE_CNTL));
361 	OUT_RING(ring, A2XX_PA_CL_VTE_CNTL_VTX_XY_FMT |
362 			A2XX_PA_CL_VTE_CNTL_VTX_Z_FMT |       // XXX check this???
363 			A2XX_PA_CL_VTE_CNTL_VPORT_X_SCALE_ENA |
364 			A2XX_PA_CL_VTE_CNTL_VPORT_X_OFFSET_ENA |
365 			A2XX_PA_CL_VTE_CNTL_VPORT_Y_SCALE_ENA |
366 			A2XX_PA_CL_VTE_CNTL_VPORT_Y_OFFSET_ENA);
367 
368 	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
369 	OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_CLIP_CNTL));
370 	OUT_RING(ring, 0x00000000);
371 
372 	if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL))
373 		emit_mem2gmem_surf(batch, gmem->zsbuf_base[0], pfb->zsbuf);
374 
375 	if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_COLOR))
376 		emit_mem2gmem_surf(batch, gmem->cbuf_base[0], pfb->cbufs[0]);
377 
378 	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
379 	OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VTE_CNTL));
380 	OUT_RING(ring, A2XX_PA_CL_VTE_CNTL_VTX_W0_FMT |
381 			A2XX_PA_CL_VTE_CNTL_VPORT_X_SCALE_ENA |
382 			A2XX_PA_CL_VTE_CNTL_VPORT_X_OFFSET_ENA |
383 			A2XX_PA_CL_VTE_CNTL_VPORT_Y_SCALE_ENA |
384 			A2XX_PA_CL_VTE_CNTL_VPORT_Y_OFFSET_ENA |
385 			A2XX_PA_CL_VTE_CNTL_VPORT_Z_SCALE_ENA |
386 			A2XX_PA_CL_VTE_CNTL_VPORT_Z_OFFSET_ENA);
387 
388 	/* TODO blob driver seems to toss in a CACHE_FLUSH after each DRAW_INDX.. */
389 }
390 
391 static void
patch_draws(struct fd_batch * batch,enum pc_di_vis_cull_mode vismode)392 patch_draws(struct fd_batch *batch, enum pc_di_vis_cull_mode vismode)
393 {
394 	unsigned i;
395 
396 	if (!is_a20x(batch->ctx->screen)) {
397 		/* identical to a3xx */
398 		for (i = 0; i < fd_patch_num_elements(&batch->draw_patches); i++) {
399 			struct fd_cs_patch *patch = fd_patch_element(&batch->draw_patches, i);
400 			*patch->cs = patch->val | DRAW(0, 0, 0, vismode, 0);
401 		}
402 		util_dynarray_clear(&batch->draw_patches);
403 		return;
404 	}
405 
406 	if (vismode == USE_VISIBILITY)
407 		return;
408 
409 	for (i = 0; i < batch->draw_patches.size / sizeof(uint32_t*); i++) {
410 		uint32_t *ptr = *util_dynarray_element(&batch->draw_patches, uint32_t*, i);
411 		unsigned cnt = ptr[0] >> 16 & 0xfff; /* 5 with idx buffer, 3 without */
412 
413 		/* convert CP_DRAW_INDX_BIN to a CP_DRAW_INDX
414 		 * replace first two DWORDS with NOP and move the rest down
415 		 * (we don't want to have to move the idx buffer reloc)
416 		 */
417 		ptr[0] = CP_TYPE3_PKT | (CP_NOP << 8);
418 		ptr[1] = 0x00000000;
419 
420 		ptr[4] = ptr[2] & ~(1 << 14 | 1 << 15); /* remove cull_enable bits */
421 		ptr[2] = CP_TYPE3_PKT | ((cnt-2) << 16) | (CP_DRAW_INDX << 8);
422 		ptr[3] = 0x00000000;
423 	}
424 }
425 
426 static void
fd2_emit_sysmem_prep(struct fd_batch * batch)427 fd2_emit_sysmem_prep(struct fd_batch *batch)
428 {
429 	struct fd_context *ctx = batch->ctx;
430 	struct fd_ringbuffer *ring = batch->gmem;
431 	struct pipe_framebuffer_state *pfb = &batch->framebuffer;
432 	struct pipe_surface *psurf = pfb->cbufs[0];
433 
434 	if (!psurf)
435 		return;
436 
437 	struct fd_resource *rsc = fd_resource(psurf->texture);
438 	uint32_t offset =
439 		fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer);
440 	uint32_t pitch = fdl2_pitch_pixels(&rsc->layout, psurf->u.tex.level);
441 
442 	assert((pitch & 31) == 0);
443 	assert((offset & 0xfff) == 0);
444 
445 	fd2_emit_restore(ctx, ring);
446 
447 	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
448 	OUT_RING(ring, CP_REG(REG_A2XX_RB_SURFACE_INFO));
449 	OUT_RING(ring, A2XX_RB_SURFACE_INFO_SURFACE_PITCH(pitch));
450 
451 	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
452 	OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO));
453 	OUT_RELOC(ring, rsc->bo, offset,
454 		COND(!rsc->layout.tile_mode, A2XX_RB_COLOR_INFO_LINEAR) |
455 		A2XX_RB_COLOR_INFO_SWAP(fmt2swap(psurf->format)) |
456 		A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(psurf->format)), 0);
457 
458 	OUT_PKT3(ring, CP_SET_CONSTANT, 3);
459 	OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_SCREEN_SCISSOR_TL));
460 	OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_TL_WINDOW_OFFSET_DISABLE);
461 	OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_BR_X(pfb->width) |
462 		A2XX_PA_SC_SCREEN_SCISSOR_BR_Y(pfb->height));
463 
464 	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
465 	OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_OFFSET));
466 	OUT_RING(ring, A2XX_PA_SC_WINDOW_OFFSET_X(0) |
467 			A2XX_PA_SC_WINDOW_OFFSET_Y(0));
468 
469 	patch_draws(batch, IGNORE_VISIBILITY);
470 	util_dynarray_clear(&batch->draw_patches);
471 	util_dynarray_clear(&batch->shader_patches);
472 }
473 
474 /* before first tile */
475 static void
fd2_emit_tile_init(struct fd_batch * batch)476 fd2_emit_tile_init(struct fd_batch *batch)
477 {
478 	struct fd_context *ctx = batch->ctx;
479 	struct fd_ringbuffer *ring = batch->gmem;
480 	struct pipe_framebuffer_state *pfb = &batch->framebuffer;
481 	const struct fd_gmem_stateobj *gmem = batch->gmem_state;
482 	enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
483 	uint32_t reg;
484 
485 	fd2_emit_restore(ctx, ring);
486 
487 	prepare_tile_fini_ib(batch);
488 
489 	OUT_PKT3(ring, CP_SET_CONSTANT, 4);
490 	OUT_RING(ring, CP_REG(REG_A2XX_RB_SURFACE_INFO));
491 	OUT_RING(ring, gmem->bin_w);                 /* RB_SURFACE_INFO */
492 	OUT_RING(ring, A2XX_RB_COLOR_INFO_SWAP(fmt2swap(format)) |
493 			A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format)));
494 	reg = A2XX_RB_DEPTH_INFO_DEPTH_BASE(gmem->zsbuf_base[0]);
495 	if (pfb->zsbuf)
496 		reg |= A2XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd_pipe2depth(pfb->zsbuf->format));
497 	OUT_RING(ring, reg);                         /* RB_DEPTH_INFO */
498 
499 	/* fast clear patches */
500 	int depth_size = -1;
501 	int color_size = -1;
502 
503 	if (pfb->cbufs[0])
504 		color_size = util_format_get_blocksizebits(format) == 32 ? 4 : 2;
505 
506 	if (pfb->zsbuf)
507 		depth_size = fd_pipe2depth(pfb->zsbuf->format) == 1 ? 4 : 2;
508 
509 	for (int i = 0; i < fd_patch_num_elements(&batch->gmem_patches); i++) {
510 		struct fd_cs_patch *patch = fd_patch_element(&batch->gmem_patches, i);
511 		uint32_t color_base = 0, depth_base = gmem->zsbuf_base[0];
512 		uint32_t size, lines;
513 
514 		/* note: 1 "line" is 512 bytes in both color/depth areas (1K total) */
515 		switch (patch->val) {
516 		case GMEM_PATCH_FASTCLEAR_COLOR:
517 			size = align(gmem->bin_w * gmem->bin_h * color_size, 0x8000);
518 			lines = size / 1024;
519 			depth_base = size / 2;
520 			break;
521 		case GMEM_PATCH_FASTCLEAR_DEPTH:
522 			size = align(gmem->bin_w * gmem->bin_h * depth_size, 0x8000);
523 			lines = size / 1024;
524 			color_base = depth_base;
525 			depth_base = depth_base + size / 2;
526 			break;
527 		case GMEM_PATCH_FASTCLEAR_COLOR_DEPTH:
528 			lines = align(gmem->bin_w * gmem->bin_h * color_size * 2, 0x8000) / 1024;
529 			break;
530 		case GMEM_PATCH_RESTORE_INFO:
531 			patch->cs[0] = gmem->bin_w;
532 			patch->cs[1] = A2XX_RB_COLOR_INFO_SWAP(fmt2swap(format)) |
533 					A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format));
534 			patch->cs[2] = A2XX_RB_DEPTH_INFO_DEPTH_BASE(gmem->zsbuf_base[0]);
535 			if (pfb->zsbuf)
536 				patch->cs[2] |= A2XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd_pipe2depth(pfb->zsbuf->format));
537 			continue;
538 		default:
539 			continue;
540 		}
541 
542 		patch->cs[0] = A2XX_PA_SC_SCREEN_SCISSOR_BR_X(32) |
543 			A2XX_PA_SC_SCREEN_SCISSOR_BR_Y(lines);
544 		patch->cs[4] = A2XX_RB_COLOR_INFO_BASE(color_base) |
545 			A2XX_RB_COLOR_INFO_FORMAT(COLORX_8_8_8_8);
546 		patch->cs[5] = A2XX_RB_DEPTH_INFO_DEPTH_BASE(depth_base) |
547 			A2XX_RB_DEPTH_INFO_DEPTH_FORMAT(1);
548 	}
549 	util_dynarray_clear(&batch->gmem_patches);
550 
551 	/* set to zero, for some reason hardware doesn't like certain values */
552 	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
553 	OUT_RING(ring, CP_REG(REG_A2XX_VGT_CURRENT_BIN_ID_MIN));
554 	OUT_RING(ring, 0);
555 
556 	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
557 	OUT_RING(ring, CP_REG(REG_A2XX_VGT_CURRENT_BIN_ID_MAX));
558 	OUT_RING(ring, 0);
559 
560 	if (use_hw_binning(batch)) {
561 		/* patch out unneeded memory exports by changing EXEC CF to EXEC_END
562 		 *
563 		 * in the shader compiler, we guarantee that the shader ends with
564 		 * a specific pattern of ALLOC/EXEC CF pairs for the hw binning exports
565 		 *
566 		 * the since patches point only to dwords and CFs are 1.5 dwords
567 		 * the patch is aligned and might point to a ALLOC CF
568 		 */
569 		for (int i = 0; i < batch->shader_patches.size / sizeof(void*); i++) {
570 			instr_cf_t *cf =
571 				*util_dynarray_element(&batch->shader_patches, instr_cf_t*, i);
572 			if (cf->opc == ALLOC)
573 				cf++;
574 			assert(cf->opc == EXEC);
575 			assert(cf[ctx->screen->info.num_vsc_pipes*2-2].opc == EXEC_END);
576 			cf[2*(gmem->num_vsc_pipes-1)].opc = EXEC_END;
577 		}
578 
579 		patch_draws(batch, USE_VISIBILITY);
580 
581 		/* initialize shader constants for the binning memexport */
582 		OUT_PKT3(ring, CP_SET_CONSTANT, 1 + gmem->num_vsc_pipes * 4);
583 		OUT_RING(ring, 0x0000000C);
584 
585 		for (int i = 0; i < gmem->num_vsc_pipes; i++) {
586 			/* allocate in 64k increments to avoid reallocs */
587 			uint32_t bo_size = align(batch->num_vertices, 0x10000);
588 			if (!ctx->vsc_pipe_bo[i] || fd_bo_size(ctx->vsc_pipe_bo[i]) < bo_size) {
589 				if (ctx->vsc_pipe_bo[i])
590 					fd_bo_del(ctx->vsc_pipe_bo[i]);
591 				ctx->vsc_pipe_bo[i] = fd_bo_new(ctx->dev, bo_size,
592 						DRM_FREEDRENO_GEM_TYPE_KMEM, "vsc_pipe[%u]", i);
593 				assert(ctx->vsc_pipe_bo[i]);
594 			}
595 
596 			/* memory export address (export32):
597 			 * .x: (base_address >> 2) | 0x40000000 (?)
598 			 * .y: index (float) - set by shader
599 			 * .z: 0x4B00D000 (?)
600 			 * .w: 0x4B000000 (?) | max_index (?)
601 			*/
602 			OUT_RELOC(ring, ctx->vsc_pipe_bo[i], 0, 0x40000000, -2);
603 			OUT_RING(ring, 0x00000000);
604 			OUT_RING(ring, 0x4B00D000);
605 			OUT_RING(ring, 0x4B000000 | bo_size);
606 		}
607 
608 		OUT_PKT3(ring, CP_SET_CONSTANT, 1 + gmem->num_vsc_pipes * 8);
609 		OUT_RING(ring, 0x0000018C);
610 
611 		for (int i = 0; i < gmem->num_vsc_pipes; i++) {
612 			const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[i];
613 			float off_x, off_y, mul_x, mul_y;
614 
615 			/* const to tranform from [-1,1] to bin coordinates for this pipe
616 			 * for x/y, [0,256/2040] = 0, [256/2040,512/2040] = 1, etc
617 			 * 8 possible values on x/y axis,
618 			 * to clip at binning stage: only use center 6x6
619 			 * TODO: set the z parameters too so that hw binning
620 			 * can clip primitives in Z too
621 			 */
622 
623 			mul_x = 1.0f / (float) (gmem->bin_w * 8);
624 			mul_y = 1.0f / (float) (gmem->bin_h * 8);
625 			off_x = -pipe->x * (1.0/8.0f) + 0.125f - mul_x * gmem->minx;
626 			off_y = -pipe->y * (1.0/8.0f) + 0.125f - mul_y * gmem->miny;
627 
628 			OUT_RING(ring, fui(off_x * (256.0f/255.0f)));
629 			OUT_RING(ring, fui(off_y * (256.0f/255.0f)));
630 			OUT_RING(ring, 0x3f000000);
631 			OUT_RING(ring, fui(0.0f));
632 
633 			OUT_RING(ring, fui(mul_x * (256.0f/255.0f)));
634 			OUT_RING(ring, fui(mul_y * (256.0f/255.0f)));
635 			OUT_RING(ring, fui(0.0f));
636 			OUT_RING(ring, fui(0.0f));
637 		}
638 
639 		OUT_PKT3(ring, CP_SET_CONSTANT, 2);
640 		OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
641 		OUT_RING(ring, 0);
642 
643 		fd2_emit_ib(ring, batch->binning);
644 
645 		OUT_PKT3(ring, CP_SET_CONSTANT, 2);
646 		OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
647 		OUT_RING(ring, 0x00000002);
648 	} else {
649 		patch_draws(batch, IGNORE_VISIBILITY);
650 	}
651 
652 	util_dynarray_clear(&batch->draw_patches);
653 	util_dynarray_clear(&batch->shader_patches);
654 }
655 
656 /* before mem2gmem */
657 static void
fd2_emit_tile_prep(struct fd_batch * batch,const struct fd_tile * tile)658 fd2_emit_tile_prep(struct fd_batch *batch, const struct fd_tile *tile)
659 {
660 	struct fd_ringbuffer *ring = batch->gmem;
661 	struct pipe_framebuffer_state *pfb = &batch->framebuffer;
662 	enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
663 
664 	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
665 	OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO));
666 	OUT_RING(ring, A2XX_RB_COLOR_INFO_SWAP(1) | /* RB_COLOR_INFO */
667 			A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format)));
668 
669 	/* setup screen scissor for current tile (same for mem2gmem): */
670 	OUT_PKT3(ring, CP_SET_CONSTANT, 3);
671 	OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_SCREEN_SCISSOR_TL));
672 	OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_TL_X(0) |
673 			A2XX_PA_SC_SCREEN_SCISSOR_TL_Y(0));
674 	OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_BR_X(tile->bin_w) |
675 			A2XX_PA_SC_SCREEN_SCISSOR_BR_Y(tile->bin_h));
676 }
677 
678 /* before IB to rendering cmds: */
679 static void
fd2_emit_tile_renderprep(struct fd_batch * batch,const struct fd_tile * tile)680 fd2_emit_tile_renderprep(struct fd_batch *batch, const struct fd_tile *tile)
681 {
682 	struct fd_context *ctx = batch->ctx;
683 	struct fd2_context *fd2_ctx = fd2_context(ctx);
684 	struct fd_ringbuffer *ring = batch->gmem;
685 	struct pipe_framebuffer_state *pfb = &batch->framebuffer;
686 	enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
687 
688 	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
689 	OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO));
690 	OUT_RING(ring, A2XX_RB_COLOR_INFO_SWAP(fmt2swap(format)) |
691 			A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format)));
692 
693 	/* setup window scissor and offset for current tile (different
694 	 * from mem2gmem):
695 	 */
696 	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
697 	OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_OFFSET));
698 	OUT_RING(ring, A2XX_PA_SC_WINDOW_OFFSET_X(-tile->xoff) |
699 			A2XX_PA_SC_WINDOW_OFFSET_Y(-tile->yoff));
700 
701 	/* write SCISSOR_BR to memory so fast clear path can restore from it */
702 	OUT_PKT3(ring, CP_MEM_WRITE, 2);
703 	OUT_RELOC(ring, fd_resource(fd2_ctx->solid_vertexbuf)->bo, 60, 0, 0);
704 	OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_BR_X(tile->bin_w) |
705 			A2XX_PA_SC_SCREEN_SCISSOR_BR_Y(tile->bin_h));
706 
707 	/* set the copy offset for gmem2mem */
708 	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
709 	OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_DEST_OFFSET));
710 	OUT_RING(ring, A2XX_RB_COPY_DEST_OFFSET_X(tile->xoff) |
711 			A2XX_RB_COPY_DEST_OFFSET_Y(tile->yoff));
712 
713 	/* tile offset for gl_FragCoord on a20x (C64 in fragment shader) */
714 	if (is_a20x(ctx->screen)) {
715 		OUT_PKT3(ring, CP_SET_CONSTANT, 5);
716 		OUT_RING(ring, 0x00000580);
717 		OUT_RING(ring, fui(tile->xoff));
718 		OUT_RING(ring, fui(tile->yoff));
719 		OUT_RING(ring, fui(0.0f));
720 		OUT_RING(ring, fui(0.0f));
721 	}
722 
723 	if (use_hw_binning(batch)) {
724 		struct fd_bo *pipe_bo = ctx->vsc_pipe_bo[tile->p];
725 
726 		OUT_PKT3(ring, CP_SET_CONSTANT, 2);
727 		OUT_RING(ring, CP_REG(REG_A2XX_VGT_CURRENT_BIN_ID_MIN));
728 		OUT_RING(ring, tile->n);
729 
730 		OUT_PKT3(ring, CP_SET_CONSTANT, 2);
731 		OUT_RING(ring, CP_REG(REG_A2XX_VGT_CURRENT_BIN_ID_MAX));
732 		OUT_RING(ring, tile->n);
733 
734 		/* TODO only emit this when tile->p changes */
735 		OUT_PKT3(ring, CP_SET_DRAW_INIT_FLAGS, 1);
736 		OUT_RELOC(ring, pipe_bo, 0, 0, 0);
737 	}
738 }
739 
740 void
fd2_gmem_init(struct pipe_context * pctx)741 fd2_gmem_init(struct pipe_context *pctx)
742 {
743 	struct fd_context *ctx = fd_context(pctx);
744 
745 	ctx->emit_sysmem_prep = fd2_emit_sysmem_prep;
746 	ctx->emit_tile_init = fd2_emit_tile_init;
747 	ctx->emit_tile_prep = fd2_emit_tile_prep;
748 	ctx->emit_tile_mem2gmem = fd2_emit_tile_mem2gmem;
749 	ctx->emit_tile_renderprep = fd2_emit_tile_renderprep;
750 	ctx->emit_tile_gmem2mem = fd2_emit_tile_gmem2mem;
751 }
752