• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
2 
3 /*
4  * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23  * SOFTWARE.
24  *
25  * Authors:
26  *    Rob Clark <robclark@freedesktop.org>
27  */
28 
29 #include "pipe/p_state.h"
30 #include "util/u_string.h"
31 #include "util/u_memory.h"
32 #include "util/u_inlines.h"
33 #include "util/u_format.h"
34 
35 #include "freedreno_draw.h"
36 #include "freedreno_state.h"
37 #include "freedreno_resource.h"
38 
39 #include "fd3_gmem.h"
40 #include "fd3_context.h"
41 #include "fd3_emit.h"
42 #include "fd3_program.h"
43 #include "fd3_format.h"
44 #include "fd3_zsa.h"
45 
46 static void
emit_mrt(struct fd_ringbuffer * ring,unsigned nr_bufs,struct pipe_surface ** bufs,uint32_t * bases,uint32_t bin_w,bool decode_srgb)47 emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
48 		 struct pipe_surface **bufs, uint32_t *bases, uint32_t bin_w,
49 		 bool decode_srgb)
50 {
51 	enum a3xx_tile_mode tile_mode;
52 	unsigned i;
53 
54 	if (bin_w) {
55 		tile_mode = TILE_32X32;
56 	} else {
57 		tile_mode = LINEAR;
58 	}
59 
60 	for (i = 0; i < A3XX_MAX_RENDER_TARGETS; i++) {
61 		enum pipe_format pformat = 0;
62 		enum a3xx_color_fmt format = 0;
63 		enum a3xx_color_swap swap = WZYX;
64 		bool srgb = false;
65 		struct fd_resource *rsc = NULL;
66 		struct fd_resource_slice *slice = NULL;
67 		uint32_t stride = 0;
68 		uint32_t base = 0;
69 		uint32_t offset = 0;
70 
71 		if ((i < nr_bufs) && bufs[i]) {
72 			struct pipe_surface *psurf = bufs[i];
73 
74 			rsc = fd_resource(psurf->texture);
75 			pformat = psurf->format;
76 			/* In case we're drawing to Z32F_S8, the "color" actually goes to
77 			 * the stencil
78 			 */
79 			if (rsc->stencil) {
80 				rsc = rsc->stencil;
81 				pformat = rsc->base.format;
82 				if (bases)
83 					bases++;
84 			}
85 			slice = fd_resource_slice(rsc, psurf->u.tex.level);
86 			format = fd3_pipe2color(pformat);
87 			swap = fd3_pipe2swap(pformat);
88 			if (decode_srgb)
89 				srgb = util_format_is_srgb(pformat);
90 			else
91 				pformat = util_format_linear(pformat);
92 
93 			debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
94 
95 			offset = fd_resource_offset(rsc, psurf->u.tex.level,
96 					psurf->u.tex.first_layer);
97 
98 			if (bin_w) {
99 				stride = bin_w * rsc->cpp;
100 
101 				if (bases) {
102 					base = bases[i];
103 				}
104 			} else {
105 				stride = slice->pitch * rsc->cpp;
106 			}
107 		} else if (i < nr_bufs && bases) {
108 			base = bases[i];
109 		}
110 
111 		OUT_PKT0(ring, REG_A3XX_RB_MRT_BUF_INFO(i), 2);
112 		OUT_RING(ring, A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) |
113 				A3XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(tile_mode) |
114 				A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH(stride) |
115 				A3XX_RB_MRT_BUF_INFO_COLOR_SWAP(swap) |
116 				COND(srgb, A3XX_RB_MRT_BUF_INFO_COLOR_SRGB));
117 		if (bin_w || (i >= nr_bufs) || !bufs[i]) {
118 			OUT_RING(ring, A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE(base));
119 		} else {
120 			OUT_RELOCW(ring, rsc->bo, offset, 0, -1);
121 		}
122 
123 		OUT_PKT0(ring, REG_A3XX_SP_FS_IMAGE_OUTPUT_REG(i), 1);
124 		OUT_RING(ring, COND((i < nr_bufs) && bufs[i],
125 							A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT(
126 									fd3_fs_output_format(pformat))));
127 	}
128 }
129 
130 static bool
use_hw_binning(struct fd_batch * batch)131 use_hw_binning(struct fd_batch *batch)
132 {
133 	struct fd_gmem_stateobj *gmem = &batch->ctx->gmem;
134 
135 	/* workaround: combining scissor optimization and hw binning
136 	 * seems problematic.  Seems like we end up with a mismatch
137 	 * between binning pass and rendering pass, wrt. where the hw
138 	 * thinks the vertices belong.  And the blob driver doesn't
139 	 * seem to implement anything like scissor optimization, so
140 	 * not entirely sure what I might be missing.
141 	 *
142 	 * But scissor optimization is mainly for window managers,
143 	 * which don't have many vertices (and therefore doesn't
144 	 * benefit much from binning pass).
145 	 *
146 	 * So for now just disable binning if scissor optimization is
147 	 * used.
148 	 */
149 	if (gmem->minx || gmem->miny)
150 		return false;
151 
152 	if ((gmem->maxpw * gmem->maxph) > 32)
153 		return false;
154 
155 	if ((gmem->maxpw > 15) || (gmem->maxph > 15))
156 		return false;
157 
158 	return fd_binning_enabled && ((gmem->nbins_x * gmem->nbins_y) > 2);
159 }
160 
161 /* workaround for (hlsq?) lockup with hw binning on a3xx patchlevel 0 */
162 static void update_vsc_pipe(struct fd_batch *batch);
163 static void
emit_binning_workaround(struct fd_batch * batch)164 emit_binning_workaround(struct fd_batch *batch)
165 {
166 	struct fd_context *ctx = batch->ctx;
167 	struct fd_gmem_stateobj *gmem = &ctx->gmem;
168 	struct fd_ringbuffer *ring = batch->gmem;
169 	struct fd3_emit emit = {
170 			.debug = &ctx->debug,
171 			.vtx = &ctx->solid_vbuf_state,
172 			.prog = &ctx->solid_prog,
173 			.key = {
174 				.half_precision = true,
175 			},
176 	};
177 
178 	OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 2);
179 	OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) |
180 			A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
181 			A3XX_RB_MODE_CONTROL_MRT(0));
182 	OUT_RING(ring, A3XX_RB_RENDER_CONTROL_BIN_WIDTH(32) |
183 			A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE |
184 			A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER));
185 
186 	OUT_PKT0(ring, REG_A3XX_RB_COPY_CONTROL, 4);
187 	OUT_RING(ring, A3XX_RB_COPY_CONTROL_MSAA_RESOLVE(MSAA_ONE) |
188 			A3XX_RB_COPY_CONTROL_MODE(0) |
189 			A3XX_RB_COPY_CONTROL_GMEM_BASE(0));
190 	OUT_RELOCW(ring, fd_resource(ctx->solid_vbuf)->bo, 0x20, 0, -1);  /* RB_COPY_DEST_BASE */
191 	OUT_RING(ring, A3XX_RB_COPY_DEST_PITCH_PITCH(128));
192 	OUT_RING(ring, A3XX_RB_COPY_DEST_INFO_TILE(LINEAR) |
193 			A3XX_RB_COPY_DEST_INFO_FORMAT(RB_R8G8B8A8_UNORM) |
194 			A3XX_RB_COPY_DEST_INFO_SWAP(WZYX) |
195 			A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE(0xf) |
196 			A3XX_RB_COPY_DEST_INFO_ENDIAN(ENDIAN_NONE));
197 
198 	OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
199 	OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) |
200 			A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
201 			A3XX_GRAS_SC_CONTROL_RASTER_MODE(1));
202 
203 	fd3_program_emit(ring, &emit, 0, NULL);
204 	fd3_emit_vertex_bufs(ring, &emit);
205 
206 	OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 4);
207 	OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(FOUR_QUADS) |
208 			A3XX_HLSQ_CONTROL_0_REG_FSSUPERTHREADENABLE |
209 			A3XX_HLSQ_CONTROL_0_REG_RESERVED2 |
210 			A3XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE);
211 	OUT_RING(ring, A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(TWO_QUADS) |
212 			A3XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE);
213 	OUT_RING(ring, A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(31));
214 	OUT_RING(ring, 0); /* HLSQ_CONTROL_3_REG */
215 
216 	OUT_PKT0(ring, REG_A3XX_HLSQ_CONST_FSPRESV_RANGE_REG, 1);
217 	OUT_RING(ring, A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY(0x20) |
218 			A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY(0x20));
219 
220 	OUT_PKT0(ring, REG_A3XX_RB_MSAA_CONTROL, 1);
221 	OUT_RING(ring, A3XX_RB_MSAA_CONTROL_DISABLE |
222 			A3XX_RB_MSAA_CONTROL_SAMPLES(MSAA_ONE) |
223 			A3XX_RB_MSAA_CONTROL_SAMPLE_MASK(0xffff));
224 
225 	OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
226 	OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_NEVER));
227 
228 	OUT_PKT0(ring, REG_A3XX_RB_STENCIL_CONTROL, 1);
229 	OUT_RING(ring, A3XX_RB_STENCIL_CONTROL_FUNC(FUNC_NEVER) |
230 			A3XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
231 			A3XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) |
232 			A3XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
233 			A3XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) |
234 			A3XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
235 			A3XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
236 			A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
237 
238 	OUT_PKT0(ring, REG_A3XX_GRAS_SU_MODE_CONTROL, 1);
239 	OUT_RING(ring, A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(0.0));
240 
241 	OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4);
242 	OUT_RING(ring, 0);            /* VFD_INDEX_MIN */
243 	OUT_RING(ring, 2);            /* VFD_INDEX_MAX */
244 	OUT_RING(ring, 0);            /* VFD_INSTANCEID_OFFSET */
245 	OUT_RING(ring, 0);            /* VFD_INDEX_OFFSET */
246 
247 	OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1);
248 	OUT_RING(ring, A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(0) |
249 			A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
250 			A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(PC_DRAW_TRIANGLES) |
251 			A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST);
252 
253 	OUT_PKT0(ring, REG_A3XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
254 	OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) |
255 			A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(1));
256 	OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X(0) |
257 			A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(1));
258 
259 	OUT_PKT0(ring, REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
260 	OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(0) |
261 			A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(0));
262 	OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(31) |
263 			A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(0));
264 
265 	fd_wfi(batch, ring);
266 	OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_XOFFSET, 6);
267 	OUT_RING(ring, A3XX_GRAS_CL_VPORT_XOFFSET(0.0));
268 	OUT_RING(ring, A3XX_GRAS_CL_VPORT_XSCALE(1.0));
269 	OUT_RING(ring, A3XX_GRAS_CL_VPORT_YOFFSET(0.0));
270 	OUT_RING(ring, A3XX_GRAS_CL_VPORT_YSCALE(1.0));
271 	OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZOFFSET(0.0));
272 	OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(1.0));
273 
274 	OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
275 	OUT_RING(ring, A3XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE |
276 			A3XX_GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE |
277 			A3XX_GRAS_CL_CLIP_CNTL_VP_CLIP_CODE_IGNORE |
278 			A3XX_GRAS_CL_CLIP_CNTL_VP_XFORM_DISABLE |
279 			A3XX_GRAS_CL_CLIP_CNTL_PERSP_DIVISION_DISABLE);
280 
281 	OUT_PKT0(ring, REG_A3XX_GRAS_CL_GB_CLIP_ADJ, 1);
282 	OUT_RING(ring, A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ(0) |
283 			A3XX_GRAS_CL_GB_CLIP_ADJ_VERT(0));
284 
285 	OUT_PKT3(ring, CP_DRAW_INDX_2, 5);
286 	OUT_RING(ring, 0x00000000);   /* viz query info. */
287 	OUT_RING(ring, DRAW(DI_PT_RECTLIST, DI_SRC_SEL_IMMEDIATE,
288 						INDEX_SIZE_32_BIT, IGNORE_VISIBILITY, 0));
289 	OUT_RING(ring, 2);            /* NumIndices */
290 	OUT_RING(ring, 2);
291 	OUT_RING(ring, 1);
292 	fd_reset_wfi(batch);
293 
294 	OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 1);
295 	OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(TWO_QUADS));
296 
297 	OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1);
298 	OUT_RING(ring, 0x00000000);
299 
300 	fd_wfi(batch, ring);
301 	OUT_PKT0(ring, REG_A3XX_VSC_BIN_SIZE, 1);
302 	OUT_RING(ring, A3XX_VSC_BIN_SIZE_WIDTH(gmem->bin_w) |
303 			A3XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h));
304 
305 	OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
306 	OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
307 			A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
308 			A3XX_GRAS_SC_CONTROL_RASTER_MODE(0));
309 
310 	OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
311 	OUT_RING(ring, 0x00000000);
312 }
313 
314 /* transfer from gmem to system memory (ie. normal RAM) */
315 
316 static void
emit_gmem2mem_surf(struct fd_batch * batch,enum adreno_rb_copy_control_mode mode,bool stencil,uint32_t base,struct pipe_surface * psurf)317 emit_gmem2mem_surf(struct fd_batch *batch,
318 				   enum adreno_rb_copy_control_mode mode,
319 				   bool stencil,
320 				   uint32_t base, struct pipe_surface *psurf)
321 {
322 	struct fd_ringbuffer *ring = batch->gmem;
323 	struct fd_resource *rsc = fd_resource(psurf->texture);
324 	enum pipe_format format = psurf->format;
325 	if (stencil) {
326 		rsc = rsc->stencil;
327 		format = rsc->base.format;
328 	}
329 	struct fd_resource_slice *slice = fd_resource_slice(rsc, psurf->u.tex.level);
330 	uint32_t offset = fd_resource_offset(rsc, psurf->u.tex.level,
331 			psurf->u.tex.first_layer);
332 
333 	debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
334 
335 	OUT_PKT0(ring, REG_A3XX_RB_COPY_CONTROL, 4);
336 	OUT_RING(ring, A3XX_RB_COPY_CONTROL_MSAA_RESOLVE(MSAA_ONE) |
337 			A3XX_RB_COPY_CONTROL_MODE(mode) |
338 			A3XX_RB_COPY_CONTROL_GMEM_BASE(base) |
339 			COND(format == PIPE_FORMAT_Z32_FLOAT ||
340 				 format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT,
341 				 A3XX_RB_COPY_CONTROL_DEPTH32_RESOLVE));
342 
343 	OUT_RELOCW(ring, rsc->bo, offset, 0, -1);    /* RB_COPY_DEST_BASE */
344 	OUT_RING(ring, A3XX_RB_COPY_DEST_PITCH_PITCH(slice->pitch * rsc->cpp));
345 	OUT_RING(ring, A3XX_RB_COPY_DEST_INFO_TILE(LINEAR) |
346 			A3XX_RB_COPY_DEST_INFO_FORMAT(fd3_pipe2color(format)) |
347 			A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE(0xf) |
348 			A3XX_RB_COPY_DEST_INFO_ENDIAN(ENDIAN_NONE) |
349 			A3XX_RB_COPY_DEST_INFO_SWAP(fd3_pipe2swap(format)));
350 
351 	fd_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
352 			DI_SRC_SEL_AUTO_INDEX, 2, 0, INDEX_SIZE_IGN, 0, 0, NULL);
353 }
354 
355 static void
fd3_emit_tile_gmem2mem(struct fd_batch * batch,struct fd_tile * tile)356 fd3_emit_tile_gmem2mem(struct fd_batch *batch, struct fd_tile *tile)
357 {
358 	struct fd_context *ctx = batch->ctx;
359 	struct fd_ringbuffer *ring = batch->gmem;
360 	struct pipe_framebuffer_state *pfb = &batch->framebuffer;
361 	struct fd3_emit emit = {
362 			.debug = &ctx->debug,
363 			.vtx = &ctx->solid_vbuf_state,
364 			.prog = &ctx->solid_prog,
365 			.key = {
366 				.half_precision = true,
367 			},
368 	};
369 	int i;
370 
371 	OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
372 	OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_NEVER));
373 
374 	OUT_PKT0(ring, REG_A3XX_RB_STENCIL_CONTROL, 1);
375 	OUT_RING(ring, A3XX_RB_STENCIL_CONTROL_FUNC(FUNC_NEVER) |
376 			A3XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
377 			A3XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) |
378 			A3XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
379 			A3XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) |
380 			A3XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
381 			A3XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
382 			A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
383 
384 	OUT_PKT0(ring, REG_A3XX_RB_STENCILREFMASK, 2);
385 	OUT_RING(ring, 0xff000000 |
386 			A3XX_RB_STENCILREFMASK_STENCILREF(0) |
387 			A3XX_RB_STENCILREFMASK_STENCILMASK(0) |
388 			A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
389 	OUT_RING(ring, 0xff000000 |
390 			A3XX_RB_STENCILREFMASK_STENCILREF(0) |
391 			A3XX_RB_STENCILREFMASK_STENCILMASK(0) |
392 			A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
393 
394 	OUT_PKT0(ring, REG_A3XX_GRAS_SU_MODE_CONTROL, 1);
395 	OUT_RING(ring, A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(0));
396 
397 	OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
398 	OUT_RING(ring, 0x00000000);   /* GRAS_CL_CLIP_CNTL */
399 
400 	fd_wfi(batch, ring);
401 	OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_XOFFSET, 6);
402 	OUT_RING(ring, A3XX_GRAS_CL_VPORT_XOFFSET((float)pfb->width/2.0 - 0.5));
403 	OUT_RING(ring, A3XX_GRAS_CL_VPORT_XSCALE((float)pfb->width/2.0));
404 	OUT_RING(ring, A3XX_GRAS_CL_VPORT_YOFFSET((float)pfb->height/2.0 - 0.5));
405 	OUT_RING(ring, A3XX_GRAS_CL_VPORT_YSCALE(-(float)pfb->height/2.0));
406 	OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZOFFSET(0.0));
407 	OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(1.0));
408 
409 	OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
410 	OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) |
411 			A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
412 			A3XX_RB_MODE_CONTROL_MRT(0));
413 
414 	OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1);
415 	OUT_RING(ring, A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE |
416 			A3XX_RB_RENDER_CONTROL_ENABLE_GMEM |
417 			A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER) |
418 			A3XX_RB_RENDER_CONTROL_BIN_WIDTH(ctx->gmem.bin_w));
419 
420 	OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
421 	OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) |
422 			A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
423 			A3XX_GRAS_SC_CONTROL_RASTER_MODE(1));
424 
425 	OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1);
426 	OUT_RING(ring, A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(0) |
427 			A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
428 			A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(PC_DRAW_TRIANGLES) |
429 			A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST);
430 
431 	OUT_PKT0(ring, REG_A3XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
432 	OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) |
433 			A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(0));
434 	OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X(pfb->width - 1) |
435 			A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(pfb->height - 1));
436 
437 	OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4);
438 	OUT_RING(ring, 0);            /* VFD_INDEX_MIN */
439 	OUT_RING(ring, 2);            /* VFD_INDEX_MAX */
440 	OUT_RING(ring, 0);            /* VFD_INSTANCEID_OFFSET */
441 	OUT_RING(ring, 0);            /* VFD_INDEX_OFFSET */
442 
443 	fd3_program_emit(ring, &emit, 0, NULL);
444 	fd3_emit_vertex_bufs(ring, &emit);
445 
446 	if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
447 		struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
448 		if (!rsc->stencil || batch->resolve & FD_BUFFER_DEPTH)
449 			emit_gmem2mem_surf(batch, RB_COPY_DEPTH_STENCIL, false,
450 							   ctx->gmem.zsbuf_base[0], pfb->zsbuf);
451 		if (rsc->stencil && batch->resolve & FD_BUFFER_STENCIL)
452 			emit_gmem2mem_surf(batch, RB_COPY_DEPTH_STENCIL, true,
453 							   ctx->gmem.zsbuf_base[1], pfb->zsbuf);
454 	}
455 
456 	if (batch->resolve & FD_BUFFER_COLOR) {
457 		for (i = 0; i < pfb->nr_cbufs; i++) {
458 			if (!pfb->cbufs[i])
459 				continue;
460 			if (!(batch->resolve & (PIPE_CLEAR_COLOR0 << i)))
461 				continue;
462 			emit_gmem2mem_surf(batch, RB_COPY_RESOLVE, false,
463 							   ctx->gmem.cbuf_base[i], pfb->cbufs[i]);
464 		}
465 	}
466 
467 	OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
468 	OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
469 			A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
470 			A3XX_RB_MODE_CONTROL_MRT(MAX2(1, pfb->nr_cbufs) - 1));
471 
472 	OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
473 	OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
474 			A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
475 			A3XX_GRAS_SC_CONTROL_RASTER_MODE(0));
476 }
477 
478 /* transfer from system memory to gmem */
479 
480 static void
emit_mem2gmem_surf(struct fd_batch * batch,uint32_t bases[],struct pipe_surface ** psurf,uint32_t bufs,uint32_t bin_w)481 emit_mem2gmem_surf(struct fd_batch *batch, uint32_t bases[],
482 		struct pipe_surface **psurf, uint32_t bufs, uint32_t bin_w)
483 {
484 	struct fd_ringbuffer *ring = batch->gmem;
485 	struct pipe_surface *zsbufs[2];
486 
487 	assert(bufs > 0);
488 
489 	OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
490 	OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
491 				   A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
492 				   A3XX_RB_MODE_CONTROL_MRT(bufs - 1));
493 
494 	emit_mrt(ring, bufs, psurf, bases, bin_w, false);
495 
496 	if (psurf[0] && (psurf[0]->format == PIPE_FORMAT_Z32_FLOAT ||
497 					 psurf[0]->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)) {
498 		/* Depth is stored as unorm in gmem, so we have to write it in using a
499 		 * special blit shader which writes depth.
500 		 */
501 		OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
502 		OUT_RING(ring, (A3XX_RB_DEPTH_CONTROL_FRAG_WRITES_Z |
503 						A3XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE |
504 						A3XX_RB_DEPTH_CONTROL_Z_ENABLE |
505 						A3XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE |
506 						A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_ALWAYS)));
507 
508 		OUT_PKT0(ring, REG_A3XX_RB_DEPTH_INFO, 2);
509 		OUT_RING(ring, A3XX_RB_DEPTH_INFO_DEPTH_BASE(bases[0]) |
510 				 A3XX_RB_DEPTH_INFO_DEPTH_FORMAT(DEPTHX_32));
511 		OUT_RING(ring, A3XX_RB_DEPTH_PITCH(4 * batch->ctx->gmem.bin_w));
512 
513 		if (psurf[0]->format == PIPE_FORMAT_Z32_FLOAT) {
514 			OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(0), 1);
515 			OUT_RING(ring, 0);
516 		} else {
517 			/* The gmem_restore_tex logic will put the first buffer's stencil
518 			 * as color. Supply it with the proper information to make that
519 			 * happen.
520 			 */
521 			zsbufs[0] = zsbufs[1] = psurf[0];
522 			psurf = zsbufs;
523 			bufs = 2;
524 		}
525 	} else {
526 		OUT_PKT0(ring, REG_A3XX_SP_FS_OUTPUT_REG, 1);
527 		OUT_RING(ring, A3XX_SP_FS_OUTPUT_REG_MRT(bufs - 1));
528 	}
529 
530 	fd3_emit_gmem_restore_tex(ring, psurf, bufs);
531 
532 	fd_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
533 			DI_SRC_SEL_AUTO_INDEX, 2, 0, INDEX_SIZE_IGN, 0, 0, NULL);
534 }
535 
536 static void
fd3_emit_tile_mem2gmem(struct fd_batch * batch,struct fd_tile * tile)537 fd3_emit_tile_mem2gmem(struct fd_batch *batch, struct fd_tile *tile)
538 {
539 	struct fd_context *ctx = batch->ctx;
540 	struct fd_gmem_stateobj *gmem = &ctx->gmem;
541 	struct fd_ringbuffer *ring = batch->gmem;
542 	struct pipe_framebuffer_state *pfb = &batch->framebuffer;
543 	struct fd3_emit emit = {
544 			.debug = &ctx->debug,
545 			.vtx = &ctx->blit_vbuf_state,
546 			.sprite_coord_enable = 1,
547 			/* NOTE: They all use the same VP, this is for vtx bufs. */
548 			.prog = &ctx->blit_prog[0],
549 			.key = {
550 				.half_precision = fd_half_precision(pfb),
551 			},
552 	};
553 	float x0, y0, x1, y1;
554 	unsigned bin_w = tile->bin_w;
555 	unsigned bin_h = tile->bin_h;
556 	unsigned i;
557 
558 	/* write texture coordinates to vertexbuf: */
559 	x0 = ((float)tile->xoff) / ((float)pfb->width);
560 	x1 = ((float)tile->xoff + bin_w) / ((float)pfb->width);
561 	y0 = ((float)tile->yoff) / ((float)pfb->height);
562 	y1 = ((float)tile->yoff + bin_h) / ((float)pfb->height);
563 
564 	OUT_PKT3(ring, CP_MEM_WRITE, 5);
565 	OUT_RELOCW(ring, fd_resource(ctx->blit_texcoord_vbuf)->bo, 0, 0, 0);
566 	OUT_RING(ring, fui(x0));
567 	OUT_RING(ring, fui(y0));
568 	OUT_RING(ring, fui(x1));
569 	OUT_RING(ring, fui(y1));
570 
571 	fd3_emit_cache_flush(batch, ring);
572 
573 	for (i = 0; i < 4; i++) {
574 		OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(i), 1);
575 		OUT_RING(ring, A3XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY) |
576 				A3XX_RB_MRT_CONTROL_DITHER_MODE(DITHER_DISABLE) |
577 				A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE(0xf));
578 
579 		OUT_PKT0(ring, REG_A3XX_RB_MRT_BLEND_CONTROL(i), 1);
580 		OUT_RING(ring, A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(FACTOR_ONE) |
581 				A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(BLEND_DST_PLUS_SRC) |
582 				A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(FACTOR_ZERO) |
583 				A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(FACTOR_ONE) |
584 				A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(BLEND_DST_PLUS_SRC) |
585 				A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(FACTOR_ZERO));
586 	}
587 
588 	OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1);
589 	OUT_RING(ring, A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_ALWAYS) |
590 			A3XX_RB_RENDER_CONTROL_BIN_WIDTH(gmem->bin_w));
591 
592 	fd_wfi(batch, ring);
593 	OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
594 	OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_LESS));
595 
596 	OUT_PKT0(ring, REG_A3XX_RB_DEPTH_INFO, 2);
597 	OUT_RING(ring, 0);
598 	OUT_RING(ring, 0);
599 
600 	OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
601 	OUT_RING(ring, A3XX_GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER);   /* GRAS_CL_CLIP_CNTL */
602 
603 	fd_wfi(batch, ring);
604 	OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_XOFFSET, 6);
605 	OUT_RING(ring, A3XX_GRAS_CL_VPORT_XOFFSET((float)bin_w/2.0 - 0.5));
606 	OUT_RING(ring, A3XX_GRAS_CL_VPORT_XSCALE((float)bin_w/2.0));
607 	OUT_RING(ring, A3XX_GRAS_CL_VPORT_YOFFSET((float)bin_h/2.0 - 0.5));
608 	OUT_RING(ring, A3XX_GRAS_CL_VPORT_YSCALE(-(float)bin_h/2.0));
609 	OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZOFFSET(0.0));
610 	OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(1.0));
611 
612 	OUT_PKT0(ring, REG_A3XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
613 	OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) |
614 			A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(0));
615 	OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X(bin_w - 1) |
616 			A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(bin_h - 1));
617 
618 	OUT_PKT0(ring, REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
619 	OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(0) |
620 			A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(0));
621 	OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(bin_w - 1) |
622 			A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(bin_h - 1));
623 
624 	OUT_PKT0(ring, REG_A3XX_RB_STENCIL_CONTROL, 1);
625 	OUT_RING(ring, 0x2 |
626 			A3XX_RB_STENCIL_CONTROL_FUNC(FUNC_ALWAYS) |
627 			A3XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
628 			A3XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) |
629 			A3XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
630 			A3XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_ALWAYS) |
631 			A3XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
632 			A3XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
633 			A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
634 
635 	OUT_PKT0(ring, REG_A3XX_RB_STENCIL_INFO, 2);
636 	OUT_RING(ring, 0); /* RB_STENCIL_INFO */
637 	OUT_RING(ring, 0); /* RB_STENCIL_PITCH */
638 
639 	OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
640 	OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
641 			A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
642 			A3XX_GRAS_SC_CONTROL_RASTER_MODE(1));
643 
644 	OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1);
645 	OUT_RING(ring, A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(2) |
646 			A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
647 			A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(PC_DRAW_TRIANGLES) |
648 			A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST);
649 
650 	OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4);
651 	OUT_RING(ring, 0);            /* VFD_INDEX_MIN */
652 	OUT_RING(ring, 2);            /* VFD_INDEX_MAX */
653 	OUT_RING(ring, 0);            /* VFD_INSTANCEID_OFFSET */
654 	OUT_RING(ring, 0);            /* VFD_INDEX_OFFSET */
655 
656 	fd3_emit_vertex_bufs(ring, &emit);
657 
658 	/* for gmem pitch/base calculations, we need to use the non-
659 	 * truncated tile sizes:
660 	 */
661 	bin_w = gmem->bin_w;
662 	bin_h = gmem->bin_h;
663 
664 	if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_COLOR)) {
665 		emit.prog = &ctx->blit_prog[pfb->nr_cbufs - 1];
666 		emit.fp = NULL;      /* frag shader changed so clear cache */
667 		fd3_program_emit(ring, &emit, pfb->nr_cbufs, pfb->cbufs);
668 		emit_mem2gmem_surf(batch, gmem->cbuf_base, pfb->cbufs, pfb->nr_cbufs, bin_w);
669 	}
670 
671 	if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
672 		if (pfb->zsbuf->format != PIPE_FORMAT_Z32_FLOAT_S8X24_UINT &&
673 			pfb->zsbuf->format != PIPE_FORMAT_Z32_FLOAT) {
674 			/* Non-float can use a regular color write. It's split over 8-bit
675 			 * components, so half precision is always sufficient.
676 			 */
677 			emit.prog = &ctx->blit_prog[0];
678 			emit.key.half_precision = true;
679 		} else {
680 			/* Float depth needs special blit shader that writes depth */
681 			if (pfb->zsbuf->format == PIPE_FORMAT_Z32_FLOAT)
682 				emit.prog = &ctx->blit_z;
683 			else
684 				emit.prog = &ctx->blit_zs;
685 			emit.key.half_precision = false;
686 		}
687 		emit.fp = NULL;      /* frag shader changed so clear cache */
688 		fd3_program_emit(ring, &emit, 1, &pfb->zsbuf);
689 		emit_mem2gmem_surf(batch, gmem->zsbuf_base, &pfb->zsbuf, 1, bin_w);
690 	}
691 
692 	OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
693 	OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
694 			A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
695 			A3XX_GRAS_SC_CONTROL_RASTER_MODE(0));
696 
697 	OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
698 	OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
699 				   A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
700 				   A3XX_RB_MODE_CONTROL_MRT(MAX2(1, pfb->nr_cbufs) - 1));
701 }
702 
703 static void
patch_draws(struct fd_batch * batch,enum pc_di_vis_cull_mode vismode)704 patch_draws(struct fd_batch *batch, enum pc_di_vis_cull_mode vismode)
705 {
706 	unsigned i;
707 	for (i = 0; i < fd_patch_num_elements(&batch->draw_patches); i++) {
708 		struct fd_cs_patch *patch = fd_patch_element(&batch->draw_patches, i);
709 		*patch->cs = patch->val | DRAW(0, 0, 0, vismode, 0);
710 	}
711 	util_dynarray_resize(&batch->draw_patches, 0);
712 }
713 
714 static void
patch_rbrc(struct fd_batch * batch,uint32_t val)715 patch_rbrc(struct fd_batch *batch, uint32_t val)
716 {
717 	unsigned i;
718 	for (i = 0; i < fd_patch_num_elements(&batch->rbrc_patches); i++) {
719 		struct fd_cs_patch *patch = fd_patch_element(&batch->rbrc_patches, i);
720 		*patch->cs = patch->val | val;
721 	}
722 	util_dynarray_resize(&batch->rbrc_patches, 0);
723 }
724 
725 /* for rendering directly to system memory: */
726 static void
fd3_emit_sysmem_prep(struct fd_batch * batch)727 fd3_emit_sysmem_prep(struct fd_batch *batch)
728 {
729 	struct pipe_framebuffer_state *pfb = &batch->framebuffer;
730 	struct fd_ringbuffer *ring = batch->gmem;
731 	uint32_t i, pitch = 0;
732 
733 	for (i = 0; i < pfb->nr_cbufs; i++) {
734 		struct pipe_surface *psurf = pfb->cbufs[i];
735 		if (!psurf)
736 			continue;
737 		pitch = fd_resource(psurf->texture)->slices[psurf->u.tex.level].pitch;
738 	}
739 
740 	fd3_emit_restore(batch, ring);
741 
742 	OUT_PKT0(ring, REG_A3XX_RB_FRAME_BUFFER_DIMENSION, 1);
743 	OUT_RING(ring, A3XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) |
744 			A3XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(pfb->height));
745 
746 	emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, 0, true);
747 
748 	/* setup scissor/offset for current tile: */
749 	OUT_PKT0(ring, REG_A3XX_RB_WINDOW_OFFSET, 1);
750 	OUT_RING(ring, A3XX_RB_WINDOW_OFFSET_X(0) |
751 			A3XX_RB_WINDOW_OFFSET_Y(0));
752 
753 	OUT_PKT0(ring, REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
754 	OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(0) |
755 			A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(0));
756 	OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(pfb->width - 1) |
757 			A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(pfb->height - 1));
758 
759 	OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
760 	OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
761 			A3XX_RB_MODE_CONTROL_GMEM_BYPASS |
762 			A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
763 			A3XX_RB_MODE_CONTROL_MRT(MAX2(1, pfb->nr_cbufs) - 1));
764 
765 	patch_draws(batch, IGNORE_VISIBILITY);
766 	patch_rbrc(batch, A3XX_RB_RENDER_CONTROL_BIN_WIDTH(pitch));
767 }
768 
769 static void
update_vsc_pipe(struct fd_batch * batch)770 update_vsc_pipe(struct fd_batch *batch)
771 {
772 	struct fd_context *ctx = batch->ctx;
773 	struct fd3_context *fd3_ctx = fd3_context(ctx);
774 	struct fd_ringbuffer *ring = batch->gmem;
775 	int i;
776 
777 	OUT_PKT0(ring, REG_A3XX_VSC_SIZE_ADDRESS, 1);
778 	OUT_RELOCW(ring, fd3_ctx->vsc_size_mem, 0, 0, 0); /* VSC_SIZE_ADDRESS */
779 
780 	for (i = 0; i < 8; i++) {
781 		struct fd_vsc_pipe *pipe = &ctx->vsc_pipe[i];
782 
783 		if (!pipe->bo) {
784 			pipe->bo = fd_bo_new(ctx->dev, 0x40000,
785 					DRM_FREEDRENO_GEM_TYPE_KMEM);
786 		}
787 
788 		OUT_PKT0(ring, REG_A3XX_VSC_PIPE(i), 3);
789 		OUT_RING(ring, A3XX_VSC_PIPE_CONFIG_X(pipe->x) |
790 				A3XX_VSC_PIPE_CONFIG_Y(pipe->y) |
791 				A3XX_VSC_PIPE_CONFIG_W(pipe->w) |
792 				A3XX_VSC_PIPE_CONFIG_H(pipe->h));
793 		OUT_RELOCW(ring, pipe->bo, 0, 0, 0);       /* VSC_PIPE[i].DATA_ADDRESS */
794 		OUT_RING(ring, fd_bo_size(pipe->bo) - 32); /* VSC_PIPE[i].DATA_LENGTH */
795 	}
796 }
797 
798 static void
emit_binning_pass(struct fd_batch * batch)799 emit_binning_pass(struct fd_batch *batch)
800 {
801 	struct fd_context *ctx = batch->ctx;
802 	struct fd_gmem_stateobj *gmem = &ctx->gmem;
803 	struct pipe_framebuffer_state *pfb = &batch->framebuffer;
804 	struct fd_ringbuffer *ring = batch->gmem;
805 	int i;
806 
807 	uint32_t x1 = gmem->minx;
808 	uint32_t y1 = gmem->miny;
809 	uint32_t x2 = gmem->minx + gmem->width - 1;
810 	uint32_t y2 = gmem->miny + gmem->height - 1;
811 
812 	if (ctx->screen->gpu_id == 320) {
813 		emit_binning_workaround(batch);
814 		fd_wfi(batch, ring);
815 		OUT_PKT3(ring, CP_INVALIDATE_STATE, 1);
816 		OUT_RING(ring, 0x00007fff);
817 	}
818 
819 	OUT_PKT0(ring, REG_A3XX_VSC_BIN_CONTROL, 1);
820 	OUT_RING(ring, A3XX_VSC_BIN_CONTROL_BINNING_ENABLE);
821 
822 	OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
823 	OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_TILING_PASS) |
824 			A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
825 			A3XX_GRAS_SC_CONTROL_RASTER_MODE(0));
826 
827 	OUT_PKT0(ring, REG_A3XX_RB_FRAME_BUFFER_DIMENSION, 1);
828 	OUT_RING(ring, A3XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) |
829 			A3XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(pfb->height));
830 
831 	OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1);
832 	OUT_RING(ring, A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER) |
833 			A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE |
834 			A3XX_RB_RENDER_CONTROL_BIN_WIDTH(gmem->bin_w));
835 
836 	/* setup scissor/offset for whole screen: */
837 	OUT_PKT0(ring, REG_A3XX_RB_WINDOW_OFFSET, 1);
838 	OUT_RING(ring, A3XX_RB_WINDOW_OFFSET_X(x1) |
839 			A3XX_RB_WINDOW_OFFSET_Y(y1));
840 
841 	OUT_PKT0(ring, REG_A3XX_RB_LRZ_VSC_CONTROL, 1);
842 	OUT_RING(ring, A3XX_RB_LRZ_VSC_CONTROL_BINNING_ENABLE);
843 
844 	OUT_PKT0(ring, REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
845 	OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(x1) |
846 			A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(y1));
847 	OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(x2) |
848 			A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(y2));
849 
850 	OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
851 	OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_TILING_PASS) |
852 			A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
853 			A3XX_RB_MODE_CONTROL_MRT(0));
854 
855 	for (i = 0; i < 4; i++) {
856 		OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(i), 1);
857 		OUT_RING(ring, A3XX_RB_MRT_CONTROL_ROP_CODE(ROP_CLEAR) |
858 				A3XX_RB_MRT_CONTROL_DITHER_MODE(DITHER_DISABLE) |
859 				A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE(0));
860 	}
861 
862 	OUT_PKT0(ring, REG_A3XX_PC_VSTREAM_CONTROL, 1);
863 	OUT_RING(ring, A3XX_PC_VSTREAM_CONTROL_SIZE(1) |
864 			A3XX_PC_VSTREAM_CONTROL_N(0));
865 
866 	/* emit IB to binning drawcmds: */
867 	ctx->emit_ib(ring, batch->binning);
868 	fd_reset_wfi(batch);
869 
870 	fd_wfi(batch, ring);
871 
872 	/* and then put stuff back the way it was: */
873 
874 	OUT_PKT0(ring, REG_A3XX_VSC_BIN_CONTROL, 1);
875 	OUT_RING(ring, 0x00000000);
876 
877 	OUT_PKT0(ring, REG_A3XX_SP_SP_CTRL_REG, 1);
878 	OUT_RING(ring, A3XX_SP_SP_CTRL_REG_RESOLVE |
879 			A3XX_SP_SP_CTRL_REG_CONSTMODE(1) |
880 			A3XX_SP_SP_CTRL_REG_SLEEPMODE(1) |
881 			A3XX_SP_SP_CTRL_REG_L0MODE(0));
882 
883 	OUT_PKT0(ring, REG_A3XX_RB_LRZ_VSC_CONTROL, 1);
884 	OUT_RING(ring, 0x00000000);
885 
886 	OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
887 	OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
888 			A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
889 			A3XX_GRAS_SC_CONTROL_RASTER_MODE(0));
890 
891 	OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 2);
892 	OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
893 			A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
894 			A3XX_RB_MODE_CONTROL_MRT(pfb->nr_cbufs - 1));
895 	OUT_RING(ring, A3XX_RB_RENDER_CONTROL_ENABLE_GMEM |
896 			A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER) |
897 			A3XX_RB_RENDER_CONTROL_BIN_WIDTH(gmem->bin_w));
898 
899 	fd_event_write(batch, ring, CACHE_FLUSH);
900 	fd_wfi(batch, ring);
901 
902 	if (ctx->screen->gpu_id == 320) {
903 		/* dummy-draw workaround: */
904 		OUT_PKT3(ring, CP_DRAW_INDX, 3);
905 		OUT_RING(ring, 0x00000000);
906 		OUT_RING(ring, DRAW(1, DI_SRC_SEL_AUTO_INDEX,
907 							INDEX_SIZE_IGN, IGNORE_VISIBILITY, 0));
908 		OUT_RING(ring, 0);             /* NumIndices */
909 		fd_reset_wfi(batch);
910 	}
911 
912 	OUT_PKT3(ring, CP_NOP, 4);
913 	OUT_RING(ring, 0x00000000);
914 	OUT_RING(ring, 0x00000000);
915 	OUT_RING(ring, 0x00000000);
916 	OUT_RING(ring, 0x00000000);
917 
918 	fd_wfi(batch, ring);
919 
920 	if (ctx->screen->gpu_id == 320) {
921 		emit_binning_workaround(batch);
922 	}
923 }
924 
925 /* before first tile */
926 static void
fd3_emit_tile_init(struct fd_batch * batch)927 fd3_emit_tile_init(struct fd_batch *batch)
928 {
929 	struct fd_ringbuffer *ring = batch->gmem;
930 	struct pipe_framebuffer_state *pfb = &batch->framebuffer;
931 	struct fd_gmem_stateobj *gmem = &batch->ctx->gmem;
932 	uint32_t rb_render_control;
933 
934 	fd3_emit_restore(batch, ring);
935 
936 	/* note: use gmem->bin_w/h, the bin_w/h parameters may be truncated
937 	 * at the right and bottom edge tiles
938 	 */
939 	OUT_PKT0(ring, REG_A3XX_VSC_BIN_SIZE, 1);
940 	OUT_RING(ring, A3XX_VSC_BIN_SIZE_WIDTH(gmem->bin_w) |
941 			A3XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h));
942 
943 	update_vsc_pipe(batch);
944 
945 	fd_wfi(batch, ring);
946 	OUT_PKT0(ring, REG_A3XX_RB_FRAME_BUFFER_DIMENSION, 1);
947 	OUT_RING(ring, A3XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) |
948 			A3XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(pfb->height));
949 
950 	if (use_hw_binning(batch)) {
951 		/* emit hw binning pass: */
952 		emit_binning_pass(batch);
953 
954 		patch_draws(batch, USE_VISIBILITY);
955 	} else {
956 		patch_draws(batch, IGNORE_VISIBILITY);
957 	}
958 
959 	rb_render_control = A3XX_RB_RENDER_CONTROL_ENABLE_GMEM |
960 			A3XX_RB_RENDER_CONTROL_BIN_WIDTH(gmem->bin_w);
961 
962 	patch_rbrc(batch, rb_render_control);
963 }
964 
965 /* before mem2gmem */
966 static void
fd3_emit_tile_prep(struct fd_batch * batch,struct fd_tile * tile)967 fd3_emit_tile_prep(struct fd_batch *batch, struct fd_tile *tile)
968 {
969 	struct fd_ringbuffer *ring = batch->gmem;
970 	struct pipe_framebuffer_state *pfb = &batch->framebuffer;
971 
972 	OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
973 	OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
974 			A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
975 			A3XX_RB_MODE_CONTROL_MRT(MAX2(1, pfb->nr_cbufs) - 1));
976 }
977 
978 /* before IB to rendering cmds: */
979 static void
fd3_emit_tile_renderprep(struct fd_batch * batch,struct fd_tile * tile)980 fd3_emit_tile_renderprep(struct fd_batch *batch, struct fd_tile *tile)
981 {
982 	struct fd_context *ctx = batch->ctx;
983 	struct fd3_context *fd3_ctx = fd3_context(ctx);
984 	struct fd_ringbuffer *ring = batch->gmem;
985 	struct fd_gmem_stateobj *gmem = &ctx->gmem;
986 	struct pipe_framebuffer_state *pfb = &batch->framebuffer;
987 
988 	uint32_t x1 = tile->xoff;
989 	uint32_t y1 = tile->yoff;
990 	uint32_t x2 = tile->xoff + tile->bin_w - 1;
991 	uint32_t y2 = tile->yoff + tile->bin_h - 1;
992 
993 	uint32_t reg;
994 
995 	OUT_PKT0(ring, REG_A3XX_RB_DEPTH_INFO, 2);
996 	reg = A3XX_RB_DEPTH_INFO_DEPTH_BASE(gmem->zsbuf_base[0]);
997 	if (pfb->zsbuf) {
998 		reg |= A3XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd_pipe2depth(pfb->zsbuf->format));
999 	}
1000 	OUT_RING(ring, reg);
1001 	if (pfb->zsbuf) {
1002 		struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
1003 		OUT_RING(ring, A3XX_RB_DEPTH_PITCH(rsc->cpp * gmem->bin_w));
1004 		if (rsc->stencil) {
1005 			OUT_PKT0(ring, REG_A3XX_RB_STENCIL_INFO, 2);
1006 			OUT_RING(ring, A3XX_RB_STENCIL_INFO_STENCIL_BASE(gmem->zsbuf_base[1]));
1007 			OUT_RING(ring, A3XX_RB_STENCIL_PITCH(rsc->stencil->cpp * gmem->bin_w));
1008 		}
1009 	} else {
1010 		OUT_RING(ring, 0x00000000);
1011 	}
1012 
1013 	if (use_hw_binning(batch)) {
1014 		struct fd_vsc_pipe *pipe = &ctx->vsc_pipe[tile->p];
1015 
1016 		assert(pipe->w * pipe->h);
1017 
1018 		fd_event_write(batch, ring, HLSQ_FLUSH);
1019 		fd_wfi(batch, ring);
1020 
1021 		OUT_PKT0(ring, REG_A3XX_PC_VSTREAM_CONTROL, 1);
1022 		OUT_RING(ring, A3XX_PC_VSTREAM_CONTROL_SIZE(pipe->w * pipe->h) |
1023 				A3XX_PC_VSTREAM_CONTROL_N(tile->n));
1024 
1025 
1026 		OUT_PKT3(ring, CP_SET_BIN_DATA, 2);
1027 		OUT_RELOCW(ring, pipe->bo, 0, 0, 0);    /* BIN_DATA_ADDR <- VSC_PIPE[p].DATA_ADDRESS */
1028 		OUT_RELOCW(ring, fd3_ctx->vsc_size_mem, /* BIN_SIZE_ADDR <- VSC_SIZE_ADDRESS + (p * 4) */
1029 				(tile->p * 4), 0, 0);
1030 	} else {
1031 		OUT_PKT0(ring, REG_A3XX_PC_VSTREAM_CONTROL, 1);
1032 		OUT_RING(ring, 0x00000000);
1033 	}
1034 
1035 	OUT_PKT3(ring, CP_SET_BIN, 3);
1036 	OUT_RING(ring, 0x00000000);
1037 	OUT_RING(ring, CP_SET_BIN_1_X1(x1) | CP_SET_BIN_1_Y1(y1));
1038 	OUT_RING(ring, CP_SET_BIN_2_X2(x2) | CP_SET_BIN_2_Y2(y2));
1039 
1040 	emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, gmem->cbuf_base, gmem->bin_w, true);
1041 
1042 	/* setup scissor/offset for current tile: */
1043 	OUT_PKT0(ring, REG_A3XX_RB_WINDOW_OFFSET, 1);
1044 	OUT_RING(ring, A3XX_RB_WINDOW_OFFSET_X(tile->xoff) |
1045 			A3XX_RB_WINDOW_OFFSET_Y(tile->yoff));
1046 
1047 	OUT_PKT0(ring, REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
1048 	OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(x1) |
1049 			A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(y1));
1050 	OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(x2) |
1051 			A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(y2));
1052 }
1053 
1054 void
fd3_gmem_init(struct pipe_context * pctx)1055 fd3_gmem_init(struct pipe_context *pctx)
1056 {
1057 	struct fd_context *ctx = fd_context(pctx);
1058 
1059 	ctx->emit_sysmem_prep = fd3_emit_sysmem_prep;
1060 	ctx->emit_tile_init = fd3_emit_tile_init;
1061 	ctx->emit_tile_prep = fd3_emit_tile_prep;
1062 	ctx->emit_tile_mem2gmem = fd3_emit_tile_mem2gmem;
1063 	ctx->emit_tile_renderprep = fd3_emit_tile_renderprep;
1064 	ctx->emit_tile_gmem2mem = fd3_emit_tile_gmem2mem;
1065 }
1066