• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2012-2013 Rob Clark <robclark@freedesktop.org>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *    Rob Clark <robclark@freedesktop.org>
25  */
26 
27 #include "pipe/p_state.h"
28 #include "util/u_string.h"
29 #include "util/u_memory.h"
30 #include "util/u_prim.h"
31 
32 #include "freedreno_state.h"
33 #include "freedreno_resource.h"
34 
35 #include "fd2_draw.h"
36 #include "fd2_context.h"
37 #include "fd2_emit.h"
38 #include "fd2_program.h"
39 #include "fd2_util.h"
40 #include "fd2_zsa.h"
41 
42 
43 static void
emit_cacheflush(struct fd_ringbuffer * ring)44 emit_cacheflush(struct fd_ringbuffer *ring)
45 {
46 	unsigned i;
47 
48 	for (i = 0; i < 12; i++) {
49 		OUT_PKT3(ring, CP_EVENT_WRITE, 1);
50 		OUT_RING(ring, CACHE_FLUSH);
51 	}
52 }
53 
54 static void
emit_vertexbufs(struct fd_context * ctx)55 emit_vertexbufs(struct fd_context *ctx)
56 {
57 	struct fd_vertex_stateobj *vtx = ctx->vtx.vtx;
58 	struct fd_vertexbuf_stateobj *vertexbuf = &ctx->vtx.vertexbuf;
59 	struct fd2_vertex_buf bufs[PIPE_MAX_ATTRIBS];
60 	unsigned i;
61 
62 	if (!vtx->num_elements)
63 		return;
64 
65 	for (i = 0; i < vtx->num_elements; i++) {
66 		struct pipe_vertex_element *elem = &vtx->pipe[i];
67 		struct pipe_vertex_buffer *vb =
68 				&vertexbuf->vb[elem->vertex_buffer_index];
69 		bufs[i].offset = vb->buffer_offset;
70 		bufs[i].size = fd_bo_size(fd_resource(vb->buffer.resource)->bo);
71 		bufs[i].prsc = vb->buffer.resource;
72 	}
73 
74 	// NOTE I believe the 0x78 (or 0x9c in solid_vp) relates to the
75 	// CONST(20,0) (or CONST(26,0) in soliv_vp)
76 
77 	fd2_emit_vertex_bufs(ctx->batch->draw, 0x78, bufs, vtx->num_elements);
78 	fd2_emit_vertex_bufs(ctx->batch->binning, 0x78, bufs, vtx->num_elements);
79 }
80 
81 static void
draw_impl(struct fd_context * ctx,const struct pipe_draw_info * info,struct fd_ringbuffer * ring,unsigned index_offset,bool binning)82 draw_impl(struct fd_context *ctx, const struct pipe_draw_info *info,
83 		   struct fd_ringbuffer *ring, unsigned index_offset, bool binning)
84 {
85 	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
86 	OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET));
87 	OUT_RING(ring, info->index_size ? 0 : info->start);
88 
89 	OUT_PKT0(ring, REG_A2XX_TC_CNTL_STATUS, 1);
90 	OUT_RING(ring, A2XX_TC_CNTL_STATUS_L2_INVALIDATE);
91 
92 	if (is_a20x(ctx->screen)) {
93 		/* wait for DMA to finish and
94 		 * dummy draw one triangle with indexes 0,0,0.
95 		 * with PRE_FETCH_CULL_ENABLE | GRP_CULL_ENABLE.
96 		 *
97 		 * this workaround is for a HW bug related to DMA alignment:
98 		 * it is necessary for indexed draws and possibly also
99 		 * draws that read binning data
100 		 */
101 		OUT_PKT3(ring, CP_WAIT_REG_EQ, 4);
102 		OUT_RING(ring, 0x000005d0); /* RBBM_STATUS */
103 		OUT_RING(ring, 0x00000000);
104 		OUT_RING(ring, 0x00001000); /* bit: 12: VGT_BUSY_NO_DMA */
105 		OUT_RING(ring, 0x00000001);
106 
107 		OUT_PKT3(ring, CP_DRAW_INDX_BIN, 6);
108 		OUT_RING(ring, 0x00000000);
109 		OUT_RING(ring, 0x0003c004);
110 		OUT_RING(ring, 0x00000000);
111 		OUT_RING(ring, 0x00000003);
112 		OUT_RELOC(ring, fd_resource(fd2_context(ctx)->solid_vertexbuf)->bo, 64, 0, 0);
113 		OUT_RING(ring, 0x00000006);
114 	} else {
115 		OUT_WFI (ring);
116 
117 		OUT_PKT3(ring, CP_SET_CONSTANT, 3);
118 		OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
119 		OUT_RING(ring, info->max_index);        /* VGT_MAX_VTX_INDX */
120 		OUT_RING(ring, info->min_index);        /* VGT_MIN_VTX_INDX */
121 	}
122 
123 	/* binning shader will take offset from C64 */
124 	if (binning && is_a20x(ctx->screen)) {
125 		OUT_PKT3(ring, CP_SET_CONSTANT, 5);
126 		OUT_RING(ring, 0x00000180);
127 		OUT_RING(ring, fui(ctx->batch->num_vertices));
128 		OUT_RING(ring, fui(0.0f));
129 		OUT_RING(ring, fui(0.0f));
130 		OUT_RING(ring, fui(0.0f));
131 	}
132 
133 	enum pc_di_vis_cull_mode vismode = USE_VISIBILITY;
134 	if (binning || info->mode == PIPE_PRIM_POINTS)
135 		vismode = IGNORE_VISIBILITY;
136 
137 	fd_draw_emit(ctx->batch, ring, ctx->primtypes[info->mode],
138 				 vismode, info, index_offset);
139 
140 	if (is_a20x(ctx->screen)) {
141 		/* not sure why this is required, but it fixes some hangs */
142 		OUT_WFI(ring);
143 	} else {
144 		OUT_PKT3(ring, CP_SET_CONSTANT, 2);
145 		OUT_RING(ring, CP_REG(REG_A2XX_UNKNOWN_2010));
146 		OUT_RING(ring, 0x00000000);
147 	}
148 
149 	emit_cacheflush(ring);
150 }
151 
152 
153 static bool
fd2_draw_vbo(struct fd_context * ctx,const struct pipe_draw_info * pinfo,unsigned index_offset)154 fd2_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *pinfo,
155 			 unsigned index_offset)
156 {
157 	if (!ctx->prog.fs || !ctx->prog.vs)
158 		return false;
159 
160 	if (ctx->dirty & FD_DIRTY_VTXBUF)
161 		emit_vertexbufs(ctx);
162 
163 	if (fd_binning_enabled)
164 		fd2_emit_state_binning(ctx, ctx->dirty);
165 
166 	fd2_emit_state(ctx, ctx->dirty);
167 
168 	/* a2xx can draw only 65535 vertices at once
169 	 * on a22x the field in the draw command is 32bits but seems limited too
170 	 * using a limit of 32k because it fixes an unexplained hang
171 	 * 32766 works for all primitives (multiple of 2 and 3)
172 	 */
173 	if (pinfo->count > 32766) {
174 		static const uint16_t step_tbl[PIPE_PRIM_MAX] = {
175 			[0 ... PIPE_PRIM_MAX - 1]  = 32766,
176 			[PIPE_PRIM_LINE_STRIP]     = 32765,
177 			[PIPE_PRIM_TRIANGLE_STRIP] = 32764,
178 
179 			/* needs more work */
180 			[PIPE_PRIM_TRIANGLE_FAN]   = 0,
181 			[PIPE_PRIM_LINE_LOOP]      = 0,
182 		};
183 
184 		struct pipe_draw_info info = *pinfo;
185 		unsigned count = info.count;
186 		unsigned step = step_tbl[info.mode];
187 		unsigned num_vertices = ctx->batch->num_vertices;
188 
189 		if (!step)
190 			return false;
191 
192 		for (; count + step > 32766; count -= step) {
193 			info.count = MIN2(count, 32766);
194 			draw_impl(ctx, &info, ctx->batch->draw, index_offset, false);
195 			draw_impl(ctx, &info, ctx->batch->binning, index_offset, true);
196 			info.start += step;
197 			ctx->batch->num_vertices += step;
198 		}
199 		/* changing this value is a hack, restore it */
200 		ctx->batch->num_vertices = num_vertices;
201 	} else {
202 		draw_impl(ctx, pinfo, ctx->batch->draw, index_offset, false);
203 		draw_impl(ctx, pinfo, ctx->batch->binning, index_offset, true);
204 	}
205 
206 	fd_context_all_clean(ctx);
207 
208 	return true;
209 }
210 
211 static void
clear_state(struct fd_batch * batch,struct fd_ringbuffer * ring,unsigned buffers,bool fast_clear)212 clear_state(struct fd_batch *batch, struct fd_ringbuffer *ring,
213 	unsigned buffers, bool fast_clear)
214 {
215 	struct fd_context *ctx = batch->ctx;
216 	struct fd2_context *fd2_ctx = fd2_context(ctx);
217 	uint32_t reg;
218 
219 	fd2_emit_vertex_bufs(ring, 0x9c, (struct fd2_vertex_buf[]) {
220 			{ .prsc = fd2_ctx->solid_vertexbuf, .size = 36 },
221 		}, 1);
222 
223 	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
224 	OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET));
225 	OUT_RING(ring, 0);
226 
227 	fd2_program_emit(ctx, ring, &ctx->solid_prog);
228 
229 	OUT_PKT0(ring, REG_A2XX_TC_CNTL_STATUS, 1);
230 	OUT_RING(ring, A2XX_TC_CNTL_STATUS_L2_INVALIDATE);
231 
232 	if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
233 		OUT_PKT3(ring, CP_SET_CONSTANT, 2);
234 		OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTHCONTROL));
235 		reg = 0;
236 		if (buffers & PIPE_CLEAR_DEPTH) {
237 			reg |= A2XX_RB_DEPTHCONTROL_ZFUNC(FUNC_ALWAYS) |
238 				A2XX_RB_DEPTHCONTROL_Z_ENABLE |
239 				A2XX_RB_DEPTHCONTROL_Z_WRITE_ENABLE |
240 				A2XX_RB_DEPTHCONTROL_EARLY_Z_ENABLE;
241 		}
242 		if (buffers & PIPE_CLEAR_STENCIL) {
243 			reg |= A2XX_RB_DEPTHCONTROL_STENCILFUNC(FUNC_ALWAYS) |
244 					A2XX_RB_DEPTHCONTROL_STENCIL_ENABLE |
245 					A2XX_RB_DEPTHCONTROL_STENCILZPASS(STENCIL_REPLACE);
246 		}
247 		OUT_RING(ring, reg);
248 	}
249 
250 	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
251 	OUT_RING(ring, CP_REG(REG_A2XX_RB_COLORCONTROL));
252 	OUT_RING(ring, A2XX_RB_COLORCONTROL_ALPHA_FUNC(FUNC_ALWAYS) |
253 			A2XX_RB_COLORCONTROL_BLEND_DISABLE |
254 			A2XX_RB_COLORCONTROL_ROP_CODE(12) |
255 			A2XX_RB_COLORCONTROL_DITHER_MODE(DITHER_DISABLE) |
256 			A2XX_RB_COLORCONTROL_DITHER_TYPE(DITHER_PIXEL));
257 
258 	OUT_PKT3(ring, CP_SET_CONSTANT, 3);
259 	OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_CLIP_CNTL));
260 	OUT_RING(ring, 0x00000000);        /* PA_CL_CLIP_CNTL */
261 	OUT_RING(ring, A2XX_PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST |  /* PA_SU_SC_MODE_CNTL */
262 			A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
263 			A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE(PC_DRAW_TRIANGLES) |
264 			(fast_clear ? A2XX_PA_SU_SC_MODE_CNTL_MSAA_ENABLE : 0));
265 
266 	if (fast_clear) {
267 		OUT_PKT3(ring, CP_SET_CONSTANT, 2);
268 		OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_CONFIG));
269 		OUT_RING(ring, A2XX_PA_SC_AA_CONFIG_MSAA_NUM_SAMPLES(3));
270 	}
271 
272 	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
273 	OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_MASK));
274 	OUT_RING(ring, 0x0000ffff);
275 
276 	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
277 	OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_MASK));
278 	if (buffers & PIPE_CLEAR_COLOR) {
279 		OUT_RING(ring, A2XX_RB_COLOR_MASK_WRITE_RED |
280 				A2XX_RB_COLOR_MASK_WRITE_GREEN |
281 				A2XX_RB_COLOR_MASK_WRITE_BLUE |
282 				A2XX_RB_COLOR_MASK_WRITE_ALPHA);
283 	} else {
284 		OUT_RING(ring, 0x0);
285 	}
286 
287 	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
288 	OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_CONTROL));
289 	OUT_RING(ring, 0);
290 
291 	if (is_a20x(batch->ctx->screen))
292 		return;
293 
294 	OUT_PKT3(ring, CP_SET_CONSTANT, 3);
295 	OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
296 	OUT_RING(ring, 3);                 /* VGT_MAX_VTX_INDX */
297 	OUT_RING(ring, 0);                 /* VGT_MIN_VTX_INDX */
298 
299 	OUT_PKT3(ring, CP_SET_CONSTANT, 3);
300 	OUT_RING(ring, CP_REG(REG_A2XX_RB_STENCILREFMASK_BF));
301 	OUT_RING(ring, 0xff000000 | A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0xff));
302 	OUT_RING(ring, 0xff000000 | A2XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
303 
304 	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
305 	OUT_RING(ring, CP_REG(REG_A2XX_A220_RB_LRZ_VSC_CONTROL));
306 	OUT_RING(ring, 0x00000084);
307 
308 	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
309 	OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
310 	OUT_RING(ring, 0x0000028f);
311 }
312 
313 static void
clear_state_restore(struct fd_context * ctx,struct fd_ringbuffer * ring)314 clear_state_restore(struct fd_context *ctx, struct fd_ringbuffer *ring)
315 {
316 	if (is_a20x(ctx->screen))
317 		return;
318 
319 	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
320 	OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL));
321 	OUT_RING(ring, 0x00000000);
322 
323 	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
324 	OUT_RING(ring, CP_REG(REG_A2XX_A220_RB_LRZ_VSC_CONTROL));
325 	OUT_RING(ring, 0x00000000);
326 
327 	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
328 	OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
329 	OUT_RING(ring, 0x0000003b);
330 }
331 
332 static void
clear_fast(struct fd_batch * batch,struct fd_ringbuffer * ring,uint32_t color_clear,uint32_t depth_clear,unsigned patch_type)333 clear_fast(struct fd_batch *batch, struct fd_ringbuffer *ring,
334 	uint32_t color_clear, uint32_t depth_clear, unsigned patch_type)
335 {
336 	BEGIN_RING(ring, 8); /* preallocate next 2 packets (for patching) */
337 
338 	/* zero values are patched in */
339 	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
340 	OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_SCREEN_SCISSOR_BR));
341 	OUT_RINGP(ring, patch_type, &batch->gmem_patches);
342 
343 	OUT_PKT3(ring, CP_SET_CONSTANT, 4);
344 	OUT_RING(ring, CP_REG(REG_A2XX_RB_SURFACE_INFO));
345 	OUT_RING(ring, 0x8000 | 32);
346 	OUT_RING(ring, 0);
347 	OUT_RING(ring, 0);
348 
349 	/* set fill values */
350 	if (!is_a20x(batch->ctx->screen)) {
351 		OUT_PKT3(ring, CP_SET_CONSTANT, 2);
352 		OUT_RING(ring, CP_REG(REG_A2XX_CLEAR_COLOR));
353 		OUT_RING(ring, color_clear);
354 
355 		OUT_PKT3(ring, CP_SET_CONSTANT, 2);
356 		OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL));
357 		OUT_RING(ring, A2XX_RB_COPY_CONTROL_DEPTH_CLEAR_ENABLE |
358 			A2XX_RB_COPY_CONTROL_CLEAR_MASK(0xf));
359 
360 		OUT_PKT3(ring, CP_SET_CONSTANT, 2);
361 		OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTH_CLEAR));
362 		OUT_RING(ring, depth_clear);
363 	} else {
364 		const float sc = 1.0f / 255.0f;
365 
366 		OUT_PKT3(ring, CP_SET_CONSTANT, 5);
367 		OUT_RING(ring, 0x00000480);
368 		OUT_RING(ring, fui((float) (color_clear >>  0 & 0xff) * sc));
369 		OUT_RING(ring, fui((float) (color_clear >>  8 & 0xff) * sc));
370 		OUT_RING(ring, fui((float) (color_clear >> 16 & 0xff) * sc));
371 		OUT_RING(ring, fui((float) (color_clear >> 24 & 0xff) * sc));
372 
373 		// XXX if using float the rounding error breaks it..
374 		float depth = ((double) (depth_clear >> 8)) * (1.0/(double) 0xffffff);
375 		assert((unsigned) (((double) depth * (double) 0xffffff)) ==
376 			(depth_clear >> 8));
377 
378 		OUT_PKT3(ring, CP_SET_CONSTANT, 3);
379 		OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_ZSCALE));
380 		OUT_RING(ring, fui(0.0f));
381 		OUT_RING(ring, fui(depth));
382 
383 		OUT_PKT3(ring, CP_SET_CONSTANT, 3);
384 		OUT_RING(ring, CP_REG(REG_A2XX_RB_STENCILREFMASK_BF));
385 		OUT_RING(ring, 0xff000000 |
386 			A2XX_RB_STENCILREFMASK_BF_STENCILREF(depth_clear & 0xff) |
387 			A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0xff));
388 		OUT_RING(ring, 0xff000000 |
389 			A2XX_RB_STENCILREFMASK_STENCILREF(depth_clear & 0xff) |
390 			A2XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
391 	}
392 
393 	fd_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
394 			DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL);
395 }
396 
397 static bool
fd2_clear_fast(struct fd_context * ctx,unsigned buffers,const union pipe_color_union * color,double depth,unsigned stencil)398 fd2_clear_fast(struct fd_context *ctx, unsigned buffers,
399 		const union pipe_color_union *color, double depth, unsigned stencil)
400 {
401 	/* using 4x MSAA allows clearing ~2x faster
402 	 * then we can use higher bpp clearing to clear lower bpp
403 	 * 1 "pixel" can clear 64 bits (rgba8+depth24+stencil8)
404 	 * note: its possible to clear with 32_32_32_32 format but its not faster
405 	 * note: fast clear doesn't work with sysmem rendering
406 	 * (sysmem rendering is disabled when clear is used)
407 	 *
408 	 * we only have 16-bit / 32-bit color formats
409 	 * and 16-bit / 32-bit depth formats
410 	 * so there are only a few possible combinations
411 	 *
412 	 * if the bpp of the color/depth doesn't match
413 	 * we clear with depth/color individually
414 	 */
415 	struct fd2_context *fd2_ctx = fd2_context(ctx);
416 	struct fd_batch *batch = ctx->batch;
417 	struct fd_ringbuffer *ring = batch->draw;
418 	struct pipe_framebuffer_state *pfb = &batch->framebuffer;
419 	uint32_t color_clear = 0, depth_clear = 0;
420 	enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
421 	int depth_size = -1; /* -1: no clear, 0: clear 16-bit, 1: clear 32-bit */
422 	int color_size = -1;
423 
424 	/* TODO: need to test performance on a22x */
425 	if (!is_a20x(ctx->screen))
426 		return false;
427 
428 	if (buffers & PIPE_CLEAR_COLOR)
429 		color_size = util_format_get_blocksizebits(format) == 32;
430 
431 	if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
432 		/* no fast clear when clearing only one component of depth+stencil buffer */
433 		if (!(buffers & PIPE_CLEAR_DEPTH))
434 			return false;
435 
436 		if ((pfb->zsbuf->format == PIPE_FORMAT_Z24_UNORM_S8_UINT ||
437 			 pfb->zsbuf->format == PIPE_FORMAT_S8_UINT_Z24_UNORM) &&
438 			 !(buffers & PIPE_CLEAR_STENCIL))
439 			return false;
440 
441 		depth_size = fd_pipe2depth(pfb->zsbuf->format) == DEPTHX_24_8;
442 	}
443 
444 	assert(color_size >= 0 || depth_size >= 0);
445 
446 	if (color_size == 0) {
447 		color_clear = pack_rgba(format, color->f);
448 		color_clear = (color_clear << 16) | (color_clear & 0xffff);
449 	} else if (color_size == 1) {
450 		color_clear = pack_rgba(format, color->f);
451 	}
452 
453 	if (depth_size == 0) {
454 		depth_clear = (uint32_t)(0xffff * depth);
455 		depth_clear |= depth_clear << 16;
456 	} else if (depth_size == 1) {
457 		depth_clear = (((uint32_t)(0xffffff * depth)) << 8);
458 		depth_clear |= (stencil & 0xff);
459 	}
460 
461 	/* disable "window" scissor.. */
462 	OUT_PKT3(ring, CP_SET_CONSTANT, 3);
463 	OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL));
464 	OUT_RING(ring, xy2d(0, 0));
465 	OUT_RING(ring, xy2d(0x7fff, 0x7fff));
466 
467 	/* make sure we fill all "pixels" (in SCREEN_SCISSOR) */
468 	OUT_PKT3(ring, CP_SET_CONSTANT, 5);
469 	OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_XSCALE));
470 	OUT_RING(ring, fui(4096.0));
471 	OUT_RING(ring, fui(4096.0));
472 	OUT_RING(ring, fui(4096.0));
473 	OUT_RING(ring, fui(4096.0));
474 
475 	clear_state(batch, ring, ~0u, true);
476 
477 	if (color_size >= 0 && depth_size != color_size)
478 		clear_fast(batch, ring, color_clear, color_clear, GMEM_PATCH_FASTCLEAR_COLOR);
479 
480 	if (depth_size >= 0 && depth_size != color_size)
481 		clear_fast(batch, ring, depth_clear, depth_clear, GMEM_PATCH_FASTCLEAR_DEPTH);
482 
483 	if (depth_size == color_size)
484 		clear_fast(batch, ring, color_clear, depth_clear, GMEM_PATCH_FASTCLEAR_COLOR_DEPTH);
485 
486 	clear_state_restore(ctx, ring);
487 
488 	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
489 	OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_CONFIG));
490 	OUT_RING(ring, 0);
491 
492 	/* can't patch in SCREEN_SCISSOR_BR as it can be different for each tile.
493 	 * MEM_WRITE the value in tile_renderprep, and use CP_LOAD_CONSTANT_CONTEXT
494 	 * the value is read from byte offset 60 in the given bo
495 	 */
496 	OUT_PKT3(ring, CP_LOAD_CONSTANT_CONTEXT, 3);
497 	OUT_RELOC(ring, fd_resource(fd2_ctx->solid_vertexbuf)->bo, 0, 0, 0);
498 	OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_SCREEN_SCISSOR_BR));
499 	OUT_RING(ring, 1);
500 
501 	OUT_PKT3(ring, CP_SET_CONSTANT, 4);
502 	OUT_RING(ring, CP_REG(REG_A2XX_RB_SURFACE_INFO));
503 	OUT_RINGP(ring, GMEM_PATCH_RESTORE_INFO, &batch->gmem_patches);
504 	OUT_RING(ring, 0);
505 	OUT_RING(ring, 0);
506 	return true;
507 }
508 
509 static bool
fd2_clear(struct fd_context * ctx,unsigned buffers,const union pipe_color_union * color,double depth,unsigned stencil)510 fd2_clear(struct fd_context *ctx, unsigned buffers,
511 		const union pipe_color_union *color, double depth, unsigned stencil)
512 {
513 	struct fd_ringbuffer *ring = ctx->batch->draw;
514 	struct pipe_framebuffer_state *fb = &ctx->batch->framebuffer;
515 
516 	if (fd2_clear_fast(ctx, buffers, color, depth, stencil))
517 		goto dirty;
518 
519 	/* set clear value */
520 	if (is_a20x(ctx->screen)) {
521 		if (buffers & PIPE_CLEAR_COLOR) {
522 			/* C0 used by fragment shader */
523 			OUT_PKT3(ring, CP_SET_CONSTANT, 5);
524 			OUT_RING(ring, 0x00000480);
525 			OUT_RING(ring, color->ui[0]);
526 			OUT_RING(ring, color->ui[1]);
527 			OUT_RING(ring, color->ui[2]);
528 			OUT_RING(ring, color->ui[3]);
529 		}
530 
531 		if (buffers & PIPE_CLEAR_DEPTH) {
532 			/* use viewport to set depth value */
533 			OUT_PKT3(ring, CP_SET_CONSTANT, 3);
534 			OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_ZSCALE));
535 			OUT_RING(ring, fui(0.0f));
536 			OUT_RING(ring, fui(depth));
537 		}
538 
539 		if (buffers & PIPE_CLEAR_STENCIL) {
540 			OUT_PKT3(ring, CP_SET_CONSTANT, 3);
541 			OUT_RING(ring, CP_REG(REG_A2XX_RB_STENCILREFMASK_BF));
542 			OUT_RING(ring, 0xff000000 |
543 				A2XX_RB_STENCILREFMASK_BF_STENCILREF(stencil) |
544 				A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0xff));
545 			OUT_RING(ring, 0xff000000 |
546 				A2XX_RB_STENCILREFMASK_STENCILREF(stencil) |
547 				A2XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
548 		}
549 	} else {
550 		if (buffers & PIPE_CLEAR_COLOR) {
551 			OUT_PKT3(ring, CP_SET_CONSTANT, 2);
552 			OUT_RING(ring, CP_REG(REG_A2XX_CLEAR_COLOR));
553 			OUT_RING(ring, pack_rgba(PIPE_FORMAT_R8G8B8A8_UNORM, color->f));
554 		}
555 
556 		if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
557 			uint32_t clear_mask, depth_clear;
558 			switch (fd_pipe2depth(fb->zsbuf->format)) {
559 			case DEPTHX_24_8:
560 				clear_mask = ((buffers & PIPE_CLEAR_DEPTH) ? 0xe : 0) |
561 					((buffers & PIPE_CLEAR_STENCIL) ? 0x1 : 0);
562 				depth_clear = (((uint32_t)(0xffffff * depth)) << 8) |
563 					(stencil & 0xff);
564 				break;
565 			case DEPTHX_16:
566 				clear_mask = 0xf;
567 				depth_clear = (uint32_t)(0xffffffff * depth);
568 				break;
569 			default:
570 				unreachable("invalid depth");
571 				break;
572 			}
573 
574 			OUT_PKT3(ring, CP_SET_CONSTANT, 2);
575 			OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL));
576 			OUT_RING(ring, A2XX_RB_COPY_CONTROL_DEPTH_CLEAR_ENABLE |
577 				A2XX_RB_COPY_CONTROL_CLEAR_MASK(clear_mask));
578 
579 			OUT_PKT3(ring, CP_SET_CONSTANT, 2);
580 			OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTH_CLEAR));
581 			OUT_RING(ring, depth_clear);
582 		}
583 	}
584 
585 	/* scissor state */
586 	OUT_PKT3(ring, CP_SET_CONSTANT, 3);
587 	OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL));
588 	OUT_RING(ring, xy2d(0, 0));
589 	OUT_RING(ring, xy2d(fb->width, fb->height));
590 
591 	/* viewport state */
592 	OUT_PKT3(ring, CP_SET_CONSTANT, 5);
593 	OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_XSCALE));
594 	OUT_RING(ring, fui((float) fb->width / 2.0));
595 	OUT_RING(ring, fui((float) fb->width / 2.0));
596 	OUT_RING(ring, fui((float) fb->height / 2.0));
597 	OUT_RING(ring, fui((float) fb->height / 2.0));
598 
599 	/* common state */
600 	clear_state(ctx->batch, ring, buffers, false);
601 
602 	fd_draw(ctx->batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
603 			DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL);
604 
605 	clear_state_restore(ctx, ring);
606 
607 dirty:
608 	ctx->dirty |= FD_DIRTY_ZSA |
609 			FD_DIRTY_VIEWPORT |
610 			FD_DIRTY_RASTERIZER |
611 			FD_DIRTY_SAMPLE_MASK |
612 			FD_DIRTY_PROG |
613 			FD_DIRTY_CONST |
614 			FD_DIRTY_BLEND |
615 			FD_DIRTY_FRAMEBUFFER |
616 			FD_DIRTY_SCISSOR;
617 
618 	ctx->dirty_shader[PIPE_SHADER_VERTEX]   |= FD_DIRTY_SHADER_PROG;
619 	ctx->dirty_shader[PIPE_SHADER_FRAGMENT] |= FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_CONST;
620 
621 	return true;
622 }
623 
624 void
fd2_draw_init(struct pipe_context * pctx)625 fd2_draw_init(struct pipe_context *pctx)
626 {
627 	struct fd_context *ctx = fd_context(pctx);
628 	ctx->draw_vbo = fd2_draw_vbo;
629 	ctx->clear = fd2_clear;
630 }
631