• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
2 
3 /*
4  * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23  * SOFTWARE.
24  *
25  * Authors:
26  *    Rob Clark <robclark@freedesktop.org>
27  */
28 
29 #include "pipe/p_state.h"
30 #include "util/u_string.h"
31 #include "util/u_memory.h"
32 #include "util/u_helpers.h"
33 #include "util/u_format.h"
34 #include "util/u_viewport.h"
35 
36 #include "freedreno_resource.h"
37 #include "freedreno_query_hw.h"
38 
39 #include "fd4_emit.h"
40 #include "fd4_blend.h"
41 #include "fd4_context.h"
42 #include "fd4_program.h"
43 #include "fd4_rasterizer.h"
44 #include "fd4_texture.h"
45 #include "fd4_format.h"
46 #include "fd4_zsa.h"
47 
48 static const enum adreno_state_block sb[] = {
49 	[SHADER_VERTEX]   = SB_VERT_SHADER,
50 	[SHADER_FRAGMENT] = SB_FRAG_SHADER,
51 };
52 
53 /* regid:          base const register
54  * prsc or dwords: buffer containing constant values
55  * sizedwords:     size of const value buffer
56  */
57 static void
fd4_emit_const(struct fd_ringbuffer * ring,enum shader_t type,uint32_t regid,uint32_t offset,uint32_t sizedwords,const uint32_t * dwords,struct pipe_resource * prsc)58 fd4_emit_const(struct fd_ringbuffer *ring, enum shader_t type,
59 		uint32_t regid, uint32_t offset, uint32_t sizedwords,
60 		const uint32_t *dwords, struct pipe_resource *prsc)
61 {
62 	uint32_t i, sz;
63 	enum adreno_state_src src;
64 
65 	debug_assert((regid % 4) == 0);
66 	debug_assert((sizedwords % 4) == 0);
67 
68 	if (prsc) {
69 		sz = 0;
70 		src = 0x2;  // TODO ??
71 	} else {
72 		sz = sizedwords;
73 		src = SS_DIRECT;
74 	}
75 
76 	OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz);
77 	OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/4) |
78 			CP_LOAD_STATE_0_STATE_SRC(src) |
79 			CP_LOAD_STATE_0_STATE_BLOCK(sb[type]) |
80 			CP_LOAD_STATE_0_NUM_UNIT(sizedwords/4));
81 	if (prsc) {
82 		struct fd_bo *bo = fd_resource(prsc)->bo;
83 		OUT_RELOC(ring, bo, offset,
84 				CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS), 0);
85 	} else {
86 		OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
87 				CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS));
88 		dwords = (uint32_t *)&((uint8_t *)dwords)[offset];
89 	}
90 	for (i = 0; i < sz; i++) {
91 		OUT_RING(ring, dwords[i]);
92 	}
93 }
94 
95 static void
fd4_emit_const_bo(struct fd_ringbuffer * ring,enum shader_t type,boolean write,uint32_t regid,uint32_t num,struct pipe_resource ** prscs,uint32_t * offsets)96 fd4_emit_const_bo(struct fd_ringbuffer *ring, enum shader_t type, boolean write,
97 		uint32_t regid, uint32_t num, struct pipe_resource **prscs, uint32_t *offsets)
98 {
99 	uint32_t anum = align(num, 4);
100 	uint32_t i;
101 
102 	debug_assert((regid % 4) == 0);
103 
104 	OUT_PKT3(ring, CP_LOAD_STATE, 2 + anum);
105 	OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/4) |
106 			CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
107 			CP_LOAD_STATE_0_STATE_BLOCK(sb[type]) |
108 			CP_LOAD_STATE_0_NUM_UNIT(anum/4));
109 	OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
110 			CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS));
111 
112 	for (i = 0; i < num; i++) {
113 		if (prscs[i]) {
114 			if (write) {
115 				OUT_RELOCW(ring, fd_resource(prscs[i])->bo, offsets[i], 0, 0);
116 			} else {
117 				OUT_RELOC(ring, fd_resource(prscs[i])->bo, offsets[i], 0, 0);
118 			}
119 		} else {
120 			OUT_RING(ring, 0xbad00000 | (i << 16));
121 		}
122 	}
123 
124 	for (; i < anum; i++)
125 		OUT_RING(ring, 0xffffffff);
126 }
127 
128 static void
emit_textures(struct fd_context * ctx,struct fd_ringbuffer * ring,enum adreno_state_block sb,struct fd_texture_stateobj * tex,const struct ir3_shader_variant * v)129 emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
130 		enum adreno_state_block sb, struct fd_texture_stateobj *tex,
131 		const struct ir3_shader_variant *v)
132 {
133 	static const uint32_t bcolor_reg[] = {
134 			[SB_VERT_TEX] = REG_A4XX_TPL1_TP_VS_BORDER_COLOR_BASE_ADDR,
135 			[SB_FRAG_TEX] = REG_A4XX_TPL1_TP_FS_BORDER_COLOR_BASE_ADDR,
136 	};
137 	struct fd4_context *fd4_ctx = fd4_context(ctx);
138 	bool needs_border = false;
139 	unsigned i;
140 
141 	if (tex->num_samplers > 0) {
142 		int num_samplers;
143 
144 		/* not sure if this is an a420.0 workaround, but we seem
145 		 * to need to emit these in pairs.. emit a final dummy
146 		 * entry if odd # of samplers:
147 		 */
148 		num_samplers = align(tex->num_samplers, 2);
149 
150 		/* output sampler state: */
151 		OUT_PKT3(ring, CP_LOAD_STATE, 2 + (2 * num_samplers));
152 		OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) |
153 				CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
154 				CP_LOAD_STATE_0_STATE_BLOCK(sb) |
155 				CP_LOAD_STATE_0_NUM_UNIT(num_samplers));
156 		OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER) |
157 				CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
158 		for (i = 0; i < tex->num_samplers; i++) {
159 			static const struct fd4_sampler_stateobj dummy_sampler = {};
160 			const struct fd4_sampler_stateobj *sampler = tex->samplers[i] ?
161 					fd4_sampler_stateobj(tex->samplers[i]) :
162 					&dummy_sampler;
163 			OUT_RING(ring, sampler->texsamp0);
164 			OUT_RING(ring, sampler->texsamp1);
165 
166 			needs_border |= sampler->needs_border;
167 		}
168 
169 		for (; i < num_samplers; i++) {
170 			OUT_RING(ring, 0x00000000);
171 			OUT_RING(ring, 0x00000000);
172 		}
173 	}
174 
175 	if (tex->num_textures > 0) {
176 		unsigned num_textures = tex->num_textures + v->astc_srgb.count;
177 
178 		/* emit texture state: */
179 		OUT_PKT3(ring, CP_LOAD_STATE, 2 + (8 * num_textures));
180 		OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) |
181 				CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
182 				CP_LOAD_STATE_0_STATE_BLOCK(sb) |
183 				CP_LOAD_STATE_0_NUM_UNIT(num_textures));
184 		OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) |
185 				CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
186 		for (i = 0; i < tex->num_textures; i++) {
187 			static const struct fd4_pipe_sampler_view dummy_view = {};
188 			const struct fd4_pipe_sampler_view *view = tex->textures[i] ?
189 					fd4_pipe_sampler_view(tex->textures[i]) :
190 					&dummy_view;
191 
192 			OUT_RING(ring, view->texconst0);
193 			OUT_RING(ring, view->texconst1);
194 			OUT_RING(ring, view->texconst2);
195 			OUT_RING(ring, view->texconst3);
196 			if (view->base.texture) {
197 				struct fd_resource *rsc = fd_resource(view->base.texture);
198 				OUT_RELOC(ring, rsc->bo, view->offset, view->texconst4, 0);
199 			} else {
200 				OUT_RING(ring, 0x00000000);
201 			}
202 			OUT_RING(ring, 0x00000000);
203 			OUT_RING(ring, 0x00000000);
204 			OUT_RING(ring, 0x00000000);
205 		}
206 
207 		for (i = 0; i < v->astc_srgb.count; i++) {
208 			static const struct fd4_pipe_sampler_view dummy_view = {};
209 			const struct fd4_pipe_sampler_view *view;
210 			unsigned idx = v->astc_srgb.orig_idx[i];
211 
212 			view = tex->textures[idx] ?
213 					fd4_pipe_sampler_view(tex->textures[idx]) :
214 					&dummy_view;
215 
216 			debug_assert(view->texconst0 & A4XX_TEX_CONST_0_SRGB);
217 
218 			OUT_RING(ring, view->texconst0 & ~A4XX_TEX_CONST_0_SRGB);
219 			OUT_RING(ring, view->texconst1);
220 			OUT_RING(ring, view->texconst2);
221 			OUT_RING(ring, view->texconst3);
222 			if (view->base.texture) {
223 				struct fd_resource *rsc = fd_resource(view->base.texture);
224 				OUT_RELOC(ring, rsc->bo, view->offset, view->texconst4, 0);
225 			} else {
226 				OUT_RING(ring, 0x00000000);
227 			}
228 			OUT_RING(ring, 0x00000000);
229 			OUT_RING(ring, 0x00000000);
230 			OUT_RING(ring, 0x00000000);
231 		}
232 	} else {
233 		debug_assert(v->astc_srgb.count == 0);
234 	}
235 
236 	if (needs_border) {
237 		unsigned off;
238 		void *ptr;
239 
240 		u_upload_alloc(fd4_ctx->border_color_uploader,
241 				0, BORDER_COLOR_UPLOAD_SIZE,
242 				BORDER_COLOR_UPLOAD_SIZE, &off,
243 				&fd4_ctx->border_color_buf,
244 				&ptr);
245 
246 		fd_setup_border_colors(tex, ptr, 0);
247 		OUT_PKT0(ring, bcolor_reg[sb], 1);
248 		OUT_RELOC(ring, fd_resource(fd4_ctx->border_color_buf)->bo, off, 0, 0);
249 
250 		u_upload_unmap(fd4_ctx->border_color_uploader);
251 	}
252 }
253 
254 /* emit texture state for mem->gmem restore operation.. eventually it would
255  * be good to get rid of this and use normal CSO/etc state for more of these
256  * special cases..
257  */
258 void
fd4_emit_gmem_restore_tex(struct fd_ringbuffer * ring,unsigned nr_bufs,struct pipe_surface ** bufs)259 fd4_emit_gmem_restore_tex(struct fd_ringbuffer *ring, unsigned nr_bufs,
260 		struct pipe_surface **bufs)
261 {
262 	unsigned char mrt_comp[A4XX_MAX_RENDER_TARGETS];
263 	int i;
264 
265 	for (i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) {
266 		mrt_comp[i] = (i < nr_bufs) ? 0xf : 0;
267 	}
268 
269 	/* output sampler state: */
270 	OUT_PKT3(ring, CP_LOAD_STATE, 2 + (2 * nr_bufs));
271 	OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) |
272 			CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
273 			CP_LOAD_STATE_0_STATE_BLOCK(SB_FRAG_TEX) |
274 			CP_LOAD_STATE_0_NUM_UNIT(nr_bufs));
275 	OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER) |
276 			CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
277 	for (i = 0; i < nr_bufs; i++) {
278 		OUT_RING(ring, A4XX_TEX_SAMP_0_XY_MAG(A4XX_TEX_NEAREST) |
279 				A4XX_TEX_SAMP_0_XY_MIN(A4XX_TEX_NEAREST) |
280 				A4XX_TEX_SAMP_0_WRAP_S(A4XX_TEX_CLAMP_TO_EDGE) |
281 				A4XX_TEX_SAMP_0_WRAP_T(A4XX_TEX_CLAMP_TO_EDGE) |
282 				A4XX_TEX_SAMP_0_WRAP_R(A4XX_TEX_REPEAT));
283 		OUT_RING(ring, 0x00000000);
284 	}
285 
286 	/* emit texture state: */
287 	OUT_PKT3(ring, CP_LOAD_STATE, 2 + (8 * nr_bufs));
288 	OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) |
289 			CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
290 			CP_LOAD_STATE_0_STATE_BLOCK(SB_FRAG_TEX) |
291 			CP_LOAD_STATE_0_NUM_UNIT(nr_bufs));
292 	OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) |
293 			CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
294 	for (i = 0; i < nr_bufs; i++) {
295 		if (bufs[i]) {
296 			struct fd_resource *rsc = fd_resource(bufs[i]->texture);
297 			enum pipe_format format = fd_gmem_restore_format(bufs[i]->format);
298 
299 			/* The restore blit_zs shader expects stencil in sampler 0,
300 			 * and depth in sampler 1
301 			 */
302 			if (rsc->stencil && (i == 0)) {
303 				rsc = rsc->stencil;
304 				format = fd_gmem_restore_format(rsc->base.b.format);
305 			}
306 
307 			/* note: PIPE_BUFFER disallowed for surfaces */
308 			unsigned lvl = bufs[i]->u.tex.level;
309 			struct fd_resource_slice *slice = fd_resource_slice(rsc, lvl);
310 			unsigned offset = fd_resource_offset(rsc, lvl, bufs[i]->u.tex.first_layer);
311 
312 			/* z32 restore is accomplished using depth write.  If there is
313 			 * no stencil component (ie. PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
314 			 * then no render target:
315 			 *
316 			 * (The same applies for z32_s8x24, since for stencil sampler
317 			 * state the above 'if' will replace 'format' with s8)
318 			 */
319 			if ((format == PIPE_FORMAT_Z32_FLOAT) ||
320 					(format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT))
321 				mrt_comp[i] = 0;
322 
323 			debug_assert(bufs[i]->u.tex.first_layer == bufs[i]->u.tex.last_layer);
324 
325 			OUT_RING(ring, A4XX_TEX_CONST_0_FMT(fd4_pipe2tex(format)) |
326 					A4XX_TEX_CONST_0_TYPE(A4XX_TEX_2D) |
327 					fd4_tex_swiz(format,  PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
328 							PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W));
329 			OUT_RING(ring, A4XX_TEX_CONST_1_WIDTH(bufs[i]->width) |
330 					A4XX_TEX_CONST_1_HEIGHT(bufs[i]->height));
331 			OUT_RING(ring, A4XX_TEX_CONST_2_PITCH(slice->pitch * rsc->cpp) |
332 					A4XX_TEX_CONST_2_FETCHSIZE(fd4_pipe2fetchsize(format)));
333 			OUT_RING(ring, 0x00000000);
334 			OUT_RELOC(ring, rsc->bo, offset, 0, 0);
335 			OUT_RING(ring, 0x00000000);
336 			OUT_RING(ring, 0x00000000);
337 			OUT_RING(ring, 0x00000000);
338 		} else {
339 			OUT_RING(ring, A4XX_TEX_CONST_0_FMT(0) |
340 					A4XX_TEX_CONST_0_TYPE(A4XX_TEX_2D) |
341 					A4XX_TEX_CONST_0_SWIZ_X(A4XX_TEX_ONE) |
342 					A4XX_TEX_CONST_0_SWIZ_Y(A4XX_TEX_ONE) |
343 					A4XX_TEX_CONST_0_SWIZ_Z(A4XX_TEX_ONE) |
344 					A4XX_TEX_CONST_0_SWIZ_W(A4XX_TEX_ONE));
345 			OUT_RING(ring, A4XX_TEX_CONST_1_WIDTH(0) |
346 					A4XX_TEX_CONST_1_HEIGHT(0));
347 			OUT_RING(ring, A4XX_TEX_CONST_2_PITCH(0));
348 			OUT_RING(ring, 0x00000000);
349 			OUT_RING(ring, 0x00000000);
350 			OUT_RING(ring, 0x00000000);
351 			OUT_RING(ring, 0x00000000);
352 			OUT_RING(ring, 0x00000000);
353 		}
354 	}
355 
356 	OUT_PKT0(ring, REG_A4XX_RB_RENDER_COMPONENTS, 1);
357 	OUT_RING(ring, A4XX_RB_RENDER_COMPONENTS_RT0(mrt_comp[0]) |
358 			A4XX_RB_RENDER_COMPONENTS_RT1(mrt_comp[1]) |
359 			A4XX_RB_RENDER_COMPONENTS_RT2(mrt_comp[2]) |
360 			A4XX_RB_RENDER_COMPONENTS_RT3(mrt_comp[3]) |
361 			A4XX_RB_RENDER_COMPONENTS_RT4(mrt_comp[4]) |
362 			A4XX_RB_RENDER_COMPONENTS_RT5(mrt_comp[5]) |
363 			A4XX_RB_RENDER_COMPONENTS_RT6(mrt_comp[6]) |
364 			A4XX_RB_RENDER_COMPONENTS_RT7(mrt_comp[7]));
365 }
366 
367 void
fd4_emit_vertex_bufs(struct fd_ringbuffer * ring,struct fd4_emit * emit)368 fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit)
369 {
370 	int32_t i, j, last = -1;
371 	uint32_t total_in = 0;
372 	const struct fd_vertex_state *vtx = emit->vtx;
373 	const struct ir3_shader_variant *vp = fd4_emit_get_vp(emit);
374 	unsigned vertex_regid = regid(63, 0);
375 	unsigned instance_regid = regid(63, 0);
376 	unsigned vtxcnt_regid = regid(63, 0);
377 
378 	/* Note that sysvals come *after* normal inputs: */
379 	for (i = 0; i < vp->inputs_count; i++) {
380 		if (!vp->inputs[i].compmask)
381 			continue;
382 		if (vp->inputs[i].sysval) {
383 			switch(vp->inputs[i].slot) {
384 			case SYSTEM_VALUE_BASE_VERTEX:
385 				/* handled elsewhere */
386 				break;
387 			case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
388 				vertex_regid = vp->inputs[i].regid;
389 				break;
390 			case SYSTEM_VALUE_INSTANCE_ID:
391 				instance_regid = vp->inputs[i].regid;
392 				break;
393 			case SYSTEM_VALUE_VERTEX_CNT:
394 				vtxcnt_regid = vp->inputs[i].regid;
395 				break;
396 			default:
397 				unreachable("invalid system value");
398 				break;
399 			}
400 		} else if (i < vtx->vtx->num_elements) {
401 			last = i;
402 		}
403 	}
404 
405 	for (i = 0, j = 0; i <= last; i++) {
406 		assert(!vp->inputs[i].sysval);
407 		if (vp->inputs[i].compmask) {
408 			struct pipe_vertex_element *elem = &vtx->vtx->pipe[i];
409 			const struct pipe_vertex_buffer *vb =
410 					&vtx->vertexbuf.vb[elem->vertex_buffer_index];
411 			struct fd_resource *rsc = fd_resource(vb->buffer);
412 			enum pipe_format pfmt = elem->src_format;
413 			enum a4xx_vtx_fmt fmt = fd4_pipe2vtx(pfmt);
414 			bool switchnext = (i != last) ||
415 					(vertex_regid != regid(63, 0)) ||
416 					(instance_regid != regid(63, 0)) ||
417 					(vtxcnt_regid != regid(63, 0));
418 			bool isint = util_format_is_pure_integer(pfmt);
419 			uint32_t fs = util_format_get_blocksize(pfmt);
420 			uint32_t off = vb->buffer_offset + elem->src_offset;
421 			uint32_t size = fd_bo_size(rsc->bo) - off;
422 			debug_assert(fmt != ~0);
423 
424 			OUT_PKT0(ring, REG_A4XX_VFD_FETCH(j), 4);
425 			OUT_RING(ring, A4XX_VFD_FETCH_INSTR_0_FETCHSIZE(fs - 1) |
426 					A4XX_VFD_FETCH_INSTR_0_BUFSTRIDE(vb->stride) |
427 					COND(elem->instance_divisor, A4XX_VFD_FETCH_INSTR_0_INSTANCED) |
428 					COND(switchnext, A4XX_VFD_FETCH_INSTR_0_SWITCHNEXT));
429 			OUT_RELOC(ring, rsc->bo, off, 0, 0);
430 			OUT_RING(ring, A4XX_VFD_FETCH_INSTR_2_SIZE(size));
431 			OUT_RING(ring, A4XX_VFD_FETCH_INSTR_3_STEPRATE(MAX2(1, elem->instance_divisor)));
432 
433 			OUT_PKT0(ring, REG_A4XX_VFD_DECODE_INSTR(j), 1);
434 			OUT_RING(ring, A4XX_VFD_DECODE_INSTR_CONSTFILL |
435 					A4XX_VFD_DECODE_INSTR_WRITEMASK(vp->inputs[i].compmask) |
436 					A4XX_VFD_DECODE_INSTR_FORMAT(fmt) |
437 					A4XX_VFD_DECODE_INSTR_SWAP(fd4_pipe2swap(pfmt)) |
438 					A4XX_VFD_DECODE_INSTR_REGID(vp->inputs[i].regid) |
439 					A4XX_VFD_DECODE_INSTR_SHIFTCNT(fs) |
440 					A4XX_VFD_DECODE_INSTR_LASTCOMPVALID |
441 					COND(isint, A4XX_VFD_DECODE_INSTR_INT) |
442 					COND(switchnext, A4XX_VFD_DECODE_INSTR_SWITCHNEXT));
443 
444 			total_in += vp->inputs[i].ncomp;
445 			j++;
446 		}
447 	}
448 
449 	/* hw doesn't like to be configured for zero vbo's, it seems: */
450 	if (last < 0) {
451 		/* just recycle the shader bo, we just need to point to *something*
452 		 * valid:
453 		 */
454 		struct fd_bo *dummy_vbo = vp->bo;
455 		bool switchnext = (vertex_regid != regid(63, 0)) ||
456 				(instance_regid != regid(63, 0)) ||
457 				(vtxcnt_regid != regid(63, 0));
458 
459 		OUT_PKT0(ring, REG_A4XX_VFD_FETCH(0), 4);
460 		OUT_RING(ring, A4XX_VFD_FETCH_INSTR_0_FETCHSIZE(0) |
461 				A4XX_VFD_FETCH_INSTR_0_BUFSTRIDE(0) |
462 				COND(switchnext, A4XX_VFD_FETCH_INSTR_0_SWITCHNEXT));
463 		OUT_RELOC(ring, dummy_vbo, 0, 0, 0);
464 		OUT_RING(ring, A4XX_VFD_FETCH_INSTR_2_SIZE(1));
465 		OUT_RING(ring, A4XX_VFD_FETCH_INSTR_3_STEPRATE(1));
466 
467 		OUT_PKT0(ring, REG_A4XX_VFD_DECODE_INSTR(0), 1);
468 		OUT_RING(ring, A4XX_VFD_DECODE_INSTR_CONSTFILL |
469 				A4XX_VFD_DECODE_INSTR_WRITEMASK(0x1) |
470 				A4XX_VFD_DECODE_INSTR_FORMAT(VFMT4_8_UNORM) |
471 				A4XX_VFD_DECODE_INSTR_SWAP(XYZW) |
472 				A4XX_VFD_DECODE_INSTR_REGID(regid(0,0)) |
473 				A4XX_VFD_DECODE_INSTR_SHIFTCNT(1) |
474 				A4XX_VFD_DECODE_INSTR_LASTCOMPVALID |
475 				COND(switchnext, A4XX_VFD_DECODE_INSTR_SWITCHNEXT));
476 
477 		total_in = 1;
478 		j = 1;
479 	}
480 
481 	OUT_PKT0(ring, REG_A4XX_VFD_CONTROL_0, 5);
482 	OUT_RING(ring, A4XX_VFD_CONTROL_0_TOTALATTRTOVS(total_in) |
483 			0xa0000 | /* XXX */
484 			A4XX_VFD_CONTROL_0_STRMDECINSTRCNT(j) |
485 			A4XX_VFD_CONTROL_0_STRMFETCHINSTRCNT(j));
486 	OUT_RING(ring, A4XX_VFD_CONTROL_1_MAXSTORAGE(129) | // XXX
487 			A4XX_VFD_CONTROL_1_REGID4VTX(vertex_regid) |
488 			A4XX_VFD_CONTROL_1_REGID4INST(instance_regid));
489 	OUT_RING(ring, 0x00000000);   /* XXX VFD_CONTROL_2 */
490 	OUT_RING(ring, A4XX_VFD_CONTROL_3_REGID_VTXCNT(vtxcnt_regid));
491 	OUT_RING(ring, 0x00000000);   /* XXX VFD_CONTROL_4 */
492 
493 	/* cache invalidate, otherwise vertex fetch could see
494 	 * stale vbo contents:
495 	 */
496 	OUT_PKT0(ring, REG_A4XX_UCHE_INVALIDATE0, 2);
497 	OUT_RING(ring, 0x00000000);
498 	OUT_RING(ring, 0x00000012);
499 }
500 
501 void
fd4_emit_state(struct fd_context * ctx,struct fd_ringbuffer * ring,struct fd4_emit * emit)502 fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
503 		struct fd4_emit *emit)
504 {
505 	const struct ir3_shader_variant *vp = fd4_emit_get_vp(emit);
506 	const struct ir3_shader_variant *fp = fd4_emit_get_fp(emit);
507 	uint32_t dirty = emit->dirty;
508 
509 	emit_marker(ring, 5);
510 
511 	if ((dirty & FD_DIRTY_FRAMEBUFFER) && !emit->key.binning_pass) {
512 		struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
513 		unsigned char mrt_comp[A4XX_MAX_RENDER_TARGETS] = {0};
514 
515 		for (unsigned i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) {
516 			mrt_comp[i] = ((i < pfb->nr_cbufs) && pfb->cbufs[i]) ? 0xf : 0;
517 		}
518 
519 		OUT_PKT0(ring, REG_A4XX_RB_RENDER_COMPONENTS, 1);
520 		OUT_RING(ring, A4XX_RB_RENDER_COMPONENTS_RT0(mrt_comp[0]) |
521 				A4XX_RB_RENDER_COMPONENTS_RT1(mrt_comp[1]) |
522 				A4XX_RB_RENDER_COMPONENTS_RT2(mrt_comp[2]) |
523 				A4XX_RB_RENDER_COMPONENTS_RT3(mrt_comp[3]) |
524 				A4XX_RB_RENDER_COMPONENTS_RT4(mrt_comp[4]) |
525 				A4XX_RB_RENDER_COMPONENTS_RT5(mrt_comp[5]) |
526 				A4XX_RB_RENDER_COMPONENTS_RT6(mrt_comp[6]) |
527 				A4XX_RB_RENDER_COMPONENTS_RT7(mrt_comp[7]));
528 	}
529 
530 	if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_FRAMEBUFFER)) {
531 		struct fd4_zsa_stateobj *zsa = fd4_zsa_stateobj(ctx->zsa);
532 		struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
533 		uint32_t rb_alpha_control = zsa->rb_alpha_control;
534 
535 		if (util_format_is_pure_integer(pipe_surface_format(pfb->cbufs[0])))
536 			rb_alpha_control &= ~A4XX_RB_ALPHA_CONTROL_ALPHA_TEST;
537 
538 		OUT_PKT0(ring, REG_A4XX_RB_ALPHA_CONTROL, 1);
539 		OUT_RING(ring, rb_alpha_control);
540 
541 		OUT_PKT0(ring, REG_A4XX_RB_STENCIL_CONTROL, 2);
542 		OUT_RING(ring, zsa->rb_stencil_control);
543 		OUT_RING(ring, zsa->rb_stencil_control2);
544 	}
545 
546 	if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_STENCIL_REF)) {
547 		struct fd4_zsa_stateobj *zsa = fd4_zsa_stateobj(ctx->zsa);
548 		struct pipe_stencil_ref *sr = &ctx->stencil_ref;
549 
550 		OUT_PKT0(ring, REG_A4XX_RB_STENCILREFMASK, 2);
551 		OUT_RING(ring, zsa->rb_stencilrefmask |
552 				A4XX_RB_STENCILREFMASK_STENCILREF(sr->ref_value[0]));
553 		OUT_RING(ring, zsa->rb_stencilrefmask_bf |
554 				A4XX_RB_STENCILREFMASK_BF_STENCILREF(sr->ref_value[1]));
555 	}
556 
557 	if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) {
558 		struct fd4_zsa_stateobj *zsa = fd4_zsa_stateobj(ctx->zsa);
559 		bool fragz = fp->has_kill | fp->writes_pos;
560 		bool clamp = !ctx->rasterizer->depth_clip;
561 
562 		OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1);
563 		OUT_RING(ring, zsa->rb_depth_control |
564 				COND(clamp, A4XX_RB_DEPTH_CONTROL_Z_CLAMP_ENABLE) |
565 				COND(fragz, A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE) |
566 				COND(fragz && fp->frag_coord, A4XX_RB_DEPTH_CONTROL_FORCE_FRAGZ_TO_FS));
567 
568 		/* maybe this register/bitfield needs a better name.. this
569 		 * appears to be just disabling early-z
570 		 */
571 		OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1);
572 		OUT_RING(ring, zsa->gras_alpha_control |
573 				COND(fragz, A4XX_GRAS_ALPHA_CONTROL_ALPHA_TEST_ENABLE) |
574 				COND(fragz && fp->frag_coord, A4XX_GRAS_ALPHA_CONTROL_FORCE_FRAGZ_TO_FS));
575 	}
576 
577 	if (dirty & FD_DIRTY_RASTERIZER) {
578 		struct fd4_rasterizer_stateobj *rasterizer =
579 				fd4_rasterizer_stateobj(ctx->rasterizer);
580 
581 		OUT_PKT0(ring, REG_A4XX_GRAS_SU_MODE_CONTROL, 1);
582 		OUT_RING(ring, rasterizer->gras_su_mode_control |
583 				A4XX_GRAS_SU_MODE_CONTROL_RENDERING_PASS);
584 
585 		OUT_PKT0(ring, REG_A4XX_GRAS_SU_POINT_MINMAX, 2);
586 		OUT_RING(ring, rasterizer->gras_su_point_minmax);
587 		OUT_RING(ring, rasterizer->gras_su_point_size);
588 
589 		OUT_PKT0(ring, REG_A4XX_GRAS_SU_POLY_OFFSET_SCALE, 2);
590 		OUT_RING(ring, rasterizer->gras_su_poly_offset_scale);
591 		OUT_RING(ring, rasterizer->gras_su_poly_offset_offset);
592 
593 		OUT_PKT0(ring, REG_A4XX_GRAS_CL_CLIP_CNTL, 1);
594 		OUT_RING(ring, rasterizer->gras_cl_clip_cntl);
595 	}
596 
597 	/* NOTE: since primitive_restart is not actually part of any
598 	 * state object, we need to make sure that we always emit
599 	 * PRIM_VTX_CNTL.. either that or be more clever and detect
600 	 * when it changes.
601 	 */
602 	if (emit->info) {
603 		const struct pipe_draw_info *info = emit->info;
604 		struct fd4_rasterizer_stateobj *rast =
605 			fd4_rasterizer_stateobj(ctx->rasterizer);
606 		uint32_t val = rast->pc_prim_vtx_cntl;
607 
608 		if (info->indexed && info->primitive_restart)
609 			val |= A4XX_PC_PRIM_VTX_CNTL_PRIMITIVE_RESTART;
610 
611 		val |= COND(vp->writes_psize, A4XX_PC_PRIM_VTX_CNTL_PSIZE);
612 
613 		if (fp->total_in > 0) {
614 			uint32_t varout = align(fp->total_in, 16) / 16;
615 			if (varout > 1)
616 				varout = align(varout, 2);
617 			val |= A4XX_PC_PRIM_VTX_CNTL_VAROUT(varout);
618 		}
619 
620 		OUT_PKT0(ring, REG_A4XX_PC_PRIM_VTX_CNTL, 2);
621 		OUT_RING(ring, val);
622 		OUT_RING(ring, rast->pc_prim_vtx_cntl2);
623 	}
624 
625 	if (dirty & FD_DIRTY_SCISSOR) {
626 		struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx);
627 
628 		OUT_PKT0(ring, REG_A4XX_GRAS_SC_WINDOW_SCISSOR_BR, 2);
629 		OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_BR_X(scissor->maxx - 1) |
630 				A4XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(scissor->maxy - 1));
631 		OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_TL_X(scissor->minx) |
632 				A4XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(scissor->miny));
633 
634 		ctx->batch->max_scissor.minx = MIN2(ctx->batch->max_scissor.minx, scissor->minx);
635 		ctx->batch->max_scissor.miny = MIN2(ctx->batch->max_scissor.miny, scissor->miny);
636 		ctx->batch->max_scissor.maxx = MAX2(ctx->batch->max_scissor.maxx, scissor->maxx);
637 		ctx->batch->max_scissor.maxy = MAX2(ctx->batch->max_scissor.maxy, scissor->maxy);
638 	}
639 
640 	if (dirty & FD_DIRTY_VIEWPORT) {
641 		fd_wfi(ctx->batch, ring);
642 		OUT_PKT0(ring, REG_A4XX_GRAS_CL_VPORT_XOFFSET_0, 6);
643 		OUT_RING(ring, A4XX_GRAS_CL_VPORT_XOFFSET_0(ctx->viewport.translate[0]));
644 		OUT_RING(ring, A4XX_GRAS_CL_VPORT_XSCALE_0(ctx->viewport.scale[0]));
645 		OUT_RING(ring, A4XX_GRAS_CL_VPORT_YOFFSET_0(ctx->viewport.translate[1]));
646 		OUT_RING(ring, A4XX_GRAS_CL_VPORT_YSCALE_0(ctx->viewport.scale[1]));
647 		OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZOFFSET_0(ctx->viewport.translate[2]));
648 		OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZSCALE_0(ctx->viewport.scale[2]));
649 	}
650 
651 	if (dirty & (FD_DIRTY_VIEWPORT | FD_DIRTY_RASTERIZER | FD_DIRTY_FRAMEBUFFER)) {
652 		float zmin, zmax;
653 		int depth = 24;
654 		if (ctx->batch->framebuffer.zsbuf) {
655 			depth = util_format_get_component_bits(
656 					pipe_surface_format(ctx->batch->framebuffer.zsbuf),
657 					UTIL_FORMAT_COLORSPACE_ZS, 0);
658 		}
659 		util_viewport_zmin_zmax(&ctx->viewport, ctx->rasterizer->clip_halfz,
660 								&zmin, &zmax);
661 
662 		OUT_PKT0(ring, REG_A4XX_RB_VPORT_Z_CLAMP(0), 2);
663 		if (depth == 32) {
664 			OUT_RING(ring, fui(zmin));
665 			OUT_RING(ring, fui(zmax));
666 		} else if (depth == 16) {
667 			OUT_RING(ring, (uint32_t)(zmin * 0xffff));
668 			OUT_RING(ring, (uint32_t)(zmax * 0xffff));
669 		} else {
670 			OUT_RING(ring, (uint32_t)(zmin * 0xffffff));
671 			OUT_RING(ring, (uint32_t)(zmax * 0xffffff));
672 		}
673 	}
674 
675 	if (dirty & (FD_DIRTY_PROG | FD_DIRTY_FRAMEBUFFER)) {
676 		struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
677 		unsigned n = pfb->nr_cbufs;
678 		/* if we have depth/stencil, we need at least on MRT: */
679 		if (pfb->zsbuf)
680 			n = MAX2(1, n);
681 		fd4_program_emit(ring, emit, n, pfb->cbufs);
682 	}
683 
684 	if (emit->prog == &ctx->prog) { /* evil hack to deal sanely with clear path */
685 		ir3_emit_consts(vp, ring, ctx, emit->info, dirty);
686 		if (!emit->key.binning_pass)
687 			ir3_emit_consts(fp, ring, ctx, emit->info, dirty);
688 	}
689 
690 	if ((dirty & FD_DIRTY_BLEND)) {
691 		struct fd4_blend_stateobj *blend = fd4_blend_stateobj(ctx->blend);
692 		uint32_t i;
693 
694 		for (i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) {
695 			enum pipe_format format = pipe_surface_format(
696 					ctx->batch->framebuffer.cbufs[i]);
697 			bool is_int = util_format_is_pure_integer(format);
698 			bool has_alpha = util_format_has_alpha(format);
699 			uint32_t control = blend->rb_mrt[i].control;
700 			uint32_t blend_control = blend->rb_mrt[i].blend_control_alpha;
701 
702 			if (is_int) {
703 				control &= A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK;
704 				control |= A4XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY);
705 			}
706 
707 			if (has_alpha) {
708 				blend_control |= blend->rb_mrt[i].blend_control_rgb;
709 			} else {
710 				blend_control |= blend->rb_mrt[i].blend_control_no_alpha_rgb;
711 				control &= ~A4XX_RB_MRT_CONTROL_BLEND2;
712 			}
713 
714 			OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1);
715 			OUT_RING(ring, control);
716 
717 			OUT_PKT0(ring, REG_A4XX_RB_MRT_BLEND_CONTROL(i), 1);
718 			OUT_RING(ring, blend_control);
719 		}
720 
721 		OUT_PKT0(ring, REG_A4XX_RB_FS_OUTPUT, 1);
722 		OUT_RING(ring, blend->rb_fs_output |
723 				A4XX_RB_FS_OUTPUT_SAMPLE_MASK(0xffff));
724 	}
725 
726 	if (dirty & FD_DIRTY_BLEND_COLOR) {
727 		struct pipe_blend_color *bcolor = &ctx->blend_color;
728 
729 		OUT_PKT0(ring, REG_A4XX_RB_BLEND_RED, 8);
730 		OUT_RING(ring, A4XX_RB_BLEND_RED_FLOAT(bcolor->color[0]) |
731 				A4XX_RB_BLEND_RED_UINT(bcolor->color[0] * 0xff) |
732 				A4XX_RB_BLEND_RED_SINT(bcolor->color[0] * 0x7f));
733 		OUT_RING(ring, A4XX_RB_BLEND_RED_F32(bcolor->color[0]));
734 		OUT_RING(ring, A4XX_RB_BLEND_GREEN_FLOAT(bcolor->color[1]) |
735 				A4XX_RB_BLEND_GREEN_UINT(bcolor->color[1] * 0xff) |
736 				A4XX_RB_BLEND_GREEN_SINT(bcolor->color[1] * 0x7f));
737 		OUT_RING(ring, A4XX_RB_BLEND_RED_F32(bcolor->color[1]));
738 		OUT_RING(ring, A4XX_RB_BLEND_BLUE_FLOAT(bcolor->color[2]) |
739 				A4XX_RB_BLEND_BLUE_UINT(bcolor->color[2] * 0xff) |
740 				A4XX_RB_BLEND_BLUE_SINT(bcolor->color[2] * 0x7f));
741 		OUT_RING(ring, A4XX_RB_BLEND_BLUE_F32(bcolor->color[2]));
742 		OUT_RING(ring, A4XX_RB_BLEND_ALPHA_FLOAT(bcolor->color[3]) |
743 				A4XX_RB_BLEND_ALPHA_UINT(bcolor->color[3] * 0xff) |
744 				A4XX_RB_BLEND_ALPHA_SINT(bcolor->color[3] * 0x7f));
745 		OUT_RING(ring, A4XX_RB_BLEND_ALPHA_F32(bcolor->color[3]));
746 	}
747 
748 	if (dirty & FD_DIRTY_VERTTEX) {
749 		if (vp->has_samp)
750 			emit_textures(ctx, ring, SB_VERT_TEX, &ctx->verttex, vp);
751 		else
752 			dirty &= ~FD_DIRTY_VERTTEX;
753 	}
754 
755 	if (dirty & FD_DIRTY_FRAGTEX) {
756 		if (fp->has_samp)
757 			emit_textures(ctx, ring, SB_FRAG_TEX, &ctx->fragtex, fp);
758 		else
759 			dirty &= ~FD_DIRTY_FRAGTEX;
760 	}
761 
762 	ctx->dirty &= ~dirty;
763 }
764 
765 /* emit setup at begin of new cmdstream buffer (don't rely on previous
766  * state, there could have been a context switch between ioctls):
767  */
768 void
fd4_emit_restore(struct fd_batch * batch,struct fd_ringbuffer * ring)769 fd4_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring)
770 {
771 	struct fd_context *ctx = batch->ctx;
772 	struct fd4_context *fd4_ctx = fd4_context(ctx);
773 
774 	OUT_PKT0(ring, REG_A4XX_RBBM_PERFCTR_CTL, 1);
775 	OUT_RING(ring, 0x00000001);
776 
777 	OUT_PKT0(ring, REG_A4XX_GRAS_DEBUG_ECO_CONTROL, 1);
778 	OUT_RING(ring, 0x00000000);
779 
780 	OUT_PKT0(ring, REG_A4XX_SP_MODE_CONTROL, 1);
781 	OUT_RING(ring, 0x00000006);
782 
783 	OUT_PKT0(ring, REG_A4XX_TPL1_TP_MODE_CONTROL, 1);
784 	OUT_RING(ring, 0x0000003a);
785 
786 	OUT_PKT0(ring, REG_A4XX_UNKNOWN_0D01, 1);
787 	OUT_RING(ring, 0x00000001);
788 
789 	OUT_PKT0(ring, REG_A4XX_UNKNOWN_0E42, 1);
790 	OUT_RING(ring, 0x00000000);
791 
792 	OUT_PKT0(ring, REG_A4XX_UCHE_CACHE_WAYS_VFD, 1);
793 	OUT_RING(ring, 0x00000007);
794 
795 	OUT_PKT0(ring, REG_A4XX_UCHE_CACHE_MODE_CONTROL, 1);
796 	OUT_RING(ring, 0x00000000);
797 
798 	OUT_PKT0(ring, REG_A4XX_UCHE_INVALIDATE0, 2);
799 	OUT_RING(ring, 0x00000000);
800 	OUT_RING(ring, 0x00000012);
801 
802 	OUT_PKT0(ring, REG_A4XX_HLSQ_MODE_CONTROL, 1);
803 	OUT_RING(ring, 0x00000000);
804 
805 	OUT_PKT0(ring, REG_A4XX_UNKNOWN_0CC5, 1);
806 	OUT_RING(ring, 0x00000006);
807 
808 	OUT_PKT0(ring, REG_A4XX_UNKNOWN_0CC6, 1);
809 	OUT_RING(ring, 0x00000000);
810 
811 	OUT_PKT0(ring, REG_A4XX_UNKNOWN_0EC2, 1);
812 	OUT_RING(ring, 0x00040000);
813 
814 	OUT_PKT0(ring, REG_A4XX_UNKNOWN_2001, 1);
815 	OUT_RING(ring, 0x00000000);
816 
817 	OUT_PKT3(ring, CP_INVALIDATE_STATE, 1);
818 	OUT_RING(ring, 0x00001000);
819 
820 	OUT_PKT0(ring, REG_A4XX_UNKNOWN_20EF, 1);
821 	OUT_RING(ring, 0x00000000);
822 
823 	OUT_PKT0(ring, REG_A4XX_RB_BLEND_RED, 4);
824 	OUT_RING(ring, A4XX_RB_BLEND_RED_UINT(0) |
825 			A4XX_RB_BLEND_RED_FLOAT(0.0));
826 	OUT_RING(ring, A4XX_RB_BLEND_GREEN_UINT(0) |
827 			A4XX_RB_BLEND_GREEN_FLOAT(0.0));
828 	OUT_RING(ring, A4XX_RB_BLEND_BLUE_UINT(0) |
829 			A4XX_RB_BLEND_BLUE_FLOAT(0.0));
830 	OUT_RING(ring, A4XX_RB_BLEND_ALPHA_UINT(0x7fff) |
831 			A4XX_RB_BLEND_ALPHA_FLOAT(1.0));
832 
833 	OUT_PKT0(ring, REG_A4XX_UNKNOWN_2152, 1);
834 	OUT_RING(ring, 0x00000000);
835 
836 	OUT_PKT0(ring, REG_A4XX_UNKNOWN_2153, 1);
837 	OUT_RING(ring, 0x00000000);
838 
839 	OUT_PKT0(ring, REG_A4XX_UNKNOWN_2154, 1);
840 	OUT_RING(ring, 0x00000000);
841 
842 	OUT_PKT0(ring, REG_A4XX_UNKNOWN_2155, 1);
843 	OUT_RING(ring, 0x00000000);
844 
845 	OUT_PKT0(ring, REG_A4XX_UNKNOWN_2156, 1);
846 	OUT_RING(ring, 0x00000000);
847 
848 	OUT_PKT0(ring, REG_A4XX_UNKNOWN_2157, 1);
849 	OUT_RING(ring, 0x00000000);
850 
851 	OUT_PKT0(ring, REG_A4XX_UNKNOWN_21C3, 1);
852 	OUT_RING(ring, 0x0000001d);
853 
854 	OUT_PKT0(ring, REG_A4XX_PC_GS_PARAM, 1);
855 	OUT_RING(ring, 0x00000000);
856 
857 	OUT_PKT0(ring, REG_A4XX_UNKNOWN_21E6, 1);
858 	OUT_RING(ring, 0x00000001);
859 
860 	OUT_PKT0(ring, REG_A4XX_PC_HS_PARAM, 1);
861 	OUT_RING(ring, 0x00000000);
862 
863 	OUT_PKT0(ring, REG_A4XX_UNKNOWN_22D7, 1);
864 	OUT_RING(ring, 0x00000000);
865 
866 	OUT_PKT0(ring, REG_A4XX_TPL1_TP_TEX_OFFSET, 1);
867 	OUT_RING(ring, 0x00000000);
868 
869 	OUT_PKT0(ring, REG_A4XX_TPL1_TP_TEX_COUNT, 1);
870 	OUT_RING(ring, A4XX_TPL1_TP_TEX_COUNT_VS(16) |
871 			A4XX_TPL1_TP_TEX_COUNT_HS(0) |
872 			A4XX_TPL1_TP_TEX_COUNT_DS(0) |
873 			A4XX_TPL1_TP_TEX_COUNT_GS(0));
874 
875 	OUT_PKT0(ring, REG_A4XX_TPL1_TP_FS_TEX_COUNT, 1);
876 	OUT_RING(ring, 16);
877 
878 	/* we don't use this yet.. probably best to disable.. */
879 	OUT_PKT3(ring, CP_SET_DRAW_STATE, 2);
880 	OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) |
881 			CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS |
882 			CP_SET_DRAW_STATE__0_GROUP_ID(0));
883 	OUT_RING(ring, CP_SET_DRAW_STATE__1_ADDR_LO(0));
884 
885 	OUT_PKT0(ring, REG_A4XX_SP_VS_PVT_MEM_PARAM, 2);
886 	OUT_RING(ring, 0x08000001);                  /* SP_VS_PVT_MEM_PARAM */
887 	OUT_RELOC(ring, fd4_ctx->vs_pvt_mem, 0,0,0); /* SP_VS_PVT_MEM_ADDR */
888 
889 	OUT_PKT0(ring, REG_A4XX_SP_FS_PVT_MEM_PARAM, 2);
890 	OUT_RING(ring, 0x08000001);                  /* SP_FS_PVT_MEM_PARAM */
891 	OUT_RELOC(ring, fd4_ctx->fs_pvt_mem, 0,0,0); /* SP_FS_PVT_MEM_ADDR */
892 
893 	OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
894 	OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
895 			A4XX_GRAS_SC_CONTROL_MSAA_DISABLE |
896 			A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
897 			A4XX_GRAS_SC_CONTROL_RASTER_MODE(0));
898 
899 	OUT_PKT0(ring, REG_A4XX_RB_MSAA_CONTROL, 1);
900 	OUT_RING(ring, A4XX_RB_MSAA_CONTROL_DISABLE |
901 			A4XX_RB_MSAA_CONTROL_SAMPLES(MSAA_ONE));
902 
903 	OUT_PKT0(ring, REG_A4XX_GRAS_CL_GB_CLIP_ADJ, 1);
904 	OUT_RING(ring, A4XX_GRAS_CL_GB_CLIP_ADJ_HORZ(0) |
905 			A4XX_GRAS_CL_GB_CLIP_ADJ_VERT(0));
906 
907 	OUT_PKT0(ring, REG_A4XX_RB_ALPHA_CONTROL, 1);
908 	OUT_RING(ring, A4XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(FUNC_ALWAYS));
909 
910 	OUT_PKT0(ring, REG_A4XX_RB_FS_OUTPUT, 1);
911 	OUT_RING(ring, A4XX_RB_FS_OUTPUT_SAMPLE_MASK(0xffff));
912 
913 	OUT_PKT0(ring, REG_A4XX_GRAS_CLEAR_CNTL, 1);
914 	OUT_RING(ring, A4XX_GRAS_CLEAR_CNTL_NOT_FASTCLEAR);
915 
916 	OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1);
917 	OUT_RING(ring, 0x0);
918 
919 	fd_hw_query_enable(batch, ring);
920 }
921 
922 static void
fd4_emit_ib(struct fd_ringbuffer * ring,struct fd_ringbuffer * target)923 fd4_emit_ib(struct fd_ringbuffer *ring, struct fd_ringbuffer *target)
924 {
925 	__OUT_IB(ring, true, target);
926 }
927 
928 void
fd4_emit_init(struct pipe_context * pctx)929 fd4_emit_init(struct pipe_context *pctx)
930 {
931 	struct fd_context *ctx = fd_context(pctx);
932 	ctx->emit_const = fd4_emit_const;
933 	ctx->emit_const_bo = fd4_emit_const_bo;
934 	ctx->emit_ib = fd4_emit_ib;
935 }
936