• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * on the rights to use, copy, modify, merge, publish, distribute, sub
8  * license, and/or sell copies of the Software, and to permit persons to whom
9  * the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *      Christian König <christian.koenig@amd.com>
25  */
26 
27 #include "si_pipe.h"
28 #include "sid.h"
29 #include "radeon/r600_cs.h"
30 #include "radeon/r600_query.h"
31 
32 #include "util/u_dual_blend.h"
33 #include "util/u_format.h"
34 #include "util/u_format_s3tc.h"
35 #include "util/u_memory.h"
36 #include "util/u_resource.h"
37 
38 /* Initialize an external atom (owned by ../radeon). */
39 static void
si_init_external_atom(struct si_context * sctx,struct r600_atom * atom,struct r600_atom ** list_elem)40 si_init_external_atom(struct si_context *sctx, struct r600_atom *atom,
41 		      struct r600_atom **list_elem)
42 {
43 	atom->id = list_elem - sctx->atoms.array + 1;
44 	*list_elem = atom;
45 }
46 
47 /* Initialize an atom owned by radeonsi.  */
si_init_atom(struct si_context * sctx,struct r600_atom * atom,struct r600_atom ** list_elem,void (* emit_func)(struct si_context * ctx,struct r600_atom * state))48 void si_init_atom(struct si_context *sctx, struct r600_atom *atom,
49 		  struct r600_atom **list_elem,
50 		  void (*emit_func)(struct si_context *ctx, struct r600_atom *state))
51 {
52 	atom->emit = (void*)emit_func;
53 	atom->id = list_elem - sctx->atoms.array + 1; /* index+1 in the atom array */
54 	*list_elem = atom;
55 }
56 
si_map_swizzle(unsigned swizzle)57 static unsigned si_map_swizzle(unsigned swizzle)
58 {
59 	switch (swizzle) {
60 	case PIPE_SWIZZLE_Y:
61 		return V_008F0C_SQ_SEL_Y;
62 	case PIPE_SWIZZLE_Z:
63 		return V_008F0C_SQ_SEL_Z;
64 	case PIPE_SWIZZLE_W:
65 		return V_008F0C_SQ_SEL_W;
66 	case PIPE_SWIZZLE_0:
67 		return V_008F0C_SQ_SEL_0;
68 	case PIPE_SWIZZLE_1:
69 		return V_008F0C_SQ_SEL_1;
70 	default: /* PIPE_SWIZZLE_X */
71 		return V_008F0C_SQ_SEL_X;
72 	}
73 }
74 
S_FIXED(float value,uint32_t frac_bits)75 static uint32_t S_FIXED(float value, uint32_t frac_bits)
76 {
77 	return value * (1 << frac_bits);
78 }
79 
80 /* 12.4 fixed-point */
si_pack_float_12p4(float x)81 static unsigned si_pack_float_12p4(float x)
82 {
83 	return x <= 0    ? 0 :
84 	       x >= 4096 ? 0xffff : x * 16;
85 }
86 
87 /*
88  * Inferred framebuffer and blender state.
89  *
90  * CB_TARGET_MASK is emitted here to avoid a hang with dual source blending
91  * if there is not enough PS outputs.
92  */
si_emit_cb_render_state(struct si_context * sctx,struct r600_atom * atom)93 static void si_emit_cb_render_state(struct si_context *sctx, struct r600_atom *atom)
94 {
95 	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
96 	struct si_state_blend *blend = sctx->queued.named.blend;
97 	/* CB_COLORn_INFO.FORMAT=INVALID should disable unbound colorbuffers,
98 	 * but you never know. */
99 	uint32_t cb_target_mask = sctx->framebuffer.colorbuf_enabled_4bit;
100 	unsigned i;
101 
102 	if (blend)
103 		cb_target_mask &= blend->cb_target_mask;
104 
105 	/* Avoid a hang that happens when dual source blending is enabled
106 	 * but there is not enough color outputs. This is undefined behavior,
107 	 * so disable color writes completely.
108 	 *
109 	 * Reproducible with Unigine Heaven 4.0 and drirc missing.
110 	 */
111 	if (blend && blend->dual_src_blend &&
112 	    sctx->ps_shader.cso &&
113 	    (sctx->ps_shader.cso->info.colors_written & 0x3) != 0x3)
114 		cb_target_mask = 0;
115 
116 	radeon_set_context_reg(cs, R_028238_CB_TARGET_MASK, cb_target_mask);
117 
118 	/* STONEY-specific register settings. */
119 	if (sctx->b.family == CHIP_STONEY) {
120 		unsigned spi_shader_col_format =
121 			sctx->ps_shader.cso ?
122 			sctx->ps_shader.current->key.part.ps.epilog.spi_shader_col_format : 0;
123 		unsigned sx_ps_downconvert = 0;
124 		unsigned sx_blend_opt_epsilon = 0;
125 		unsigned sx_blend_opt_control = 0;
126 
127 		for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) {
128 			struct r600_surface *surf =
129 				(struct r600_surface*)sctx->framebuffer.state.cbufs[i];
130 			unsigned format, swap, spi_format, colormask;
131 			bool has_alpha, has_rgb;
132 
133 			if (!surf)
134 				continue;
135 
136 			format = G_028C70_FORMAT(surf->cb_color_info);
137 			swap = G_028C70_COMP_SWAP(surf->cb_color_info);
138 			spi_format = (spi_shader_col_format >> (i * 4)) & 0xf;
139 			colormask = (cb_target_mask >> (i * 4)) & 0xf;
140 
141 			/* Set if RGB and A are present. */
142 			has_alpha = !G_028C74_FORCE_DST_ALPHA_1(surf->cb_color_attrib);
143 
144 			if (format == V_028C70_COLOR_8 ||
145 			    format == V_028C70_COLOR_16 ||
146 			    format == V_028C70_COLOR_32)
147 				has_rgb = !has_alpha;
148 			else
149 				has_rgb = true;
150 
151 			/* Check the colormask and export format. */
152 			if (!(colormask & (PIPE_MASK_RGBA & ~PIPE_MASK_A)))
153 				has_rgb = false;
154 			if (!(colormask & PIPE_MASK_A))
155 				has_alpha = false;
156 
157 			if (spi_format == V_028714_SPI_SHADER_ZERO) {
158 				has_rgb = false;
159 				has_alpha = false;
160 			}
161 
162 			/* Disable value checking for disabled channels. */
163 			if (!has_rgb)
164 				sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (i * 4);
165 			if (!has_alpha)
166 				sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (i * 4);
167 
168 			/* Enable down-conversion for 32bpp and smaller formats. */
169 			switch (format) {
170 			case V_028C70_COLOR_8:
171 			case V_028C70_COLOR_8_8:
172 			case V_028C70_COLOR_8_8_8_8:
173 				/* For 1 and 2-channel formats, use the superset thereof. */
174 				if (spi_format == V_028714_SPI_SHADER_FP16_ABGR ||
175 				    spi_format == V_028714_SPI_SHADER_UINT16_ABGR ||
176 				    spi_format == V_028714_SPI_SHADER_SINT16_ABGR) {
177 					sx_ps_downconvert |= V_028754_SX_RT_EXPORT_8_8_8_8 << (i * 4);
178 					sx_blend_opt_epsilon |= V_028758_8BIT_FORMAT << (i * 4);
179 				}
180 				break;
181 
182 			case V_028C70_COLOR_5_6_5:
183 				if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
184 					sx_ps_downconvert |= V_028754_SX_RT_EXPORT_5_6_5 << (i * 4);
185 					sx_blend_opt_epsilon |= V_028758_6BIT_FORMAT << (i * 4);
186 				}
187 				break;
188 
189 			case V_028C70_COLOR_1_5_5_5:
190 				if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
191 					sx_ps_downconvert |= V_028754_SX_RT_EXPORT_1_5_5_5 << (i * 4);
192 					sx_blend_opt_epsilon |= V_028758_5BIT_FORMAT << (i * 4);
193 				}
194 				break;
195 
196 			case V_028C70_COLOR_4_4_4_4:
197 				if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
198 					sx_ps_downconvert |= V_028754_SX_RT_EXPORT_4_4_4_4 << (i * 4);
199 					sx_blend_opt_epsilon |= V_028758_4BIT_FORMAT << (i * 4);
200 				}
201 				break;
202 
203 			case V_028C70_COLOR_32:
204 				if (swap == V_0280A0_SWAP_STD &&
205 				    spi_format == V_028714_SPI_SHADER_32_R)
206 					sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_R << (i * 4);
207 				else if (swap == V_0280A0_SWAP_ALT_REV &&
208 					 spi_format == V_028714_SPI_SHADER_32_AR)
209 					sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_A << (i * 4);
210 				break;
211 
212 			case V_028C70_COLOR_16:
213 			case V_028C70_COLOR_16_16:
214 				/* For 1-channel formats, use the superset thereof. */
215 				if (spi_format == V_028714_SPI_SHADER_UNORM16_ABGR ||
216 				    spi_format == V_028714_SPI_SHADER_SNORM16_ABGR ||
217 				    spi_format == V_028714_SPI_SHADER_UINT16_ABGR ||
218 				    spi_format == V_028714_SPI_SHADER_SINT16_ABGR) {
219 					if (swap == V_0280A0_SWAP_STD ||
220 					    swap == V_0280A0_SWAP_STD_REV)
221 						sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_GR << (i * 4);
222 					else
223 						sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_AR << (i * 4);
224 				}
225 				break;
226 
227 			case V_028C70_COLOR_10_11_11:
228 				if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
229 					sx_ps_downconvert |= V_028754_SX_RT_EXPORT_10_11_11 << (i * 4);
230 					sx_blend_opt_epsilon |= V_028758_11BIT_FORMAT << (i * 4);
231 				}
232 				break;
233 
234 			case V_028C70_COLOR_2_10_10_10:
235 				if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
236 					sx_ps_downconvert |= V_028754_SX_RT_EXPORT_2_10_10_10 << (i * 4);
237 					sx_blend_opt_epsilon |= V_028758_10BIT_FORMAT << (i * 4);
238 				}
239 				break;
240 			}
241 		}
242 
243 		if (sctx->screen->b.debug_flags & DBG_NO_RB_PLUS) {
244 			sx_ps_downconvert = 0;
245 			sx_blend_opt_epsilon = 0;
246 			sx_blend_opt_control = 0;
247 		}
248 
249 		radeon_set_context_reg_seq(cs, R_028754_SX_PS_DOWNCONVERT, 3);
250 		radeon_emit(cs, sx_ps_downconvert);	/* R_028754_SX_PS_DOWNCONVERT */
251 		radeon_emit(cs, sx_blend_opt_epsilon);	/* R_028758_SX_BLEND_OPT_EPSILON */
252 		radeon_emit(cs, sx_blend_opt_control);	/* R_02875C_SX_BLEND_OPT_CONTROL */
253 	}
254 }
255 
256 /*
257  * Blender functions
258  */
259 
si_translate_blend_function(int blend_func)260 static uint32_t si_translate_blend_function(int blend_func)
261 {
262 	switch (blend_func) {
263 	case PIPE_BLEND_ADD:
264 		return V_028780_COMB_DST_PLUS_SRC;
265 	case PIPE_BLEND_SUBTRACT:
266 		return V_028780_COMB_SRC_MINUS_DST;
267 	case PIPE_BLEND_REVERSE_SUBTRACT:
268 		return V_028780_COMB_DST_MINUS_SRC;
269 	case PIPE_BLEND_MIN:
270 		return V_028780_COMB_MIN_DST_SRC;
271 	case PIPE_BLEND_MAX:
272 		return V_028780_COMB_MAX_DST_SRC;
273 	default:
274 		R600_ERR("Unknown blend function %d\n", blend_func);
275 		assert(0);
276 		break;
277 	}
278 	return 0;
279 }
280 
si_translate_blend_factor(int blend_fact)281 static uint32_t si_translate_blend_factor(int blend_fact)
282 {
283 	switch (blend_fact) {
284 	case PIPE_BLENDFACTOR_ONE:
285 		return V_028780_BLEND_ONE;
286 	case PIPE_BLENDFACTOR_SRC_COLOR:
287 		return V_028780_BLEND_SRC_COLOR;
288 	case PIPE_BLENDFACTOR_SRC_ALPHA:
289 		return V_028780_BLEND_SRC_ALPHA;
290 	case PIPE_BLENDFACTOR_DST_ALPHA:
291 		return V_028780_BLEND_DST_ALPHA;
292 	case PIPE_BLENDFACTOR_DST_COLOR:
293 		return V_028780_BLEND_DST_COLOR;
294 	case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
295 		return V_028780_BLEND_SRC_ALPHA_SATURATE;
296 	case PIPE_BLENDFACTOR_CONST_COLOR:
297 		return V_028780_BLEND_CONSTANT_COLOR;
298 	case PIPE_BLENDFACTOR_CONST_ALPHA:
299 		return V_028780_BLEND_CONSTANT_ALPHA;
300 	case PIPE_BLENDFACTOR_ZERO:
301 		return V_028780_BLEND_ZERO;
302 	case PIPE_BLENDFACTOR_INV_SRC_COLOR:
303 		return V_028780_BLEND_ONE_MINUS_SRC_COLOR;
304 	case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
305 		return V_028780_BLEND_ONE_MINUS_SRC_ALPHA;
306 	case PIPE_BLENDFACTOR_INV_DST_ALPHA:
307 		return V_028780_BLEND_ONE_MINUS_DST_ALPHA;
308 	case PIPE_BLENDFACTOR_INV_DST_COLOR:
309 		return V_028780_BLEND_ONE_MINUS_DST_COLOR;
310 	case PIPE_BLENDFACTOR_INV_CONST_COLOR:
311 		return V_028780_BLEND_ONE_MINUS_CONSTANT_COLOR;
312 	case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
313 		return V_028780_BLEND_ONE_MINUS_CONSTANT_ALPHA;
314 	case PIPE_BLENDFACTOR_SRC1_COLOR:
315 		return V_028780_BLEND_SRC1_COLOR;
316 	case PIPE_BLENDFACTOR_SRC1_ALPHA:
317 		return V_028780_BLEND_SRC1_ALPHA;
318 	case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
319 		return V_028780_BLEND_INV_SRC1_COLOR;
320 	case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
321 		return V_028780_BLEND_INV_SRC1_ALPHA;
322 	default:
323 		R600_ERR("Bad blend factor %d not supported!\n", blend_fact);
324 		assert(0);
325 		break;
326 	}
327 	return 0;
328 }
329 
si_translate_blend_opt_function(int blend_func)330 static uint32_t si_translate_blend_opt_function(int blend_func)
331 {
332 	switch (blend_func) {
333 	case PIPE_BLEND_ADD:
334 		return V_028760_OPT_COMB_ADD;
335 	case PIPE_BLEND_SUBTRACT:
336 		return V_028760_OPT_COMB_SUBTRACT;
337 	case PIPE_BLEND_REVERSE_SUBTRACT:
338 		return V_028760_OPT_COMB_REVSUBTRACT;
339 	case PIPE_BLEND_MIN:
340 		return V_028760_OPT_COMB_MIN;
341 	case PIPE_BLEND_MAX:
342 		return V_028760_OPT_COMB_MAX;
343 	default:
344 		return V_028760_OPT_COMB_BLEND_DISABLED;
345 	}
346 }
347 
si_translate_blend_opt_factor(int blend_fact,bool is_alpha)348 static uint32_t si_translate_blend_opt_factor(int blend_fact, bool is_alpha)
349 {
350 	switch (blend_fact) {
351 	case PIPE_BLENDFACTOR_ZERO:
352 		return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_ALL;
353 	case PIPE_BLENDFACTOR_ONE:
354 		return V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE;
355 	case PIPE_BLENDFACTOR_SRC_COLOR:
356 		return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0
357 				: V_028760_BLEND_OPT_PRESERVE_C1_IGNORE_C0;
358 	case PIPE_BLENDFACTOR_INV_SRC_COLOR:
359 		return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1
360 				: V_028760_BLEND_OPT_PRESERVE_C0_IGNORE_C1;
361 	case PIPE_BLENDFACTOR_SRC_ALPHA:
362 		return V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0;
363 	case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
364 		return V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1;
365 	case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
366 		return is_alpha ? V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE
367 				: V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0;
368 	default:
369 		return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
370 	}
371 }
372 
373 /**
374  * Get rid of DST in the blend factors by commuting the operands:
375  *    func(src * DST, dst * 0) ---> func(src * 0, dst * SRC)
376  */
si_blend_remove_dst(unsigned * func,unsigned * src_factor,unsigned * dst_factor,unsigned expected_dst,unsigned replacement_src)377 static void si_blend_remove_dst(unsigned *func, unsigned *src_factor,
378 				unsigned *dst_factor, unsigned expected_dst,
379 				unsigned replacement_src)
380 {
381 	if (*src_factor == expected_dst &&
382 	    *dst_factor == PIPE_BLENDFACTOR_ZERO) {
383 		*src_factor = PIPE_BLENDFACTOR_ZERO;
384 		*dst_factor = replacement_src;
385 
386 		/* Commuting the operands requires reversing subtractions. */
387 		if (*func == PIPE_BLEND_SUBTRACT)
388 			*func = PIPE_BLEND_REVERSE_SUBTRACT;
389 		else if (*func == PIPE_BLEND_REVERSE_SUBTRACT)
390 			*func = PIPE_BLEND_SUBTRACT;
391 	}
392 }
393 
si_blend_factor_uses_dst(unsigned factor)394 static bool si_blend_factor_uses_dst(unsigned factor)
395 {
396 	return factor == PIPE_BLENDFACTOR_DST_COLOR ||
397 		factor == PIPE_BLENDFACTOR_DST_ALPHA ||
398 		factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
399 		factor == PIPE_BLENDFACTOR_INV_DST_ALPHA ||
400 		factor == PIPE_BLENDFACTOR_INV_DST_COLOR;
401 }
402 
si_create_blend_state_mode(struct pipe_context * ctx,const struct pipe_blend_state * state,unsigned mode)403 static void *si_create_blend_state_mode(struct pipe_context *ctx,
404 					const struct pipe_blend_state *state,
405 					unsigned mode)
406 {
407 	struct si_context *sctx = (struct si_context*)ctx;
408 	struct si_state_blend *blend = CALLOC_STRUCT(si_state_blend);
409 	struct si_pm4_state *pm4 = &blend->pm4;
410 	uint32_t sx_mrt_blend_opt[8] = {0};
411 	uint32_t color_control = 0;
412 
413 	if (!blend)
414 		return NULL;
415 
416 	blend->alpha_to_coverage = state->alpha_to_coverage;
417 	blend->alpha_to_one = state->alpha_to_one;
418 	blend->dual_src_blend = util_blend_state_is_dual(state, 0);
419 
420 	if (state->logicop_enable) {
421 		color_control |= S_028808_ROP3(state->logicop_func | (state->logicop_func << 4));
422 	} else {
423 		color_control |= S_028808_ROP3(0xcc);
424 	}
425 
426 	si_pm4_set_reg(pm4, R_028B70_DB_ALPHA_TO_MASK,
427 		       S_028B70_ALPHA_TO_MASK_ENABLE(state->alpha_to_coverage) |
428 		       S_028B70_ALPHA_TO_MASK_OFFSET0(2) |
429 		       S_028B70_ALPHA_TO_MASK_OFFSET1(2) |
430 		       S_028B70_ALPHA_TO_MASK_OFFSET2(2) |
431 		       S_028B70_ALPHA_TO_MASK_OFFSET3(2));
432 
433 	if (state->alpha_to_coverage)
434 		blend->need_src_alpha_4bit |= 0xf;
435 
436 	blend->cb_target_mask = 0;
437 	for (int i = 0; i < 8; i++) {
438 		/* state->rt entries > 0 only written if independent blending */
439 		const int j = state->independent_blend_enable ? i : 0;
440 
441 		unsigned eqRGB = state->rt[j].rgb_func;
442 		unsigned srcRGB = state->rt[j].rgb_src_factor;
443 		unsigned dstRGB = state->rt[j].rgb_dst_factor;
444 		unsigned eqA = state->rt[j].alpha_func;
445 		unsigned srcA = state->rt[j].alpha_src_factor;
446 		unsigned dstA = state->rt[j].alpha_dst_factor;
447 
448 		unsigned srcRGB_opt, dstRGB_opt, srcA_opt, dstA_opt;
449 		unsigned blend_cntl = 0;
450 
451 		sx_mrt_blend_opt[i] =
452 			S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) |
453 			S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED);
454 
455 		/* Only set dual source blending for MRT0 to avoid a hang. */
456 		if (i >= 1 && blend->dual_src_blend) {
457 			/* Vulkan does this for dual source blending. */
458 			if (i == 1)
459 				blend_cntl |= S_028780_ENABLE(1);
460 
461 			si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
462 			continue;
463 		}
464 
465 		/* Only addition and subtraction equations are supported with
466 		 * dual source blending.
467 		 */
468 		if (blend->dual_src_blend &&
469 		    (eqRGB == PIPE_BLEND_MIN || eqRGB == PIPE_BLEND_MAX ||
470 		     eqA == PIPE_BLEND_MIN || eqA == PIPE_BLEND_MAX)) {
471 			assert(!"Unsupported equation for dual source blending");
472 			si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
473 			continue;
474 		}
475 
476 		/* cb_render_state will disable unused ones */
477 		blend->cb_target_mask |= (unsigned)state->rt[j].colormask << (4 * i);
478 
479 		if (!state->rt[j].colormask || !state->rt[j].blend_enable) {
480 			si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
481 			continue;
482 		}
483 
484 		/* Blending optimizations for Stoney.
485 		 * These transformations don't change the behavior.
486 		 *
487 		 * First, get rid of DST in the blend factors:
488 		 *    func(src * DST, dst * 0) ---> func(src * 0, dst * SRC)
489 		 */
490 		si_blend_remove_dst(&eqRGB, &srcRGB, &dstRGB,
491 				    PIPE_BLENDFACTOR_DST_COLOR,
492 				    PIPE_BLENDFACTOR_SRC_COLOR);
493 		si_blend_remove_dst(&eqA, &srcA, &dstA,
494 				    PIPE_BLENDFACTOR_DST_COLOR,
495 				    PIPE_BLENDFACTOR_SRC_COLOR);
496 		si_blend_remove_dst(&eqA, &srcA, &dstA,
497 				    PIPE_BLENDFACTOR_DST_ALPHA,
498 				    PIPE_BLENDFACTOR_SRC_ALPHA);
499 
500 		/* Look up the ideal settings from tables. */
501 		srcRGB_opt = si_translate_blend_opt_factor(srcRGB, false);
502 		dstRGB_opt = si_translate_blend_opt_factor(dstRGB, false);
503 		srcA_opt = si_translate_blend_opt_factor(srcA, true);
504 		dstA_opt = si_translate_blend_opt_factor(dstA, true);
505 
506 		/* Handle interdependencies. */
507 		if (si_blend_factor_uses_dst(srcRGB))
508 			dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
509 		if (si_blend_factor_uses_dst(srcA))
510 			dstA_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
511 
512 		if (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE &&
513 		    (dstRGB == PIPE_BLENDFACTOR_ZERO ||
514 		     dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
515 		     dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE))
516 			dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0;
517 
518 		/* Set the final value. */
519 		sx_mrt_blend_opt[i] =
520 			S_028760_COLOR_SRC_OPT(srcRGB_opt) |
521 			S_028760_COLOR_DST_OPT(dstRGB_opt) |
522 			S_028760_COLOR_COMB_FCN(si_translate_blend_opt_function(eqRGB)) |
523 			S_028760_ALPHA_SRC_OPT(srcA_opt) |
524 			S_028760_ALPHA_DST_OPT(dstA_opt) |
525 			S_028760_ALPHA_COMB_FCN(si_translate_blend_opt_function(eqA));
526 
527 		/* Set blend state. */
528 		blend_cntl |= S_028780_ENABLE(1);
529 		blend_cntl |= S_028780_COLOR_COMB_FCN(si_translate_blend_function(eqRGB));
530 		blend_cntl |= S_028780_COLOR_SRCBLEND(si_translate_blend_factor(srcRGB));
531 		blend_cntl |= S_028780_COLOR_DESTBLEND(si_translate_blend_factor(dstRGB));
532 
533 		if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) {
534 			blend_cntl |= S_028780_SEPARATE_ALPHA_BLEND(1);
535 			blend_cntl |= S_028780_ALPHA_COMB_FCN(si_translate_blend_function(eqA));
536 			blend_cntl |= S_028780_ALPHA_SRCBLEND(si_translate_blend_factor(srcA));
537 			blend_cntl |= S_028780_ALPHA_DESTBLEND(si_translate_blend_factor(dstA));
538 		}
539 		si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
540 
541 		blend->blend_enable_4bit |= 0xfu << (i * 4);
542 
543 		/* This is only important for formats without alpha. */
544 		if (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
545 		    dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
546 		    srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
547 		    dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
548 		    srcRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
549 		    dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA)
550 			blend->need_src_alpha_4bit |= 0xfu << (i * 4);
551 	}
552 
553 	if (blend->cb_target_mask) {
554 		color_control |= S_028808_MODE(mode);
555 	} else {
556 		color_control |= S_028808_MODE(V_028808_CB_DISABLE);
557 	}
558 
559 	if (sctx->b.family == CHIP_STONEY) {
560 		/* Disable RB+ blend optimizations for dual source blending.
561 		 * Vulkan does this.
562 		 */
563 		if (blend->dual_src_blend) {
564 			for (int i = 0; i < 8; i++) {
565 				sx_mrt_blend_opt[i] =
566 					S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_NONE) |
567 					S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_NONE);
568 			}
569 		}
570 
571 		for (int i = 0; i < 8; i++)
572 			si_pm4_set_reg(pm4, R_028760_SX_MRT0_BLEND_OPT + i * 4,
573 				       sx_mrt_blend_opt[i]);
574 
575 		/* RB+ doesn't work with dual source blending, logic op, and RESOLVE. */
576 		if (blend->dual_src_blend || state->logicop_enable ||
577 		    mode == V_028808_CB_RESOLVE)
578 			color_control |= S_028808_DISABLE_DUAL_QUAD(1);
579 	}
580 
581 	si_pm4_set_reg(pm4, R_028808_CB_COLOR_CONTROL, color_control);
582 	return blend;
583 }
584 
si_create_blend_state(struct pipe_context * ctx,const struct pipe_blend_state * state)585 static void *si_create_blend_state(struct pipe_context *ctx,
586 				   const struct pipe_blend_state *state)
587 {
588 	return si_create_blend_state_mode(ctx, state, V_028808_CB_NORMAL);
589 }
590 
si_bind_blend_state(struct pipe_context * ctx,void * state)591 static void si_bind_blend_state(struct pipe_context *ctx, void *state)
592 {
593 	struct si_context *sctx = (struct si_context *)ctx;
594 	si_pm4_bind_state(sctx, blend, (struct si_state_blend *)state);
595 	si_mark_atom_dirty(sctx, &sctx->cb_render_state);
596 	sctx->do_update_shaders = true;
597 }
598 
si_delete_blend_state(struct pipe_context * ctx,void * state)599 static void si_delete_blend_state(struct pipe_context *ctx, void *state)
600 {
601 	struct si_context *sctx = (struct si_context *)ctx;
602 	si_pm4_delete_state(sctx, blend, (struct si_state_blend *)state);
603 }
604 
si_set_blend_color(struct pipe_context * ctx,const struct pipe_blend_color * state)605 static void si_set_blend_color(struct pipe_context *ctx,
606 			       const struct pipe_blend_color *state)
607 {
608 	struct si_context *sctx = (struct si_context *)ctx;
609 
610 	if (memcmp(&sctx->blend_color.state, state, sizeof(*state)) == 0)
611 		return;
612 
613 	sctx->blend_color.state = *state;
614 	si_mark_atom_dirty(sctx, &sctx->blend_color.atom);
615 }
616 
si_emit_blend_color(struct si_context * sctx,struct r600_atom * atom)617 static void si_emit_blend_color(struct si_context *sctx, struct r600_atom *atom)
618 {
619 	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
620 
621 	radeon_set_context_reg_seq(cs, R_028414_CB_BLEND_RED, 4);
622 	radeon_emit_array(cs, (uint32_t*)sctx->blend_color.state.color, 4);
623 }
624 
625 /*
626  * Clipping
627  */
628 
si_set_clip_state(struct pipe_context * ctx,const struct pipe_clip_state * state)629 static void si_set_clip_state(struct pipe_context *ctx,
630 			      const struct pipe_clip_state *state)
631 {
632 	struct si_context *sctx = (struct si_context *)ctx;
633 	struct pipe_constant_buffer cb;
634 
635 	if (memcmp(&sctx->clip_state.state, state, sizeof(*state)) == 0)
636 		return;
637 
638 	sctx->clip_state.state = *state;
639 	si_mark_atom_dirty(sctx, &sctx->clip_state.atom);
640 
641 	cb.buffer = NULL;
642 	cb.user_buffer = state->ucp;
643 	cb.buffer_offset = 0;
644 	cb.buffer_size = 4*4*8;
645 	si_set_rw_buffer(sctx, SI_VS_CONST_CLIP_PLANES, &cb);
646 	pipe_resource_reference(&cb.buffer, NULL);
647 }
648 
si_emit_clip_state(struct si_context * sctx,struct r600_atom * atom)649 static void si_emit_clip_state(struct si_context *sctx, struct r600_atom *atom)
650 {
651 	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
652 
653 	radeon_set_context_reg_seq(cs, R_0285BC_PA_CL_UCP_0_X, 6*4);
654 	radeon_emit_array(cs, (uint32_t*)sctx->clip_state.state.ucp, 6*4);
655 }
656 
657 #define SIX_BITS 0x3F
658 
si_emit_clip_regs(struct si_context * sctx,struct r600_atom * atom)659 static void si_emit_clip_regs(struct si_context *sctx, struct r600_atom *atom)
660 {
661 	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
662 	struct si_shader *vs = si_get_vs_state(sctx);
663 	struct tgsi_shader_info *info = si_get_vs_info(sctx);
664 	struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
665 	unsigned window_space =
666 	   info->properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION];
667 	unsigned clipdist_mask =
668 		info->writes_clipvertex ? SIX_BITS : info->clipdist_writemask;
669 	unsigned ucp_mask = clipdist_mask ? 0 : rs->clip_plane_enable & SIX_BITS;
670 	unsigned culldist_mask = info->culldist_writemask << info->num_written_clipdistance;
671 	unsigned total_mask;
672 	bool misc_vec_ena;
673 
674 	if (vs->key.opt.hw_vs.clip_disable) {
675 		assert(!info->culldist_writemask);
676 		clipdist_mask = 0;
677 		culldist_mask = 0;
678 	}
679 	total_mask = clipdist_mask | culldist_mask;
680 
681 	/* Clip distances on points have no effect, so need to be implemented
682 	 * as cull distances. This applies for the clipvertex case as well.
683 	 *
684 	 * Setting this for primitives other than points should have no adverse
685 	 * effects.
686 	 */
687 	clipdist_mask &= rs->clip_plane_enable;
688 	culldist_mask |= clipdist_mask;
689 
690 	misc_vec_ena = info->writes_psize || info->writes_edgeflag ||
691 		       info->writes_layer || info->writes_viewport_index;
692 
693 	radeon_set_context_reg(cs, R_02881C_PA_CL_VS_OUT_CNTL,
694 		S_02881C_USE_VTX_POINT_SIZE(info->writes_psize) |
695 		S_02881C_USE_VTX_EDGE_FLAG(info->writes_edgeflag) |
696 		S_02881C_USE_VTX_RENDER_TARGET_INDX(info->writes_layer) |
697 	        S_02881C_USE_VTX_VIEWPORT_INDX(info->writes_viewport_index) |
698 		S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0F) != 0) |
699 		S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xF0) != 0) |
700 		S_02881C_VS_OUT_MISC_VEC_ENA(misc_vec_ena) |
701 		S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(misc_vec_ena) |
702 		clipdist_mask | (culldist_mask << 8));
703 	radeon_set_context_reg(cs, R_028810_PA_CL_CLIP_CNTL,
704 		rs->pa_cl_clip_cntl |
705 		ucp_mask |
706 		S_028810_CLIP_DISABLE(window_space));
707 
708 	/* reuse needs to be set off if we write oViewport */
709 	radeon_set_context_reg(cs, R_028AB4_VGT_REUSE_OFF,
710 			       S_028AB4_REUSE_OFF(info->writes_viewport_index));
711 }
712 
713 /*
714  * inferred state between framebuffer and rasterizer
715  */
si_update_poly_offset_state(struct si_context * sctx)716 static void si_update_poly_offset_state(struct si_context *sctx)
717 {
718 	struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
719 
720 	if (!rs || !rs->uses_poly_offset || !sctx->framebuffer.state.zsbuf) {
721 		si_pm4_bind_state(sctx, poly_offset, NULL);
722 		return;
723 	}
724 
725 	/* Use the user format, not db_render_format, so that the polygon
726 	 * offset behaves as expected by applications.
727 	 */
728 	switch (sctx->framebuffer.state.zsbuf->texture->format) {
729 	case PIPE_FORMAT_Z16_UNORM:
730 		si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[0]);
731 		break;
732 	default: /* 24-bit */
733 		si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[1]);
734 		break;
735 	case PIPE_FORMAT_Z32_FLOAT:
736 	case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
737 		si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[2]);
738 		break;
739 	}
740 }
741 
742 /*
743  * Rasterizer
744  */
745 
si_translate_fill(uint32_t func)746 static uint32_t si_translate_fill(uint32_t func)
747 {
748 	switch(func) {
749 	case PIPE_POLYGON_MODE_FILL:
750 		return V_028814_X_DRAW_TRIANGLES;
751 	case PIPE_POLYGON_MODE_LINE:
752 		return V_028814_X_DRAW_LINES;
753 	case PIPE_POLYGON_MODE_POINT:
754 		return V_028814_X_DRAW_POINTS;
755 	default:
756 		assert(0);
757 		return V_028814_X_DRAW_POINTS;
758 	}
759 }
760 
si_create_rs_state(struct pipe_context * ctx,const struct pipe_rasterizer_state * state)761 static void *si_create_rs_state(struct pipe_context *ctx,
762 				const struct pipe_rasterizer_state *state)
763 {
764 	struct si_state_rasterizer *rs = CALLOC_STRUCT(si_state_rasterizer);
765 	struct si_pm4_state *pm4 = &rs->pm4;
766 	unsigned tmp, i;
767 	float psize_min, psize_max;
768 
769 	if (!rs) {
770 		return NULL;
771 	}
772 
773 	rs->scissor_enable = state->scissor;
774 	rs->clip_halfz = state->clip_halfz;
775 	rs->two_side = state->light_twoside;
776 	rs->multisample_enable = state->multisample;
777 	rs->force_persample_interp = state->force_persample_interp;
778 	rs->clip_plane_enable = state->clip_plane_enable;
779 	rs->line_stipple_enable = state->line_stipple_enable;
780 	rs->poly_stipple_enable = state->poly_stipple_enable;
781 	rs->line_smooth = state->line_smooth;
782 	rs->poly_smooth = state->poly_smooth;
783 	rs->uses_poly_offset = state->offset_point || state->offset_line ||
784 			       state->offset_tri;
785 	rs->clamp_fragment_color = state->clamp_fragment_color;
786 	rs->flatshade = state->flatshade;
787 	rs->sprite_coord_enable = state->sprite_coord_enable;
788 	rs->rasterizer_discard = state->rasterizer_discard;
789 	rs->pa_sc_line_stipple = state->line_stipple_enable ?
790 				S_028A0C_LINE_PATTERN(state->line_stipple_pattern) |
791 				S_028A0C_REPEAT_COUNT(state->line_stipple_factor) : 0;
792 	rs->pa_cl_clip_cntl =
793 		S_028810_DX_CLIP_SPACE_DEF(state->clip_halfz) |
794 		S_028810_ZCLIP_NEAR_DISABLE(!state->depth_clip) |
795 		S_028810_ZCLIP_FAR_DISABLE(!state->depth_clip) |
796 		S_028810_DX_RASTERIZATION_KILL(state->rasterizer_discard) |
797 		S_028810_DX_LINEAR_ATTR_CLIP_ENA(1);
798 
799 	si_pm4_set_reg(pm4, R_0286D4_SPI_INTERP_CONTROL_0,
800 		S_0286D4_FLAT_SHADE_ENA(1) |
801 		S_0286D4_PNT_SPRITE_ENA(1) |
802 		S_0286D4_PNT_SPRITE_OVRD_X(V_0286D4_SPI_PNT_SPRITE_SEL_S) |
803 		S_0286D4_PNT_SPRITE_OVRD_Y(V_0286D4_SPI_PNT_SPRITE_SEL_T) |
804 		S_0286D4_PNT_SPRITE_OVRD_Z(V_0286D4_SPI_PNT_SPRITE_SEL_0) |
805 		S_0286D4_PNT_SPRITE_OVRD_W(V_0286D4_SPI_PNT_SPRITE_SEL_1) |
806 		S_0286D4_PNT_SPRITE_TOP_1(state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT));
807 
808 	/* point size 12.4 fixed point */
809 	tmp = (unsigned)(state->point_size * 8.0);
810 	si_pm4_set_reg(pm4, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp));
811 
812 	if (state->point_size_per_vertex) {
813 		psize_min = util_get_min_point_size(state);
814 		psize_max = 8192;
815 	} else {
816 		/* Force the point size to be as if the vertex output was disabled. */
817 		psize_min = state->point_size;
818 		psize_max = state->point_size;
819 	}
820 	/* Divide by two, because 0.5 = 1 pixel. */
821 	si_pm4_set_reg(pm4, R_028A04_PA_SU_POINT_MINMAX,
822 			S_028A04_MIN_SIZE(si_pack_float_12p4(psize_min/2)) |
823 			S_028A04_MAX_SIZE(si_pack_float_12p4(psize_max/2)));
824 
825 	tmp = (unsigned)state->line_width * 8;
826 	si_pm4_set_reg(pm4, R_028A08_PA_SU_LINE_CNTL, S_028A08_WIDTH(tmp));
827 	si_pm4_set_reg(pm4, R_028A48_PA_SC_MODE_CNTL_0,
828 		       S_028A48_LINE_STIPPLE_ENABLE(state->line_stipple_enable) |
829 		       S_028A48_MSAA_ENABLE(state->multisample ||
830 					    state->poly_smooth ||
831 					    state->line_smooth) |
832 		       S_028A48_VPORT_SCISSOR_ENABLE(1));
833 
834 	si_pm4_set_reg(pm4, R_028BE4_PA_SU_VTX_CNTL,
835 		       S_028BE4_PIX_CENTER(state->half_pixel_center) |
836 		       S_028BE4_QUANT_MODE(V_028BE4_X_16_8_FIXED_POINT_1_256TH));
837 
838 	si_pm4_set_reg(pm4, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, fui(state->offset_clamp));
839 	si_pm4_set_reg(pm4, R_028814_PA_SU_SC_MODE_CNTL,
840 		S_028814_PROVOKING_VTX_LAST(!state->flatshade_first) |
841 		S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) |
842 		S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) |
843 		S_028814_FACE(!state->front_ccw) |
844 		S_028814_POLY_OFFSET_FRONT_ENABLE(util_get_offset(state, state->fill_front)) |
845 		S_028814_POLY_OFFSET_BACK_ENABLE(util_get_offset(state, state->fill_back)) |
846 		S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_point || state->offset_line) |
847 		S_028814_POLY_MODE(state->fill_front != PIPE_POLYGON_MODE_FILL ||
848 				   state->fill_back != PIPE_POLYGON_MODE_FILL) |
849 		S_028814_POLYMODE_FRONT_PTYPE(si_translate_fill(state->fill_front)) |
850 		S_028814_POLYMODE_BACK_PTYPE(si_translate_fill(state->fill_back)));
851 	si_pm4_set_reg(pm4, R_00B130_SPI_SHADER_USER_DATA_VS_0 +
852 		       SI_SGPR_VS_STATE_BITS * 4, state->clamp_vertex_color);
853 
854 	/* Precalculate polygon offset states for 16-bit, 24-bit, and 32-bit zbuffers. */
855 	for (i = 0; i < 3; i++) {
856 		struct si_pm4_state *pm4 = &rs->pm4_poly_offset[i];
857 		float offset_units = state->offset_units;
858 		float offset_scale = state->offset_scale * 16.0f;
859 		uint32_t pa_su_poly_offset_db_fmt_cntl = 0;
860 
861 		if (!state->offset_units_unscaled) {
862 			switch (i) {
863 			case 0: /* 16-bit zbuffer */
864 				offset_units *= 4.0f;
865 				pa_su_poly_offset_db_fmt_cntl =
866 					S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
867 				break;
868 			case 1: /* 24-bit zbuffer */
869 				offset_units *= 2.0f;
870 				pa_su_poly_offset_db_fmt_cntl =
871 					S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
872 				break;
873 			case 2: /* 32-bit zbuffer */
874 				offset_units *= 1.0f;
875 				pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) |
876 								S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
877 				break;
878 			}
879 		}
880 
881 		si_pm4_set_reg(pm4, R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE,
882 			       fui(offset_scale));
883 		si_pm4_set_reg(pm4, R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET,
884 			       fui(offset_units));
885 		si_pm4_set_reg(pm4, R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE,
886 			       fui(offset_scale));
887 		si_pm4_set_reg(pm4, R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET,
888 			       fui(offset_units));
889 		si_pm4_set_reg(pm4, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
890 			       pa_su_poly_offset_db_fmt_cntl);
891 	}
892 
893 	return rs;
894 }
895 
si_bind_rs_state(struct pipe_context * ctx,void * state)896 static void si_bind_rs_state(struct pipe_context *ctx, void *state)
897 {
898 	struct si_context *sctx = (struct si_context *)ctx;
899 	struct si_state_rasterizer *old_rs =
900 		(struct si_state_rasterizer*)sctx->queued.named.rasterizer;
901 	struct si_state_rasterizer *rs = (struct si_state_rasterizer *)state;
902 
903 	if (!state)
904 		return;
905 
906 	if (!old_rs || old_rs->multisample_enable != rs->multisample_enable) {
907 		si_mark_atom_dirty(sctx, &sctx->db_render_state);
908 
909 		/* Update the small primitive filter workaround if necessary. */
910 		if (sctx->b.family >= CHIP_POLARIS10 &&
911 		    sctx->framebuffer.nr_samples > 1)
912 			si_mark_atom_dirty(sctx, &sctx->msaa_sample_locs.atom);
913 	}
914 
915 	r600_viewport_set_rast_deps(&sctx->b, rs->scissor_enable, rs->clip_halfz);
916 
917 	si_pm4_bind_state(sctx, rasterizer, rs);
918 	si_update_poly_offset_state(sctx);
919 
920 	si_mark_atom_dirty(sctx, &sctx->clip_regs);
921 	sctx->do_update_shaders = true;
922 }
923 
si_delete_rs_state(struct pipe_context * ctx,void * state)924 static void si_delete_rs_state(struct pipe_context *ctx, void *state)
925 {
926 	struct si_context *sctx = (struct si_context *)ctx;
927 
928 	if (sctx->queued.named.rasterizer == state)
929 		si_pm4_bind_state(sctx, poly_offset, NULL);
930 	si_pm4_delete_state(sctx, rasterizer, (struct si_state_rasterizer *)state);
931 }
932 
933 /*
934  * infeered state between dsa and stencil ref
935  */
si_emit_stencil_ref(struct si_context * sctx,struct r600_atom * atom)936 static void si_emit_stencil_ref(struct si_context *sctx, struct r600_atom *atom)
937 {
938 	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
939 	struct pipe_stencil_ref *ref = &sctx->stencil_ref.state;
940 	struct si_dsa_stencil_ref_part *dsa = &sctx->stencil_ref.dsa_part;
941 
942 	radeon_set_context_reg_seq(cs, R_028430_DB_STENCILREFMASK, 2);
943 	radeon_emit(cs, S_028430_STENCILTESTVAL(ref->ref_value[0]) |
944 			S_028430_STENCILMASK(dsa->valuemask[0]) |
945 			S_028430_STENCILWRITEMASK(dsa->writemask[0]) |
946 			S_028430_STENCILOPVAL(1));
947 	radeon_emit(cs, S_028434_STENCILTESTVAL_BF(ref->ref_value[1]) |
948 			S_028434_STENCILMASK_BF(dsa->valuemask[1]) |
949 			S_028434_STENCILWRITEMASK_BF(dsa->writemask[1]) |
950 			S_028434_STENCILOPVAL_BF(1));
951 }
952 
si_set_stencil_ref(struct pipe_context * ctx,const struct pipe_stencil_ref * state)953 static void si_set_stencil_ref(struct pipe_context *ctx,
954 			       const struct pipe_stencil_ref *state)
955 {
956         struct si_context *sctx = (struct si_context *)ctx;
957 
958 	if (memcmp(&sctx->stencil_ref.state, state, sizeof(*state)) == 0)
959 		return;
960 
961 	sctx->stencil_ref.state = *state;
962 	si_mark_atom_dirty(sctx, &sctx->stencil_ref.atom);
963 }
964 
965 
966 /*
967  * DSA
968  */
969 
si_translate_stencil_op(int s_op)970 static uint32_t si_translate_stencil_op(int s_op)
971 {
972 	switch (s_op) {
973 	case PIPE_STENCIL_OP_KEEP:
974 		return V_02842C_STENCIL_KEEP;
975 	case PIPE_STENCIL_OP_ZERO:
976 		return V_02842C_STENCIL_ZERO;
977 	case PIPE_STENCIL_OP_REPLACE:
978 		return V_02842C_STENCIL_REPLACE_TEST;
979 	case PIPE_STENCIL_OP_INCR:
980 		return V_02842C_STENCIL_ADD_CLAMP;
981 	case PIPE_STENCIL_OP_DECR:
982 		return V_02842C_STENCIL_SUB_CLAMP;
983 	case PIPE_STENCIL_OP_INCR_WRAP:
984 		return V_02842C_STENCIL_ADD_WRAP;
985 	case PIPE_STENCIL_OP_DECR_WRAP:
986 		return V_02842C_STENCIL_SUB_WRAP;
987 	case PIPE_STENCIL_OP_INVERT:
988 		return V_02842C_STENCIL_INVERT;
989 	default:
990 		R600_ERR("Unknown stencil op %d", s_op);
991 		assert(0);
992 		break;
993 	}
994 	return 0;
995 }
996 
si_create_dsa_state(struct pipe_context * ctx,const struct pipe_depth_stencil_alpha_state * state)997 static void *si_create_dsa_state(struct pipe_context *ctx,
998 				 const struct pipe_depth_stencil_alpha_state *state)
999 {
1000 	struct si_state_dsa *dsa = CALLOC_STRUCT(si_state_dsa);
1001 	struct si_pm4_state *pm4 = &dsa->pm4;
1002 	unsigned db_depth_control;
1003 	uint32_t db_stencil_control = 0;
1004 
1005 	if (!dsa) {
1006 		return NULL;
1007 	}
1008 
1009 	dsa->stencil_ref.valuemask[0] = state->stencil[0].valuemask;
1010 	dsa->stencil_ref.valuemask[1] = state->stencil[1].valuemask;
1011 	dsa->stencil_ref.writemask[0] = state->stencil[0].writemask;
1012 	dsa->stencil_ref.writemask[1] = state->stencil[1].writemask;
1013 
1014 	db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) |
1015 		S_028800_Z_WRITE_ENABLE(state->depth.writemask) |
1016 		S_028800_ZFUNC(state->depth.func) |
1017 		S_028800_DEPTH_BOUNDS_ENABLE(state->depth.bounds_test);
1018 
1019 	/* stencil */
1020 	if (state->stencil[0].enabled) {
1021 		db_depth_control |= S_028800_STENCIL_ENABLE(1);
1022 		db_depth_control |= S_028800_STENCILFUNC(state->stencil[0].func);
1023 		db_stencil_control |= S_02842C_STENCILFAIL(si_translate_stencil_op(state->stencil[0].fail_op));
1024 		db_stencil_control |= S_02842C_STENCILZPASS(si_translate_stencil_op(state->stencil[0].zpass_op));
1025 		db_stencil_control |= S_02842C_STENCILZFAIL(si_translate_stencil_op(state->stencil[0].zfail_op));
1026 
1027 		if (state->stencil[1].enabled) {
1028 			db_depth_control |= S_028800_BACKFACE_ENABLE(1);
1029 			db_depth_control |= S_028800_STENCILFUNC_BF(state->stencil[1].func);
1030 			db_stencil_control |= S_02842C_STENCILFAIL_BF(si_translate_stencil_op(state->stencil[1].fail_op));
1031 			db_stencil_control |= S_02842C_STENCILZPASS_BF(si_translate_stencil_op(state->stencil[1].zpass_op));
1032 			db_stencil_control |= S_02842C_STENCILZFAIL_BF(si_translate_stencil_op(state->stencil[1].zfail_op));
1033 		}
1034 	}
1035 
1036 	/* alpha */
1037 	if (state->alpha.enabled) {
1038 		dsa->alpha_func = state->alpha.func;
1039 
1040 		si_pm4_set_reg(pm4, R_00B030_SPI_SHADER_USER_DATA_PS_0 +
1041 		               SI_SGPR_ALPHA_REF * 4, fui(state->alpha.ref_value));
1042 	} else {
1043 		dsa->alpha_func = PIPE_FUNC_ALWAYS;
1044 	}
1045 
1046 	si_pm4_set_reg(pm4, R_028800_DB_DEPTH_CONTROL, db_depth_control);
1047 	si_pm4_set_reg(pm4, R_02842C_DB_STENCIL_CONTROL, db_stencil_control);
1048 	if (state->depth.bounds_test) {
1049 		si_pm4_set_reg(pm4, R_028020_DB_DEPTH_BOUNDS_MIN, fui(state->depth.bounds_min));
1050 		si_pm4_set_reg(pm4, R_028024_DB_DEPTH_BOUNDS_MAX, fui(state->depth.bounds_max));
1051 	}
1052 
1053 	return dsa;
1054 }
1055 
si_bind_dsa_state(struct pipe_context * ctx,void * state)1056 static void si_bind_dsa_state(struct pipe_context *ctx, void *state)
1057 {
1058         struct si_context *sctx = (struct si_context *)ctx;
1059         struct si_state_dsa *dsa = state;
1060 
1061         if (!state)
1062                 return;
1063 
1064 	si_pm4_bind_state(sctx, dsa, dsa);
1065 
1066 	if (memcmp(&dsa->stencil_ref, &sctx->stencil_ref.dsa_part,
1067 		   sizeof(struct si_dsa_stencil_ref_part)) != 0) {
1068 		sctx->stencil_ref.dsa_part = dsa->stencil_ref;
1069 		si_mark_atom_dirty(sctx, &sctx->stencil_ref.atom);
1070 	}
1071 	sctx->do_update_shaders = true;
1072 }
1073 
si_delete_dsa_state(struct pipe_context * ctx,void * state)1074 static void si_delete_dsa_state(struct pipe_context *ctx, void *state)
1075 {
1076 	struct si_context *sctx = (struct si_context *)ctx;
1077 	si_pm4_delete_state(sctx, dsa, (struct si_state_dsa *)state);
1078 }
1079 
si_create_db_flush_dsa(struct si_context * sctx)1080 static void *si_create_db_flush_dsa(struct si_context *sctx)
1081 {
1082 	struct pipe_depth_stencil_alpha_state dsa = {};
1083 
1084 	return sctx->b.b.create_depth_stencil_alpha_state(&sctx->b.b, &dsa);
1085 }
1086 
1087 /* DB RENDER STATE */
1088 
si_set_active_query_state(struct pipe_context * ctx,boolean enable)1089 static void si_set_active_query_state(struct pipe_context *ctx, boolean enable)
1090 {
1091 	struct si_context *sctx = (struct si_context*)ctx;
1092 
1093 	/* Pipeline stat & streamout queries. */
1094 	if (enable) {
1095 		sctx->b.flags &= ~R600_CONTEXT_STOP_PIPELINE_STATS;
1096 		sctx->b.flags |= R600_CONTEXT_START_PIPELINE_STATS;
1097 	} else {
1098 		sctx->b.flags &= ~R600_CONTEXT_START_PIPELINE_STATS;
1099 		sctx->b.flags |= R600_CONTEXT_STOP_PIPELINE_STATS;
1100 	}
1101 
1102 	/* Occlusion queries. */
1103 	if (sctx->occlusion_queries_disabled != !enable) {
1104 		sctx->occlusion_queries_disabled = !enable;
1105 		si_mark_atom_dirty(sctx, &sctx->db_render_state);
1106 	}
1107 }
1108 
si_set_occlusion_query_state(struct pipe_context * ctx,bool enable)1109 static void si_set_occlusion_query_state(struct pipe_context *ctx, bool enable)
1110 {
1111 	struct si_context *sctx = (struct si_context*)ctx;
1112 
1113 	si_mark_atom_dirty(sctx, &sctx->db_render_state);
1114 }
1115 
si_save_qbo_state(struct pipe_context * ctx,struct r600_qbo_state * st)1116 static void si_save_qbo_state(struct pipe_context *ctx, struct r600_qbo_state *st)
1117 {
1118 	struct si_context *sctx = (struct si_context*)ctx;
1119 
1120 	st->saved_compute = sctx->cs_shader_state.program;
1121 
1122 	si_get_pipe_constant_buffer(sctx, PIPE_SHADER_COMPUTE, 0, &st->saved_const0);
1123 	si_get_shader_buffers(sctx, PIPE_SHADER_COMPUTE, 0, 3, st->saved_ssbo);
1124 }
1125 
si_emit_db_render_state(struct si_context * sctx,struct r600_atom * state)1126 static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *state)
1127 {
1128 	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
1129 	struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
1130 	unsigned db_shader_control;
1131 
1132 	radeon_set_context_reg_seq(cs, R_028000_DB_RENDER_CONTROL, 2);
1133 
1134 	/* DB_RENDER_CONTROL */
1135 	if (sctx->dbcb_depth_copy_enabled ||
1136 	    sctx->dbcb_stencil_copy_enabled) {
1137 		radeon_emit(cs,
1138 			    S_028000_DEPTH_COPY(sctx->dbcb_depth_copy_enabled) |
1139 			    S_028000_STENCIL_COPY(sctx->dbcb_stencil_copy_enabled) |
1140 			    S_028000_COPY_CENTROID(1) |
1141 			    S_028000_COPY_SAMPLE(sctx->dbcb_copy_sample));
1142 	} else if (sctx->db_flush_depth_inplace || sctx->db_flush_stencil_inplace) {
1143 		radeon_emit(cs,
1144 			    S_028000_DEPTH_COMPRESS_DISABLE(sctx->db_flush_depth_inplace) |
1145 			    S_028000_STENCIL_COMPRESS_DISABLE(sctx->db_flush_stencil_inplace));
1146 	} else {
1147 		radeon_emit(cs,
1148 			    S_028000_DEPTH_CLEAR_ENABLE(sctx->db_depth_clear) |
1149 			    S_028000_STENCIL_CLEAR_ENABLE(sctx->db_stencil_clear));
1150 	}
1151 
1152 	/* DB_COUNT_CONTROL (occlusion queries) */
1153 	if (sctx->b.num_occlusion_queries > 0 &&
1154 	    !sctx->occlusion_queries_disabled) {
1155 		bool perfect = sctx->b.num_perfect_occlusion_queries > 0;
1156 
1157 		if (sctx->b.chip_class >= CIK) {
1158 			radeon_emit(cs,
1159 				    S_028004_PERFECT_ZPASS_COUNTS(perfect) |
1160 				    S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples) |
1161 				    S_028004_ZPASS_ENABLE(1) |
1162 				    S_028004_SLICE_EVEN_ENABLE(1) |
1163 				    S_028004_SLICE_ODD_ENABLE(1));
1164 		} else {
1165 			radeon_emit(cs,
1166 				    S_028004_PERFECT_ZPASS_COUNTS(perfect) |
1167 				    S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples));
1168 		}
1169 	} else {
1170 		/* Disable occlusion queries. */
1171 		if (sctx->b.chip_class >= CIK) {
1172 			radeon_emit(cs, 0);
1173 		} else {
1174 			radeon_emit(cs, S_028004_ZPASS_INCREMENT_DISABLE(1));
1175 		}
1176 	}
1177 
1178 	/* DB_RENDER_OVERRIDE2 */
1179 	radeon_set_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2,
1180 		S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(sctx->db_depth_disable_expclear) |
1181 		S_028010_DISABLE_SMEM_EXPCLEAR_OPTIMIZATION(sctx->db_stencil_disable_expclear) |
1182 		S_028010_DECOMPRESS_Z_ON_FLUSH(sctx->framebuffer.nr_samples >= 4));
1183 
1184 	db_shader_control = sctx->ps_db_shader_control;
1185 
1186 	/* Bug workaround for smoothing (overrasterization) on SI. */
1187 	if (sctx->b.chip_class == SI && sctx->smoothing_enabled) {
1188 		db_shader_control &= C_02880C_Z_ORDER;
1189 		db_shader_control |= S_02880C_Z_ORDER(V_02880C_LATE_Z);
1190 	}
1191 
1192 	/* Disable the gl_SampleMask fragment shader output if MSAA is disabled. */
1193 	if (!rs || !rs->multisample_enable)
1194 		db_shader_control &= C_02880C_MASK_EXPORT_ENABLE;
1195 
1196 	if (sctx->b.family == CHIP_STONEY &&
1197 	    sctx->screen->b.debug_flags & DBG_NO_RB_PLUS)
1198 		db_shader_control |= S_02880C_DUAL_QUAD_DISABLE(1);
1199 
1200 	radeon_set_context_reg(cs, R_02880C_DB_SHADER_CONTROL,
1201 			       db_shader_control);
1202 }
1203 
1204 /*
1205  * format translation
1206  */
si_translate_colorformat(enum pipe_format format)1207 static uint32_t si_translate_colorformat(enum pipe_format format)
1208 {
1209 	const struct util_format_description *desc = util_format_description(format);
1210 
1211 #define HAS_SIZE(x,y,z,w) \
1212 	(desc->channel[0].size == (x) && desc->channel[1].size == (y) && \
1213          desc->channel[2].size == (z) && desc->channel[3].size == (w))
1214 
1215 	if (format == PIPE_FORMAT_R11G11B10_FLOAT) /* isn't plain */
1216 		return V_028C70_COLOR_10_11_11;
1217 
1218 	if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN)
1219 		return V_028C70_COLOR_INVALID;
1220 
1221 	/* hw cannot support mixed formats (except depth/stencil, since
1222 	 * stencil is not written to). */
1223 	if (desc->is_mixed && desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS)
1224 		return V_028C70_COLOR_INVALID;
1225 
1226 	switch (desc->nr_channels) {
1227 	case 1:
1228 		switch (desc->channel[0].size) {
1229 		case 8:
1230 			return V_028C70_COLOR_8;
1231 		case 16:
1232 			return V_028C70_COLOR_16;
1233 		case 32:
1234 			return V_028C70_COLOR_32;
1235 		}
1236 		break;
1237 	case 2:
1238 		if (desc->channel[0].size == desc->channel[1].size) {
1239 			switch (desc->channel[0].size) {
1240 			case 8:
1241 				return V_028C70_COLOR_8_8;
1242 			case 16:
1243 				return V_028C70_COLOR_16_16;
1244 			case 32:
1245 				return V_028C70_COLOR_32_32;
1246 			}
1247 		} else if (HAS_SIZE(8,24,0,0)) {
1248 			return V_028C70_COLOR_24_8;
1249 		} else if (HAS_SIZE(24,8,0,0)) {
1250 			return V_028C70_COLOR_8_24;
1251 		}
1252 		break;
1253 	case 3:
1254 		if (HAS_SIZE(5,6,5,0)) {
1255 			return V_028C70_COLOR_5_6_5;
1256 		} else if (HAS_SIZE(32,8,24,0)) {
1257 			return V_028C70_COLOR_X24_8_32_FLOAT;
1258 		}
1259 		break;
1260 	case 4:
1261 		if (desc->channel[0].size == desc->channel[1].size &&
1262 		    desc->channel[0].size == desc->channel[2].size &&
1263 		    desc->channel[0].size == desc->channel[3].size) {
1264 			switch (desc->channel[0].size) {
1265 			case 4:
1266 				return V_028C70_COLOR_4_4_4_4;
1267 			case 8:
1268 				return V_028C70_COLOR_8_8_8_8;
1269 			case 16:
1270 				return V_028C70_COLOR_16_16_16_16;
1271 			case 32:
1272 				return V_028C70_COLOR_32_32_32_32;
1273 			}
1274 		} else if (HAS_SIZE(5,5,5,1)) {
1275 			return V_028C70_COLOR_1_5_5_5;
1276 		} else if (HAS_SIZE(10,10,10,2)) {
1277 			return V_028C70_COLOR_2_10_10_10;
1278 		}
1279 		break;
1280 	}
1281 	return V_028C70_COLOR_INVALID;
1282 }
1283 
si_colorformat_endian_swap(uint32_t colorformat)1284 static uint32_t si_colorformat_endian_swap(uint32_t colorformat)
1285 {
1286 	if (SI_BIG_ENDIAN) {
1287 		switch(colorformat) {
1288 		/* 8-bit buffers. */
1289 		case V_028C70_COLOR_8:
1290 			return V_028C70_ENDIAN_NONE;
1291 
1292 		/* 16-bit buffers. */
1293 		case V_028C70_COLOR_5_6_5:
1294 		case V_028C70_COLOR_1_5_5_5:
1295 		case V_028C70_COLOR_4_4_4_4:
1296 		case V_028C70_COLOR_16:
1297 		case V_028C70_COLOR_8_8:
1298 			return V_028C70_ENDIAN_8IN16;
1299 
1300 		/* 32-bit buffers. */
1301 		case V_028C70_COLOR_8_8_8_8:
1302 		case V_028C70_COLOR_2_10_10_10:
1303 		case V_028C70_COLOR_8_24:
1304 		case V_028C70_COLOR_24_8:
1305 		case V_028C70_COLOR_16_16:
1306 			return V_028C70_ENDIAN_8IN32;
1307 
1308 		/* 64-bit buffers. */
1309 		case V_028C70_COLOR_16_16_16_16:
1310 			return V_028C70_ENDIAN_8IN16;
1311 
1312 		case V_028C70_COLOR_32_32:
1313 			return V_028C70_ENDIAN_8IN32;
1314 
1315 		/* 128-bit buffers. */
1316 		case V_028C70_COLOR_32_32_32_32:
1317 			return V_028C70_ENDIAN_8IN32;
1318 		default:
1319 			return V_028C70_ENDIAN_NONE; /* Unsupported. */
1320 		}
1321 	} else {
1322 		return V_028C70_ENDIAN_NONE;
1323 	}
1324 }
1325 
si_translate_dbformat(enum pipe_format format)1326 static uint32_t si_translate_dbformat(enum pipe_format format)
1327 {
1328 	switch (format) {
1329 	case PIPE_FORMAT_Z16_UNORM:
1330 		return V_028040_Z_16;
1331 	case PIPE_FORMAT_S8_UINT_Z24_UNORM:
1332 	case PIPE_FORMAT_X8Z24_UNORM:
1333 	case PIPE_FORMAT_Z24X8_UNORM:
1334 	case PIPE_FORMAT_Z24_UNORM_S8_UINT:
1335 		return V_028040_Z_24; /* deprecated on SI */
1336 	case PIPE_FORMAT_Z32_FLOAT:
1337 	case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
1338 		return V_028040_Z_32_FLOAT;
1339 	default:
1340 		return V_028040_Z_INVALID;
1341 	}
1342 }
1343 
1344 /*
1345  * Texture translation
1346  */
1347 
si_translate_texformat(struct pipe_screen * screen,enum pipe_format format,const struct util_format_description * desc,int first_non_void)1348 static uint32_t si_translate_texformat(struct pipe_screen *screen,
1349 				       enum pipe_format format,
1350 				       const struct util_format_description *desc,
1351 				       int first_non_void)
1352 {
1353 	struct si_screen *sscreen = (struct si_screen*)screen;
1354 	bool enable_compressed_formats = (sscreen->b.info.drm_major == 2 &&
1355 					  sscreen->b.info.drm_minor >= 31) ||
1356 					 sscreen->b.info.drm_major == 3;
1357 	bool uniform = true;
1358 	int i;
1359 
1360 	/* Colorspace (return non-RGB formats directly). */
1361 	switch (desc->colorspace) {
1362 	/* Depth stencil formats */
1363 	case UTIL_FORMAT_COLORSPACE_ZS:
1364 		switch (format) {
1365 		case PIPE_FORMAT_Z16_UNORM:
1366 			return V_008F14_IMG_DATA_FORMAT_16;
1367 		case PIPE_FORMAT_X24S8_UINT:
1368 		case PIPE_FORMAT_S8X24_UINT:
1369 			/*
1370 			 * Implemented as an 8_8_8_8 data format to fix texture
1371 			 * gathers in stencil sampling. This affects at least
1372 			 * GL45-CTS.texture_cube_map_array.sampling on VI.
1373 			 */
1374 			return V_008F14_IMG_DATA_FORMAT_8_8_8_8;
1375 		case PIPE_FORMAT_Z24X8_UNORM:
1376 		case PIPE_FORMAT_Z24_UNORM_S8_UINT:
1377 			return V_008F14_IMG_DATA_FORMAT_8_24;
1378 		case PIPE_FORMAT_X8Z24_UNORM:
1379 		case PIPE_FORMAT_S8_UINT_Z24_UNORM:
1380 			return V_008F14_IMG_DATA_FORMAT_24_8;
1381 		case PIPE_FORMAT_S8_UINT:
1382 			return V_008F14_IMG_DATA_FORMAT_8;
1383 		case PIPE_FORMAT_Z32_FLOAT:
1384 			return V_008F14_IMG_DATA_FORMAT_32;
1385 		case PIPE_FORMAT_X32_S8X24_UINT:
1386 		case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
1387 			return V_008F14_IMG_DATA_FORMAT_X24_8_32;
1388 		default:
1389 			goto out_unknown;
1390 		}
1391 
1392 	case UTIL_FORMAT_COLORSPACE_YUV:
1393 		goto out_unknown; /* TODO */
1394 
1395 	case UTIL_FORMAT_COLORSPACE_SRGB:
1396 		if (desc->nr_channels != 4 && desc->nr_channels != 1)
1397 			goto out_unknown;
1398 		break;
1399 
1400 	default:
1401 		break;
1402 	}
1403 
1404 	if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) {
1405 		if (!enable_compressed_formats)
1406 			goto out_unknown;
1407 
1408 		switch (format) {
1409 		case PIPE_FORMAT_RGTC1_SNORM:
1410 		case PIPE_FORMAT_LATC1_SNORM:
1411 		case PIPE_FORMAT_RGTC1_UNORM:
1412 		case PIPE_FORMAT_LATC1_UNORM:
1413 			return V_008F14_IMG_DATA_FORMAT_BC4;
1414 		case PIPE_FORMAT_RGTC2_SNORM:
1415 		case PIPE_FORMAT_LATC2_SNORM:
1416 		case PIPE_FORMAT_RGTC2_UNORM:
1417 		case PIPE_FORMAT_LATC2_UNORM:
1418 			return V_008F14_IMG_DATA_FORMAT_BC5;
1419 		default:
1420 			goto out_unknown;
1421 		}
1422 	}
1423 
1424 	if (desc->layout == UTIL_FORMAT_LAYOUT_ETC &&
1425 	    sscreen->b.family == CHIP_STONEY) {
1426 		switch (format) {
1427 		case PIPE_FORMAT_ETC1_RGB8:
1428 		case PIPE_FORMAT_ETC2_RGB8:
1429 		case PIPE_FORMAT_ETC2_SRGB8:
1430 			return V_008F14_IMG_DATA_FORMAT_ETC2_RGB;
1431 		case PIPE_FORMAT_ETC2_RGB8A1:
1432 		case PIPE_FORMAT_ETC2_SRGB8A1:
1433 			return V_008F14_IMG_DATA_FORMAT_ETC2_RGBA1;
1434 		case PIPE_FORMAT_ETC2_RGBA8:
1435 		case PIPE_FORMAT_ETC2_SRGBA8:
1436 			return V_008F14_IMG_DATA_FORMAT_ETC2_RGBA;
1437 		case PIPE_FORMAT_ETC2_R11_UNORM:
1438 		case PIPE_FORMAT_ETC2_R11_SNORM:
1439 			return V_008F14_IMG_DATA_FORMAT_ETC2_R;
1440 		case PIPE_FORMAT_ETC2_RG11_UNORM:
1441 		case PIPE_FORMAT_ETC2_RG11_SNORM:
1442 			return V_008F14_IMG_DATA_FORMAT_ETC2_RG;
1443 		default:
1444 			goto out_unknown;
1445 		}
1446 	}
1447 
1448 	if (desc->layout == UTIL_FORMAT_LAYOUT_BPTC) {
1449 		if (!enable_compressed_formats)
1450 			goto out_unknown;
1451 
1452 		switch (format) {
1453 		case PIPE_FORMAT_BPTC_RGBA_UNORM:
1454 		case PIPE_FORMAT_BPTC_SRGBA:
1455 			return V_008F14_IMG_DATA_FORMAT_BC7;
1456 		case PIPE_FORMAT_BPTC_RGB_FLOAT:
1457 		case PIPE_FORMAT_BPTC_RGB_UFLOAT:
1458 			return V_008F14_IMG_DATA_FORMAT_BC6;
1459 		default:
1460 			goto out_unknown;
1461 		}
1462 	}
1463 
1464 	if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
1465 		switch (format) {
1466 		case PIPE_FORMAT_R8G8_B8G8_UNORM:
1467 		case PIPE_FORMAT_G8R8_B8R8_UNORM:
1468 			return V_008F14_IMG_DATA_FORMAT_GB_GR;
1469 		case PIPE_FORMAT_G8R8_G8B8_UNORM:
1470 		case PIPE_FORMAT_R8G8_R8B8_UNORM:
1471 			return V_008F14_IMG_DATA_FORMAT_BG_RG;
1472 		default:
1473 			goto out_unknown;
1474 		}
1475 	}
1476 
1477 	if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
1478 		if (!enable_compressed_formats)
1479 			goto out_unknown;
1480 
1481 		if (!util_format_s3tc_enabled) {
1482 			goto out_unknown;
1483 		}
1484 
1485 		switch (format) {
1486 		case PIPE_FORMAT_DXT1_RGB:
1487 		case PIPE_FORMAT_DXT1_RGBA:
1488 		case PIPE_FORMAT_DXT1_SRGB:
1489 		case PIPE_FORMAT_DXT1_SRGBA:
1490 			return V_008F14_IMG_DATA_FORMAT_BC1;
1491 		case PIPE_FORMAT_DXT3_RGBA:
1492 		case PIPE_FORMAT_DXT3_SRGBA:
1493 			return V_008F14_IMG_DATA_FORMAT_BC2;
1494 		case PIPE_FORMAT_DXT5_RGBA:
1495 		case PIPE_FORMAT_DXT5_SRGBA:
1496 			return V_008F14_IMG_DATA_FORMAT_BC3;
1497 		default:
1498 			goto out_unknown;
1499 		}
1500 	}
1501 
1502 	if (format == PIPE_FORMAT_R9G9B9E5_FLOAT) {
1503 		return V_008F14_IMG_DATA_FORMAT_5_9_9_9;
1504 	} else if (format == PIPE_FORMAT_R11G11B10_FLOAT) {
1505 		return V_008F14_IMG_DATA_FORMAT_10_11_11;
1506 	}
1507 
1508 	/* R8G8Bx_SNORM - TODO CxV8U8 */
1509 
1510 	/* hw cannot support mixed formats (except depth/stencil, since only
1511 	 * depth is read).*/
1512 	if (desc->is_mixed && desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS)
1513 		goto out_unknown;
1514 
1515 	/* See whether the components are of the same size. */
1516 	for (i = 1; i < desc->nr_channels; i++) {
1517 		uniform = uniform && desc->channel[0].size == desc->channel[i].size;
1518 	}
1519 
1520 	/* Non-uniform formats. */
1521 	if (!uniform) {
1522 		switch(desc->nr_channels) {
1523 		case 3:
1524 			if (desc->channel[0].size == 5 &&
1525 			    desc->channel[1].size == 6 &&
1526 			    desc->channel[2].size == 5) {
1527 				return V_008F14_IMG_DATA_FORMAT_5_6_5;
1528 			}
1529 			goto out_unknown;
1530 		case 4:
1531 			if (desc->channel[0].size == 5 &&
1532 			    desc->channel[1].size == 5 &&
1533 			    desc->channel[2].size == 5 &&
1534 			    desc->channel[3].size == 1) {
1535 				return V_008F14_IMG_DATA_FORMAT_1_5_5_5;
1536 			}
1537 			if (desc->channel[0].size == 10 &&
1538 			    desc->channel[1].size == 10 &&
1539 			    desc->channel[2].size == 10 &&
1540 			    desc->channel[3].size == 2) {
1541 				return V_008F14_IMG_DATA_FORMAT_2_10_10_10;
1542 			}
1543 			goto out_unknown;
1544 		}
1545 		goto out_unknown;
1546 	}
1547 
1548 	if (first_non_void < 0 || first_non_void > 3)
1549 		goto out_unknown;
1550 
1551 	/* uniform formats */
1552 	switch (desc->channel[first_non_void].size) {
1553 	case 4:
1554 		switch (desc->nr_channels) {
1555 #if 0 /* Not supported for render targets */
1556 		case 2:
1557 			return V_008F14_IMG_DATA_FORMAT_4_4;
1558 #endif
1559 		case 4:
1560 			return V_008F14_IMG_DATA_FORMAT_4_4_4_4;
1561 		}
1562 		break;
1563 	case 8:
1564 		switch (desc->nr_channels) {
1565 		case 1:
1566 			return V_008F14_IMG_DATA_FORMAT_8;
1567 		case 2:
1568 			return V_008F14_IMG_DATA_FORMAT_8_8;
1569 		case 4:
1570 			return V_008F14_IMG_DATA_FORMAT_8_8_8_8;
1571 		}
1572 		break;
1573 	case 16:
1574 		switch (desc->nr_channels) {
1575 		case 1:
1576 			return V_008F14_IMG_DATA_FORMAT_16;
1577 		case 2:
1578 			return V_008F14_IMG_DATA_FORMAT_16_16;
1579 		case 4:
1580 			return V_008F14_IMG_DATA_FORMAT_16_16_16_16;
1581 		}
1582 		break;
1583 	case 32:
1584 		switch (desc->nr_channels) {
1585 		case 1:
1586 			return V_008F14_IMG_DATA_FORMAT_32;
1587 		case 2:
1588 			return V_008F14_IMG_DATA_FORMAT_32_32;
1589 #if 0 /* Not supported for render targets */
1590 		case 3:
1591 			return V_008F14_IMG_DATA_FORMAT_32_32_32;
1592 #endif
1593 		case 4:
1594 			return V_008F14_IMG_DATA_FORMAT_32_32_32_32;
1595 		}
1596 	}
1597 
1598 out_unknown:
1599 	/* R600_ERR("Unable to handle texformat %d %s\n", format, util_format_name(format)); */
1600 	return ~0;
1601 }
1602 
si_tex_wrap(unsigned wrap)1603 static unsigned si_tex_wrap(unsigned wrap)
1604 {
1605 	switch (wrap) {
1606 	default:
1607 	case PIPE_TEX_WRAP_REPEAT:
1608 		return V_008F30_SQ_TEX_WRAP;
1609 	case PIPE_TEX_WRAP_CLAMP:
1610 		return V_008F30_SQ_TEX_CLAMP_HALF_BORDER;
1611 	case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
1612 		return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
1613 	case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
1614 		return V_008F30_SQ_TEX_CLAMP_BORDER;
1615 	case PIPE_TEX_WRAP_MIRROR_REPEAT:
1616 		return V_008F30_SQ_TEX_MIRROR;
1617 	case PIPE_TEX_WRAP_MIRROR_CLAMP:
1618 		return V_008F30_SQ_TEX_MIRROR_ONCE_HALF_BORDER;
1619 	case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
1620 		return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
1621 	case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
1622 		return V_008F30_SQ_TEX_MIRROR_ONCE_BORDER;
1623 	}
1624 }
1625 
si_tex_mipfilter(unsigned filter)1626 static unsigned si_tex_mipfilter(unsigned filter)
1627 {
1628 	switch (filter) {
1629 	case PIPE_TEX_MIPFILTER_NEAREST:
1630 		return V_008F38_SQ_TEX_Z_FILTER_POINT;
1631 	case PIPE_TEX_MIPFILTER_LINEAR:
1632 		return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
1633 	default:
1634 	case PIPE_TEX_MIPFILTER_NONE:
1635 		return V_008F38_SQ_TEX_Z_FILTER_NONE;
1636 	}
1637 }
1638 
si_tex_compare(unsigned compare)1639 static unsigned si_tex_compare(unsigned compare)
1640 {
1641 	switch (compare) {
1642 	default:
1643 	case PIPE_FUNC_NEVER:
1644 		return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
1645 	case PIPE_FUNC_LESS:
1646 		return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
1647 	case PIPE_FUNC_EQUAL:
1648 		return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
1649 	case PIPE_FUNC_LEQUAL:
1650 		return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
1651 	case PIPE_FUNC_GREATER:
1652 		return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
1653 	case PIPE_FUNC_NOTEQUAL:
1654 		return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
1655 	case PIPE_FUNC_GEQUAL:
1656 		return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
1657 	case PIPE_FUNC_ALWAYS:
1658 		return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
1659 	}
1660 }
1661 
si_tex_dim(unsigned res_target,unsigned view_target,unsigned nr_samples)1662 static unsigned si_tex_dim(unsigned res_target, unsigned view_target,
1663 			   unsigned nr_samples)
1664 {
1665 	if (view_target == PIPE_TEXTURE_CUBE ||
1666 	    view_target == PIPE_TEXTURE_CUBE_ARRAY)
1667 		res_target = view_target;
1668 	/* If interpreting cubemaps as something else, set 2D_ARRAY. */
1669 	else if (res_target == PIPE_TEXTURE_CUBE ||
1670 		 res_target == PIPE_TEXTURE_CUBE_ARRAY)
1671 		res_target = PIPE_TEXTURE_2D_ARRAY;
1672 
1673 	switch (res_target) {
1674 	default:
1675 	case PIPE_TEXTURE_1D:
1676 		return V_008F1C_SQ_RSRC_IMG_1D;
1677 	case PIPE_TEXTURE_1D_ARRAY:
1678 		return V_008F1C_SQ_RSRC_IMG_1D_ARRAY;
1679 	case PIPE_TEXTURE_2D:
1680 	case PIPE_TEXTURE_RECT:
1681 		return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA :
1682 					V_008F1C_SQ_RSRC_IMG_2D;
1683 	case PIPE_TEXTURE_2D_ARRAY:
1684 		return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY :
1685 					V_008F1C_SQ_RSRC_IMG_2D_ARRAY;
1686 	case PIPE_TEXTURE_3D:
1687 		return V_008F1C_SQ_RSRC_IMG_3D;
1688 	case PIPE_TEXTURE_CUBE:
1689 	case PIPE_TEXTURE_CUBE_ARRAY:
1690 		return V_008F1C_SQ_RSRC_IMG_CUBE;
1691 	}
1692 }
1693 
1694 /*
1695  * Format support testing
1696  */
1697 
si_is_sampler_format_supported(struct pipe_screen * screen,enum pipe_format format)1698 static bool si_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format)
1699 {
1700 	return si_translate_texformat(screen, format, util_format_description(format),
1701 				      util_format_get_first_non_void_channel(format)) != ~0U;
1702 }
1703 
si_translate_buffer_dataformat(struct pipe_screen * screen,const struct util_format_description * desc,int first_non_void)1704 static uint32_t si_translate_buffer_dataformat(struct pipe_screen *screen,
1705 					       const struct util_format_description *desc,
1706 					       int first_non_void)
1707 {
1708 	int i;
1709 
1710 	if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT)
1711 		return V_008F0C_BUF_DATA_FORMAT_10_11_11;
1712 
1713 	assert(first_non_void >= 0);
1714 
1715 	if (desc->nr_channels == 4 &&
1716 	    desc->channel[0].size == 10 &&
1717 	    desc->channel[1].size == 10 &&
1718 	    desc->channel[2].size == 10 &&
1719 	    desc->channel[3].size == 2)
1720 		return V_008F0C_BUF_DATA_FORMAT_2_10_10_10;
1721 
1722 	/* See whether the components are of the same size. */
1723 	for (i = 0; i < desc->nr_channels; i++) {
1724 		if (desc->channel[first_non_void].size != desc->channel[i].size)
1725 			return V_008F0C_BUF_DATA_FORMAT_INVALID;
1726 	}
1727 
1728 	switch (desc->channel[first_non_void].size) {
1729 	case 8:
1730 		switch (desc->nr_channels) {
1731 		case 1:
1732 			return V_008F0C_BUF_DATA_FORMAT_8;
1733 		case 2:
1734 			return V_008F0C_BUF_DATA_FORMAT_8_8;
1735 		case 3:
1736 		case 4:
1737 			return V_008F0C_BUF_DATA_FORMAT_8_8_8_8;
1738 		}
1739 		break;
1740 	case 16:
1741 		switch (desc->nr_channels) {
1742 		case 1:
1743 			return V_008F0C_BUF_DATA_FORMAT_16;
1744 		case 2:
1745 			return V_008F0C_BUF_DATA_FORMAT_16_16;
1746 		case 3:
1747 		case 4:
1748 			return V_008F0C_BUF_DATA_FORMAT_16_16_16_16;
1749 		}
1750 		break;
1751 	case 32:
1752 		switch (desc->nr_channels) {
1753 		case 1:
1754 			return V_008F0C_BUF_DATA_FORMAT_32;
1755 		case 2:
1756 			return V_008F0C_BUF_DATA_FORMAT_32_32;
1757 		case 3:
1758 			return V_008F0C_BUF_DATA_FORMAT_32_32_32;
1759 		case 4:
1760 			return V_008F0C_BUF_DATA_FORMAT_32_32_32_32;
1761 		}
1762 		break;
1763 	}
1764 
1765 	return V_008F0C_BUF_DATA_FORMAT_INVALID;
1766 }
1767 
si_translate_buffer_numformat(struct pipe_screen * screen,const struct util_format_description * desc,int first_non_void)1768 static uint32_t si_translate_buffer_numformat(struct pipe_screen *screen,
1769 					      const struct util_format_description *desc,
1770 					      int first_non_void)
1771 {
1772 	if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT)
1773 		return V_008F0C_BUF_NUM_FORMAT_FLOAT;
1774 
1775 	assert(first_non_void >= 0);
1776 
1777 	switch (desc->channel[first_non_void].type) {
1778 	case UTIL_FORMAT_TYPE_SIGNED:
1779 	case UTIL_FORMAT_TYPE_FIXED:
1780 		if (desc->channel[first_non_void].size >= 32 ||
1781 		    desc->channel[first_non_void].pure_integer)
1782 			return V_008F0C_BUF_NUM_FORMAT_SINT;
1783 		else if (desc->channel[first_non_void].normalized)
1784 			return V_008F0C_BUF_NUM_FORMAT_SNORM;
1785 		else
1786 			return V_008F0C_BUF_NUM_FORMAT_SSCALED;
1787 		break;
1788 	case UTIL_FORMAT_TYPE_UNSIGNED:
1789 		if (desc->channel[first_non_void].size >= 32 ||
1790 		    desc->channel[first_non_void].pure_integer)
1791 			return V_008F0C_BUF_NUM_FORMAT_UINT;
1792 		else if (desc->channel[first_non_void].normalized)
1793 			return V_008F0C_BUF_NUM_FORMAT_UNORM;
1794 		else
1795 			return V_008F0C_BUF_NUM_FORMAT_USCALED;
1796 		break;
1797 	case UTIL_FORMAT_TYPE_FLOAT:
1798 	default:
1799 		return V_008F0C_BUF_NUM_FORMAT_FLOAT;
1800 	}
1801 }
1802 
si_is_vertex_format_supported(struct pipe_screen * screen,enum pipe_format format,unsigned usage)1803 static unsigned si_is_vertex_format_supported(struct pipe_screen *screen,
1804 					      enum pipe_format format,
1805 					      unsigned usage)
1806 {
1807 	const struct util_format_description *desc;
1808 	int first_non_void;
1809 	unsigned data_format;
1810 
1811 	assert((usage & ~(PIPE_BIND_SHADER_IMAGE |
1812 			  PIPE_BIND_SAMPLER_VIEW |
1813 			  PIPE_BIND_VERTEX_BUFFER)) == 0);
1814 
1815 	desc = util_format_description(format);
1816 
1817 	/* There are no native 8_8_8 or 16_16_16 data formats, and we currently
1818 	 * select 8_8_8_8 and 16_16_16_16 instead. This works reasonably well
1819 	 * for read-only access (with caveats surrounding bounds checks), but
1820 	 * obviously fails for write access which we have to implement for
1821 	 * shader images. Luckily, OpenGL doesn't expect this to be supported
1822 	 * anyway, and so the only impact is on PBO uploads / downloads, which
1823 	 * shouldn't be expected to be fast for GL_RGB anyway.
1824 	 */
1825 	if (desc->block.bits == 3 * 8 ||
1826 	    desc->block.bits == 3 * 16) {
1827 		if (usage & (PIPE_BIND_SHADER_IMAGE | PIPE_BIND_SAMPLER_VIEW)) {
1828 		    usage &= ~(PIPE_BIND_SHADER_IMAGE | PIPE_BIND_SAMPLER_VIEW);
1829 			if (!usage)
1830 				return 0;
1831 		}
1832 	}
1833 
1834 	first_non_void = util_format_get_first_non_void_channel(format);
1835 	data_format = si_translate_buffer_dataformat(screen, desc, first_non_void);
1836 	if (data_format == V_008F0C_BUF_DATA_FORMAT_INVALID)
1837 		return 0;
1838 
1839 	return usage;
1840 }
1841 
si_is_colorbuffer_format_supported(enum pipe_format format)1842 static bool si_is_colorbuffer_format_supported(enum pipe_format format)
1843 {
1844 	return si_translate_colorformat(format) != V_028C70_COLOR_INVALID &&
1845 		r600_translate_colorswap(format, false) != ~0U;
1846 }
1847 
si_is_zs_format_supported(enum pipe_format format)1848 static bool si_is_zs_format_supported(enum pipe_format format)
1849 {
1850 	return si_translate_dbformat(format) != V_028040_Z_INVALID;
1851 }
1852 
si_is_format_supported(struct pipe_screen * screen,enum pipe_format format,enum pipe_texture_target target,unsigned sample_count,unsigned usage)1853 static boolean si_is_format_supported(struct pipe_screen *screen,
1854 				      enum pipe_format format,
1855 				      enum pipe_texture_target target,
1856 				      unsigned sample_count,
1857 				      unsigned usage)
1858 {
1859 	unsigned retval = 0;
1860 
1861 	if (target >= PIPE_MAX_TEXTURE_TYPES) {
1862 		R600_ERR("r600: unsupported texture type %d\n", target);
1863 		return false;
1864 	}
1865 
1866 	if (!util_format_is_supported(format, usage))
1867 		return false;
1868 
1869 	if (sample_count > 1) {
1870 		if (!screen->get_param(screen, PIPE_CAP_TEXTURE_MULTISAMPLE))
1871 			return false;
1872 
1873 		if (usage & PIPE_BIND_SHADER_IMAGE)
1874 			return false;
1875 
1876 		switch (sample_count) {
1877 		case 2:
1878 		case 4:
1879 		case 8:
1880 			break;
1881 		case 16:
1882 			if (format == PIPE_FORMAT_NONE)
1883 				return true;
1884 			else
1885 				return false;
1886 		default:
1887 			return false;
1888 		}
1889 	}
1890 
1891 	if (usage & (PIPE_BIND_SAMPLER_VIEW |
1892 		     PIPE_BIND_SHADER_IMAGE)) {
1893 		if (target == PIPE_BUFFER) {
1894 			retval |= si_is_vertex_format_supported(
1895 				screen, format, usage & (PIPE_BIND_SAMPLER_VIEW |
1896 						         PIPE_BIND_SHADER_IMAGE));
1897 		} else {
1898 			if (si_is_sampler_format_supported(screen, format))
1899 				retval |= usage & (PIPE_BIND_SAMPLER_VIEW |
1900 						   PIPE_BIND_SHADER_IMAGE);
1901 		}
1902 	}
1903 
1904 	if ((usage & (PIPE_BIND_RENDER_TARGET |
1905 		      PIPE_BIND_DISPLAY_TARGET |
1906 		      PIPE_BIND_SCANOUT |
1907 		      PIPE_BIND_SHARED |
1908 		      PIPE_BIND_BLENDABLE)) &&
1909 	    si_is_colorbuffer_format_supported(format)) {
1910 		retval |= usage &
1911 			  (PIPE_BIND_RENDER_TARGET |
1912 			   PIPE_BIND_DISPLAY_TARGET |
1913 			   PIPE_BIND_SCANOUT |
1914 			   PIPE_BIND_SHARED);
1915 		if (!util_format_is_pure_integer(format) &&
1916 		    !util_format_is_depth_or_stencil(format))
1917 			retval |= usage & PIPE_BIND_BLENDABLE;
1918 	}
1919 
1920 	if ((usage & PIPE_BIND_DEPTH_STENCIL) &&
1921 	    si_is_zs_format_supported(format)) {
1922 		retval |= PIPE_BIND_DEPTH_STENCIL;
1923 	}
1924 
1925 	if (usage & PIPE_BIND_VERTEX_BUFFER) {
1926 		retval |= si_is_vertex_format_supported(screen, format,
1927 							PIPE_BIND_VERTEX_BUFFER);
1928 	}
1929 
1930 	if ((usage & PIPE_BIND_LINEAR) &&
1931 	    !util_format_is_compressed(format) &&
1932 	    !(usage & PIPE_BIND_DEPTH_STENCIL))
1933 		retval |= PIPE_BIND_LINEAR;
1934 
1935 	return retval == usage;
1936 }
1937 
1938 /*
1939  * framebuffer handling
1940  */
1941 
si_choose_spi_color_formats(struct r600_surface * surf,unsigned format,unsigned swap,unsigned ntype,bool is_depth)1942 static void si_choose_spi_color_formats(struct r600_surface *surf,
1943 					unsigned format, unsigned swap,
1944 					unsigned ntype, bool is_depth)
1945 {
1946 	/* Alpha is needed for alpha-to-coverage.
1947 	 * Blending may be with or without alpha.
1948 	 */
1949 	unsigned normal = 0; /* most optimal, may not support blending or export alpha */
1950 	unsigned alpha = 0; /* exports alpha, but may not support blending */
1951 	unsigned blend = 0; /* supports blending, but may not export alpha */
1952 	unsigned blend_alpha = 0; /* least optimal, supports blending and exports alpha */
1953 
1954 	/* Choose the SPI color formats. These are required values for Stoney/RB+.
1955 	 * Other chips have multiple choices, though they are not necessarily better.
1956 	 */
1957 	switch (format) {
1958 	case V_028C70_COLOR_5_6_5:
1959 	case V_028C70_COLOR_1_5_5_5:
1960 	case V_028C70_COLOR_5_5_5_1:
1961 	case V_028C70_COLOR_4_4_4_4:
1962 	case V_028C70_COLOR_10_11_11:
1963 	case V_028C70_COLOR_11_11_10:
1964 	case V_028C70_COLOR_8:
1965 	case V_028C70_COLOR_8_8:
1966 	case V_028C70_COLOR_8_8_8_8:
1967 	case V_028C70_COLOR_10_10_10_2:
1968 	case V_028C70_COLOR_2_10_10_10:
1969 		if (ntype == V_028C70_NUMBER_UINT)
1970 			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_UINT16_ABGR;
1971 		else if (ntype == V_028C70_NUMBER_SINT)
1972 			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_SINT16_ABGR;
1973 		else
1974 			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_FP16_ABGR;
1975 		break;
1976 
1977 	case V_028C70_COLOR_16:
1978 	case V_028C70_COLOR_16_16:
1979 	case V_028C70_COLOR_16_16_16_16:
1980 		if (ntype == V_028C70_NUMBER_UNORM ||
1981 		    ntype == V_028C70_NUMBER_SNORM) {
1982 			/* UNORM16 and SNORM16 don't support blending */
1983 			if (ntype == V_028C70_NUMBER_UNORM)
1984 				normal = alpha = V_028714_SPI_SHADER_UNORM16_ABGR;
1985 			else
1986 				normal = alpha = V_028714_SPI_SHADER_SNORM16_ABGR;
1987 
1988 			/* Use 32 bits per channel for blending. */
1989 			if (format == V_028C70_COLOR_16) {
1990 				if (swap == V_028C70_SWAP_STD) { /* R */
1991 					blend = V_028714_SPI_SHADER_32_R;
1992 					blend_alpha = V_028714_SPI_SHADER_32_AR;
1993 				} else if (swap == V_028C70_SWAP_ALT_REV) /* A */
1994 					blend = blend_alpha = V_028714_SPI_SHADER_32_AR;
1995 				else
1996 					assert(0);
1997 			} else if (format == V_028C70_COLOR_16_16) {
1998 				if (swap == V_028C70_SWAP_STD) { /* RG */
1999 					blend = V_028714_SPI_SHADER_32_GR;
2000 					blend_alpha = V_028714_SPI_SHADER_32_ABGR;
2001 				} else if (swap == V_028C70_SWAP_ALT) /* RA */
2002 					blend = blend_alpha = V_028714_SPI_SHADER_32_AR;
2003 				else
2004 					assert(0);
2005 			} else /* 16_16_16_16 */
2006 				blend = blend_alpha = V_028714_SPI_SHADER_32_ABGR;
2007 		} else if (ntype == V_028C70_NUMBER_UINT)
2008 			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_UINT16_ABGR;
2009 		else if (ntype == V_028C70_NUMBER_SINT)
2010 			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_SINT16_ABGR;
2011 		else if (ntype == V_028C70_NUMBER_FLOAT)
2012 			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_FP16_ABGR;
2013 		else
2014 			assert(0);
2015 		break;
2016 
2017 	case V_028C70_COLOR_32:
2018 		if (swap == V_028C70_SWAP_STD) { /* R */
2019 			blend = normal = V_028714_SPI_SHADER_32_R;
2020 			alpha = blend_alpha = V_028714_SPI_SHADER_32_AR;
2021 		} else if (swap == V_028C70_SWAP_ALT_REV) /* A */
2022 			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_AR;
2023 		else
2024 			assert(0);
2025 		break;
2026 
2027 	case V_028C70_COLOR_32_32:
2028 		if (swap == V_028C70_SWAP_STD) { /* RG */
2029 			blend = normal = V_028714_SPI_SHADER_32_GR;
2030 			alpha = blend_alpha = V_028714_SPI_SHADER_32_ABGR;
2031 		} else if (swap == V_028C70_SWAP_ALT) /* RA */
2032 			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_AR;
2033 		else
2034 			assert(0);
2035 		break;
2036 
2037 	case V_028C70_COLOR_32_32_32_32:
2038 	case V_028C70_COLOR_8_24:
2039 	case V_028C70_COLOR_24_8:
2040 	case V_028C70_COLOR_X24_8_32_FLOAT:
2041 		alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_ABGR;
2042 		break;
2043 
2044 	default:
2045 		assert(0);
2046 		return;
2047 	}
2048 
2049 	/* The DB->CB copy needs 32_ABGR. */
2050 	if (is_depth)
2051 		alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_ABGR;
2052 
2053 	surf->spi_shader_col_format = normal;
2054 	surf->spi_shader_col_format_alpha = alpha;
2055 	surf->spi_shader_col_format_blend = blend;
2056 	surf->spi_shader_col_format_blend_alpha = blend_alpha;
2057 }
2058 
si_initialize_color_surface(struct si_context * sctx,struct r600_surface * surf)2059 static void si_initialize_color_surface(struct si_context *sctx,
2060 					struct r600_surface *surf)
2061 {
2062 	struct r600_texture *rtex = (struct r600_texture*)surf->base.texture;
2063 	unsigned color_info, color_attrib, color_view;
2064 	unsigned format, swap, ntype, endian;
2065 	const struct util_format_description *desc;
2066 	int i;
2067 	unsigned blend_clamp = 0, blend_bypass = 0;
2068 
2069 	color_view = S_028C6C_SLICE_START(surf->base.u.tex.first_layer) |
2070 		     S_028C6C_SLICE_MAX(surf->base.u.tex.last_layer);
2071 
2072 	desc = util_format_description(surf->base.format);
2073 	for (i = 0; i < 4; i++) {
2074 		if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) {
2075 			break;
2076 		}
2077 	}
2078 	if (i == 4 || desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) {
2079 		ntype = V_028C70_NUMBER_FLOAT;
2080 	} else {
2081 		ntype = V_028C70_NUMBER_UNORM;
2082 		if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
2083 			ntype = V_028C70_NUMBER_SRGB;
2084 		else if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
2085 			if (desc->channel[i].pure_integer) {
2086 				ntype = V_028C70_NUMBER_SINT;
2087 			} else {
2088 				assert(desc->channel[i].normalized);
2089 				ntype = V_028C70_NUMBER_SNORM;
2090 			}
2091 		} else if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) {
2092 			if (desc->channel[i].pure_integer) {
2093 				ntype = V_028C70_NUMBER_UINT;
2094 			} else {
2095 				assert(desc->channel[i].normalized);
2096 				ntype = V_028C70_NUMBER_UNORM;
2097 			}
2098 		}
2099 	}
2100 
2101 	format = si_translate_colorformat(surf->base.format);
2102 	if (format == V_028C70_COLOR_INVALID) {
2103 		R600_ERR("Invalid CB format: %d, disabling CB.\n", surf->base.format);
2104 	}
2105 	assert(format != V_028C70_COLOR_INVALID);
2106 	swap = r600_translate_colorswap(surf->base.format, false);
2107 	endian = si_colorformat_endian_swap(format);
2108 
2109 	/* blend clamp should be set for all NORM/SRGB types */
2110 	if (ntype == V_028C70_NUMBER_UNORM ||
2111 	    ntype == V_028C70_NUMBER_SNORM ||
2112 	    ntype == V_028C70_NUMBER_SRGB)
2113 		blend_clamp = 1;
2114 
2115 	/* set blend bypass according to docs if SINT/UINT or
2116 	   8/24 COLOR variants */
2117 	if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
2118 	    format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
2119 	    format == V_028C70_COLOR_X24_8_32_FLOAT) {
2120 		blend_clamp = 0;
2121 		blend_bypass = 1;
2122 	}
2123 
2124 	if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) {
2125 		if (format == V_028C70_COLOR_8 ||
2126 		    format == V_028C70_COLOR_8_8 ||
2127 		    format == V_028C70_COLOR_8_8_8_8)
2128 			surf->color_is_int8 = true;
2129 		else if (format == V_028C70_COLOR_10_10_10_2 ||
2130 			 format == V_028C70_COLOR_2_10_10_10)
2131 			surf->color_is_int10 = true;
2132 	}
2133 
2134 	color_info = S_028C70_FORMAT(format) |
2135 		S_028C70_COMP_SWAP(swap) |
2136 		S_028C70_BLEND_CLAMP(blend_clamp) |
2137 		S_028C70_BLEND_BYPASS(blend_bypass) |
2138 		S_028C70_SIMPLE_FLOAT(1) |
2139 		S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM &&
2140 				    ntype != V_028C70_NUMBER_SNORM &&
2141 				    ntype != V_028C70_NUMBER_SRGB &&
2142 				    format != V_028C70_COLOR_8_24 &&
2143 				    format != V_028C70_COLOR_24_8) |
2144 		S_028C70_NUMBER_TYPE(ntype) |
2145 		S_028C70_ENDIAN(endian);
2146 
2147 	/* Intensity is implemented as Red, so treat it that way. */
2148 	color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == PIPE_SWIZZLE_1 ||
2149 						  util_format_is_intensity(surf->base.format));
2150 
2151 	if (rtex->resource.b.b.nr_samples > 1) {
2152 		unsigned log_samples = util_logbase2(rtex->resource.b.b.nr_samples);
2153 
2154 		color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
2155 				S_028C74_NUM_FRAGMENTS(log_samples);
2156 
2157 		if (rtex->fmask.size) {
2158 			color_info |= S_028C70_COMPRESSION(1);
2159 			unsigned fmask_bankh = util_logbase2(rtex->fmask.bank_height);
2160 
2161 			if (sctx->b.chip_class == SI) {
2162 				/* due to a hw bug, FMASK_BANK_HEIGHT must be set on SI too */
2163 				color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
2164 			}
2165 		}
2166 	}
2167 
2168 	surf->cb_color_view = color_view;
2169 	surf->cb_color_info = color_info;
2170 	surf->cb_color_attrib = color_attrib;
2171 
2172 	if (sctx->b.chip_class >= VI) {
2173 		unsigned max_uncompressed_block_size = 2;
2174 
2175 		if (rtex->resource.b.b.nr_samples > 1) {
2176 			if (rtex->surface.bpe == 1)
2177 				max_uncompressed_block_size = 0;
2178 			else if (rtex->surface.bpe == 2)
2179 				max_uncompressed_block_size = 1;
2180 		}
2181 
2182 		surf->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
2183 		                       S_028C78_INDEPENDENT_64B_BLOCKS(1);
2184 	}
2185 
2186 	/* This must be set for fast clear to work without FMASK. */
2187 	if (!rtex->fmask.size && sctx->b.chip_class == SI) {
2188 		unsigned bankh = util_logbase2(rtex->surface.bankh);
2189 		surf->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
2190 	}
2191 
2192 	/* Determine pixel shader export format */
2193 	si_choose_spi_color_formats(surf, format, swap, ntype, rtex->is_depth);
2194 
2195 	surf->color_initialized = true;
2196 }
2197 
si_init_depth_surface(struct si_context * sctx,struct r600_surface * surf)2198 static void si_init_depth_surface(struct si_context *sctx,
2199 				  struct r600_surface *surf)
2200 {
2201 	struct r600_texture *rtex = (struct r600_texture*)surf->base.texture;
2202 	unsigned level = surf->base.u.tex.level;
2203 	struct radeon_surf_level *levelinfo = &rtex->surface.level[level];
2204 	unsigned format;
2205 	uint32_t z_info, s_info, db_depth_info;
2206 	uint64_t z_offs, s_offs;
2207 	uint32_t db_htile_data_base, db_htile_surface;
2208 
2209 	format = si_translate_dbformat(rtex->db_render_format);
2210 
2211 	if (format == V_028040_Z_INVALID) {
2212 		R600_ERR("Invalid DB format: %d, disabling DB.\n", rtex->resource.b.b.format);
2213 	}
2214 	assert(format != V_028040_Z_INVALID);
2215 
2216 	s_offs = z_offs = rtex->resource.gpu_address;
2217 	z_offs += rtex->surface.level[level].offset;
2218 	s_offs += rtex->surface.stencil_level[level].offset;
2219 
2220 	db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!rtex->tc_compatible_htile);
2221 
2222 	z_info = S_028040_FORMAT(format);
2223 	if (rtex->resource.b.b.nr_samples > 1) {
2224 		z_info |= S_028040_NUM_SAMPLES(util_logbase2(rtex->resource.b.b.nr_samples));
2225 	}
2226 
2227 	if (rtex->surface.flags & RADEON_SURF_SBUFFER)
2228 		s_info = S_028044_FORMAT(V_028044_STENCIL_8);
2229 	else
2230 		s_info = S_028044_FORMAT(V_028044_STENCIL_INVALID);
2231 
2232 	if (sctx->b.chip_class >= CIK) {
2233 		struct radeon_info *info = &sctx->screen->b.info;
2234 		unsigned index = rtex->surface.tiling_index[level];
2235 		unsigned stencil_index = rtex->surface.stencil_tiling_index[level];
2236 		unsigned macro_index = rtex->surface.macro_tile_index;
2237 		unsigned tile_mode = info->si_tile_mode_array[index];
2238 		unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
2239 		unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
2240 
2241 		db_depth_info |=
2242 			S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
2243 			S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
2244 			S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
2245 			S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
2246 			S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
2247 			S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
2248 		z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
2249 		s_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
2250 	} else {
2251 		unsigned tile_mode_index = si_tile_mode_index(rtex, level, false);
2252 		z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
2253 		tile_mode_index = si_tile_mode_index(rtex, level, true);
2254 		s_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
2255 	}
2256 
2257 	/* HiZ aka depth buffer htile */
2258 	/* use htile only for first level */
2259 	if (rtex->htile_buffer && !level) {
2260 		z_info |= S_028040_TILE_SURFACE_ENABLE(1) |
2261 			  S_028040_ALLOW_EXPCLEAR(1);
2262 
2263 		if (rtex->surface.flags & RADEON_SURF_SBUFFER) {
2264 			/* Workaround: For a not yet understood reason, the
2265 			 * combination of MSAA, fast stencil clear and stencil
2266 			 * decompress messes with subsequent stencil buffer
2267 			 * uses. Problem was reproduced on Verde, Bonaire,
2268 			 * Tonga, and Carrizo.
2269 			 *
2270 			 * Disabling EXPCLEAR works around the problem.
2271 			 *
2272 			 * Check piglit's arb_texture_multisample-stencil-clear
2273 			 * test if you want to try changing this.
2274 			 */
2275 			if (rtex->resource.b.b.nr_samples <= 1)
2276 				s_info |= S_028044_ALLOW_EXPCLEAR(1);
2277 		} else if (!rtex->tc_compatible_htile) {
2278 			/* Use all of the htile_buffer for depth if there's no stencil.
2279 			 * This must not be set when TC-compatible HTILE is enabled
2280 			 * due to a hw bug.
2281 			 */
2282 			s_info |= S_028044_TILE_STENCIL_DISABLE(1);
2283 		}
2284 
2285 		uint64_t va = rtex->htile_buffer->gpu_address;
2286 		db_htile_data_base = va >> 8;
2287 		db_htile_surface = S_028ABC_FULL_CACHE(1);
2288 
2289 		if (rtex->tc_compatible_htile) {
2290 			db_htile_surface |= S_028ABC_TC_COMPATIBLE(1);
2291 
2292 			switch (rtex->resource.b.b.nr_samples) {
2293 			case 0:
2294 			case 1:
2295 				z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(5);
2296 				break;
2297 			case 2:
2298 			case 4:
2299 				z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(3);
2300 				break;
2301 			case 8:
2302 				z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(2);
2303 				break;
2304 			default:
2305 				assert(0);
2306 			}
2307 		}
2308 	} else {
2309 		db_htile_data_base = 0;
2310 		db_htile_surface = 0;
2311 	}
2312 
2313 	assert(levelinfo->nblk_x % 8 == 0 && levelinfo->nblk_y % 8 == 0);
2314 
2315 	surf->db_depth_view = S_028008_SLICE_START(surf->base.u.tex.first_layer) |
2316 			      S_028008_SLICE_MAX(surf->base.u.tex.last_layer);
2317 	surf->db_htile_data_base = db_htile_data_base;
2318 	surf->db_depth_info = db_depth_info;
2319 	surf->db_z_info = z_info;
2320 	surf->db_stencil_info = s_info;
2321 	surf->db_depth_base = z_offs >> 8;
2322 	surf->db_stencil_base = s_offs >> 8;
2323 	surf->db_depth_size = S_028058_PITCH_TILE_MAX((levelinfo->nblk_x / 8) - 1) |
2324 			      S_028058_HEIGHT_TILE_MAX((levelinfo->nblk_y / 8) - 1);
2325 	surf->db_depth_slice = S_02805C_SLICE_TILE_MAX((levelinfo->nblk_x *
2326 							levelinfo->nblk_y) / 64 - 1);
2327 	surf->db_htile_surface = db_htile_surface;
2328 
2329 	surf->depth_initialized = true;
2330 }
2331 
si_dec_framebuffer_counters(const struct pipe_framebuffer_state * state)2332 static void si_dec_framebuffer_counters(const struct pipe_framebuffer_state *state)
2333 {
2334 	for (int i = 0; i < state->nr_cbufs; ++i) {
2335 		struct r600_surface *surf = NULL;
2336 		struct r600_texture *rtex;
2337 
2338 		if (!state->cbufs[i])
2339 			continue;
2340 		surf = (struct r600_surface*)state->cbufs[i];
2341 		rtex = (struct r600_texture*)surf->base.texture;
2342 
2343 		p_atomic_dec(&rtex->framebuffers_bound);
2344 	}
2345 }
2346 
si_set_framebuffer_state(struct pipe_context * ctx,const struct pipe_framebuffer_state * state)2347 static void si_set_framebuffer_state(struct pipe_context *ctx,
2348 				     const struct pipe_framebuffer_state *state)
2349 {
2350 	struct si_context *sctx = (struct si_context *)ctx;
2351 	struct pipe_constant_buffer constbuf = {0};
2352 	struct r600_surface *surf = NULL;
2353 	struct r600_texture *rtex;
2354 	bool old_any_dst_linear = sctx->framebuffer.any_dst_linear;
2355 	unsigned old_nr_samples = sctx->framebuffer.nr_samples;
2356 	int i;
2357 
2358 	for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) {
2359 		if (!sctx->framebuffer.state.cbufs[i])
2360 			continue;
2361 
2362 		rtex = (struct r600_texture*)sctx->framebuffer.state.cbufs[i]->texture;
2363 		if (rtex->dcc_gather_statistics)
2364 			vi_separate_dcc_stop_query(ctx, rtex);
2365 	}
2366 
2367 	/* Only flush TC when changing the framebuffer state, because
2368 	 * the only client not using TC that can change textures is
2369 	 * the framebuffer.
2370 	 *
2371 	 * Flush all CB and DB caches here because all buffers can be used
2372 	 * for write by both TC (with shader image stores) and CB/DB.
2373 	 */
2374 	sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
2375 			 SI_CONTEXT_INV_GLOBAL_L2 |
2376 			 SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER |
2377 			 SI_CONTEXT_CS_PARTIAL_FLUSH;
2378 
2379 	/* Take the maximum of the old and new count. If the new count is lower,
2380 	 * dirtying is needed to disable the unbound colorbuffers.
2381 	 */
2382 	sctx->framebuffer.dirty_cbufs |=
2383 		(1 << MAX2(sctx->framebuffer.state.nr_cbufs, state->nr_cbufs)) - 1;
2384 	sctx->framebuffer.dirty_zsbuf |= sctx->framebuffer.state.zsbuf != state->zsbuf;
2385 
2386 	si_dec_framebuffer_counters(&sctx->framebuffer.state);
2387 	util_copy_framebuffer_state(&sctx->framebuffer.state, state);
2388 
2389 	sctx->framebuffer.colorbuf_enabled_4bit = 0;
2390 	sctx->framebuffer.spi_shader_col_format = 0;
2391 	sctx->framebuffer.spi_shader_col_format_alpha = 0;
2392 	sctx->framebuffer.spi_shader_col_format_blend = 0;
2393 	sctx->framebuffer.spi_shader_col_format_blend_alpha = 0;
2394 	sctx->framebuffer.color_is_int8 = 0;
2395 	sctx->framebuffer.color_is_int10 = 0;
2396 
2397 	sctx->framebuffer.compressed_cb_mask = 0;
2398 	sctx->framebuffer.nr_samples = util_framebuffer_get_num_samples(state);
2399 	sctx->framebuffer.log_samples = util_logbase2(sctx->framebuffer.nr_samples);
2400 	sctx->framebuffer.any_dst_linear = false;
2401 
2402 	for (i = 0; i < state->nr_cbufs; i++) {
2403 		if (!state->cbufs[i])
2404 			continue;
2405 
2406 		surf = (struct r600_surface*)state->cbufs[i];
2407 		rtex = (struct r600_texture*)surf->base.texture;
2408 
2409 		if (!surf->color_initialized) {
2410 			si_initialize_color_surface(sctx, surf);
2411 		}
2412 
2413 		sctx->framebuffer.colorbuf_enabled_4bit |= 0xf << (i * 4);
2414 		sctx->framebuffer.spi_shader_col_format |=
2415 			surf->spi_shader_col_format << (i * 4);
2416 		sctx->framebuffer.spi_shader_col_format_alpha |=
2417 			surf->spi_shader_col_format_alpha << (i * 4);
2418 		sctx->framebuffer.spi_shader_col_format_blend |=
2419 			surf->spi_shader_col_format_blend << (i * 4);
2420 		sctx->framebuffer.spi_shader_col_format_blend_alpha |=
2421 			surf->spi_shader_col_format_blend_alpha << (i * 4);
2422 
2423 		if (surf->color_is_int8)
2424 			sctx->framebuffer.color_is_int8 |= 1 << i;
2425 		if (surf->color_is_int10)
2426 			sctx->framebuffer.color_is_int10 |= 1 << i;
2427 
2428 		if (rtex->fmask.size) {
2429 			sctx->framebuffer.compressed_cb_mask |= 1 << i;
2430 		}
2431 
2432 		if (rtex->surface.is_linear)
2433 			sctx->framebuffer.any_dst_linear = true;
2434 
2435 		r600_context_add_resource_size(ctx, surf->base.texture);
2436 
2437 		p_atomic_inc(&rtex->framebuffers_bound);
2438 
2439 		if (rtex->dcc_gather_statistics) {
2440 			/* Dirty tracking must be enabled for DCC usage analysis. */
2441 			sctx->framebuffer.compressed_cb_mask |= 1 << i;
2442 			vi_separate_dcc_start_query(ctx, rtex);
2443 		}
2444 	}
2445 
2446 	if (state->zsbuf) {
2447 		surf = (struct r600_surface*)state->zsbuf;
2448 		rtex = (struct r600_texture*)surf->base.texture;
2449 
2450 		if (!surf->depth_initialized) {
2451 			si_init_depth_surface(sctx, surf);
2452 		}
2453 		r600_context_add_resource_size(ctx, surf->base.texture);
2454 	}
2455 
2456 	si_update_poly_offset_state(sctx);
2457 	si_mark_atom_dirty(sctx, &sctx->cb_render_state);
2458 	si_mark_atom_dirty(sctx, &sctx->framebuffer.atom);
2459 
2460 	if (sctx->framebuffer.any_dst_linear != old_any_dst_linear)
2461 		si_mark_atom_dirty(sctx, &sctx->msaa_config);
2462 
2463 	if (sctx->framebuffer.nr_samples != old_nr_samples) {
2464 		si_mark_atom_dirty(sctx, &sctx->msaa_config);
2465 		si_mark_atom_dirty(sctx, &sctx->db_render_state);
2466 
2467 		/* Set sample locations as fragment shader constants. */
2468 		switch (sctx->framebuffer.nr_samples) {
2469 		case 1:
2470 			constbuf.user_buffer = sctx->b.sample_locations_1x;
2471 			break;
2472 		case 2:
2473 			constbuf.user_buffer = sctx->b.sample_locations_2x;
2474 			break;
2475 		case 4:
2476 			constbuf.user_buffer = sctx->b.sample_locations_4x;
2477 			break;
2478 		case 8:
2479 			constbuf.user_buffer = sctx->b.sample_locations_8x;
2480 			break;
2481 		case 16:
2482 			constbuf.user_buffer = sctx->b.sample_locations_16x;
2483 			break;
2484 		default:
2485 			R600_ERR("Requested an invalid number of samples %i.\n",
2486 				 sctx->framebuffer.nr_samples);
2487 			assert(0);
2488 		}
2489 		constbuf.buffer_size = sctx->framebuffer.nr_samples * 2 * 4;
2490 		si_set_rw_buffer(sctx, SI_PS_CONST_SAMPLE_POSITIONS, &constbuf);
2491 
2492 		si_mark_atom_dirty(sctx, &sctx->msaa_sample_locs.atom);
2493 	}
2494 
2495 	sctx->need_check_render_feedback = true;
2496 	sctx->do_update_shaders = true;
2497 }
2498 
si_emit_framebuffer_state(struct si_context * sctx,struct r600_atom * atom)2499 static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom *atom)
2500 {
2501 	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
2502 	struct pipe_framebuffer_state *state = &sctx->framebuffer.state;
2503 	unsigned i, nr_cbufs = state->nr_cbufs;
2504 	struct r600_texture *tex = NULL;
2505 	struct r600_surface *cb = NULL;
2506 	unsigned cb_color_info = 0;
2507 
2508 	/* Colorbuffers. */
2509 	for (i = 0; i < nr_cbufs; i++) {
2510 		const struct radeon_surf_level *level_info;
2511 		unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
2512 		unsigned cb_color_base, cb_color_fmask, cb_color_attrib;
2513 		unsigned cb_color_pitch, cb_color_slice, cb_color_fmask_slice;
2514 
2515 		if (!(sctx->framebuffer.dirty_cbufs & (1 << i)))
2516 			continue;
2517 
2518 		cb = (struct r600_surface*)state->cbufs[i];
2519 		if (!cb) {
2520 			radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C,
2521 					       S_028C70_FORMAT(V_028C70_COLOR_INVALID));
2522 			continue;
2523 		}
2524 
2525 		tex = (struct r600_texture *)cb->base.texture;
2526 		level_info =  &tex->surface.level[cb->base.u.tex.level];
2527 		radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
2528 				      &tex->resource, RADEON_USAGE_READWRITE,
2529 				      tex->resource.b.b.nr_samples > 1 ?
2530 					      RADEON_PRIO_COLOR_BUFFER_MSAA :
2531 					      RADEON_PRIO_COLOR_BUFFER);
2532 
2533 		if (tex->cmask_buffer && tex->cmask_buffer != &tex->resource) {
2534 			radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
2535 				tex->cmask_buffer, RADEON_USAGE_READWRITE,
2536 				RADEON_PRIO_CMASK);
2537 		}
2538 
2539 		if (tex->dcc_separate_buffer)
2540 			radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
2541 						  tex->dcc_separate_buffer,
2542 						  RADEON_USAGE_READWRITE,
2543 						  RADEON_PRIO_DCC);
2544 
2545 		/* Compute mutable surface parameters. */
2546 		pitch_tile_max = level_info->nblk_x / 8 - 1;
2547 		slice_tile_max = level_info->nblk_x *
2548 				 level_info->nblk_y / 64 - 1;
2549 		tile_mode_index = si_tile_mode_index(tex, cb->base.u.tex.level, false);
2550 
2551 		cb_color_base = (tex->resource.gpu_address + level_info->offset) >> 8;
2552 		cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
2553 		cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
2554 		cb_color_attrib = cb->cb_color_attrib |
2555 				  S_028C74_TILE_MODE_INDEX(tile_mode_index);
2556 
2557 		if (tex->fmask.size) {
2558 			if (sctx->b.chip_class >= CIK)
2559 				cb_color_pitch |= S_028C64_FMASK_TILE_MAX(tex->fmask.pitch_in_pixels / 8 - 1);
2560 			cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tex->fmask.tile_mode_index);
2561 			cb_color_fmask = (tex->resource.gpu_address + tex->fmask.offset) >> 8;
2562 			cb_color_fmask_slice = S_028C88_TILE_MAX(tex->fmask.slice_tile_max);
2563 		} else {
2564 			/* This must be set for fast clear to work without FMASK. */
2565 			if (sctx->b.chip_class >= CIK)
2566 				cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
2567 			cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
2568 			cb_color_fmask = cb_color_base;
2569 			cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
2570 		}
2571 
2572 		cb_color_info = cb->cb_color_info | tex->cb_color_info;
2573 
2574 		if (tex->dcc_offset && cb->base.u.tex.level < tex->surface.num_dcc_levels) {
2575 			bool is_msaa_resolve_dst = state->cbufs[0] &&
2576 						   state->cbufs[0]->texture->nr_samples > 1 &&
2577 						   state->cbufs[1] == &cb->base &&
2578 						   state->cbufs[1]->texture->nr_samples <= 1;
2579 
2580 			if (!is_msaa_resolve_dst)
2581 				cb_color_info |= S_028C70_DCC_ENABLE(1);
2582 		}
2583 
2584 		radeon_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C,
2585 					   sctx->b.chip_class >= VI ? 14 : 13);
2586 		radeon_emit(cs, cb_color_base);		/* R_028C60_CB_COLOR0_BASE */
2587 		radeon_emit(cs, cb_color_pitch);	/* R_028C64_CB_COLOR0_PITCH */
2588 		radeon_emit(cs, cb_color_slice);	/* R_028C68_CB_COLOR0_SLICE */
2589 		radeon_emit(cs, cb->cb_color_view);	/* R_028C6C_CB_COLOR0_VIEW */
2590 		radeon_emit(cs, cb_color_info);		/* R_028C70_CB_COLOR0_INFO */
2591 		radeon_emit(cs, cb_color_attrib);	/* R_028C74_CB_COLOR0_ATTRIB */
2592 		radeon_emit(cs, cb->cb_dcc_control);	/* R_028C78_CB_COLOR0_DCC_CONTROL */
2593 		radeon_emit(cs, tex->cmask.base_address_reg);	/* R_028C7C_CB_COLOR0_CMASK */
2594 		radeon_emit(cs, tex->cmask.slice_tile_max);	/* R_028C80_CB_COLOR0_CMASK_SLICE */
2595 		radeon_emit(cs, cb_color_fmask);		/* R_028C84_CB_COLOR0_FMASK */
2596 		radeon_emit(cs, cb_color_fmask_slice);		/* R_028C88_CB_COLOR0_FMASK_SLICE */
2597 		radeon_emit(cs, tex->color_clear_value[0]);	/* R_028C8C_CB_COLOR0_CLEAR_WORD0 */
2598 		radeon_emit(cs, tex->color_clear_value[1]);	/* R_028C90_CB_COLOR0_CLEAR_WORD1 */
2599 
2600 		if (sctx->b.chip_class >= VI) /* R_028C94_CB_COLOR0_DCC_BASE */
2601 			radeon_emit(cs, ((!tex->dcc_separate_buffer ? tex->resource.gpu_address : 0) +
2602 					 tex->dcc_offset +
2603 				         tex->surface.level[cb->base.u.tex.level].dcc_offset) >> 8);
2604 	}
2605 	for (; i < 8 ; i++)
2606 		if (sctx->framebuffer.dirty_cbufs & (1 << i))
2607 			radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0);
2608 
2609 	/* ZS buffer. */
2610 	if (state->zsbuf && sctx->framebuffer.dirty_zsbuf) {
2611 		struct r600_surface *zb = (struct r600_surface*)state->zsbuf;
2612 		struct r600_texture *rtex = (struct r600_texture*)zb->base.texture;
2613 
2614 		radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
2615 				      &rtex->resource, RADEON_USAGE_READWRITE,
2616 				      zb->base.texture->nr_samples > 1 ?
2617 					      RADEON_PRIO_DEPTH_BUFFER_MSAA :
2618 					      RADEON_PRIO_DEPTH_BUFFER);
2619 
2620 		if (zb->db_htile_data_base) {
2621 			radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
2622 					      rtex->htile_buffer, RADEON_USAGE_READWRITE,
2623 					      RADEON_PRIO_HTILE);
2624 		}
2625 
2626 		radeon_set_context_reg(cs, R_028008_DB_DEPTH_VIEW, zb->db_depth_view);
2627 		radeon_set_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, zb->db_htile_data_base);
2628 
2629 		radeon_set_context_reg_seq(cs, R_02803C_DB_DEPTH_INFO, 9);
2630 		radeon_emit(cs, zb->db_depth_info);	/* R_02803C_DB_DEPTH_INFO */
2631 		radeon_emit(cs, zb->db_z_info |		/* R_028040_DB_Z_INFO */
2632 			    S_028040_ZRANGE_PRECISION(rtex->depth_clear_value != 0));
2633 		radeon_emit(cs, zb->db_stencil_info);	/* R_028044_DB_STENCIL_INFO */
2634 		radeon_emit(cs, zb->db_depth_base);	/* R_028048_DB_Z_READ_BASE */
2635 		radeon_emit(cs, zb->db_stencil_base);	/* R_02804C_DB_STENCIL_READ_BASE */
2636 		radeon_emit(cs, zb->db_depth_base);	/* R_028050_DB_Z_WRITE_BASE */
2637 		radeon_emit(cs, zb->db_stencil_base);	/* R_028054_DB_STENCIL_WRITE_BASE */
2638 		radeon_emit(cs, zb->db_depth_size);	/* R_028058_DB_DEPTH_SIZE */
2639 		radeon_emit(cs, zb->db_depth_slice);	/* R_02805C_DB_DEPTH_SLICE */
2640 
2641 		radeon_set_context_reg_seq(cs, R_028028_DB_STENCIL_CLEAR, 2);
2642 		radeon_emit(cs, rtex->stencil_clear_value); /* R_028028_DB_STENCIL_CLEAR */
2643 		radeon_emit(cs, fui(rtex->depth_clear_value)); /* R_02802C_DB_DEPTH_CLEAR */
2644 
2645 		radeon_set_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, zb->db_htile_surface);
2646 	} else if (sctx->framebuffer.dirty_zsbuf) {
2647 		radeon_set_context_reg_seq(cs, R_028040_DB_Z_INFO, 2);
2648 		radeon_emit(cs, S_028040_FORMAT(V_028040_Z_INVALID)); /* R_028040_DB_Z_INFO */
2649 		radeon_emit(cs, S_028044_FORMAT(V_028044_STENCIL_INVALID)); /* R_028044_DB_STENCIL_INFO */
2650 	}
2651 
2652 	/* Framebuffer dimensions. */
2653         /* PA_SC_WINDOW_SCISSOR_TL is set in si_init_config() */
2654 	radeon_set_context_reg(cs, R_028208_PA_SC_WINDOW_SCISSOR_BR,
2655 			       S_028208_BR_X(state->width) | S_028208_BR_Y(state->height));
2656 
2657 	sctx->framebuffer.dirty_cbufs = 0;
2658 	sctx->framebuffer.dirty_zsbuf = false;
2659 }
2660 
si_emit_msaa_sample_locs(struct si_context * sctx,struct r600_atom * atom)2661 static void si_emit_msaa_sample_locs(struct si_context *sctx,
2662 				     struct r600_atom *atom)
2663 {
2664 	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
2665 	unsigned nr_samples = sctx->framebuffer.nr_samples;
2666 
2667 	/* Smoothing (only possible with nr_samples == 1) uses the same
2668 	 * sample locations as the MSAA it simulates.
2669 	 */
2670 	if (nr_samples <= 1 && sctx->smoothing_enabled)
2671 		nr_samples = SI_NUM_SMOOTH_AA_SAMPLES;
2672 
2673 	/* On Polaris, the small primitive filter uses the sample locations
2674 	 * even when MSAA is off, so we need to make sure they're set to 0.
2675 	 */
2676 	if (sctx->b.family >= CHIP_POLARIS10)
2677 		nr_samples = MAX2(nr_samples, 1);
2678 
2679 	if (nr_samples >= 1 &&
2680 	    (nr_samples != sctx->msaa_sample_locs.nr_samples)) {
2681 		sctx->msaa_sample_locs.nr_samples = nr_samples;
2682 		cayman_emit_msaa_sample_locs(cs, nr_samples);
2683 	}
2684 
2685 	if (sctx->b.family >= CHIP_POLARIS10) {
2686 		struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
2687 		unsigned small_prim_filter_cntl =
2688 			S_028830_SMALL_PRIM_FILTER_ENABLE(1) |
2689 			S_028830_LINE_FILTER_DISABLE(1); /* line bug */
2690 
2691 		/* The alternative of setting sample locations to 0 would
2692 		 * require a DB flush to avoid Z errors, see
2693 		 * https://bugs.freedesktop.org/show_bug.cgi?id=96908
2694 		 */
2695 		if (sctx->framebuffer.nr_samples > 1 && rs && !rs->multisample_enable)
2696 			small_prim_filter_cntl &= C_028830_SMALL_PRIM_FILTER_ENABLE;
2697 
2698 		radeon_set_context_reg(cs, R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL,
2699 				       small_prim_filter_cntl);
2700 	}
2701 }
2702 
si_emit_msaa_config(struct si_context * sctx,struct r600_atom * atom)2703 static void si_emit_msaa_config(struct si_context *sctx, struct r600_atom *atom)
2704 {
2705 	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
2706 	unsigned num_tile_pipes = sctx->screen->b.info.num_tile_pipes;
2707 	/* 33% faster rendering to linear color buffers */
2708 	bool dst_is_linear = sctx->framebuffer.any_dst_linear;
2709 	unsigned sc_mode_cntl_1 =
2710 		S_028A4C_WALK_SIZE(dst_is_linear) |
2711 		S_028A4C_WALK_FENCE_ENABLE(!dst_is_linear) |
2712 		S_028A4C_WALK_FENCE_SIZE(num_tile_pipes == 2 ? 2 : 3) |
2713 		/* always 1: */
2714 		S_028A4C_WALK_ALIGN8_PRIM_FITS_ST(1) |
2715 		S_028A4C_SUPERTILE_WALK_ORDER_ENABLE(1) |
2716 		S_028A4C_TILE_WALK_ORDER_ENABLE(1) |
2717 		S_028A4C_MULTI_SHADER_ENGINE_PRIM_DISCARD_ENABLE(1) |
2718 		S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) |
2719 		S_028A4C_FORCE_EOV_REZ_ENABLE(1);
2720 
2721 	cayman_emit_msaa_config(cs, sctx->framebuffer.nr_samples,
2722 				sctx->ps_iter_samples,
2723 				sctx->smoothing_enabled ? SI_NUM_SMOOTH_AA_SAMPLES : 0,
2724 				sc_mode_cntl_1);
2725 }
2726 
si_set_min_samples(struct pipe_context * ctx,unsigned min_samples)2727 static void si_set_min_samples(struct pipe_context *ctx, unsigned min_samples)
2728 {
2729 	struct si_context *sctx = (struct si_context *)ctx;
2730 
2731 	if (sctx->ps_iter_samples == min_samples)
2732 		return;
2733 
2734 	sctx->ps_iter_samples = min_samples;
2735 	sctx->do_update_shaders = true;
2736 
2737 	if (sctx->framebuffer.nr_samples > 1)
2738 		si_mark_atom_dirty(sctx, &sctx->msaa_config);
2739 }
2740 
2741 /*
2742  * Samplers
2743  */
2744 
2745 /**
2746  * Build the sampler view descriptor for a buffer texture.
2747  * @param state 256-bit descriptor; only the high 128 bits are filled in
2748  */
2749 void
si_make_buffer_descriptor(struct si_screen * screen,struct r600_resource * buf,enum pipe_format format,unsigned offset,unsigned size,uint32_t * state)2750 si_make_buffer_descriptor(struct si_screen *screen, struct r600_resource *buf,
2751 			  enum pipe_format format,
2752 			  unsigned offset, unsigned size,
2753 			  uint32_t *state)
2754 {
2755 	const struct util_format_description *desc;
2756 	int first_non_void;
2757 	unsigned stride;
2758 	unsigned num_records;
2759 	unsigned num_format, data_format;
2760 
2761 	desc = util_format_description(format);
2762 	first_non_void = util_format_get_first_non_void_channel(format);
2763 	stride = desc->block.bits / 8;
2764 	num_format = si_translate_buffer_numformat(&screen->b.b, desc, first_non_void);
2765 	data_format = si_translate_buffer_dataformat(&screen->b.b, desc, first_non_void);
2766 
2767 	num_records = size / stride;
2768 	num_records = MIN2(num_records, (buf->b.b.width0 - offset) / stride);
2769 
2770 	if (screen->b.chip_class >= VI)
2771 		num_records *= stride;
2772 
2773 	state[4] = 0;
2774 	state[5] = S_008F04_STRIDE(stride);
2775 	state[6] = num_records;
2776 	state[7] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) |
2777 		   S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) |
2778 		   S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) |
2779 		   S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) |
2780 		   S_008F0C_NUM_FORMAT(num_format) |
2781 		   S_008F0C_DATA_FORMAT(data_format);
2782 }
2783 
2784 /**
2785  * Build the sampler view descriptor for a texture.
2786  */
2787 void
si_make_texture_descriptor(struct si_screen * screen,struct r600_texture * tex,bool sampler,enum pipe_texture_target target,enum pipe_format pipe_format,const unsigned char state_swizzle[4],unsigned first_level,unsigned last_level,unsigned first_layer,unsigned last_layer,unsigned width,unsigned height,unsigned depth,uint32_t * state,uint32_t * fmask_state)2788 si_make_texture_descriptor(struct si_screen *screen,
2789 			   struct r600_texture *tex,
2790 			   bool sampler,
2791 			   enum pipe_texture_target target,
2792 			   enum pipe_format pipe_format,
2793 			   const unsigned char state_swizzle[4],
2794 			   unsigned first_level, unsigned last_level,
2795 			   unsigned first_layer, unsigned last_layer,
2796 			   unsigned width, unsigned height, unsigned depth,
2797 			   uint32_t *state,
2798 			   uint32_t *fmask_state)
2799 {
2800 	struct pipe_resource *res = &tex->resource.b.b;
2801 	const struct util_format_description *desc;
2802 	unsigned char swizzle[4];
2803 	int first_non_void;
2804 	unsigned num_format, data_format, type;
2805 	uint64_t va;
2806 
2807 	desc = util_format_description(pipe_format);
2808 
2809 	if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
2810 		const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
2811 		const unsigned char swizzle_yyyy[4] = {1, 1, 1, 1};
2812 		const unsigned char swizzle_wwww[4] = {3, 3, 3, 3};
2813 
2814 		switch (pipe_format) {
2815 		case PIPE_FORMAT_S8_UINT_Z24_UNORM:
2816 		case PIPE_FORMAT_X32_S8X24_UINT:
2817 		case PIPE_FORMAT_X8Z24_UNORM:
2818 			util_format_compose_swizzles(swizzle_yyyy, state_swizzle, swizzle);
2819 			break;
2820 		case PIPE_FORMAT_X24S8_UINT:
2821 			/*
2822 			 * X24S8 is implemented as an 8_8_8_8 data format, to
2823 			 * fix texture gathers. This affects at least
2824 			 * GL45-CTS.texture_cube_map_array.sampling on VI.
2825 			 */
2826 			util_format_compose_swizzles(swizzle_wwww, state_swizzle, swizzle);
2827 			break;
2828 		default:
2829 			util_format_compose_swizzles(swizzle_xxxx, state_swizzle, swizzle);
2830 		}
2831 	} else {
2832 		util_format_compose_swizzles(desc->swizzle, state_swizzle, swizzle);
2833 	}
2834 
2835 	first_non_void = util_format_get_first_non_void_channel(pipe_format);
2836 
2837 	switch (pipe_format) {
2838 	case PIPE_FORMAT_S8_UINT_Z24_UNORM:
2839 		num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
2840 		break;
2841 	default:
2842 		if (first_non_void < 0) {
2843 			if (util_format_is_compressed(pipe_format)) {
2844 				switch (pipe_format) {
2845 				case PIPE_FORMAT_DXT1_SRGB:
2846 				case PIPE_FORMAT_DXT1_SRGBA:
2847 				case PIPE_FORMAT_DXT3_SRGBA:
2848 				case PIPE_FORMAT_DXT5_SRGBA:
2849 				case PIPE_FORMAT_BPTC_SRGBA:
2850 				case PIPE_FORMAT_ETC2_SRGB8:
2851 				case PIPE_FORMAT_ETC2_SRGB8A1:
2852 				case PIPE_FORMAT_ETC2_SRGBA8:
2853 					num_format = V_008F14_IMG_NUM_FORMAT_SRGB;
2854 					break;
2855 				case PIPE_FORMAT_RGTC1_SNORM:
2856 				case PIPE_FORMAT_LATC1_SNORM:
2857 				case PIPE_FORMAT_RGTC2_SNORM:
2858 				case PIPE_FORMAT_LATC2_SNORM:
2859 				case PIPE_FORMAT_ETC2_R11_SNORM:
2860 				case PIPE_FORMAT_ETC2_RG11_SNORM:
2861 				/* implies float, so use SNORM/UNORM to determine
2862 				   whether data is signed or not */
2863 				case PIPE_FORMAT_BPTC_RGB_FLOAT:
2864 					num_format = V_008F14_IMG_NUM_FORMAT_SNORM;
2865 					break;
2866 				default:
2867 					num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
2868 					break;
2869 				}
2870 			} else if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
2871 				num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
2872 			} else {
2873 				num_format = V_008F14_IMG_NUM_FORMAT_FLOAT;
2874 			}
2875 		} else if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
2876 			num_format = V_008F14_IMG_NUM_FORMAT_SRGB;
2877 		} else {
2878 			num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
2879 
2880 			switch (desc->channel[first_non_void].type) {
2881 			case UTIL_FORMAT_TYPE_FLOAT:
2882 				num_format = V_008F14_IMG_NUM_FORMAT_FLOAT;
2883 				break;
2884 			case UTIL_FORMAT_TYPE_SIGNED:
2885 				if (desc->channel[first_non_void].normalized)
2886 					num_format = V_008F14_IMG_NUM_FORMAT_SNORM;
2887 				else if (desc->channel[first_non_void].pure_integer)
2888 					num_format = V_008F14_IMG_NUM_FORMAT_SINT;
2889 				else
2890 					num_format = V_008F14_IMG_NUM_FORMAT_SSCALED;
2891 				break;
2892 			case UTIL_FORMAT_TYPE_UNSIGNED:
2893 				if (desc->channel[first_non_void].normalized)
2894 					num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
2895 				else if (desc->channel[first_non_void].pure_integer)
2896 					num_format = V_008F14_IMG_NUM_FORMAT_UINT;
2897 				else
2898 					num_format = V_008F14_IMG_NUM_FORMAT_USCALED;
2899 			}
2900 		}
2901 	}
2902 
2903 	data_format = si_translate_texformat(&screen->b.b, pipe_format, desc, first_non_void);
2904 	if (data_format == ~0) {
2905 		data_format = 0;
2906 	}
2907 
2908 	if (!sampler &&
2909 	    (res->target == PIPE_TEXTURE_CUBE ||
2910 	     res->target == PIPE_TEXTURE_CUBE_ARRAY ||
2911 	     res->target == PIPE_TEXTURE_3D)) {
2912 		/* For the purpose of shader images, treat cube maps and 3D
2913 		 * textures as 2D arrays. For 3D textures, the address
2914 		 * calculations for mipmaps are different, so we rely on the
2915 		 * caller to effectively disable mipmaps.
2916 		 */
2917 		type = V_008F1C_SQ_RSRC_IMG_2D_ARRAY;
2918 
2919 		assert(res->target != PIPE_TEXTURE_3D || (first_level == 0 && last_level == 0));
2920 	} else {
2921 		type = si_tex_dim(res->target, target, res->nr_samples);
2922 	}
2923 
2924 	if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
2925 	        height = 1;
2926 		depth = res->array_size;
2927 	} else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY ||
2928 		   type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
2929 		if (sampler || res->target != PIPE_TEXTURE_3D)
2930 			depth = res->array_size;
2931 	} else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
2932 		depth = res->array_size / 6;
2933 
2934 	state[0] = 0;
2935 	state[1] = (S_008F14_DATA_FORMAT(data_format) |
2936 		    S_008F14_NUM_FORMAT(num_format));
2937 	state[2] = (S_008F18_WIDTH(width - 1) |
2938 		    S_008F18_HEIGHT(height - 1) |
2939 		    S_008F18_PERF_MOD(4));
2940 	state[3] = (S_008F1C_DST_SEL_X(si_map_swizzle(swizzle[0])) |
2941 		    S_008F1C_DST_SEL_Y(si_map_swizzle(swizzle[1])) |
2942 		    S_008F1C_DST_SEL_Z(si_map_swizzle(swizzle[2])) |
2943 		    S_008F1C_DST_SEL_W(si_map_swizzle(swizzle[3])) |
2944 		    S_008F1C_BASE_LEVEL(res->nr_samples > 1 ?
2945 					0 : first_level) |
2946 		    S_008F1C_LAST_LEVEL(res->nr_samples > 1 ?
2947 					util_logbase2(res->nr_samples) :
2948 					last_level) |
2949 		    S_008F1C_POW2_PAD(res->last_level > 0) |
2950 		    S_008F1C_TYPE(type));
2951 	state[4] = S_008F20_DEPTH(depth - 1);
2952 	state[5] = (S_008F24_BASE_ARRAY(first_layer) |
2953 		    S_008F24_LAST_ARRAY(last_layer));
2954 	state[6] = 0;
2955 	state[7] = 0;
2956 
2957 	if (tex->dcc_offset) {
2958 		unsigned swap = r600_translate_colorswap(pipe_format, false);
2959 
2960 		state[6] = S_008F28_ALPHA_IS_ON_MSB(swap <= 1);
2961 	} else {
2962 		/* The last dword is unused by hw. The shader uses it to clear
2963 		 * bits in the first dword of sampler state.
2964 		 */
2965 		if (screen->b.chip_class <= CIK && res->nr_samples <= 1) {
2966 			if (first_level == last_level)
2967 				state[7] = C_008F30_MAX_ANISO_RATIO;
2968 			else
2969 				state[7] = 0xffffffff;
2970 		}
2971 	}
2972 
2973 	/* Initialize the sampler view for FMASK. */
2974 	if (tex->fmask.size) {
2975 		uint32_t fmask_format;
2976 
2977 		va = tex->resource.gpu_address + tex->fmask.offset;
2978 
2979 		switch (res->nr_samples) {
2980 		case 2:
2981 			fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2;
2982 			break;
2983 		case 4:
2984 			fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4;
2985 			break;
2986 		case 8:
2987 			fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8;
2988 			break;
2989 		default:
2990 			assert(0);
2991 			fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID;
2992 		}
2993 
2994 		fmask_state[0] = va >> 8;
2995 		fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) |
2996 				 S_008F14_DATA_FORMAT(fmask_format) |
2997 				 S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_UINT);
2998 		fmask_state[2] = S_008F18_WIDTH(width - 1) |
2999 				 S_008F18_HEIGHT(height - 1);
3000 		fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
3001 				 S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
3002 				 S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
3003 				 S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
3004 				 S_008F1C_TILING_INDEX(tex->fmask.tile_mode_index) |
3005 				 S_008F1C_TYPE(si_tex_dim(res->target, target, 0));
3006 		fmask_state[4] = S_008F20_DEPTH(depth - 1) |
3007 				 S_008F20_PITCH(tex->fmask.pitch_in_pixels - 1);
3008 		fmask_state[5] = S_008F24_BASE_ARRAY(first_layer) |
3009 				 S_008F24_LAST_ARRAY(last_layer);
3010 		fmask_state[6] = 0;
3011 		fmask_state[7] = 0;
3012 	}
3013 }
3014 
3015 /**
3016  * Create a sampler view.
3017  *
3018  * @param ctx		context
3019  * @param texture	texture
3020  * @param state		sampler view template
3021  * @param width0	width0 override (for compressed textures as int)
3022  * @param height0	height0 override (for compressed textures as int)
3023  * @param force_level   set the base address to the level (for compressed textures)
3024  */
3025 struct pipe_sampler_view *
si_create_sampler_view_custom(struct pipe_context * ctx,struct pipe_resource * texture,const struct pipe_sampler_view * state,unsigned width0,unsigned height0,unsigned force_level)3026 si_create_sampler_view_custom(struct pipe_context *ctx,
3027 			      struct pipe_resource *texture,
3028 			      const struct pipe_sampler_view *state,
3029 			      unsigned width0, unsigned height0,
3030 			      unsigned force_level)
3031 {
3032 	struct si_context *sctx = (struct si_context*)ctx;
3033 	struct si_sampler_view *view = CALLOC_STRUCT(si_sampler_view);
3034 	struct r600_texture *tmp = (struct r600_texture*)texture;
3035 	unsigned base_level, first_level, last_level;
3036 	unsigned char state_swizzle[4];
3037 	unsigned height, depth, width;
3038 	unsigned last_layer = state->u.tex.last_layer;
3039 	enum pipe_format pipe_format;
3040 	const struct radeon_surf_level *surflevel;
3041 
3042 	if (!view)
3043 		return NULL;
3044 
3045 	/* initialize base object */
3046 	view->base = *state;
3047 	view->base.texture = NULL;
3048 	view->base.reference.count = 1;
3049 	view->base.context = ctx;
3050 
3051 	assert(texture);
3052 	pipe_resource_reference(&view->base.texture, texture);
3053 
3054 	if (state->format == PIPE_FORMAT_X24S8_UINT ||
3055 	    state->format == PIPE_FORMAT_S8X24_UINT ||
3056 	    state->format == PIPE_FORMAT_X32_S8X24_UINT ||
3057 	    state->format == PIPE_FORMAT_S8_UINT)
3058 		view->is_stencil_sampler = true;
3059 
3060 	/* Buffer resource. */
3061 	if (texture->target == PIPE_BUFFER) {
3062 		si_make_buffer_descriptor(sctx->screen,
3063 					  (struct r600_resource *)texture,
3064 					  state->format,
3065 					  state->u.buf.offset,
3066 					  state->u.buf.size,
3067 					  view->state);
3068 		return &view->base;
3069 	}
3070 
3071 	state_swizzle[0] = state->swizzle_r;
3072 	state_swizzle[1] = state->swizzle_g;
3073 	state_swizzle[2] = state->swizzle_b;
3074 	state_swizzle[3] = state->swizzle_a;
3075 
3076 	base_level = 0;
3077 	first_level = state->u.tex.first_level;
3078 	last_level = state->u.tex.last_level;
3079 	width = width0;
3080 	height = height0;
3081 	depth = texture->depth0;
3082 
3083 	if (force_level) {
3084 		assert(force_level == first_level &&
3085 		       force_level == last_level);
3086 		base_level = force_level;
3087 		first_level = 0;
3088 		last_level = 0;
3089 		width = u_minify(width, force_level);
3090 		height = u_minify(height, force_level);
3091 		depth = u_minify(depth, force_level);
3092 	}
3093 
3094 	/* This is not needed if state trackers set last_layer correctly. */
3095 	if (state->target == PIPE_TEXTURE_1D ||
3096 	    state->target == PIPE_TEXTURE_2D ||
3097 	    state->target == PIPE_TEXTURE_RECT ||
3098 	    state->target == PIPE_TEXTURE_CUBE)
3099 		last_layer = state->u.tex.first_layer;
3100 
3101 	/* Texturing with separate depth and stencil. */
3102 	pipe_format = state->format;
3103 
3104 	/* Depth/stencil texturing sometimes needs separate texture. */
3105 	if (tmp->is_depth && !r600_can_sample_zs(tmp, view->is_stencil_sampler)) {
3106 		if (!tmp->flushed_depth_texture &&
3107 		    !r600_init_flushed_depth_texture(ctx, texture, NULL)) {
3108 			pipe_resource_reference(&view->base.texture, NULL);
3109 			FREE(view);
3110 			return NULL;
3111 		}
3112 
3113 		assert(tmp->flushed_depth_texture);
3114 
3115 		/* Override format for the case where the flushed texture
3116 		 * contains only Z or only S.
3117 		 */
3118 		if (tmp->flushed_depth_texture->resource.b.b.format != tmp->resource.b.b.format)
3119 			pipe_format = tmp->flushed_depth_texture->resource.b.b.format;
3120 
3121 		tmp = tmp->flushed_depth_texture;
3122 	}
3123 
3124 	surflevel = tmp->surface.level;
3125 
3126 	if (tmp->db_compatible) {
3127 		if (!view->is_stencil_sampler)
3128 			pipe_format = tmp->db_render_format;
3129 
3130 		switch (pipe_format) {
3131 		case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
3132 			pipe_format = PIPE_FORMAT_Z32_FLOAT;
3133 			break;
3134 		case PIPE_FORMAT_X8Z24_UNORM:
3135 		case PIPE_FORMAT_S8_UINT_Z24_UNORM:
3136 			/* Z24 is always stored like this for DB
3137 			 * compatibility.
3138 			 */
3139 			pipe_format = PIPE_FORMAT_Z24X8_UNORM;
3140 			break;
3141 		case PIPE_FORMAT_X24S8_UINT:
3142 		case PIPE_FORMAT_S8X24_UINT:
3143 		case PIPE_FORMAT_X32_S8X24_UINT:
3144 			pipe_format = PIPE_FORMAT_S8_UINT;
3145 			surflevel = tmp->surface.stencil_level;
3146 			break;
3147 		default:;
3148 		}
3149 	}
3150 
3151 	vi_dcc_disable_if_incompatible_format(&sctx->b, texture,
3152 					      state->u.tex.first_level,
3153 					      state->format);
3154 
3155 	si_make_texture_descriptor(sctx->screen, tmp, true,
3156 				   state->target, pipe_format, state_swizzle,
3157 				   first_level, last_level,
3158 				   state->u.tex.first_layer, last_layer,
3159 				   width, height, depth,
3160 				   view->state, view->fmask_state);
3161 
3162 	view->base_level_info = &surflevel[base_level];
3163 	view->base_level = base_level;
3164 	view->block_width = util_format_get_blockwidth(pipe_format);
3165 	return &view->base;
3166 }
3167 
3168 static struct pipe_sampler_view *
si_create_sampler_view(struct pipe_context * ctx,struct pipe_resource * texture,const struct pipe_sampler_view * state)3169 si_create_sampler_view(struct pipe_context *ctx,
3170 		       struct pipe_resource *texture,
3171 		       const struct pipe_sampler_view *state)
3172 {
3173 	return si_create_sampler_view_custom(ctx, texture, state,
3174 					     texture ? texture->width0 : 0,
3175 					     texture ? texture->height0 : 0, 0);
3176 }
3177 
si_sampler_view_destroy(struct pipe_context * ctx,struct pipe_sampler_view * state)3178 static void si_sampler_view_destroy(struct pipe_context *ctx,
3179 				    struct pipe_sampler_view *state)
3180 {
3181 	struct si_sampler_view *view = (struct si_sampler_view *)state;
3182 
3183 	pipe_resource_reference(&state->texture, NULL);
3184 	FREE(view);
3185 }
3186 
wrap_mode_uses_border_color(unsigned wrap,bool linear_filter)3187 static bool wrap_mode_uses_border_color(unsigned wrap, bool linear_filter)
3188 {
3189 	return wrap == PIPE_TEX_WRAP_CLAMP_TO_BORDER ||
3190 	       wrap == PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER ||
3191 	       (linear_filter &&
3192 	        (wrap == PIPE_TEX_WRAP_CLAMP ||
3193 		 wrap == PIPE_TEX_WRAP_MIRROR_CLAMP));
3194 }
3195 
sampler_state_needs_border_color(const struct pipe_sampler_state * state)3196 static bool sampler_state_needs_border_color(const struct pipe_sampler_state *state)
3197 {
3198 	bool linear_filter = state->min_img_filter != PIPE_TEX_FILTER_NEAREST ||
3199 			     state->mag_img_filter != PIPE_TEX_FILTER_NEAREST;
3200 
3201 	return (state->border_color.ui[0] || state->border_color.ui[1] ||
3202 		state->border_color.ui[2] || state->border_color.ui[3]) &&
3203 	       (wrap_mode_uses_border_color(state->wrap_s, linear_filter) ||
3204 		wrap_mode_uses_border_color(state->wrap_t, linear_filter) ||
3205 		wrap_mode_uses_border_color(state->wrap_r, linear_filter));
3206 }
3207 
si_create_sampler_state(struct pipe_context * ctx,const struct pipe_sampler_state * state)3208 static void *si_create_sampler_state(struct pipe_context *ctx,
3209 				     const struct pipe_sampler_state *state)
3210 {
3211 	struct si_context *sctx = (struct si_context *)ctx;
3212 	struct r600_common_screen *rscreen = sctx->b.screen;
3213 	struct si_sampler_state *rstate = CALLOC_STRUCT(si_sampler_state);
3214 	unsigned border_color_type, border_color_index = 0;
3215 	unsigned max_aniso = rscreen->force_aniso >= 0 ? rscreen->force_aniso
3216 						       : state->max_anisotropy;
3217 	unsigned max_aniso_ratio = r600_tex_aniso_filter(max_aniso);
3218 
3219 	if (!rstate) {
3220 		return NULL;
3221 	}
3222 
3223 	if (!sampler_state_needs_border_color(state))
3224 		border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
3225 	else if (state->border_color.f[0] == 0 &&
3226 		 state->border_color.f[1] == 0 &&
3227 		 state->border_color.f[2] == 0 &&
3228 		 state->border_color.f[3] == 0)
3229 		border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
3230 	else if (state->border_color.f[0] == 0 &&
3231 		 state->border_color.f[1] == 0 &&
3232 		 state->border_color.f[2] == 0 &&
3233 		 state->border_color.f[3] == 1)
3234 		border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK;
3235 	else if (state->border_color.f[0] == 1 &&
3236 		 state->border_color.f[1] == 1 &&
3237 		 state->border_color.f[2] == 1 &&
3238 		 state->border_color.f[3] == 1)
3239 		border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
3240 	else {
3241 		int i;
3242 
3243 		border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER;
3244 
3245 		/* Check if the border has been uploaded already. */
3246 		for (i = 0; i < sctx->border_color_count; i++)
3247 			if (memcmp(&sctx->border_color_table[i], &state->border_color,
3248 				   sizeof(state->border_color)) == 0)
3249 				break;
3250 
3251 		if (i >= SI_MAX_BORDER_COLORS) {
3252 			/* Getting 4096 unique border colors is very unlikely. */
3253 			fprintf(stderr, "radeonsi: The border color table is full. "
3254 				"Any new border colors will be just black. "
3255 				"Please file a bug.\n");
3256 			border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
3257 		} else {
3258 			if (i == sctx->border_color_count) {
3259 				/* Upload a new border color. */
3260 				memcpy(&sctx->border_color_table[i], &state->border_color,
3261 				       sizeof(state->border_color));
3262 				util_memcpy_cpu_to_le32(&sctx->border_color_map[i],
3263 							&state->border_color,
3264 							sizeof(state->border_color));
3265 				sctx->border_color_count++;
3266 			}
3267 
3268 			border_color_index = i;
3269 		}
3270 	}
3271 
3272 #ifdef DEBUG
3273 	rstate->magic = SI_SAMPLER_STATE_MAGIC;
3274 #endif
3275 	rstate->val[0] = (S_008F30_CLAMP_X(si_tex_wrap(state->wrap_s)) |
3276 			  S_008F30_CLAMP_Y(si_tex_wrap(state->wrap_t)) |
3277 			  S_008F30_CLAMP_Z(si_tex_wrap(state->wrap_r)) |
3278 			  S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) |
3279 			  S_008F30_DEPTH_COMPARE_FUNC(si_tex_compare(state->compare_func)) |
3280 			  S_008F30_FORCE_UNNORMALIZED(!state->normalized_coords) |
3281 			  S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) |
3282 			  S_008F30_ANISO_BIAS(max_aniso_ratio) |
3283 			  S_008F30_DISABLE_CUBE_WRAP(!state->seamless_cube_map) |
3284 			  S_008F30_COMPAT_MODE(sctx->b.chip_class >= VI));
3285 	rstate->val[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 8)) |
3286 			  S_008F34_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 8)) |
3287 			  S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
3288 	rstate->val[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) |
3289 			  S_008F38_XY_MAG_FILTER(eg_tex_filter(state->mag_img_filter, max_aniso)) |
3290 			  S_008F38_XY_MIN_FILTER(eg_tex_filter(state->min_img_filter, max_aniso)) |
3291 			  S_008F38_MIP_FILTER(si_tex_mipfilter(state->min_mip_filter)) |
3292 			  S_008F38_MIP_POINT_PRECLAMP(1) |
3293 			  S_008F38_DISABLE_LSB_CEIL(1) |
3294 			  S_008F38_FILTER_PREC_FIX(1) |
3295 			  S_008F38_ANISO_OVERRIDE(sctx->b.chip_class >= VI));
3296 	rstate->val[3] = S_008F3C_BORDER_COLOR_PTR(border_color_index) |
3297 			 S_008F3C_BORDER_COLOR_TYPE(border_color_type);
3298 	return rstate;
3299 }
3300 
si_set_sample_mask(struct pipe_context * ctx,unsigned sample_mask)3301 static void si_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask)
3302 {
3303 	struct si_context *sctx = (struct si_context *)ctx;
3304 
3305 	if (sctx->sample_mask.sample_mask == (uint16_t)sample_mask)
3306 		return;
3307 
3308 	sctx->sample_mask.sample_mask = sample_mask;
3309 	si_mark_atom_dirty(sctx, &sctx->sample_mask.atom);
3310 }
3311 
si_emit_sample_mask(struct si_context * sctx,struct r600_atom * atom)3312 static void si_emit_sample_mask(struct si_context *sctx, struct r600_atom *atom)
3313 {
3314 	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
3315 	unsigned mask = sctx->sample_mask.sample_mask;
3316 
3317 	/* Needed for line and polygon smoothing as well as for the Polaris
3318 	 * small primitive filter. We expect the state tracker to take care of
3319 	 * this for us.
3320 	 */
3321 	assert(mask == 0xffff || sctx->framebuffer.nr_samples > 1 ||
3322 	       (mask & 1 && sctx->blitter->running));
3323 
3324 	radeon_set_context_reg_seq(cs, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2);
3325 	radeon_emit(cs, mask | (mask << 16));
3326 	radeon_emit(cs, mask | (mask << 16));
3327 }
3328 
si_delete_sampler_state(struct pipe_context * ctx,void * state)3329 static void si_delete_sampler_state(struct pipe_context *ctx, void *state)
3330 {
3331 #ifdef DEBUG
3332 	struct si_sampler_state *s = state;
3333 
3334 	assert(s->magic == SI_SAMPLER_STATE_MAGIC);
3335 	s->magic = 0;
3336 #endif
3337 	free(state);
3338 }
3339 
3340 /*
3341  * Vertex elements & buffers
3342  */
3343 
si_create_vertex_elements(struct pipe_context * ctx,unsigned count,const struct pipe_vertex_element * elements)3344 static void *si_create_vertex_elements(struct pipe_context *ctx,
3345 				       unsigned count,
3346 				       const struct pipe_vertex_element *elements)
3347 {
3348 	struct si_vertex_element *v = CALLOC_STRUCT(si_vertex_element);
3349 	bool used[SI_NUM_VERTEX_BUFFERS] = {};
3350 	int i;
3351 
3352 	assert(count <= SI_MAX_ATTRIBS);
3353 	if (!v)
3354 		return NULL;
3355 
3356 	v->count = count;
3357 	for (i = 0; i < count; ++i) {
3358 		const struct util_format_description *desc;
3359 		const struct util_format_channel_description *channel;
3360 		unsigned data_format, num_format;
3361 		int first_non_void;
3362 		unsigned vbo_index = elements[i].vertex_buffer_index;
3363 
3364 		if (vbo_index >= SI_NUM_VERTEX_BUFFERS) {
3365 			FREE(v);
3366 			return NULL;
3367 		}
3368 
3369 		if (!used[vbo_index]) {
3370 			v->first_vb_use_mask |= 1 << i;
3371 			used[vbo_index] = true;
3372 		}
3373 
3374 		desc = util_format_description(elements[i].src_format);
3375 		first_non_void = util_format_get_first_non_void_channel(elements[i].src_format);
3376 		data_format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void);
3377 		num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void);
3378 		channel = first_non_void >= 0 ? &desc->channel[first_non_void] : NULL;
3379 
3380 		v->rsrc_word3[i] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) |
3381 				   S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) |
3382 				   S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) |
3383 				   S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) |
3384 				   S_008F0C_NUM_FORMAT(num_format) |
3385 				   S_008F0C_DATA_FORMAT(data_format);
3386 		v->format_size[i] = desc->block.bits / 8;
3387 
3388 		/* The hardware always treats the 2-bit alpha channel as
3389 		 * unsigned, so a shader workaround is needed.
3390 		 */
3391 		if (data_format == V_008F0C_BUF_DATA_FORMAT_2_10_10_10) {
3392 			if (num_format == V_008F0C_BUF_NUM_FORMAT_SNORM) {
3393 				v->fix_fetch |= (uint64_t)SI_FIX_FETCH_A2_SNORM << (4 * i);
3394 			} else if (num_format == V_008F0C_BUF_NUM_FORMAT_SSCALED) {
3395 				v->fix_fetch |= (uint64_t)SI_FIX_FETCH_A2_SSCALED << (4 * i);
3396 			} else if (num_format == V_008F0C_BUF_NUM_FORMAT_SINT) {
3397 				/* This isn't actually used in OpenGL. */
3398 				v->fix_fetch |= (uint64_t)SI_FIX_FETCH_A2_SINT << (4 * i);
3399 			}
3400 		} else if (channel && channel->type == UTIL_FORMAT_TYPE_FIXED) {
3401 			if (desc->swizzle[3] == PIPE_SWIZZLE_1)
3402 				v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBX_32_FIXED << (4 * i);
3403 			else
3404 				v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBA_32_FIXED << (4 * i);
3405 		} else if (channel && channel->size == 32 && !channel->pure_integer) {
3406 			if (channel->type == UTIL_FORMAT_TYPE_SIGNED) {
3407 				if (channel->normalized) {
3408 					if (desc->swizzle[3] == PIPE_SWIZZLE_1)
3409 						v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBX_32_SNORM << (4 * i);
3410 					else
3411 						v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBA_32_SNORM << (4 * i);
3412 				} else {
3413 					v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBA_32_SSCALED << (4 * i);
3414 				}
3415 			} else if (channel->type == UTIL_FORMAT_TYPE_UNSIGNED) {
3416 				if (channel->normalized) {
3417 					if (desc->swizzle[3] == PIPE_SWIZZLE_1)
3418 						v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBX_32_UNORM << (4 * i);
3419 					else
3420 						v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBA_32_UNORM << (4 * i);
3421 				} else {
3422 					v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBA_32_USCALED << (4 * i);
3423 				}
3424 			}
3425 		}
3426 
3427 		/* We work around the fact that 8_8_8 and 16_16_16 data formats
3428 		 * do not exist by using the corresponding 4-component formats.
3429 		 * This requires a fixup of the descriptor for bounds checks.
3430 		 */
3431 		if (desc->block.bits == 3 * 8 ||
3432 		    desc->block.bits == 3 * 16) {
3433 			v->fix_size3 |= (desc->block.bits / 24) << (2 * i);
3434 		}
3435 	}
3436 	memcpy(v->elements, elements, sizeof(struct pipe_vertex_element) * count);
3437 
3438 	return v;
3439 }
3440 
si_bind_vertex_elements(struct pipe_context * ctx,void * state)3441 static void si_bind_vertex_elements(struct pipe_context *ctx, void *state)
3442 {
3443 	struct si_context *sctx = (struct si_context *)ctx;
3444 	struct si_vertex_element *v = (struct si_vertex_element*)state;
3445 
3446 	sctx->vertex_elements = v;
3447 	sctx->vertex_buffers_dirty = true;
3448 	sctx->do_update_shaders = true;
3449 }
3450 
si_delete_vertex_element(struct pipe_context * ctx,void * state)3451 static void si_delete_vertex_element(struct pipe_context *ctx, void *state)
3452 {
3453 	struct si_context *sctx = (struct si_context *)ctx;
3454 
3455 	if (sctx->vertex_elements == state)
3456 		sctx->vertex_elements = NULL;
3457 	FREE(state);
3458 }
3459 
si_set_vertex_buffers(struct pipe_context * ctx,unsigned start_slot,unsigned count,const struct pipe_vertex_buffer * buffers)3460 static void si_set_vertex_buffers(struct pipe_context *ctx,
3461 				  unsigned start_slot, unsigned count,
3462 				  const struct pipe_vertex_buffer *buffers)
3463 {
3464 	struct si_context *sctx = (struct si_context *)ctx;
3465 	struct pipe_vertex_buffer *dst = sctx->vertex_buffer + start_slot;
3466 	int i;
3467 
3468 	assert(start_slot + count <= ARRAY_SIZE(sctx->vertex_buffer));
3469 
3470 	if (buffers) {
3471 		for (i = 0; i < count; i++) {
3472 			const struct pipe_vertex_buffer *src = buffers + i;
3473 			struct pipe_vertex_buffer *dsti = dst + i;
3474 			struct pipe_resource *buf = src->buffer;
3475 
3476 			pipe_resource_reference(&dsti->buffer, buf);
3477 			dsti->buffer_offset = src->buffer_offset;
3478 			dsti->stride = src->stride;
3479 			r600_context_add_resource_size(ctx, buf);
3480 			if (buf)
3481 				r600_resource(buf)->bind_history |= PIPE_BIND_VERTEX_BUFFER;
3482 		}
3483 	} else {
3484 		for (i = 0; i < count; i++) {
3485 			pipe_resource_reference(&dst[i].buffer, NULL);
3486 		}
3487 	}
3488 	sctx->vertex_buffers_dirty = true;
3489 }
3490 
si_set_index_buffer(struct pipe_context * ctx,const struct pipe_index_buffer * ib)3491 static void si_set_index_buffer(struct pipe_context *ctx,
3492 				const struct pipe_index_buffer *ib)
3493 {
3494 	struct si_context *sctx = (struct si_context *)ctx;
3495 
3496 	if (ib) {
3497 		struct pipe_resource *buf = ib->buffer;
3498 
3499 		pipe_resource_reference(&sctx->index_buffer.buffer, buf);
3500 	        memcpy(&sctx->index_buffer, ib, sizeof(*ib));
3501 		r600_context_add_resource_size(ctx, buf);
3502 		if (buf)
3503 			r600_resource(buf)->bind_history |= PIPE_BIND_INDEX_BUFFER;
3504 	} else {
3505 		pipe_resource_reference(&sctx->index_buffer.buffer, NULL);
3506 	}
3507 }
3508 
3509 /*
3510  * Misc
3511  */
3512 
si_set_tess_state(struct pipe_context * ctx,const float default_outer_level[4],const float default_inner_level[2])3513 static void si_set_tess_state(struct pipe_context *ctx,
3514 			      const float default_outer_level[4],
3515 			      const float default_inner_level[2])
3516 {
3517 	struct si_context *sctx = (struct si_context *)ctx;
3518 	struct pipe_constant_buffer cb;
3519 	float array[8];
3520 
3521 	memcpy(array, default_outer_level, sizeof(float) * 4);
3522 	memcpy(array+4, default_inner_level, sizeof(float) * 2);
3523 
3524 	cb.buffer = NULL;
3525 	cb.user_buffer = NULL;
3526 	cb.buffer_size = sizeof(array);
3527 
3528 	si_upload_const_buffer(sctx, (struct r600_resource**)&cb.buffer,
3529 			       (void*)array, sizeof(array),
3530 			       &cb.buffer_offset);
3531 
3532 	si_set_rw_buffer(sctx, SI_HS_CONST_DEFAULT_TESS_LEVELS, &cb);
3533 	pipe_resource_reference(&cb.buffer, NULL);
3534 }
3535 
si_texture_barrier(struct pipe_context * ctx,unsigned flags)3536 static void si_texture_barrier(struct pipe_context *ctx, unsigned flags)
3537 {
3538 	struct si_context *sctx = (struct si_context *)ctx;
3539 
3540 	sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
3541 			 SI_CONTEXT_INV_GLOBAL_L2 |
3542 			 SI_CONTEXT_FLUSH_AND_INV_CB;
3543 }
3544 
3545 /* This only ensures coherency for shader image/buffer stores. */
si_memory_barrier(struct pipe_context * ctx,unsigned flags)3546 static void si_memory_barrier(struct pipe_context *ctx, unsigned flags)
3547 {
3548 	struct si_context *sctx = (struct si_context *)ctx;
3549 
3550 	/* Subsequent commands must wait for all shader invocations to
3551 	 * complete. */
3552 	sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
3553 	                 SI_CONTEXT_CS_PARTIAL_FLUSH;
3554 
3555 	if (flags & PIPE_BARRIER_CONSTANT_BUFFER)
3556 		sctx->b.flags |= SI_CONTEXT_INV_SMEM_L1 |
3557 				 SI_CONTEXT_INV_VMEM_L1;
3558 
3559 	if (flags & (PIPE_BARRIER_VERTEX_BUFFER |
3560 		     PIPE_BARRIER_SHADER_BUFFER |
3561 		     PIPE_BARRIER_TEXTURE |
3562 		     PIPE_BARRIER_IMAGE |
3563 		     PIPE_BARRIER_STREAMOUT_BUFFER |
3564 		     PIPE_BARRIER_GLOBAL_BUFFER)) {
3565 		/* As far as I can tell, L1 contents are written back to L2
3566 		 * automatically at end of shader, but the contents of other
3567 		 * L1 caches might still be stale. */
3568 		sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1;
3569 	}
3570 
3571 	if (flags & PIPE_BARRIER_INDEX_BUFFER) {
3572 		/* Indices are read through TC L2 since VI.
3573 		 * L1 isn't used.
3574 		 */
3575 		if (sctx->screen->b.chip_class <= CIK)
3576 			sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
3577 	}
3578 
3579 	if (flags & PIPE_BARRIER_FRAMEBUFFER)
3580 		sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
3581 
3582 	if (flags & (PIPE_BARRIER_FRAMEBUFFER |
3583 		     PIPE_BARRIER_INDIRECT_BUFFER))
3584 		sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
3585 }
3586 
si_create_blend_custom(struct si_context * sctx,unsigned mode)3587 static void *si_create_blend_custom(struct si_context *sctx, unsigned mode)
3588 {
3589 	struct pipe_blend_state blend;
3590 
3591 	memset(&blend, 0, sizeof(blend));
3592 	blend.independent_blend_enable = true;
3593 	blend.rt[0].colormask = 0xf;
3594 	return si_create_blend_state_mode(&sctx->b.b, &blend, mode);
3595 }
3596 
si_need_gfx_cs_space(struct pipe_context * ctx,unsigned num_dw,bool include_draw_vbo)3597 static void si_need_gfx_cs_space(struct pipe_context *ctx, unsigned num_dw,
3598 				 bool include_draw_vbo)
3599 {
3600 	si_need_cs_space((struct si_context*)ctx);
3601 }
3602 
3603 static void si_init_config(struct si_context *sctx);
3604 
si_init_state_functions(struct si_context * sctx)3605 void si_init_state_functions(struct si_context *sctx)
3606 {
3607 	si_init_external_atom(sctx, &sctx->b.render_cond_atom, &sctx->atoms.s.render_cond);
3608 	si_init_external_atom(sctx, &sctx->b.streamout.begin_atom, &sctx->atoms.s.streamout_begin);
3609 	si_init_external_atom(sctx, &sctx->b.streamout.enable_atom, &sctx->atoms.s.streamout_enable);
3610 	si_init_external_atom(sctx, &sctx->b.scissors.atom, &sctx->atoms.s.scissors);
3611 	si_init_external_atom(sctx, &sctx->b.viewports.atom, &sctx->atoms.s.viewports);
3612 
3613 	si_init_atom(sctx, &sctx->framebuffer.atom, &sctx->atoms.s.framebuffer, si_emit_framebuffer_state);
3614 	si_init_atom(sctx, &sctx->msaa_sample_locs.atom, &sctx->atoms.s.msaa_sample_locs, si_emit_msaa_sample_locs);
3615 	si_init_atom(sctx, &sctx->db_render_state, &sctx->atoms.s.db_render_state, si_emit_db_render_state);
3616 	si_init_atom(sctx, &sctx->msaa_config, &sctx->atoms.s.msaa_config, si_emit_msaa_config);
3617 	si_init_atom(sctx, &sctx->sample_mask.atom, &sctx->atoms.s.sample_mask, si_emit_sample_mask);
3618 	si_init_atom(sctx, &sctx->cb_render_state, &sctx->atoms.s.cb_render_state, si_emit_cb_render_state);
3619 	si_init_atom(sctx, &sctx->blend_color.atom, &sctx->atoms.s.blend_color, si_emit_blend_color);
3620 	si_init_atom(sctx, &sctx->clip_regs, &sctx->atoms.s.clip_regs, si_emit_clip_regs);
3621 	si_init_atom(sctx, &sctx->clip_state.atom, &sctx->atoms.s.clip_state, si_emit_clip_state);
3622 	si_init_atom(sctx, &sctx->stencil_ref.atom, &sctx->atoms.s.stencil_ref, si_emit_stencil_ref);
3623 
3624 	sctx->b.b.create_blend_state = si_create_blend_state;
3625 	sctx->b.b.bind_blend_state = si_bind_blend_state;
3626 	sctx->b.b.delete_blend_state = si_delete_blend_state;
3627 	sctx->b.b.set_blend_color = si_set_blend_color;
3628 
3629 	sctx->b.b.create_rasterizer_state = si_create_rs_state;
3630 	sctx->b.b.bind_rasterizer_state = si_bind_rs_state;
3631 	sctx->b.b.delete_rasterizer_state = si_delete_rs_state;
3632 
3633 	sctx->b.b.create_depth_stencil_alpha_state = si_create_dsa_state;
3634 	sctx->b.b.bind_depth_stencil_alpha_state = si_bind_dsa_state;
3635 	sctx->b.b.delete_depth_stencil_alpha_state = si_delete_dsa_state;
3636 
3637 	sctx->custom_dsa_flush = si_create_db_flush_dsa(sctx);
3638 	sctx->custom_blend_resolve = si_create_blend_custom(sctx, V_028808_CB_RESOLVE);
3639 	sctx->custom_blend_decompress = si_create_blend_custom(sctx, V_028808_CB_FMASK_DECOMPRESS);
3640 	sctx->custom_blend_fastclear = si_create_blend_custom(sctx, V_028808_CB_ELIMINATE_FAST_CLEAR);
3641 	sctx->custom_blend_dcc_decompress = si_create_blend_custom(sctx, V_028808_CB_DCC_DECOMPRESS);
3642 
3643 	sctx->b.b.set_clip_state = si_set_clip_state;
3644 	sctx->b.b.set_stencil_ref = si_set_stencil_ref;
3645 
3646 	sctx->b.b.set_framebuffer_state = si_set_framebuffer_state;
3647 	sctx->b.b.get_sample_position = cayman_get_sample_position;
3648 
3649 	sctx->b.b.create_sampler_state = si_create_sampler_state;
3650 	sctx->b.b.delete_sampler_state = si_delete_sampler_state;
3651 
3652 	sctx->b.b.create_sampler_view = si_create_sampler_view;
3653 	sctx->b.b.sampler_view_destroy = si_sampler_view_destroy;
3654 
3655 	sctx->b.b.set_sample_mask = si_set_sample_mask;
3656 
3657 	sctx->b.b.create_vertex_elements_state = si_create_vertex_elements;
3658 	sctx->b.b.bind_vertex_elements_state = si_bind_vertex_elements;
3659 	sctx->b.b.delete_vertex_elements_state = si_delete_vertex_element;
3660 	sctx->b.b.set_vertex_buffers = si_set_vertex_buffers;
3661 	sctx->b.b.set_index_buffer = si_set_index_buffer;
3662 
3663 	sctx->b.b.texture_barrier = si_texture_barrier;
3664 	sctx->b.b.memory_barrier = si_memory_barrier;
3665 	sctx->b.b.set_min_samples = si_set_min_samples;
3666 	sctx->b.b.set_tess_state = si_set_tess_state;
3667 
3668 	sctx->b.b.set_active_query_state = si_set_active_query_state;
3669 	sctx->b.set_occlusion_query_state = si_set_occlusion_query_state;
3670 	sctx->b.save_qbo_state = si_save_qbo_state;
3671 	sctx->b.need_gfx_cs_space = si_need_gfx_cs_space;
3672 
3673 	sctx->b.b.draw_vbo = si_draw_vbo;
3674 
3675 	si_init_config(sctx);
3676 }
3677 
si_get_bo_metadata_word1(struct r600_common_screen * rscreen)3678 static uint32_t si_get_bo_metadata_word1(struct r600_common_screen *rscreen)
3679 {
3680 	return (ATI_VENDOR_ID << 16) | rscreen->info.pci_id;
3681 }
3682 
si_query_opaque_metadata(struct r600_common_screen * rscreen,struct r600_texture * rtex,struct radeon_bo_metadata * md)3683 static void si_query_opaque_metadata(struct r600_common_screen *rscreen,
3684 				     struct r600_texture *rtex,
3685 			             struct radeon_bo_metadata *md)
3686 {
3687 	struct si_screen *sscreen = (struct si_screen*)rscreen;
3688 	struct pipe_resource *res = &rtex->resource.b.b;
3689 	static const unsigned char swizzle[] = {
3690 		PIPE_SWIZZLE_X,
3691 		PIPE_SWIZZLE_Y,
3692 		PIPE_SWIZZLE_Z,
3693 		PIPE_SWIZZLE_W
3694 	};
3695 	uint32_t desc[8], i;
3696 	bool is_array = util_resource_is_array_texture(res);
3697 
3698 	/* DRM 2.x.x doesn't support this. */
3699 	if (rscreen->info.drm_major != 3)
3700 		return;
3701 
3702 	assert(rtex->dcc_separate_buffer == NULL);
3703 	assert(rtex->fmask.size == 0);
3704 
3705 	/* Metadata image format format version 1:
3706 	 * [0] = 1 (metadata format identifier)
3707 	 * [1] = (VENDOR_ID << 16) | PCI_ID
3708 	 * [2:9] = image descriptor for the whole resource
3709 	 *         [2] is always 0, because the base address is cleared
3710 	 *         [9] is the DCC offset bits [39:8] from the beginning of
3711 	 *             the buffer
3712 	 * [10:10+LAST_LEVEL] = mipmap level offset bits [39:8] for each level
3713 	 */
3714 
3715 	md->metadata[0] = 1; /* metadata image format version 1 */
3716 
3717 	/* TILE_MODE_INDEX is ambiguous without a PCI ID. */
3718 	md->metadata[1] = si_get_bo_metadata_word1(rscreen);
3719 
3720 	si_make_texture_descriptor(sscreen, rtex, true,
3721 				   res->target, res->format,
3722 				   swizzle, 0, res->last_level, 0,
3723 				   is_array ? res->array_size - 1 : 0,
3724 				   res->width0, res->height0, res->depth0,
3725 				   desc, NULL);
3726 
3727 	si_set_mutable_tex_desc_fields(rtex, &rtex->surface.level[0], 0, 0,
3728 				       rtex->surface.blk_w, false, desc);
3729 
3730 	/* Clear the base address and set the relative DCC offset. */
3731 	desc[0] = 0;
3732 	desc[1] &= C_008F14_BASE_ADDRESS_HI;
3733 	desc[7] = rtex->dcc_offset >> 8;
3734 
3735 	/* Dwords [2:9] contain the image descriptor. */
3736 	memcpy(&md->metadata[2], desc, sizeof(desc));
3737 
3738 	/* Dwords [10:..] contain the mipmap level offsets. */
3739 	for (i = 0; i <= res->last_level; i++)
3740 		md->metadata[10+i] = rtex->surface.level[i].offset >> 8;
3741 
3742 	md->size_metadata = (11 + res->last_level) * 4;
3743 }
3744 
si_apply_opaque_metadata(struct r600_common_screen * rscreen,struct r600_texture * rtex,struct radeon_bo_metadata * md)3745 static void si_apply_opaque_metadata(struct r600_common_screen *rscreen,
3746 				     struct r600_texture *rtex,
3747 			             struct radeon_bo_metadata *md)
3748 {
3749 	uint32_t *desc = &md->metadata[2];
3750 
3751 	if (rscreen->chip_class < VI)
3752 		return;
3753 
3754 	/* Return if DCC is enabled. The texture should be set up with it
3755 	 * already.
3756 	 */
3757 	if (md->size_metadata >= 11 * 4 &&
3758 	    md->metadata[0] != 0 &&
3759 	    md->metadata[1] == si_get_bo_metadata_word1(rscreen) &&
3760 	    G_008F28_COMPRESSION_EN(desc[6])) {
3761 		assert(rtex->dcc_offset == ((uint64_t)desc[7] << 8));
3762 		return;
3763 	}
3764 
3765 	/* Disable DCC. These are always set by texture_from_handle and must
3766 	 * be cleared here.
3767 	 */
3768 	rtex->dcc_offset = 0;
3769 }
3770 
si_init_screen_state_functions(struct si_screen * sscreen)3771 void si_init_screen_state_functions(struct si_screen *sscreen)
3772 {
3773 	sscreen->b.b.is_format_supported = si_is_format_supported;
3774 	sscreen->b.query_opaque_metadata = si_query_opaque_metadata;
3775 	sscreen->b.apply_opaque_metadata = si_apply_opaque_metadata;
3776 }
3777 
3778 static void
si_write_harvested_raster_configs(struct si_context * sctx,struct si_pm4_state * pm4,unsigned raster_config,unsigned raster_config_1)3779 si_write_harvested_raster_configs(struct si_context *sctx,
3780 				  struct si_pm4_state *pm4,
3781 				  unsigned raster_config,
3782 				  unsigned raster_config_1)
3783 {
3784 	unsigned sh_per_se = MAX2(sctx->screen->b.info.max_sh_per_se, 1);
3785 	unsigned num_se = MAX2(sctx->screen->b.info.max_se, 1);
3786 	unsigned rb_mask = sctx->screen->b.info.enabled_rb_mask;
3787 	unsigned num_rb = MIN2(sctx->screen->b.info.num_render_backends, 16);
3788 	unsigned rb_per_pkr = MIN2(num_rb / num_se / sh_per_se, 2);
3789 	unsigned rb_per_se = num_rb / num_se;
3790 	unsigned se_mask[4];
3791 	unsigned se;
3792 
3793 	se_mask[0] = ((1 << rb_per_se) - 1);
3794 	se_mask[1] = (se_mask[0] << rb_per_se);
3795 	se_mask[2] = (se_mask[1] << rb_per_se);
3796 	se_mask[3] = (se_mask[2] << rb_per_se);
3797 
3798 	se_mask[0] &= rb_mask;
3799 	se_mask[1] &= rb_mask;
3800 	se_mask[2] &= rb_mask;
3801 	se_mask[3] &= rb_mask;
3802 
3803 	assert(num_se == 1 || num_se == 2 || num_se == 4);
3804 	assert(sh_per_se == 1 || sh_per_se == 2);
3805 	assert(rb_per_pkr == 1 || rb_per_pkr == 2);
3806 
3807 	/* XXX: I can't figure out what the *_XSEL and *_YSEL
3808 	 * fields are for, so I'm leaving them as their default
3809 	 * values. */
3810 
3811 	for (se = 0; se < num_se; se++) {
3812 		unsigned raster_config_se = raster_config;
3813 		unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3814 		unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3815 		int idx = (se / 2) * 2;
3816 
3817 		if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3818 			raster_config_se &= C_028350_SE_MAP;
3819 
3820 			if (!se_mask[idx]) {
3821 				raster_config_se |=
3822 					S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_3);
3823 			} else {
3824 				raster_config_se |=
3825 					S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_0);
3826 			}
3827 		}
3828 
3829 		pkr0_mask &= rb_mask;
3830 		pkr1_mask &= rb_mask;
3831 		if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3832 			raster_config_se &= C_028350_PKR_MAP;
3833 
3834 			if (!pkr0_mask) {
3835 				raster_config_se |=
3836 					S_028350_PKR_MAP(V_028350_RASTER_CONFIG_PKR_MAP_3);
3837 			} else {
3838 				raster_config_se |=
3839 					S_028350_PKR_MAP(V_028350_RASTER_CONFIG_PKR_MAP_0);
3840 			}
3841 		}
3842 
3843 		if (rb_per_se >= 2) {
3844 			unsigned rb0_mask = 1 << (se * rb_per_se);
3845 			unsigned rb1_mask = rb0_mask << 1;
3846 
3847 			rb0_mask &= rb_mask;
3848 			rb1_mask &= rb_mask;
3849 			if (!rb0_mask || !rb1_mask) {
3850 				raster_config_se &= C_028350_RB_MAP_PKR0;
3851 
3852 				if (!rb0_mask) {
3853 					raster_config_se |=
3854 						S_028350_RB_MAP_PKR0(V_028350_RASTER_CONFIG_RB_MAP_3);
3855 				} else {
3856 					raster_config_se |=
3857 						S_028350_RB_MAP_PKR0(V_028350_RASTER_CONFIG_RB_MAP_0);
3858 				}
3859 			}
3860 
3861 			if (rb_per_se > 2) {
3862 				rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3863 				rb1_mask = rb0_mask << 1;
3864 				rb0_mask &= rb_mask;
3865 				rb1_mask &= rb_mask;
3866 				if (!rb0_mask || !rb1_mask) {
3867 					raster_config_se &= C_028350_RB_MAP_PKR1;
3868 
3869 					if (!rb0_mask) {
3870 						raster_config_se |=
3871 							S_028350_RB_MAP_PKR1(V_028350_RASTER_CONFIG_RB_MAP_3);
3872 					} else {
3873 						raster_config_se |=
3874 							S_028350_RB_MAP_PKR1(V_028350_RASTER_CONFIG_RB_MAP_0);
3875 					}
3876 				}
3877 			}
3878 		}
3879 
3880 		/* GRBM_GFX_INDEX has a different offset on SI and CI+ */
3881 		if (sctx->b.chip_class < CIK)
3882 			si_pm4_set_reg(pm4, GRBM_GFX_INDEX,
3883 				       SE_INDEX(se) | SH_BROADCAST_WRITES |
3884 				       INSTANCE_BROADCAST_WRITES);
3885 		else
3886 			si_pm4_set_reg(pm4, R_030800_GRBM_GFX_INDEX,
3887 				       S_030800_SE_INDEX(se) | S_030800_SH_BROADCAST_WRITES(1) |
3888 				       S_030800_INSTANCE_BROADCAST_WRITES(1));
3889 		si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, raster_config_se);
3890 	}
3891 
3892 	/* GRBM_GFX_INDEX has a different offset on SI and CI+ */
3893 	if (sctx->b.chip_class < CIK)
3894 		si_pm4_set_reg(pm4, GRBM_GFX_INDEX,
3895 			       SE_BROADCAST_WRITES | SH_BROADCAST_WRITES |
3896 			       INSTANCE_BROADCAST_WRITES);
3897 	else {
3898 		si_pm4_set_reg(pm4, R_030800_GRBM_GFX_INDEX,
3899 			       S_030800_SE_BROADCAST_WRITES(1) | S_030800_SH_BROADCAST_WRITES(1) |
3900 			       S_030800_INSTANCE_BROADCAST_WRITES(1));
3901 
3902 		if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3903 		                     (!se_mask[2] && !se_mask[3]))) {
3904 			raster_config_1 &= C_028354_SE_PAIR_MAP;
3905 
3906 			if (!se_mask[0] && !se_mask[1]) {
3907 				raster_config_1 |=
3908 					S_028354_SE_PAIR_MAP(V_028354_RASTER_CONFIG_SE_PAIR_MAP_3);
3909 			} else {
3910 				raster_config_1 |=
3911 					S_028354_SE_PAIR_MAP(V_028354_RASTER_CONFIG_SE_PAIR_MAP_0);
3912 			}
3913 		}
3914 
3915 		si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1);
3916 	}
3917 }
3918 
si_init_config(struct si_context * sctx)3919 static void si_init_config(struct si_context *sctx)
3920 {
3921 	struct si_screen *sscreen = sctx->screen;
3922 	unsigned num_rb = MIN2(sctx->screen->b.info.num_render_backends, 16);
3923 	unsigned rb_mask = sctx->screen->b.info.enabled_rb_mask;
3924 	unsigned raster_config, raster_config_1;
3925 	uint64_t border_color_va = sctx->border_color_buffer->gpu_address;
3926 	struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
3927 
3928 	if (!pm4)
3929 		return;
3930 
3931 	si_pm4_cmd_begin(pm4, PKT3_CONTEXT_CONTROL);
3932 	si_pm4_cmd_add(pm4, CONTEXT_CONTROL_LOAD_ENABLE(1));
3933 	si_pm4_cmd_add(pm4, CONTEXT_CONTROL_SHADOW_ENABLE(1));
3934 	si_pm4_cmd_end(pm4, false);
3935 
3936 	si_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64));
3937 	si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0));
3938 
3939 	/* FIXME calculate these values somehow ??? */
3940 	si_pm4_set_reg(pm4, R_028A54_VGT_GS_PER_ES, SI_GS_PER_ES);
3941 	si_pm4_set_reg(pm4, R_028A58_VGT_ES_PER_GS, 0x40);
3942 	si_pm4_set_reg(pm4, R_028A5C_VGT_GS_PER_VS, 0x2);
3943 
3944 	si_pm4_set_reg(pm4, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0);
3945 	si_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);
3946 
3947 	si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0);
3948 	si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0);
3949 	if (sctx->b.chip_class < CIK)
3950 		si_pm4_set_reg(pm4, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) |
3951 			       S_008A14_CLIP_VTX_REORDER_ENA(1));
3952 
3953 	si_pm4_set_reg(pm4, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 0x76543210);
3954 	si_pm4_set_reg(pm4, R_028BD8_PA_SC_CENTROID_PRIORITY_1, 0xfedcba98);
3955 
3956 	si_pm4_set_reg(pm4, R_02882C_PA_SU_PRIM_FILTER_CNTL, 0);
3957 
3958 	switch (sctx->screen->b.family) {
3959 	case CHIP_TAHITI:
3960 	case CHIP_PITCAIRN:
3961 		raster_config = 0x2a00126a;
3962 		raster_config_1 = 0x00000000;
3963 		break;
3964 	case CHIP_VERDE:
3965 		raster_config = 0x0000124a;
3966 		raster_config_1 = 0x00000000;
3967 		break;
3968 	case CHIP_OLAND:
3969 		raster_config = 0x00000082;
3970 		raster_config_1 = 0x00000000;
3971 		break;
3972 	case CHIP_HAINAN:
3973 		raster_config = 0x00000000;
3974 		raster_config_1 = 0x00000000;
3975 		break;
3976 	case CHIP_BONAIRE:
3977 		raster_config = 0x16000012;
3978 		raster_config_1 = 0x00000000;
3979 		break;
3980 	case CHIP_HAWAII:
3981 		raster_config = 0x3a00161a;
3982 		raster_config_1 = 0x0000002e;
3983 		break;
3984 	case CHIP_FIJI:
3985 		if (sscreen->b.info.cik_macrotile_mode_array[0] == 0x000000e8) {
3986 			/* old kernels with old tiling config */
3987 			raster_config = 0x16000012;
3988 			raster_config_1 = 0x0000002a;
3989 		} else {
3990 			raster_config = 0x3a00161a;
3991 			raster_config_1 = 0x0000002e;
3992 		}
3993 		break;
3994 	case CHIP_POLARIS10:
3995 		raster_config = 0x16000012;
3996 		raster_config_1 = 0x0000002a;
3997 		break;
3998 	case CHIP_POLARIS11:
3999 	case CHIP_POLARIS12:
4000 		raster_config = 0x16000012;
4001 		raster_config_1 = 0x00000000;
4002 		break;
4003 	case CHIP_TONGA:
4004 		raster_config = 0x16000012;
4005 		raster_config_1 = 0x0000002a;
4006 		break;
4007 	case CHIP_ICELAND:
4008 		if (num_rb == 1)
4009 			raster_config = 0x00000000;
4010 		else
4011 			raster_config = 0x00000002;
4012 		raster_config_1 = 0x00000000;
4013 		break;
4014 	case CHIP_CARRIZO:
4015 		raster_config = 0x00000002;
4016 		raster_config_1 = 0x00000000;
4017 		break;
4018 	case CHIP_KAVERI:
4019 		/* KV should be 0x00000002, but that causes problems with radeon */
4020 		raster_config = 0x00000000; /* 0x00000002 */
4021 		raster_config_1 = 0x00000000;
4022 		break;
4023 	case CHIP_KABINI:
4024 	case CHIP_MULLINS:
4025 	case CHIP_STONEY:
4026 		raster_config = 0x00000000;
4027 		raster_config_1 = 0x00000000;
4028 		break;
4029 	default:
4030 		fprintf(stderr,
4031 			"radeonsi: Unknown GPU, using 0 for raster_config\n");
4032 		raster_config = 0x00000000;
4033 		raster_config_1 = 0x00000000;
4034 		break;
4035 	}
4036 
4037 	/* Always use the default config when all backends are enabled
4038 	 * (or when we failed to determine the enabled backends).
4039 	 */
4040 	if (!rb_mask || util_bitcount(rb_mask) >= num_rb) {
4041 		si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG,
4042 			       raster_config);
4043 		if (sctx->b.chip_class >= CIK)
4044 			si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1,
4045 				       raster_config_1);
4046 	} else {
4047 		si_write_harvested_raster_configs(sctx, pm4, raster_config, raster_config_1);
4048 	}
4049 
4050 	si_pm4_set_reg(pm4, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1));
4051 	si_pm4_set_reg(pm4, R_028240_PA_SC_GENERIC_SCISSOR_TL, S_028240_WINDOW_OFFSET_DISABLE(1));
4052 	si_pm4_set_reg(pm4, R_028244_PA_SC_GENERIC_SCISSOR_BR,
4053 		       S_028244_BR_X(16384) | S_028244_BR_Y(16384));
4054 	si_pm4_set_reg(pm4, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0);
4055 	si_pm4_set_reg(pm4, R_028034_PA_SC_SCREEN_SCISSOR_BR,
4056 		       S_028034_BR_X(16384) | S_028034_BR_Y(16384));
4057 
4058 	si_pm4_set_reg(pm4, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF);
4059 	si_pm4_set_reg(pm4, R_028230_PA_SC_EDGERULE,
4060 		       S_028230_ER_TRI(0xA) |
4061 		       S_028230_ER_POINT(0xA) |
4062 		       S_028230_ER_RECT(0xA) |
4063 		       /* Required by DX10_DIAMOND_TEST_ENA: */
4064 		       S_028230_ER_LINE_LR(0x1A) |
4065 		       S_028230_ER_LINE_RL(0x26) |
4066 		       S_028230_ER_LINE_TB(0xA) |
4067 		       S_028230_ER_LINE_BT(0xA));
4068 	/* PA_SU_HARDWARE_SCREEN_OFFSET must be 0 due to hw bug on SI */
4069 	si_pm4_set_reg(pm4, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0);
4070 	si_pm4_set_reg(pm4, R_028820_PA_CL_NANINF_CNTL, 0);
4071 	si_pm4_set_reg(pm4, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0);
4072 	si_pm4_set_reg(pm4, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0);
4073 	si_pm4_set_reg(pm4, R_028AC8_DB_PRELOAD_CONTROL, 0x0);
4074 	si_pm4_set_reg(pm4, R_02800C_DB_RENDER_OVERRIDE, 0);
4075 
4076 	si_pm4_set_reg(pm4, R_028400_VGT_MAX_VTX_INDX, ~0);
4077 	si_pm4_set_reg(pm4, R_028404_VGT_MIN_VTX_INDX, 0);
4078 	si_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET, 0);
4079 
4080 	if (sctx->b.chip_class >= CIK) {
4081 		/* If this is 0, Bonaire can hang even if GS isn't being used.
4082 		 * Other chips are unaffected. These are suboptimal values,
4083 		 * but we don't use on-chip GS.
4084 		 */
4085 		si_pm4_set_reg(pm4, R_028A44_VGT_GS_ONCHIP_CNTL,
4086 			       S_028A44_ES_VERTS_PER_SUBGRP(64) |
4087 			       S_028A44_GS_PRIMS_PER_SUBGRP(4));
4088 
4089 		si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xffff));
4090 		si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 0);
4091 		si_pm4_set_reg(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, S_00B31C_CU_EN(0xffff));
4092 		si_pm4_set_reg(pm4, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, S_00B21C_CU_EN(0xffff));
4093 
4094 		if (sscreen->b.info.num_good_compute_units /
4095 		    (sscreen->b.info.max_se * sscreen->b.info.max_sh_per_se) <= 4) {
4096 			/* Too few available compute units per SH. Disallowing
4097 			 * VS to run on CU0 could hurt us more than late VS
4098 			 * allocation would help.
4099 			 *
4100 			 * LATE_ALLOC_VS = 2 is the highest safe number.
4101 			 */
4102 			si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xffff));
4103 			si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(2));
4104 		} else {
4105 			/* Set LATE_ALLOC_VS == 31. It should be less than
4106 			 * the number of scratch waves. Limitations:
4107 			 * - VS can't execute on CU0.
4108 			 * - If HS writes outputs to LDS, LS can't execute on CU0.
4109 			 */
4110 			si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xfffe));
4111 			si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(31));
4112 		}
4113 
4114 		si_pm4_set_reg(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, S_00B01C_CU_EN(0xffff));
4115 	}
4116 
4117 	if (sctx->b.chip_class >= VI) {
4118 		unsigned vgt_tess_distribution;
4119 
4120 		si_pm4_set_reg(pm4, R_028424_CB_DCC_CONTROL,
4121 			       S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(1) |
4122 			       S_028424_OVERWRITE_COMBINER_WATERMARK(4));
4123 		if (sctx->b.family < CHIP_POLARIS10)
4124 			si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 30);
4125 		si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 32);
4126 
4127 		vgt_tess_distribution =
4128 			S_028B50_ACCUM_ISOLINE(32) |
4129 			S_028B50_ACCUM_TRI(11) |
4130 			S_028B50_ACCUM_QUAD(11) |
4131 			S_028B50_DONUT_SPLIT(16);
4132 
4133 		/* Testing with Unigine Heaven extreme tesselation yielded best results
4134 		 * with TRAP_SPLIT = 3.
4135 		 */
4136 		if (sctx->b.family == CHIP_FIJI ||
4137 		    sctx->b.family >= CHIP_POLARIS10)
4138 			vgt_tess_distribution |= S_028B50_TRAP_SPLIT(3);
4139 
4140 		si_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION, vgt_tess_distribution);
4141 	} else {
4142 		si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14);
4143 		si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 16);
4144 	}
4145 
4146 	if (sctx->b.family == CHIP_STONEY)
4147 		si_pm4_set_reg(pm4, R_028C40_PA_SC_SHADER_CONTROL, 0);
4148 
4149 	si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8);
4150 	if (sctx->b.chip_class >= CIK)
4151 		si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, border_color_va >> 40);
4152 	si_pm4_add_bo(pm4, sctx->border_color_buffer, RADEON_USAGE_READ,
4153 		      RADEON_PRIO_BORDER_COLORS);
4154 
4155 	si_pm4_upload_indirect_buffer(sctx, pm4);
4156 	sctx->init_config = pm4;
4157 }
4158