• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "si_build_pm4.h"
8 #include "si_query.h"
9 #include "si_shader_internal.h"
10 #include "sid.h"
11 #include "util/fast_idiv_by_const.h"
12 #include "util/format/u_format.h"
13 #include "util/format/u_format_s3tc.h"
14 #include "util/u_dual_blend.h"
15 #include "util/u_helpers.h"
16 #include "util/u_memory.h"
17 #include "util/u_resource.h"
18 #include "util/u_upload_mgr.h"
19 #include "util/u_blend.h"
20 
21 #include "gfx10_format_table.h"
22 
si_map_swizzle(unsigned swizzle)23 static unsigned si_map_swizzle(unsigned swizzle)
24 {
25    switch (swizzle) {
26    case PIPE_SWIZZLE_Y:
27       return V_008F0C_SQ_SEL_Y;
28    case PIPE_SWIZZLE_Z:
29       return V_008F0C_SQ_SEL_Z;
30    case PIPE_SWIZZLE_W:
31       return V_008F0C_SQ_SEL_W;
32    case PIPE_SWIZZLE_0:
33       return V_008F0C_SQ_SEL_0;
34    case PIPE_SWIZZLE_1:
35       return V_008F0C_SQ_SEL_1;
36    default: /* PIPE_SWIZZLE_X */
37       return V_008F0C_SQ_SEL_X;
38    }
39 }
40 
41 /* 12.4 fixed-point */
si_pack_float_12p4(float x)42 static unsigned si_pack_float_12p4(float x)
43 {
44    return x <= 0 ? 0 : x >= 4096 ? 0xffff : x * 16;
45 }
46 
47 /*
48  * Inferred framebuffer and blender state.
49  *
50  * CB_TARGET_MASK is emitted here to avoid a hang with dual source blending
51  * if there is not enough PS outputs.
52  */
si_emit_cb_render_state(struct si_context * sctx,unsigned index)53 static void si_emit_cb_render_state(struct si_context *sctx, unsigned index)
54 {
55    struct radeon_cmdbuf *cs = &sctx->gfx_cs;
56    struct si_state_blend *blend = sctx->queued.named.blend;
57    /* CB_COLORn_INFO.FORMAT=INVALID should disable unbound colorbuffers,
58     * but you never know. */
59    uint32_t cb_target_mask = sctx->framebuffer.colorbuf_enabled_4bit & blend->cb_target_mask;
60    unsigned i;
61 
62    /* Avoid a hang that happens when dual source blending is enabled
63     * but there is not enough color outputs. This is undefined behavior,
64     * so disable color writes completely.
65     *
66     * Reproducible with Unigine Heaven 4.0 and drirc missing.
67     */
68    if (blend->dual_src_blend && sctx->shader.ps.cso &&
69        (sctx->shader.ps.cso->info.colors_written & 0x3) != 0x3)
70       cb_target_mask = 0;
71 
72    /* GFX9: Flush DFSM when CB_TARGET_MASK changes.
73     * I think we don't have to do anything between IBs.
74     */
75    if (sctx->screen->dpbb_allowed && sctx->last_cb_target_mask != cb_target_mask &&
76        sctx->screen->pbb_context_states_per_bin > 1) {
77       sctx->last_cb_target_mask = cb_target_mask;
78 
79       radeon_begin(cs);
80       radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
81       radeon_emit(EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0));
82       radeon_end();
83    }
84 
85    uint32_t cb_dcc_control = 0;
86 
87    if (sctx->gfx_level >= GFX8) {
88       /* DCC MSAA workaround.
89        * Alternatively, we can set CB_COLORi_DCC_CONTROL.OVERWRITE_-
90        * COMBINER_DISABLE, but that would be more complicated.
91        */
92       bool oc_disable =
93          blend->dcc_msaa_corruption_4bit & cb_target_mask && sctx->framebuffer.nr_samples >= 2;
94 
95       if (sctx->gfx_level >= GFX11) {
96          cb_dcc_control = S_028424_SAMPLE_MASK_TRACKER_DISABLE(oc_disable) |
97                           S_028424_SAMPLE_MASK_TRACKER_WATERMARK(0);
98       } else {
99          cb_dcc_control =
100             S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(sctx->gfx_level <= GFX9) |
101             S_028424_OVERWRITE_COMBINER_WATERMARK(sctx->gfx_level >= GFX10 ? 6 : 4) |
102             S_028424_OVERWRITE_COMBINER_DISABLE(oc_disable) |
103             S_028424_DISABLE_CONSTANT_ENCODE_REG(sctx->gfx_level < GFX11 &&
104                                                  sctx->screen->info.has_dcc_constant_encode);
105       }
106    }
107 
108    uint32_t sx_ps_downconvert = 0;
109    uint32_t sx_blend_opt_epsilon = 0;
110    uint32_t sx_blend_opt_control = 0;
111 
112    /* RB+ register settings. */
113    if (sctx->screen->info.rbplus_allowed) {
114       unsigned spi_shader_col_format =
115          sctx->shader.ps.cso ? sctx->shader.ps.current->key.ps.part.epilog.spi_shader_col_format
116                              : 0;
117       unsigned num_cbufs = util_last_bit(sctx->framebuffer.colorbuf_enabled_4bit &
118                                          blend->cb_target_enabled_4bit) / 4;
119 
120       for (i = 0; i < num_cbufs; i++) {
121          struct si_surface *surf = (struct si_surface *)sctx->framebuffer.state.cbufs[i];
122          unsigned format, swap, spi_format, colormask;
123          bool has_alpha, has_rgb;
124 
125          if (!surf) {
126             /* If the color buffer is not set, the driver sets 32_R
127              * as the SPI color format, because the hw doesn't allow
128              * holes between color outputs, so also set this to
129              * enable RB+.
130              */
131             sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_R << (i * 4);
132             continue;
133          }
134 
135          format = sctx->gfx_level >= GFX11 ? G_028C70_FORMAT_GFX11(surf->cb_color_info):
136                                              G_028C70_FORMAT_GFX6(surf->cb_color_info);
137          swap = G_028C70_COMP_SWAP(surf->cb_color_info);
138          spi_format = (spi_shader_col_format >> (i * 4)) & 0xf;
139          colormask = (cb_target_mask >> (i * 4)) & 0xf;
140 
141          /* Set if RGB and A are present. */
142          has_alpha = !(sctx->gfx_level >= GFX11 ? G_028C74_FORCE_DST_ALPHA_1_GFX11(surf->cb_color_attrib):
143                                                   G_028C74_FORCE_DST_ALPHA_1_GFX6(surf->cb_color_attrib));
144 
145          if (format == V_028C70_COLOR_8 || format == V_028C70_COLOR_16 ||
146              format == V_028C70_COLOR_32)
147             has_rgb = !has_alpha;
148          else
149             has_rgb = true;
150 
151          /* Check the colormask and export format. */
152          if (!(colormask & (PIPE_MASK_RGBA & ~PIPE_MASK_A)))
153             has_rgb = false;
154          if (!(colormask & PIPE_MASK_A))
155             has_alpha = false;
156 
157          if (spi_format == V_028714_SPI_SHADER_ZERO) {
158             has_rgb = false;
159             has_alpha = false;
160          }
161 
162          /* Disable value checking for disabled channels. */
163          if (!has_rgb)
164             sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (i * 4);
165          if (!has_alpha)
166             sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (i * 4);
167 
168          /* Enable down-conversion for 32bpp and smaller formats. */
169          switch (format) {
170          case V_028C70_COLOR_8:
171          case V_028C70_COLOR_8_8:
172          case V_028C70_COLOR_8_8_8_8:
173             /* For 1 and 2-channel formats, use the superset thereof. */
174             if (spi_format == V_028714_SPI_SHADER_FP16_ABGR ||
175                 spi_format == V_028714_SPI_SHADER_UINT16_ABGR ||
176                 spi_format == V_028714_SPI_SHADER_SINT16_ABGR) {
177                sx_ps_downconvert |= V_028754_SX_RT_EXPORT_8_8_8_8 << (i * 4);
178                if (G_028C70_NUMBER_TYPE(surf->cb_color_info) != V_028C70_NUMBER_SRGB)
179                   sx_blend_opt_epsilon |= V_028758_8BIT_FORMAT_0_5 << (i * 4);
180             }
181             break;
182 
183          case V_028C70_COLOR_5_6_5:
184             if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
185                sx_ps_downconvert |= V_028754_SX_RT_EXPORT_5_6_5 << (i * 4);
186                sx_blend_opt_epsilon |= V_028758_6BIT_FORMAT_0_5 << (i * 4);
187             }
188             break;
189 
190          case V_028C70_COLOR_1_5_5_5:
191             if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
192                sx_ps_downconvert |= V_028754_SX_RT_EXPORT_1_5_5_5 << (i * 4);
193                sx_blend_opt_epsilon |= V_028758_5BIT_FORMAT_0_5 << (i * 4);
194             }
195             break;
196 
197          case V_028C70_COLOR_4_4_4_4:
198             if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
199                sx_ps_downconvert |= V_028754_SX_RT_EXPORT_4_4_4_4 << (i * 4);
200                sx_blend_opt_epsilon |= V_028758_4BIT_FORMAT_0_5 << (i * 4);
201             }
202             break;
203 
204          case V_028C70_COLOR_32:
205             if (swap == V_028C70_SWAP_STD && spi_format == V_028714_SPI_SHADER_32_R)
206                sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_R << (i * 4);
207             else if (swap == V_028C70_SWAP_ALT_REV && spi_format == V_028714_SPI_SHADER_32_AR)
208                sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_A << (i * 4);
209             break;
210 
211          case V_028C70_COLOR_16:
212          case V_028C70_COLOR_16_16:
213             /* For 1-channel formats, use the superset thereof. */
214             if (spi_format == V_028714_SPI_SHADER_UNORM16_ABGR ||
215                 spi_format == V_028714_SPI_SHADER_SNORM16_ABGR ||
216                 spi_format == V_028714_SPI_SHADER_UINT16_ABGR ||
217                 spi_format == V_028714_SPI_SHADER_SINT16_ABGR) {
218                if (swap == V_028C70_SWAP_STD || swap == V_028C70_SWAP_STD_REV)
219                   sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_GR << (i * 4);
220                else
221                   sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_AR << (i * 4);
222             }
223             break;
224 
225          case V_028C70_COLOR_10_11_11:
226             if (spi_format == V_028714_SPI_SHADER_FP16_ABGR)
227                sx_ps_downconvert |= V_028754_SX_RT_EXPORT_10_11_11 << (i * 4);
228             break;
229 
230          case V_028C70_COLOR_2_10_10_10:
231          case V_028C70_COLOR_10_10_10_2:
232             if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
233                sx_ps_downconvert |= V_028754_SX_RT_EXPORT_2_10_10_10 << (i * 4);
234                sx_blend_opt_epsilon |= V_028758_10BIT_FORMAT_0_5 << (i * 4);
235             }
236             break;
237 
238          case V_028C70_COLOR_5_9_9_9:
239             if (spi_format == V_028714_SPI_SHADER_FP16_ABGR)
240                sx_ps_downconvert |= V_028754_SX_RT_EXPORT_9_9_9_E5 << (i * 4);
241             break;
242          }
243       }
244 
245       /* If there are no color outputs, the first color export is
246        * always enabled as 32_R, so also set this to enable RB+.
247        */
248       if (!sx_ps_downconvert)
249          sx_ps_downconvert = V_028754_SX_RT_EXPORT_32_R;
250    }
251 
252    if (sctx->screen->info.has_set_context_pairs_packed) {
253       radeon_begin(cs);
254       gfx11_begin_packed_context_regs();
255       gfx11_opt_set_context_reg(R_028238_CB_TARGET_MASK, SI_TRACKED_CB_TARGET_MASK,
256                                 cb_target_mask);
257       gfx11_opt_set_context_reg(R_028424_CB_DCC_CONTROL, SI_TRACKED_CB_DCC_CONTROL,
258                                 cb_dcc_control);
259       gfx11_opt_set_context_reg(R_028754_SX_PS_DOWNCONVERT, SI_TRACKED_SX_PS_DOWNCONVERT,
260                                 sx_ps_downconvert);
261       gfx11_opt_set_context_reg(R_028758_SX_BLEND_OPT_EPSILON, SI_TRACKED_SX_BLEND_OPT_EPSILON,
262                                 sx_blend_opt_epsilon);
263       gfx11_opt_set_context_reg(R_02875C_SX_BLEND_OPT_CONTROL, SI_TRACKED_SX_BLEND_OPT_CONTROL,
264                                 sx_blend_opt_control);
265       gfx11_end_packed_context_regs();
266       radeon_end(); /* don't track context rolls on GFX11 */
267    } else {
268       radeon_begin(cs);
269       radeon_opt_set_context_reg(sctx, R_028238_CB_TARGET_MASK, SI_TRACKED_CB_TARGET_MASK,
270                                  cb_target_mask);
271       if (sctx->gfx_level >= GFX8) {
272          radeon_opt_set_context_reg(sctx, R_028424_CB_DCC_CONTROL, SI_TRACKED_CB_DCC_CONTROL,
273                                     cb_dcc_control);
274       }
275       if (sctx->screen->info.rbplus_allowed) {
276          radeon_opt_set_context_reg3(sctx, R_028754_SX_PS_DOWNCONVERT, SI_TRACKED_SX_PS_DOWNCONVERT,
277                                      sx_ps_downconvert, sx_blend_opt_epsilon, sx_blend_opt_control);
278       }
279       radeon_end_update_context_roll(sctx);
280    }
281 }
282 
283 /*
284  * Blender functions
285  */
286 
si_translate_blend_function(int blend_func)287 static uint32_t si_translate_blend_function(int blend_func)
288 {
289    switch (blend_func) {
290    case PIPE_BLEND_ADD:
291       return V_028780_COMB_DST_PLUS_SRC;
292    case PIPE_BLEND_SUBTRACT:
293       return V_028780_COMB_SRC_MINUS_DST;
294    case PIPE_BLEND_REVERSE_SUBTRACT:
295       return V_028780_COMB_DST_MINUS_SRC;
296    case PIPE_BLEND_MIN:
297       return V_028780_COMB_MIN_DST_SRC;
298    case PIPE_BLEND_MAX:
299       return V_028780_COMB_MAX_DST_SRC;
300    default:
301       PRINT_ERR("Unknown blend function %d\n", blend_func);
302       assert(0);
303       break;
304    }
305    return 0;
306 }
307 
si_translate_blend_factor(enum amd_gfx_level gfx_level,int blend_fact)308 static uint32_t si_translate_blend_factor(enum amd_gfx_level gfx_level, int blend_fact)
309 {
310    switch (blend_fact) {
311    case PIPE_BLENDFACTOR_ONE:
312       return V_028780_BLEND_ONE;
313    case PIPE_BLENDFACTOR_SRC_COLOR:
314       return V_028780_BLEND_SRC_COLOR;
315    case PIPE_BLENDFACTOR_SRC_ALPHA:
316       return V_028780_BLEND_SRC_ALPHA;
317    case PIPE_BLENDFACTOR_DST_ALPHA:
318       return V_028780_BLEND_DST_ALPHA;
319    case PIPE_BLENDFACTOR_DST_COLOR:
320       return V_028780_BLEND_DST_COLOR;
321    case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
322       return V_028780_BLEND_SRC_ALPHA_SATURATE;
323    case PIPE_BLENDFACTOR_CONST_COLOR:
324       return gfx_level >= GFX11 ? V_028780_BLEND_CONSTANT_COLOR_GFX11:
325                                    V_028780_BLEND_CONSTANT_COLOR_GFX6;
326    case PIPE_BLENDFACTOR_CONST_ALPHA:
327       return gfx_level >= GFX11 ? V_028780_BLEND_CONSTANT_ALPHA_GFX11 :
328                                    V_028780_BLEND_CONSTANT_ALPHA_GFX6;
329    case PIPE_BLENDFACTOR_ZERO:
330       return V_028780_BLEND_ZERO;
331    case PIPE_BLENDFACTOR_INV_SRC_COLOR:
332       return V_028780_BLEND_ONE_MINUS_SRC_COLOR;
333    case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
334       return V_028780_BLEND_ONE_MINUS_SRC_ALPHA;
335    case PIPE_BLENDFACTOR_INV_DST_ALPHA:
336       return V_028780_BLEND_ONE_MINUS_DST_ALPHA;
337    case PIPE_BLENDFACTOR_INV_DST_COLOR:
338       return V_028780_BLEND_ONE_MINUS_DST_COLOR;
339    case PIPE_BLENDFACTOR_INV_CONST_COLOR:
340       return gfx_level >= GFX11 ? V_028780_BLEND_ONE_MINUS_CONSTANT_COLOR_GFX11:
341                                    V_028780_BLEND_ONE_MINUS_CONSTANT_COLOR_GFX6;
342    case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
343       return gfx_level >= GFX11 ? V_028780_BLEND_ONE_MINUS_CONSTANT_ALPHA_GFX11:
344                                    V_028780_BLEND_ONE_MINUS_CONSTANT_ALPHA_GFX6;
345    case PIPE_BLENDFACTOR_SRC1_COLOR:
346       return gfx_level >= GFX11 ? V_028780_BLEND_SRC1_COLOR_GFX11:
347                                    V_028780_BLEND_SRC1_COLOR_GFX6;
348    case PIPE_BLENDFACTOR_SRC1_ALPHA:
349       return gfx_level >= GFX11 ? V_028780_BLEND_SRC1_ALPHA_GFX11:
350                                    V_028780_BLEND_SRC1_ALPHA_GFX6;
351    case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
352       return gfx_level >= GFX11 ? V_028780_BLEND_INV_SRC1_COLOR_GFX11:
353                                    V_028780_BLEND_INV_SRC1_COLOR_GFX6;
354    case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
355       return gfx_level >= GFX11 ? V_028780_BLEND_INV_SRC1_ALPHA_GFX11:
356                                    V_028780_BLEND_INV_SRC1_ALPHA_GFX6;
357    default:
358       PRINT_ERR("Bad blend factor %d not supported!\n", blend_fact);
359       assert(0);
360       break;
361    }
362    return 0;
363 }
364 
si_translate_blend_opt_function(int blend_func)365 static uint32_t si_translate_blend_opt_function(int blend_func)
366 {
367    switch (blend_func) {
368    case PIPE_BLEND_ADD:
369       return V_028760_OPT_COMB_ADD;
370    case PIPE_BLEND_SUBTRACT:
371       return V_028760_OPT_COMB_SUBTRACT;
372    case PIPE_BLEND_REVERSE_SUBTRACT:
373       return V_028760_OPT_COMB_REVSUBTRACT;
374    case PIPE_BLEND_MIN:
375       return V_028760_OPT_COMB_MIN;
376    case PIPE_BLEND_MAX:
377       return V_028760_OPT_COMB_MAX;
378    default:
379       return V_028760_OPT_COMB_BLEND_DISABLED;
380    }
381 }
382 
si_translate_blend_opt_factor(int blend_fact,bool is_alpha)383 static uint32_t si_translate_blend_opt_factor(int blend_fact, bool is_alpha)
384 {
385    switch (blend_fact) {
386    case PIPE_BLENDFACTOR_ZERO:
387       return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_ALL;
388    case PIPE_BLENDFACTOR_ONE:
389       return V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE;
390    case PIPE_BLENDFACTOR_SRC_COLOR:
391       return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0
392                       : V_028760_BLEND_OPT_PRESERVE_C1_IGNORE_C0;
393    case PIPE_BLENDFACTOR_INV_SRC_COLOR:
394       return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1
395                       : V_028760_BLEND_OPT_PRESERVE_C0_IGNORE_C1;
396    case PIPE_BLENDFACTOR_SRC_ALPHA:
397       return V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0;
398    case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
399       return V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1;
400    case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
401       return is_alpha ? V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE
402                       : V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0;
403    default:
404       return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
405    }
406 }
407 
si_blend_check_commutativity(struct si_screen * sscreen,struct si_state_blend * blend,enum pipe_blend_func func,enum pipe_blendfactor src,enum pipe_blendfactor dst,unsigned chanmask)408 static void si_blend_check_commutativity(struct si_screen *sscreen, struct si_state_blend *blend,
409                                          enum pipe_blend_func func, enum pipe_blendfactor src,
410                                          enum pipe_blendfactor dst, unsigned chanmask)
411 {
412    /* Src factor is allowed when it does not depend on Dst */
413    static const uint32_t src_allowed =
414       (1u << PIPE_BLENDFACTOR_ONE) | (1u << PIPE_BLENDFACTOR_SRC_COLOR) |
415       (1u << PIPE_BLENDFACTOR_SRC_ALPHA) | (1u << PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE) |
416       (1u << PIPE_BLENDFACTOR_CONST_COLOR) | (1u << PIPE_BLENDFACTOR_CONST_ALPHA) |
417       (1u << PIPE_BLENDFACTOR_SRC1_COLOR) | (1u << PIPE_BLENDFACTOR_SRC1_ALPHA) |
418       (1u << PIPE_BLENDFACTOR_ZERO) | (1u << PIPE_BLENDFACTOR_INV_SRC_COLOR) |
419       (1u << PIPE_BLENDFACTOR_INV_SRC_ALPHA) | (1u << PIPE_BLENDFACTOR_INV_CONST_COLOR) |
420       (1u << PIPE_BLENDFACTOR_INV_CONST_ALPHA) | (1u << PIPE_BLENDFACTOR_INV_SRC1_COLOR) |
421       (1u << PIPE_BLENDFACTOR_INV_SRC1_ALPHA);
422 
423    if (dst == PIPE_BLENDFACTOR_ONE && (src_allowed & (1u << src)) &&
424        (func == PIPE_BLEND_MAX || func == PIPE_BLEND_MIN))
425       blend->commutative_4bit |= chanmask;
426 }
427 
428 /**
429  * Get rid of DST in the blend factors by commuting the operands:
430  *    func(src * DST, dst * 0) ---> func(src * 0, dst * SRC)
431  */
si_blend_remove_dst(unsigned * func,unsigned * src_factor,unsigned * dst_factor,unsigned expected_dst,unsigned replacement_src)432 static void si_blend_remove_dst(unsigned *func, unsigned *src_factor, unsigned *dst_factor,
433                                 unsigned expected_dst, unsigned replacement_src)
434 {
435    if (*src_factor == expected_dst && *dst_factor == PIPE_BLENDFACTOR_ZERO) {
436       *src_factor = PIPE_BLENDFACTOR_ZERO;
437       *dst_factor = replacement_src;
438 
439       /* Commuting the operands requires reversing subtractions. */
440       if (*func == PIPE_BLEND_SUBTRACT)
441          *func = PIPE_BLEND_REVERSE_SUBTRACT;
442       else if (*func == PIPE_BLEND_REVERSE_SUBTRACT)
443          *func = PIPE_BLEND_SUBTRACT;
444    }
445 }
446 
si_create_blend_state_mode(struct pipe_context * ctx,const struct pipe_blend_state * state,unsigned mode)447 static void *si_create_blend_state_mode(struct pipe_context *ctx,
448                                         const struct pipe_blend_state *state, unsigned mode)
449 {
450    struct si_context *sctx = (struct si_context *)ctx;
451    struct si_state_blend *blend = CALLOC_STRUCT(si_state_blend);
452    struct si_pm4_state *pm4 = &blend->pm4;
453    uint32_t sx_mrt_blend_opt[8] = {0};
454    uint32_t color_control = 0;
455    bool logicop_enable = state->logicop_enable && state->logicop_func != PIPE_LOGICOP_COPY;
456 
457    if (!blend)
458       return NULL;
459 
460    si_pm4_clear_state(pm4, sctx->screen, false);
461 
462    blend->alpha_to_coverage = state->alpha_to_coverage;
463    blend->alpha_to_one = state->alpha_to_one;
464    blend->dual_src_blend = util_blend_state_is_dual(state, 0);
465    blend->logicop_enable = logicop_enable;
466    blend->allows_noop_optimization =
467       state->rt[0].rgb_func == PIPE_BLEND_ADD &&
468       state->rt[0].alpha_func == PIPE_BLEND_ADD &&
469       state->rt[0].rgb_src_factor == PIPE_BLENDFACTOR_DST_COLOR &&
470       state->rt[0].alpha_src_factor == PIPE_BLENDFACTOR_DST_COLOR &&
471       state->rt[0].rgb_dst_factor == PIPE_BLENDFACTOR_ZERO &&
472       state->rt[0].alpha_dst_factor == PIPE_BLENDFACTOR_ZERO &&
473       mode == V_028808_CB_NORMAL;
474 
475    unsigned num_shader_outputs = state->max_rt + 1; /* estimate */
476    if (blend->dual_src_blend)
477       num_shader_outputs = MAX2(num_shader_outputs, 2);
478 
479    if (logicop_enable) {
480       color_control |= S_028808_ROP3(state->logicop_func | (state->logicop_func << 4));
481    } else {
482       color_control |= S_028808_ROP3(0xcc);
483    }
484 
485    unsigned db_alpha_to_mask;
486    if (state->alpha_to_coverage && state->alpha_to_coverage_dither) {
487       db_alpha_to_mask = S_028B70_ALPHA_TO_MASK_ENABLE(state->alpha_to_coverage) |
488                          S_028B70_ALPHA_TO_MASK_OFFSET0(3) | S_028B70_ALPHA_TO_MASK_OFFSET1(1) |
489                          S_028B70_ALPHA_TO_MASK_OFFSET2(0) | S_028B70_ALPHA_TO_MASK_OFFSET3(2) |
490                          S_028B70_OFFSET_ROUND(1);
491    } else {
492       db_alpha_to_mask = S_028B70_ALPHA_TO_MASK_ENABLE(state->alpha_to_coverage) |
493                          S_028B70_ALPHA_TO_MASK_OFFSET0(2) | S_028B70_ALPHA_TO_MASK_OFFSET1(2) |
494                          S_028B70_ALPHA_TO_MASK_OFFSET2(2) | S_028B70_ALPHA_TO_MASK_OFFSET3(2) |
495                          S_028B70_OFFSET_ROUND(0);
496    }
497 
498    si_pm4_set_reg(pm4, R_028B70_DB_ALPHA_TO_MASK, db_alpha_to_mask);
499 
500    blend->cb_target_mask = 0;
501    blend->cb_target_enabled_4bit = 0;
502 
503    unsigned last_blend_cntl;
504 
505    for (int i = 0; i < num_shader_outputs; i++) {
506       /* state->rt entries > 0 only written if independent blending */
507       const int j = state->independent_blend_enable ? i : 0;
508 
509       unsigned eqRGB = state->rt[j].rgb_func;
510       unsigned srcRGB = state->rt[j].rgb_src_factor;
511       unsigned dstRGB = state->rt[j].rgb_dst_factor;
512       unsigned eqA = state->rt[j].alpha_func;
513       unsigned srcA = state->rt[j].alpha_src_factor;
514       unsigned dstA = state->rt[j].alpha_dst_factor;
515 
516       unsigned srcRGB_opt, dstRGB_opt, srcA_opt, dstA_opt;
517       unsigned blend_cntl = 0;
518 
519       sx_mrt_blend_opt[i] = S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) |
520                             S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED);
521 
522       /* Only set dual source blending for MRT0 to avoid a hang. */
523       if (i >= 1 && blend->dual_src_blend) {
524          if (i == 1) {
525             if (sctx->gfx_level >= GFX11)
526                blend_cntl = last_blend_cntl;
527             else
528                blend_cntl = S_028780_ENABLE(1);
529          }
530 
531          si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
532          continue;
533       }
534 
535       /* Only addition and subtraction equations are supported with
536        * dual source blending.
537        */
538       if (blend->dual_src_blend && (eqRGB == PIPE_BLEND_MIN || eqRGB == PIPE_BLEND_MAX ||
539                                     eqA == PIPE_BLEND_MIN || eqA == PIPE_BLEND_MAX)) {
540          assert(!"Unsupported equation for dual source blending");
541          si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
542          continue;
543       }
544 
545       /* cb_render_state will disable unused ones */
546       blend->cb_target_mask |= (unsigned)state->rt[j].colormask << (4 * i);
547       if (state->rt[j].colormask)
548          blend->cb_target_enabled_4bit |= 0xf << (4 * i);
549 
550       if (!state->rt[j].colormask || !state->rt[j].blend_enable) {
551          si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
552          continue;
553       }
554 
555       si_blend_check_commutativity(sctx->screen, blend, eqRGB, srcRGB, dstRGB, 0x7 << (4 * i));
556       si_blend_check_commutativity(sctx->screen, blend, eqA, srcA, dstA, 0x8 << (4 * i));
557 
558       /* Blending optimizations for RB+.
559        * These transformations don't change the behavior.
560        *
561        * First, get rid of DST in the blend factors:
562        *    func(src * DST, dst * 0) ---> func(src * 0, dst * SRC)
563        */
564       si_blend_remove_dst(&eqRGB, &srcRGB, &dstRGB, PIPE_BLENDFACTOR_DST_COLOR,
565                           PIPE_BLENDFACTOR_SRC_COLOR);
566       si_blend_remove_dst(&eqA, &srcA, &dstA, PIPE_BLENDFACTOR_DST_COLOR,
567                           PIPE_BLENDFACTOR_SRC_COLOR);
568       si_blend_remove_dst(&eqA, &srcA, &dstA, PIPE_BLENDFACTOR_DST_ALPHA,
569                           PIPE_BLENDFACTOR_SRC_ALPHA);
570 
571       /* Look up the ideal settings from tables. */
572       srcRGB_opt = si_translate_blend_opt_factor(srcRGB, false);
573       dstRGB_opt = si_translate_blend_opt_factor(dstRGB, false);
574       srcA_opt = si_translate_blend_opt_factor(srcA, true);
575       dstA_opt = si_translate_blend_opt_factor(dstA, true);
576 
577       /* Handle interdependencies. */
578       if (util_blend_factor_uses_dest(srcRGB, false))
579          dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
580       if (util_blend_factor_uses_dest(srcA, false))
581          dstA_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
582 
583       if (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE &&
584           (dstRGB == PIPE_BLENDFACTOR_ZERO || dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
585            dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE))
586          dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0;
587 
588       /* Set the final value. */
589       sx_mrt_blend_opt[i] = S_028760_COLOR_SRC_OPT(srcRGB_opt) |
590                             S_028760_COLOR_DST_OPT(dstRGB_opt) |
591                             S_028760_COLOR_COMB_FCN(si_translate_blend_opt_function(eqRGB)) |
592                             S_028760_ALPHA_SRC_OPT(srcA_opt) | S_028760_ALPHA_DST_OPT(dstA_opt) |
593                             S_028760_ALPHA_COMB_FCN(si_translate_blend_opt_function(eqA));
594 
595       /* Alpha-to-coverage with blending enabled, depth writes enabled, and having no MRTZ export
596        * should disable SX blend optimizations.
597        *
598        * TODO: Add a piglit test for this. It should fail on gfx11 without this.
599        */
600       if (sctx->gfx_level >= GFX11 && state->alpha_to_coverage && i == 0) {
601          sx_mrt_blend_opt[0] = S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_NONE) |
602                                S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_NONE);
603       }
604 
605       /* Set blend state. */
606       blend_cntl |= S_028780_ENABLE(1);
607       blend_cntl |= S_028780_COLOR_COMB_FCN(si_translate_blend_function(eqRGB));
608       blend_cntl |= S_028780_COLOR_SRCBLEND(si_translate_blend_factor(sctx->gfx_level, srcRGB));
609       blend_cntl |= S_028780_COLOR_DESTBLEND(si_translate_blend_factor(sctx->gfx_level, dstRGB));
610 
611       if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) {
612          blend_cntl |= S_028780_SEPARATE_ALPHA_BLEND(1);
613          blend_cntl |= S_028780_ALPHA_COMB_FCN(si_translate_blend_function(eqA));
614          blend_cntl |= S_028780_ALPHA_SRCBLEND(si_translate_blend_factor(sctx->gfx_level, srcA));
615          blend_cntl |= S_028780_ALPHA_DESTBLEND(si_translate_blend_factor(sctx->gfx_level, dstA));
616       }
617       si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
618       last_blend_cntl = blend_cntl;
619 
620       blend->blend_enable_4bit |= 0xfu << (i * 4);
621 
622       if (sctx->gfx_level >= GFX8 && sctx->gfx_level <= GFX10)
623          blend->dcc_msaa_corruption_4bit |= 0xfu << (i * 4);
624 
625       /* This is only important for formats without alpha. */
626       if (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA || dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
627           srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
628           dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
629           srcRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA || dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA)
630          blend->need_src_alpha_4bit |= 0xfu << (i * 4);
631    }
632 
633    if (sctx->gfx_level >= GFX8 && sctx->gfx_level <= GFX10 && logicop_enable)
634       blend->dcc_msaa_corruption_4bit |= blend->cb_target_enabled_4bit;
635 
636    if (blend->cb_target_mask) {
637       color_control |= S_028808_MODE(mode);
638    } else {
639       color_control |= S_028808_MODE(V_028808_CB_DISABLE);
640    }
641 
642    if (sctx->screen->info.rbplus_allowed) {
643       /* Disable RB+ blend optimizations for dual source blending.
644        * Vulkan does this.
645        */
646       if (blend->dual_src_blend) {
647          for (int i = 0; i < num_shader_outputs; i++) {
648             sx_mrt_blend_opt[i] = S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_NONE) |
649                                   S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_NONE);
650          }
651       }
652 
653       for (int i = 0; i < num_shader_outputs; i++)
654          si_pm4_set_reg(pm4, R_028760_SX_MRT0_BLEND_OPT + i * 4, sx_mrt_blend_opt[i]);
655 
656       /* RB+ doesn't work with dual source blending, logic op, and RESOLVE. */
657       if (blend->dual_src_blend || logicop_enable || mode == V_028808_CB_RESOLVE)
658          color_control |= S_028808_DISABLE_DUAL_QUAD(1);
659    }
660 
661    si_pm4_set_reg(pm4, R_028808_CB_COLOR_CONTROL, color_control);
662    si_pm4_finalize(pm4);
663    return blend;
664 }
665 
si_create_blend_state(struct pipe_context * ctx,const struct pipe_blend_state * state)666 static void *si_create_blend_state(struct pipe_context *ctx, const struct pipe_blend_state *state)
667 {
668    return si_create_blend_state_mode(ctx, state, V_028808_CB_NORMAL);
669 }
670 
si_check_blend_dst_sampler_noop(struct si_context * sctx)671 static bool si_check_blend_dst_sampler_noop(struct si_context *sctx)
672 {
673    if (sctx->framebuffer.state.nr_cbufs == 1) {
674       struct si_shader_selector *sel = sctx->shader.ps.cso;
675 
676       if (unlikely(sel->info.writes_1_if_tex_is_1 == 0xff)) {
677          /* Wait for the shader to be ready. */
678          util_queue_fence_wait(&sel->ready);
679          assert(sel->nir_binary);
680 
681          struct nir_shader *nir = si_deserialize_shader(sel);
682 
683          /* Determine if this fragment shader always writes vec4(1) if a specific texture
684           * is all 1s.
685           */
686          float in[4] = { 1.0, 1.0, 1.0, 1.0 };
687          float out[4];
688          int texunit;
689          if (si_nir_is_output_const_if_tex_is_const(nir, in, out, &texunit) &&
690              !memcmp(in, out, 4 * sizeof(float))) {
691             sel->info.writes_1_if_tex_is_1 = 1 + texunit;
692          } else {
693             sel->info.writes_1_if_tex_is_1 = 0;
694          }
695 
696          ralloc_free(nir);
697       }
698 
699       if (sel->info.writes_1_if_tex_is_1 &&
700           sel->info.writes_1_if_tex_is_1 != 0xff) {
701          /* Now check if the texture is cleared to 1 */
702          int unit = sctx->shader.ps.cso->info.writes_1_if_tex_is_1 - 1;
703          struct si_samplers *samp = &sctx->samplers[PIPE_SHADER_FRAGMENT];
704          if ((1u << unit) & samp->enabled_mask) {
705             struct si_texture* tex = (struct si_texture*) samp->views[unit]->texture;
706             if (tex->is_depth &&
707                 tex->depth_cleared_level_mask & BITFIELD_BIT(samp->views[unit]->u.tex.first_level) &&
708                 tex->depth_clear_value[0] == 1) {
709                return false;
710             }
711             /* TODO: handle color textures */
712          }
713       }
714    }
715 
716    return true;
717 }
718 
si_draw_blend_dst_sampler_noop(struct pipe_context * ctx,const struct pipe_draw_info * info,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)719 static void si_draw_blend_dst_sampler_noop(struct pipe_context *ctx,
720                                            const struct pipe_draw_info *info,
721                                            unsigned drawid_offset,
722                                            const struct pipe_draw_indirect_info *indirect,
723                                            const struct pipe_draw_start_count_bias *draws,
724                                            unsigned num_draws) {
725    struct si_context *sctx = (struct si_context *)ctx;
726 
727    if (!si_check_blend_dst_sampler_noop(sctx))
728       return;
729 
730    sctx->real_draw_vbo(ctx, info, drawid_offset, indirect, draws, num_draws);
731 }
732 
si_draw_vstate_blend_dst_sampler_noop(struct pipe_context * ctx,struct pipe_vertex_state * state,uint32_t partial_velem_mask,struct pipe_draw_vertex_state_info info,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)733 static void si_draw_vstate_blend_dst_sampler_noop(struct pipe_context *ctx,
734                                                   struct pipe_vertex_state *state,
735                                                   uint32_t partial_velem_mask,
736                                                   struct pipe_draw_vertex_state_info info,
737                                                   const struct pipe_draw_start_count_bias *draws,
738                                                   unsigned num_draws) {
739    struct si_context *sctx = (struct si_context *)ctx;
740 
741    if (!si_check_blend_dst_sampler_noop(sctx))
742       return;
743 
744    sctx->real_draw_vertex_state(ctx, state, partial_velem_mask, info, draws, num_draws);
745 }
746 
si_bind_blend_state(struct pipe_context * ctx,void * state)747 static void si_bind_blend_state(struct pipe_context *ctx, void *state)
748 {
749    struct si_context *sctx = (struct si_context *)ctx;
750    struct si_state_blend *old_blend = sctx->queued.named.blend;
751    struct si_state_blend *blend = (struct si_state_blend *)state;
752 
753    if (!blend)
754       blend = (struct si_state_blend *)sctx->noop_blend;
755 
756    si_pm4_bind_state(sctx, blend, blend);
757 
758    if (old_blend->cb_target_mask != blend->cb_target_mask ||
759        old_blend->dual_src_blend != blend->dual_src_blend ||
760        (old_blend->dcc_msaa_corruption_4bit != blend->dcc_msaa_corruption_4bit &&
761         sctx->framebuffer.has_dcc_msaa))
762       si_mark_atom_dirty(sctx, &sctx->atoms.s.cb_render_state);
763 
764    if ((sctx->screen->info.has_export_conflict_bug &&
765         old_blend->blend_enable_4bit != blend->blend_enable_4bit) ||
766        (sctx->occlusion_query_mode == SI_OCCLUSION_QUERY_MODE_PRECISE_BOOLEAN &&
767         !!old_blend->cb_target_mask != !!blend->cb_target_enabled_4bit))
768       si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
769 
770    if (old_blend->cb_target_enabled_4bit != blend->cb_target_enabled_4bit ||
771        old_blend->alpha_to_coverage != blend->alpha_to_coverage ||
772        old_blend->alpha_to_one != blend->alpha_to_one ||
773        old_blend->dual_src_blend != blend->dual_src_blend ||
774        old_blend->blend_enable_4bit != blend->blend_enable_4bit ||
775        old_blend->need_src_alpha_4bit != blend->need_src_alpha_4bit)
776       si_ps_key_update_framebuffer_blend_rasterizer(sctx);
777 
778    if (old_blend->cb_target_enabled_4bit != blend->cb_target_enabled_4bit ||
779        old_blend->alpha_to_coverage != blend->alpha_to_coverage)
780       si_update_ps_inputs_read_or_disabled(sctx);
781 
782    if (sctx->screen->dpbb_allowed &&
783        (old_blend->alpha_to_coverage != blend->alpha_to_coverage ||
784         old_blend->blend_enable_4bit != blend->blend_enable_4bit ||
785         old_blend->cb_target_enabled_4bit != blend->cb_target_enabled_4bit))
786       si_mark_atom_dirty(sctx, &sctx->atoms.s.dpbb_state);
787 
788    if (sctx->screen->info.has_out_of_order_rast &&
789        ((old_blend->blend_enable_4bit != blend->blend_enable_4bit ||
790          old_blend->cb_target_enabled_4bit != blend->cb_target_enabled_4bit ||
791          old_blend->commutative_4bit != blend->commutative_4bit ||
792          old_blend->logicop_enable != blend->logicop_enable)))
793       si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config);
794 
795    /* RB+ depth-only rendering. See the comment where we set rbplus_depth_only_opt for more
796     * information.
797     */
798    if (sctx->screen->info.rbplus_allowed &&
799        !!old_blend->cb_target_mask != !!blend->cb_target_mask) {
800       sctx->framebuffer.dirty_cbufs |= BITFIELD_BIT(0);
801       si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer);
802    }
803 
804    if (likely(!radeon_uses_secure_bos(sctx->ws))) {
805       if (unlikely(blend->allows_noop_optimization)) {
806          si_install_draw_wrapper(sctx, si_draw_blend_dst_sampler_noop,
807                                  si_draw_vstate_blend_dst_sampler_noop);
808       } else {
809          si_install_draw_wrapper(sctx, NULL, NULL);
810       }
811    }
812 }
813 
si_delete_blend_state(struct pipe_context * ctx,void * state)814 static void si_delete_blend_state(struct pipe_context *ctx, void *state)
815 {
816    struct si_context *sctx = (struct si_context *)ctx;
817 
818    if (sctx->queued.named.blend == state)
819       si_bind_blend_state(ctx, sctx->noop_blend);
820 
821    si_pm4_free_state(sctx, (struct si_pm4_state*)state, SI_STATE_IDX(blend));
822 }
823 
si_set_blend_color(struct pipe_context * ctx,const struct pipe_blend_color * state)824 static void si_set_blend_color(struct pipe_context *ctx, const struct pipe_blend_color *state)
825 {
826    struct si_context *sctx = (struct si_context *)ctx;
827    static const struct pipe_blend_color zeros;
828 
829    sctx->blend_color = *state;
830    sctx->blend_color_any_nonzeros = memcmp(state, &zeros, sizeof(*state)) != 0;
831    si_mark_atom_dirty(sctx, &sctx->atoms.s.blend_color);
832 }
833 
si_emit_blend_color(struct si_context * sctx,unsigned index)834 static void si_emit_blend_color(struct si_context *sctx, unsigned index)
835 {
836    struct radeon_cmdbuf *cs = &sctx->gfx_cs;
837 
838    radeon_begin(cs);
839    radeon_set_context_reg_seq(R_028414_CB_BLEND_RED, 4);
840    radeon_emit_array((uint32_t *)sctx->blend_color.color, 4);
841    radeon_end();
842 }
843 
844 /*
845  * Clipping
846  */
847 
si_set_clip_state(struct pipe_context * ctx,const struct pipe_clip_state * state)848 static void si_set_clip_state(struct pipe_context *ctx, const struct pipe_clip_state *state)
849 {
850    struct si_context *sctx = (struct si_context *)ctx;
851    struct pipe_constant_buffer cb;
852    static const struct pipe_clip_state zeros;
853 
854    if (memcmp(&sctx->clip_state, state, sizeof(*state)) == 0)
855       return;
856 
857    sctx->clip_state = *state;
858    sctx->clip_state_any_nonzeros = memcmp(state, &zeros, sizeof(*state)) != 0;
859    si_mark_atom_dirty(sctx, &sctx->atoms.s.clip_state);
860 
861    cb.buffer = NULL;
862    cb.user_buffer = state->ucp;
863    cb.buffer_offset = 0;
864    cb.buffer_size = 4 * 4 * 8;
865    si_set_internal_const_buffer(sctx, SI_VS_CONST_CLIP_PLANES, &cb);
866 }
867 
si_emit_clip_state(struct si_context * sctx,unsigned index)868 static void si_emit_clip_state(struct si_context *sctx, unsigned index)
869 {
870    struct radeon_cmdbuf *cs = &sctx->gfx_cs;
871 
872    radeon_begin(cs);
873    radeon_set_context_reg_seq(R_0285BC_PA_CL_UCP_0_X, 6 * 4);
874    radeon_emit_array((uint32_t *)sctx->clip_state.ucp, 6 * 4);
875    radeon_end();
876 }
877 
si_emit_clip_regs(struct si_context * sctx,unsigned index)878 static void si_emit_clip_regs(struct si_context *sctx, unsigned index)
879 {
880    struct si_shader *vs = si_get_vs(sctx)->current;
881    struct si_shader_selector *vs_sel = vs->selector;
882    struct si_shader_info *info = &vs_sel->info;
883    struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
884    bool window_space = vs_sel->stage == MESA_SHADER_VERTEX ?
885                           info->base.vs.window_space_position : 0;
886    unsigned clipdist_mask = vs_sel->info.clipdist_mask;
887    unsigned ucp_mask = clipdist_mask ? 0 : rs->clip_plane_enable & SI_USER_CLIP_PLANE_MASK;
888    unsigned culldist_mask = vs_sel->info.culldist_mask;
889 
890    /* Clip distances on points have no effect, so need to be implemented
891     * as cull distances. This applies for the clipvertex case as well.
892     *
893     * Setting this for primitives other than points should have no adverse
894     * effects.
895     */
896    clipdist_mask &= rs->clip_plane_enable;
897    culldist_mask |= clipdist_mask;
898 
899    unsigned pa_cl_cntl = S_02881C_BYPASS_VTX_RATE_COMBINER(sctx->gfx_level >= GFX10_3 &&
900                                                            !sctx->screen->options.vrs2x2) |
901                          S_02881C_BYPASS_PRIM_RATE_COMBINER(sctx->gfx_level >= GFX10_3) |
902                          clipdist_mask | (culldist_mask << 8);
903 
904    unsigned pa_cl_clip_cntl = rs->pa_cl_clip_cntl | ucp_mask |
905                               S_028810_CLIP_DISABLE(window_space);
906    unsigned pa_cl_vs_out_cntl = pa_cl_cntl | vs->pa_cl_vs_out_cntl;
907 
908    if (sctx->screen->info.has_set_context_pairs_packed) {
909       radeon_begin(&sctx->gfx_cs);
910       gfx11_begin_packed_context_regs();
911       gfx11_opt_set_context_reg(R_028810_PA_CL_CLIP_CNTL, SI_TRACKED_PA_CL_CLIP_CNTL,
912                                 pa_cl_clip_cntl);
913       gfx11_opt_set_context_reg(R_02881C_PA_CL_VS_OUT_CNTL, SI_TRACKED_PA_CL_VS_OUT_CNTL,
914                                 pa_cl_vs_out_cntl);
915       gfx11_end_packed_context_regs();
916       radeon_end(); /* don't track context rolls on GFX11 */
917    } else {
918       radeon_begin(&sctx->gfx_cs);
919       radeon_opt_set_context_reg(sctx, R_028810_PA_CL_CLIP_CNTL, SI_TRACKED_PA_CL_CLIP_CNTL,
920                                  pa_cl_clip_cntl);
921       radeon_opt_set_context_reg(sctx, R_02881C_PA_CL_VS_OUT_CNTL, SI_TRACKED_PA_CL_VS_OUT_CNTL,
922                                  pa_cl_vs_out_cntl);
923       radeon_end_update_context_roll(sctx);
924    }
925 }
926 
927 /*
928  * Rasterizer
929  */
930 
si_translate_fill(uint32_t func)931 static uint32_t si_translate_fill(uint32_t func)
932 {
933    switch (func) {
934    case PIPE_POLYGON_MODE_FILL:
935       return V_028814_X_DRAW_TRIANGLES;
936    case PIPE_POLYGON_MODE_LINE:
937       return V_028814_X_DRAW_LINES;
938    case PIPE_POLYGON_MODE_POINT:
939       return V_028814_X_DRAW_POINTS;
940    default:
941       assert(0);
942       return V_028814_X_DRAW_POINTS;
943    }
944 }
945 
si_create_rs_state(struct pipe_context * ctx,const struct pipe_rasterizer_state * state)946 static void *si_create_rs_state(struct pipe_context *ctx, const struct pipe_rasterizer_state *state)
947 {
948    struct si_screen *sscreen = ((struct si_context *)ctx)->screen;
949    struct si_state_rasterizer *rs = CALLOC_STRUCT(si_state_rasterizer);
950 
951    if (!rs) {
952       return NULL;
953    }
954 
955    rs->scissor_enable = state->scissor;
956    rs->clip_halfz = state->clip_halfz;
957    rs->two_side = state->light_twoside;
958    rs->multisample_enable = state->multisample;
959    rs->force_persample_interp = state->force_persample_interp;
960    rs->clip_plane_enable = state->clip_plane_enable;
961    rs->half_pixel_center = state->half_pixel_center;
962    rs->line_stipple_enable = state->line_stipple_enable;
963    rs->poly_stipple_enable = state->poly_stipple_enable;
964    rs->line_smooth = state->line_smooth;
965    rs->line_width = state->line_width;
966    rs->poly_smooth = state->poly_smooth;
967    rs->point_smooth = state->point_smooth;
968    rs->uses_poly_offset = state->offset_point || state->offset_line || state->offset_tri;
969    rs->clamp_fragment_color = state->clamp_fragment_color;
970    rs->clamp_vertex_color = state->clamp_vertex_color;
971    rs->flatshade = state->flatshade;
972    rs->flatshade_first = state->flatshade_first;
973    rs->sprite_coord_enable = state->sprite_coord_enable;
974    rs->rasterizer_discard = state->rasterizer_discard;
975    rs->bottom_edge_rule = state->bottom_edge_rule;
976    rs->polygon_mode_is_lines =
977       (state->fill_front == PIPE_POLYGON_MODE_LINE && !(state->cull_face & PIPE_FACE_FRONT)) ||
978       (state->fill_back == PIPE_POLYGON_MODE_LINE && !(state->cull_face & PIPE_FACE_BACK));
979    rs->polygon_mode_is_points =
980       (state->fill_front == PIPE_POLYGON_MODE_POINT && !(state->cull_face & PIPE_FACE_FRONT)) ||
981       (state->fill_back == PIPE_POLYGON_MODE_POINT && !(state->cull_face & PIPE_FACE_BACK));
982    rs->pa_sc_line_stipple = state->line_stipple_enable ?
983                                S_028A0C_LINE_PATTERN(state->line_stipple_pattern) |
984                                S_028A0C_REPEAT_COUNT(state->line_stipple_factor) : 0;
985    /* TODO: implement line stippling with perpendicular end caps. */
986    /* Line width > 2 is an internal recommendation. */
987    rs->perpendicular_end_caps = state->multisample &&
988                                 state->line_width > 2 && !state->line_stipple_enable;
989 
990    rs->pa_cl_clip_cntl = S_028810_DX_CLIP_SPACE_DEF(state->clip_halfz) |
991                          S_028810_ZCLIP_NEAR_DISABLE(!state->depth_clip_near) |
992                          S_028810_ZCLIP_FAR_DISABLE(!state->depth_clip_far) |
993                          S_028810_DX_RASTERIZATION_KILL(state->rasterizer_discard) |
994                          S_028810_DX_LINEAR_ATTR_CLIP_ENA(1);
995 
996    rs->ngg_cull_flags_tris = SI_NGG_CULL_TRIANGLES |
997                              SI_NGG_CULL_CLIP_PLANE_ENABLE(state->clip_plane_enable);
998    rs->ngg_cull_flags_tris_y_inverted = rs->ngg_cull_flags_tris;
999 
1000    rs->ngg_cull_flags_lines = SI_NGG_CULL_LINES |
1001                               (!rs->perpendicular_end_caps ? SI_NGG_CULL_SMALL_LINES_DIAMOND_EXIT : 0) |
1002                               SI_NGG_CULL_CLIP_PLANE_ENABLE(state->clip_plane_enable);
1003 
1004    if (rs->rasterizer_discard) {
1005       rs->ngg_cull_flags_tris |= SI_NGG_CULL_FRONT_FACE |
1006                                  SI_NGG_CULL_BACK_FACE;
1007       rs->ngg_cull_flags_tris_y_inverted = rs->ngg_cull_flags_tris;
1008    } else {
1009       bool cull_front, cull_back;
1010 
1011       if (!state->front_ccw) {
1012          cull_front = !!(state->cull_face & PIPE_FACE_FRONT);
1013          cull_back = !!(state->cull_face & PIPE_FACE_BACK);
1014       } else {
1015          cull_back = !!(state->cull_face & PIPE_FACE_FRONT);
1016          cull_front = !!(state->cull_face & PIPE_FACE_BACK);
1017       }
1018 
1019       if (cull_front) {
1020          rs->ngg_cull_flags_tris |= SI_NGG_CULL_FRONT_FACE;
1021          rs->ngg_cull_flags_tris_y_inverted |= SI_NGG_CULL_BACK_FACE;
1022       }
1023 
1024       if (cull_back) {
1025          rs->ngg_cull_flags_tris |= SI_NGG_CULL_BACK_FACE;
1026          rs->ngg_cull_flags_tris_y_inverted |= SI_NGG_CULL_FRONT_FACE;
1027       }
1028    }
1029 
1030    /* Force gl_FrontFacing to true or false if the other face is culled. */
1031    if (util_bitcount(state->cull_face) == 1) {
1032       if (state->cull_face & PIPE_FACE_FRONT)
1033          rs->force_front_face_input = -1;
1034       else
1035          rs->force_front_face_input = 1;
1036    }
1037 
1038    rs->spi_interp_control_0 = S_0286D4_FLAT_SHADE_ENA(1) |
1039                               S_0286D4_PNT_SPRITE_ENA(state->point_quad_rasterization) |
1040                               S_0286D4_PNT_SPRITE_OVRD_X(V_0286D4_SPI_PNT_SPRITE_SEL_S) |
1041                               S_0286D4_PNT_SPRITE_OVRD_Y(V_0286D4_SPI_PNT_SPRITE_SEL_T) |
1042                               S_0286D4_PNT_SPRITE_OVRD_Z(V_0286D4_SPI_PNT_SPRITE_SEL_0) |
1043                               S_0286D4_PNT_SPRITE_OVRD_W(V_0286D4_SPI_PNT_SPRITE_SEL_1) |
1044                               S_0286D4_PNT_SPRITE_TOP_1(state->sprite_coord_mode !=
1045                                                         PIPE_SPRITE_COORD_UPPER_LEFT);
1046 
1047    /* point size 12.4 fixed point */
1048    float psize_min, psize_max;
1049    unsigned tmp = (unsigned)(state->point_size * 8.0);
1050    rs->pa_su_point_size = S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp);
1051 
1052    if (state->point_size_per_vertex) {
1053       psize_min = util_get_min_point_size(state);
1054       psize_max = SI_MAX_POINT_SIZE;
1055    } else {
1056       /* Force the point size to be as if the vertex output was disabled. */
1057       psize_min = state->point_size;
1058       psize_max = state->point_size;
1059    }
1060    rs->max_point_size = psize_max;
1061 
1062    /* Divide by two, because 0.5 = 1 pixel. */
1063    rs->pa_su_point_minmax = S_028A04_MIN_SIZE(si_pack_float_12p4(psize_min / 2)) |
1064                             S_028A04_MAX_SIZE(si_pack_float_12p4(psize_max / 2));
1065    rs->pa_su_line_cntl = S_028A08_WIDTH(si_pack_float_12p4(state->line_width / 2));
1066 
1067    rs->pa_sc_mode_cntl_0 = S_028A48_LINE_STIPPLE_ENABLE(state->line_stipple_enable) |
1068                            S_028A48_MSAA_ENABLE(state->multisample || state->poly_smooth ||
1069                                                 state->line_smooth) |
1070                            S_028A48_VPORT_SCISSOR_ENABLE(1) |
1071                            S_028A48_ALTERNATE_RBS_PER_TILE(sscreen->info.gfx_level >= GFX9);
1072 
1073    bool polygon_mode_enabled =
1074       (state->fill_front != PIPE_POLYGON_MODE_FILL && !(state->cull_face & PIPE_FACE_FRONT)) ||
1075       (state->fill_back != PIPE_POLYGON_MODE_FILL && !(state->cull_face & PIPE_FACE_BACK));
1076 
1077    rs->pa_su_sc_mode_cntl = S_028814_PROVOKING_VTX_LAST(!state->flatshade_first) |
1078                             S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) |
1079                             S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) |
1080                             S_028814_FACE(!state->front_ccw) |
1081                             S_028814_POLY_OFFSET_FRONT_ENABLE(util_get_offset(state, state->fill_front)) |
1082                             S_028814_POLY_OFFSET_BACK_ENABLE(util_get_offset(state, state->fill_back)) |
1083                             S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_point || state->offset_line) |
1084                             S_028814_POLY_MODE(polygon_mode_enabled) |
1085                             S_028814_POLYMODE_FRONT_PTYPE(si_translate_fill(state->fill_front)) |
1086                             S_028814_POLYMODE_BACK_PTYPE(si_translate_fill(state->fill_back)) |
1087                             /* this must be set if POLY_MODE or PERPENDICULAR_ENDCAP_ENA is set */
1088                             S_028814_KEEP_TOGETHER_ENABLE(sscreen->info.gfx_level >= GFX10 ?
1089                                                              polygon_mode_enabled ||
1090                                                              rs->perpendicular_end_caps : 0);
1091    if (sscreen->info.gfx_level >= GFX10) {
1092       rs->pa_cl_ngg_cntl = S_028838_INDEX_BUF_EDGE_FLAG_ENA(rs->polygon_mode_is_points ||
1093                                                             rs->polygon_mode_is_lines) |
1094                            S_028838_VERTEX_REUSE_DEPTH(sscreen->info.gfx_level >= GFX10_3 ? 30 : 0);
1095    }
1096 
1097    if (state->bottom_edge_rule) {
1098       /* OpenGL windows should set this. */
1099       rs->pa_sc_edgerule = S_028230_ER_TRI(0xA) |
1100                            S_028230_ER_POINT(0x5) |
1101                            S_028230_ER_RECT(0x9) |
1102                            S_028230_ER_LINE_LR(0x2A) |
1103                            S_028230_ER_LINE_RL(0x2A) |
1104                            S_028230_ER_LINE_TB(0xA) |
1105                            S_028230_ER_LINE_BT(0xA);
1106    } else {
1107       /* OpenGL FBOs and Direct3D should set this. */
1108       rs->pa_sc_edgerule = S_028230_ER_TRI(0xA) |
1109                            S_028230_ER_POINT(0x6) |
1110                            S_028230_ER_RECT(0xA) |
1111                            S_028230_ER_LINE_LR(0x19) |
1112                            S_028230_ER_LINE_RL(0x25) |
1113                            S_028230_ER_LINE_TB(0xA) |
1114                            S_028230_ER_LINE_BT(0xA);
1115    }
1116 
1117    if (rs->uses_poly_offset) {
1118       /* Calculate polygon offset states for 16-bit, 24-bit, and 32-bit zbuffers. */
1119       rs->pa_su_poly_offset_clamp = fui(state->offset_clamp);
1120       rs->pa_su_poly_offset_frontback_scale = fui(state->offset_scale * 16);
1121 
1122       if (!state->offset_units_unscaled) {
1123          /* 16-bit zbuffer */
1124          rs->pa_su_poly_offset_db_fmt_cntl[0] = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
1125          rs->pa_su_poly_offset_frontback_offset[0] = fui(state->offset_units * 4);
1126 
1127          /* 24-bit zbuffer */
1128          rs->pa_su_poly_offset_db_fmt_cntl[1] = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
1129          rs->pa_su_poly_offset_frontback_offset[1] = fui(state->offset_units * 2);
1130 
1131          /* 32-bit zbuffer */
1132          rs->pa_su_poly_offset_db_fmt_cntl[2] = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) |
1133                                                 S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
1134          rs->pa_su_poly_offset_frontback_offset[2] = fui(state->offset_units);
1135       } else {
1136          rs->pa_su_poly_offset_frontback_offset[0] = fui(state->offset_units);
1137          rs->pa_su_poly_offset_frontback_offset[1] = fui(state->offset_units);
1138          rs->pa_su_poly_offset_frontback_offset[2] = fui(state->offset_units);
1139       }
1140    }
1141 
1142    return rs;
1143 }
1144 
si_pm4_emit_rasterizer(struct si_context * sctx,unsigned index)1145 static void si_pm4_emit_rasterizer(struct si_context *sctx, unsigned index)
1146 {
1147    struct si_state_rasterizer *state = sctx->queued.named.rasterizer;
1148 
1149    if (sctx->screen->info.has_set_context_pairs_packed) {
1150       radeon_begin(&sctx->gfx_cs);
1151       gfx11_begin_packed_context_regs();
1152       gfx11_opt_set_context_reg(R_0286D4_SPI_INTERP_CONTROL_0, SI_TRACKED_SPI_INTERP_CONTROL_0,
1153                                 state->spi_interp_control_0);
1154       gfx11_opt_set_context_reg(R_028A00_PA_SU_POINT_SIZE, SI_TRACKED_PA_SU_POINT_SIZE,
1155                                 state->pa_su_point_size);
1156       gfx11_opt_set_context_reg(R_028A04_PA_SU_POINT_MINMAX, SI_TRACKED_PA_SU_POINT_MINMAX,
1157                                 state->pa_su_point_minmax);
1158       gfx11_opt_set_context_reg(R_028A08_PA_SU_LINE_CNTL, SI_TRACKED_PA_SU_LINE_CNTL,
1159                                 state->pa_su_line_cntl);
1160       gfx11_opt_set_context_reg(R_028A48_PA_SC_MODE_CNTL_0, SI_TRACKED_PA_SC_MODE_CNTL_0,
1161                                 state->pa_sc_mode_cntl_0);
1162       gfx11_opt_set_context_reg(R_028814_PA_SU_SC_MODE_CNTL, SI_TRACKED_PA_SU_SC_MODE_CNTL,
1163                                 state->pa_su_sc_mode_cntl);
1164       gfx11_opt_set_context_reg(R_028838_PA_CL_NGG_CNTL, SI_TRACKED_PA_CL_NGG_CNTL,
1165                                 state->pa_cl_ngg_cntl);
1166       gfx11_opt_set_context_reg(R_028230_PA_SC_EDGERULE, SI_TRACKED_PA_SC_EDGERULE,
1167                                 state->pa_sc_edgerule);
1168 
1169       if (state->uses_poly_offset && sctx->framebuffer.state.zsbuf) {
1170          unsigned db_format_index =
1171             ((struct si_surface *)sctx->framebuffer.state.zsbuf)->db_format_index;
1172 
1173          gfx11_opt_set_context_reg(R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
1174                                    SI_TRACKED_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
1175                                    state->pa_su_poly_offset_db_fmt_cntl[db_format_index]);
1176          gfx11_opt_set_context_reg(R_028B7C_PA_SU_POLY_OFFSET_CLAMP,
1177                                    SI_TRACKED_PA_SU_POLY_OFFSET_CLAMP,
1178                                    state->pa_su_poly_offset_clamp);
1179          gfx11_opt_set_context_reg(R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE,
1180                                    SI_TRACKED_PA_SU_POLY_OFFSET_FRONT_SCALE,
1181                                    state->pa_su_poly_offset_frontback_scale);
1182          gfx11_opt_set_context_reg(R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET,
1183                                    SI_TRACKED_PA_SU_POLY_OFFSET_FRONT_OFFSET,
1184                                    state->pa_su_poly_offset_frontback_offset[db_format_index]);
1185          gfx11_opt_set_context_reg(R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE,
1186                                    SI_TRACKED_PA_SU_POLY_OFFSET_BACK_SCALE,
1187                                    state->pa_su_poly_offset_frontback_scale);
1188          gfx11_opt_set_context_reg(R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET,
1189                                    SI_TRACKED_PA_SU_POLY_OFFSET_BACK_OFFSET,
1190                                    state->pa_su_poly_offset_frontback_offset[db_format_index]);
1191       }
1192       gfx11_end_packed_context_regs();
1193       radeon_end(); /* don't track context rolls on GFX11 */
1194    } else {
1195       radeon_begin(&sctx->gfx_cs);
1196       radeon_opt_set_context_reg(sctx, R_0286D4_SPI_INTERP_CONTROL_0,
1197                                  SI_TRACKED_SPI_INTERP_CONTROL_0,
1198                                  state->spi_interp_control_0);
1199       radeon_opt_set_context_reg(sctx, R_028A00_PA_SU_POINT_SIZE, SI_TRACKED_PA_SU_POINT_SIZE,
1200                                  state->pa_su_point_size);
1201       radeon_opt_set_context_reg(sctx, R_028A04_PA_SU_POINT_MINMAX, SI_TRACKED_PA_SU_POINT_MINMAX,
1202                                  state->pa_su_point_minmax);
1203       radeon_opt_set_context_reg(sctx, R_028A08_PA_SU_LINE_CNTL, SI_TRACKED_PA_SU_LINE_CNTL,
1204                                  state->pa_su_line_cntl);
1205       radeon_opt_set_context_reg(sctx, R_028A48_PA_SC_MODE_CNTL_0, SI_TRACKED_PA_SC_MODE_CNTL_0,
1206                                  state->pa_sc_mode_cntl_0);
1207       radeon_opt_set_context_reg(sctx, R_028814_PA_SU_SC_MODE_CNTL,
1208                                  SI_TRACKED_PA_SU_SC_MODE_CNTL, state->pa_su_sc_mode_cntl);
1209       if (sctx->gfx_level >= GFX10) {
1210          radeon_opt_set_context_reg(sctx, R_028838_PA_CL_NGG_CNTL, SI_TRACKED_PA_CL_NGG_CNTL,
1211                                     state->pa_cl_ngg_cntl);
1212       }
1213       radeon_opt_set_context_reg(sctx, R_028230_PA_SC_EDGERULE, SI_TRACKED_PA_SC_EDGERULE,
1214                                  state->pa_sc_edgerule);
1215 
1216       if (state->uses_poly_offset && sctx->framebuffer.state.zsbuf) {
1217          unsigned db_format_index =
1218             ((struct si_surface *)sctx->framebuffer.state.zsbuf)->db_format_index;
1219 
1220          radeon_opt_set_context_reg6(R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
1221                                      SI_TRACKED_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
1222                                      state->pa_su_poly_offset_db_fmt_cntl[db_format_index],
1223                                      state->pa_su_poly_offset_clamp,
1224                                      state->pa_su_poly_offset_frontback_scale,
1225                                      state->pa_su_poly_offset_frontback_offset[db_format_index],
1226                                      state->pa_su_poly_offset_frontback_scale,
1227                                      state->pa_su_poly_offset_frontback_offset[db_format_index]);
1228       }
1229       radeon_end_update_context_roll();
1230    }
1231 
1232    sctx->emitted.named.rasterizer = state;
1233 }
1234 
si_bind_rs_state(struct pipe_context * ctx,void * state)1235 static void si_bind_rs_state(struct pipe_context *ctx, void *state)
1236 {
1237    struct si_context *sctx = (struct si_context *)ctx;
1238    struct si_state_rasterizer *old_rs = (struct si_state_rasterizer *)sctx->queued.named.rasterizer;
1239    struct si_state_rasterizer *rs = (struct si_state_rasterizer *)state;
1240 
1241    if (!rs)
1242       rs = (struct si_state_rasterizer *)sctx->discard_rasterizer_state;
1243 
1244    if (old_rs->multisample_enable != rs->multisample_enable) {
1245       si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config);
1246 
1247       /* Update the small primitive filter workaround if necessary. */
1248       if (sctx->screen->info.has_small_prim_filter_sample_loc_bug && sctx->framebuffer.nr_samples > 1)
1249          si_mark_atom_dirty(sctx, &sctx->atoms.s.sample_locations);
1250 
1251       /* NGG cull state uses multisample_enable. */
1252       if (sctx->screen->use_ngg_culling)
1253          si_mark_atom_dirty(sctx, &sctx->atoms.s.ngg_cull_state);
1254    }
1255 
1256    if (old_rs->perpendicular_end_caps != rs->perpendicular_end_caps)
1257       si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config);
1258 
1259    if (sctx->screen->use_ngg_culling &&
1260        (old_rs->half_pixel_center != rs->half_pixel_center ||
1261         old_rs->line_width != rs->line_width))
1262       si_mark_atom_dirty(sctx, &sctx->atoms.s.ngg_cull_state);
1263 
1264    SET_FIELD(sctx->current_vs_state, VS_STATE_CLAMP_VERTEX_COLOR, rs->clamp_vertex_color);
1265 
1266    si_pm4_bind_state(sctx, rasterizer, rs);
1267 
1268    if (old_rs->scissor_enable != rs->scissor_enable)
1269       si_mark_atom_dirty(sctx, &sctx->atoms.s.scissors);
1270 
1271    /* This never changes for OpenGL. */
1272    if (old_rs->half_pixel_center != rs->half_pixel_center)
1273       si_mark_atom_dirty(sctx, &sctx->atoms.s.guardband);
1274 
1275    if (util_prim_is_lines(sctx->current_rast_prim))
1276       si_set_clip_discard_distance(sctx, rs->line_width);
1277    else if (sctx->current_rast_prim == MESA_PRIM_POINTS)
1278       si_set_clip_discard_distance(sctx, rs->max_point_size);
1279 
1280    if (old_rs->clip_halfz != rs->clip_halfz)
1281       si_mark_atom_dirty(sctx, &sctx->atoms.s.viewports);
1282 
1283    if (old_rs->clip_plane_enable != rs->clip_plane_enable ||
1284        old_rs->pa_cl_clip_cntl != rs->pa_cl_clip_cntl)
1285       si_mark_atom_dirty(sctx, &sctx->atoms.s.clip_regs);
1286 
1287    if (old_rs->sprite_coord_enable != rs->sprite_coord_enable ||
1288        old_rs->flatshade != rs->flatshade)
1289       si_mark_atom_dirty(sctx, &sctx->atoms.s.spi_map);
1290 
1291    if (sctx->screen->dpbb_allowed && (old_rs->bottom_edge_rule != rs->bottom_edge_rule))
1292       si_mark_atom_dirty(sctx, &sctx->atoms.s.dpbb_state);
1293 
1294    if (old_rs->multisample_enable != rs->multisample_enable)
1295       si_ps_key_update_framebuffer_blend_rasterizer(sctx);
1296 
1297    if (old_rs->flatshade != rs->flatshade ||
1298        old_rs->clamp_fragment_color != rs->clamp_fragment_color)
1299       si_ps_key_update_rasterizer(sctx);
1300 
1301    if (old_rs->flatshade != rs->flatshade ||
1302        old_rs->force_persample_interp != rs->force_persample_interp ||
1303        old_rs->multisample_enable != rs->multisample_enable)
1304       si_ps_key_update_framebuffer_rasterizer_sample_shading(sctx);
1305 
1306    if (old_rs->rasterizer_discard != rs->rasterizer_discard ||
1307        old_rs->two_side != rs->two_side ||
1308        old_rs->poly_stipple_enable != rs->poly_stipple_enable ||
1309        old_rs->point_smooth != rs->point_smooth)
1310       si_update_ps_inputs_read_or_disabled(sctx);
1311 
1312    if (old_rs->point_smooth != rs->point_smooth ||
1313        old_rs->line_smooth != rs->line_smooth ||
1314        old_rs->poly_smooth != rs->poly_smooth ||
1315        old_rs->polygon_mode_is_points != rs->polygon_mode_is_points ||
1316        old_rs->poly_stipple_enable != rs->poly_stipple_enable ||
1317        old_rs->two_side != rs->two_side ||
1318        old_rs->force_front_face_input != rs->force_front_face_input)
1319       si_vs_ps_key_update_rast_prim_smooth_stipple(sctx);
1320 
1321    /* Used by si_get_vs_key_outputs in si_update_shaders: */
1322    if (old_rs->clip_plane_enable != rs->clip_plane_enable)
1323       sctx->do_update_shaders = true;
1324 
1325    if (old_rs->line_smooth != rs->line_smooth ||
1326        old_rs->poly_smooth != rs->poly_smooth ||
1327        old_rs->point_smooth != rs->point_smooth ||
1328        old_rs->poly_stipple_enable != rs->poly_stipple_enable ||
1329        old_rs->flatshade != rs->flatshade)
1330       si_update_vrs_flat_shading(sctx);
1331 
1332    if (old_rs->flatshade_first != rs->flatshade_first)
1333       si_update_ngg_sgpr_state_provoking_vtx(sctx, si_get_vs(sctx)->current, sctx->ngg);
1334 }
1335 
si_delete_rs_state(struct pipe_context * ctx,void * state)1336 static void si_delete_rs_state(struct pipe_context *ctx, void *state)
1337 {
1338    struct si_context *sctx = (struct si_context *)ctx;
1339    struct si_state_rasterizer *rs = (struct si_state_rasterizer *)state;
1340 
1341    if (sctx->queued.named.rasterizer == state)
1342       si_bind_rs_state(ctx, sctx->discard_rasterizer_state);
1343 
1344    si_pm4_free_state(sctx, &rs->pm4, SI_STATE_IDX(rasterizer));
1345 }
1346 
1347 /*
1348  * inferred state between dsa and stencil ref
1349  */
si_emit_stencil_ref(struct si_context * sctx,unsigned index)1350 static void si_emit_stencil_ref(struct si_context *sctx, unsigned index)
1351 {
1352    struct radeon_cmdbuf *cs = &sctx->gfx_cs;
1353    struct pipe_stencil_ref *ref = &sctx->stencil_ref.state;
1354    struct si_dsa_stencil_ref_part *dsa = &sctx->stencil_ref.dsa_part;
1355 
1356    radeon_begin(cs);
1357    radeon_set_context_reg_seq(R_028430_DB_STENCILREFMASK, 2);
1358    radeon_emit(S_028430_STENCILTESTVAL(ref->ref_value[0]) |
1359                S_028430_STENCILMASK(dsa->valuemask[0]) |
1360                S_028430_STENCILWRITEMASK(dsa->writemask[0]) |
1361                S_028430_STENCILOPVAL(1));
1362    radeon_emit(S_028434_STENCILTESTVAL_BF(ref->ref_value[1]) |
1363                S_028434_STENCILMASK_BF(dsa->valuemask[1]) |
1364                S_028434_STENCILWRITEMASK_BF(dsa->writemask[1]) |
1365                S_028434_STENCILOPVAL_BF(1));
1366    radeon_end();
1367 }
1368 
si_set_stencil_ref(struct pipe_context * ctx,const struct pipe_stencil_ref state)1369 static void si_set_stencil_ref(struct pipe_context *ctx, const struct pipe_stencil_ref state)
1370 {
1371    struct si_context *sctx = (struct si_context *)ctx;
1372 
1373    if (memcmp(&sctx->stencil_ref.state, &state, sizeof(state)) == 0)
1374       return;
1375 
1376    sctx->stencil_ref.state = state;
1377    si_mark_atom_dirty(sctx, &sctx->atoms.s.stencil_ref);
1378 }
1379 
1380 /*
1381  * DSA
1382  */
1383 
si_translate_stencil_op(int s_op)1384 static uint32_t si_translate_stencil_op(int s_op)
1385 {
1386    switch (s_op) {
1387    case PIPE_STENCIL_OP_KEEP:
1388       return V_02842C_STENCIL_KEEP;
1389    case PIPE_STENCIL_OP_ZERO:
1390       return V_02842C_STENCIL_ZERO;
1391    case PIPE_STENCIL_OP_REPLACE:
1392       return V_02842C_STENCIL_REPLACE_TEST;
1393    case PIPE_STENCIL_OP_INCR:
1394       return V_02842C_STENCIL_ADD_CLAMP;
1395    case PIPE_STENCIL_OP_DECR:
1396       return V_02842C_STENCIL_SUB_CLAMP;
1397    case PIPE_STENCIL_OP_INCR_WRAP:
1398       return V_02842C_STENCIL_ADD_WRAP;
1399    case PIPE_STENCIL_OP_DECR_WRAP:
1400       return V_02842C_STENCIL_SUB_WRAP;
1401    case PIPE_STENCIL_OP_INVERT:
1402       return V_02842C_STENCIL_INVERT;
1403    default:
1404       PRINT_ERR("Unknown stencil op %d", s_op);
1405       assert(0);
1406       break;
1407    }
1408    return 0;
1409 }
1410 
si_order_invariant_stencil_op(enum pipe_stencil_op op)1411 static bool si_order_invariant_stencil_op(enum pipe_stencil_op op)
1412 {
1413    /* REPLACE is normally order invariant, except when the stencil
1414     * reference value is written by the fragment shader. Tracking this
1415     * interaction does not seem worth the effort, so be conservative. */
1416    return op != PIPE_STENCIL_OP_INCR && op != PIPE_STENCIL_OP_DECR && op != PIPE_STENCIL_OP_REPLACE;
1417 }
1418 
1419 /* Compute whether, assuming Z writes are disabled, this stencil state is order
1420  * invariant in the sense that the set of passing fragments as well as the
1421  * final stencil buffer result does not depend on the order of fragments. */
si_order_invariant_stencil_state(const struct pipe_stencil_state * state)1422 static bool si_order_invariant_stencil_state(const struct pipe_stencil_state *state)
1423 {
1424    return !state->enabled || !state->writemask ||
1425           /* The following assumes that Z writes are disabled. */
1426           (state->func == PIPE_FUNC_ALWAYS && si_order_invariant_stencil_op(state->zpass_op) &&
1427            si_order_invariant_stencil_op(state->zfail_op)) ||
1428           (state->func == PIPE_FUNC_NEVER && si_order_invariant_stencil_op(state->fail_op));
1429 }
1430 
si_create_dsa_state(struct pipe_context * ctx,const struct pipe_depth_stencil_alpha_state * state)1431 static void *si_create_dsa_state(struct pipe_context *ctx,
1432                                  const struct pipe_depth_stencil_alpha_state *state)
1433 {
1434    struct si_state_dsa *dsa = CALLOC_STRUCT(si_state_dsa);
1435    if (!dsa) {
1436       return NULL;
1437    }
1438 
1439    dsa->stencil_ref.valuemask[0] = state->stencil[0].valuemask;
1440    dsa->stencil_ref.valuemask[1] = state->stencil[1].valuemask;
1441    dsa->stencil_ref.writemask[0] = state->stencil[0].writemask;
1442    dsa->stencil_ref.writemask[1] = state->stencil[1].writemask;
1443 
1444    dsa->db_depth_control =
1445       S_028800_Z_ENABLE(state->depth_enabled) | S_028800_Z_WRITE_ENABLE(state->depth_writemask) |
1446       S_028800_ZFUNC(state->depth_func) | S_028800_DEPTH_BOUNDS_ENABLE(state->depth_bounds_test);
1447 
1448    /* stencil */
1449    if (state->stencil[0].enabled) {
1450       dsa->db_depth_control |= S_028800_STENCIL_ENABLE(1);
1451       dsa->db_depth_control |= S_028800_STENCILFUNC(state->stencil[0].func);
1452       dsa->db_stencil_control |=
1453          S_02842C_STENCILFAIL(si_translate_stencil_op(state->stencil[0].fail_op));
1454       dsa->db_stencil_control |=
1455          S_02842C_STENCILZPASS(si_translate_stencil_op(state->stencil[0].zpass_op));
1456       dsa->db_stencil_control |=
1457          S_02842C_STENCILZFAIL(si_translate_stencil_op(state->stencil[0].zfail_op));
1458 
1459       if (state->stencil[1].enabled) {
1460          dsa->db_depth_control |= S_028800_BACKFACE_ENABLE(1);
1461          dsa->db_depth_control |= S_028800_STENCILFUNC_BF(state->stencil[1].func);
1462          dsa->db_stencil_control |=
1463             S_02842C_STENCILFAIL_BF(si_translate_stencil_op(state->stencil[1].fail_op));
1464          dsa->db_stencil_control |=
1465             S_02842C_STENCILZPASS_BF(si_translate_stencil_op(state->stencil[1].zpass_op));
1466          dsa->db_stencil_control |=
1467             S_02842C_STENCILZFAIL_BF(si_translate_stencil_op(state->stencil[1].zfail_op));
1468       }
1469    }
1470 
1471    dsa->db_depth_bounds_min = fui(state->depth_bounds_min);
1472    dsa->db_depth_bounds_max = fui(state->depth_bounds_max);
1473 
1474    /* alpha */
1475    if (state->alpha_enabled) {
1476       dsa->alpha_func = state->alpha_func;
1477       dsa->spi_shader_user_data_ps_alpha_ref = fui(state->alpha_ref_value);
1478    } else {
1479       dsa->alpha_func = PIPE_FUNC_ALWAYS;
1480    }
1481 
1482    dsa->depth_enabled = state->depth_enabled;
1483    dsa->depth_write_enabled = state->depth_enabled && state->depth_writemask;
1484    dsa->stencil_enabled = state->stencil[0].enabled;
1485    dsa->stencil_write_enabled =
1486       (util_writes_stencil(&state->stencil[0]) || util_writes_stencil(&state->stencil[1]));
1487    dsa->db_can_write = dsa->depth_write_enabled || dsa->stencil_write_enabled;
1488    dsa->depth_bounds_enabled = state->depth_bounds_test;
1489 
1490    bool zfunc_is_ordered =
1491       state->depth_func == PIPE_FUNC_NEVER || state->depth_func == PIPE_FUNC_LESS ||
1492       state->depth_func == PIPE_FUNC_LEQUAL || state->depth_func == PIPE_FUNC_GREATER ||
1493       state->depth_func == PIPE_FUNC_GEQUAL;
1494 
1495    bool nozwrite_and_order_invariant_stencil =
1496       !dsa->db_can_write ||
1497       (!dsa->depth_write_enabled && si_order_invariant_stencil_state(&state->stencil[0]) &&
1498        si_order_invariant_stencil_state(&state->stencil[1]));
1499 
1500    dsa->order_invariance[1].zs =
1501       nozwrite_and_order_invariant_stencil || (!dsa->stencil_write_enabled && zfunc_is_ordered);
1502    dsa->order_invariance[0].zs = !dsa->depth_write_enabled || zfunc_is_ordered;
1503 
1504    dsa->order_invariance[1].pass_set =
1505       nozwrite_and_order_invariant_stencil ||
1506       (!dsa->stencil_write_enabled &&
1507        (state->depth_func == PIPE_FUNC_ALWAYS || state->depth_func == PIPE_FUNC_NEVER));
1508    dsa->order_invariance[0].pass_set =
1509       !dsa->depth_write_enabled ||
1510       (state->depth_func == PIPE_FUNC_ALWAYS || state->depth_func == PIPE_FUNC_NEVER);
1511 
1512    return dsa;
1513 }
1514 
si_pm4_emit_dsa(struct si_context * sctx,unsigned index)1515 static void si_pm4_emit_dsa(struct si_context *sctx, unsigned index)
1516 {
1517    struct si_state_dsa *state = sctx->queued.named.dsa;
1518    assert(state && state != sctx->emitted.named.dsa);
1519 
1520    if (sctx->screen->info.has_set_context_pairs_packed) {
1521       radeon_begin(&sctx->gfx_cs);
1522       gfx11_begin_packed_context_regs();
1523       gfx11_opt_set_context_reg(R_028800_DB_DEPTH_CONTROL, SI_TRACKED_DB_DEPTH_CONTROL,
1524                                 state->db_depth_control);
1525       if (state->stencil_enabled) {
1526          gfx11_opt_set_context_reg(R_02842C_DB_STENCIL_CONTROL, SI_TRACKED_DB_STENCIL_CONTROL,
1527                                    state->db_stencil_control);
1528       }
1529       if (state->depth_bounds_enabled) {
1530          gfx11_opt_set_context_reg(R_028020_DB_DEPTH_BOUNDS_MIN, SI_TRACKED_DB_DEPTH_BOUNDS_MIN,
1531                                    state->db_depth_bounds_min);
1532          gfx11_opt_set_context_reg(R_028024_DB_DEPTH_BOUNDS_MAX, SI_TRACKED_DB_DEPTH_BOUNDS_MAX,
1533                                    state->db_depth_bounds_max);
1534       }
1535       gfx11_end_packed_context_regs();
1536 
1537       if (state->alpha_func != PIPE_FUNC_ALWAYS) {
1538          if (sctx->screen->info.has_set_sh_pairs_packed) {
1539             gfx11_opt_push_gfx_sh_reg(R_00B030_SPI_SHADER_USER_DATA_PS_0 + SI_SGPR_ALPHA_REF * 4,
1540                                       SI_TRACKED_SPI_SHADER_USER_DATA_PS__ALPHA_REF,
1541                                       state->spi_shader_user_data_ps_alpha_ref);
1542          } else {
1543             radeon_opt_set_sh_reg(sctx, R_00B030_SPI_SHADER_USER_DATA_PS_0 + SI_SGPR_ALPHA_REF * 4,
1544                                   SI_TRACKED_SPI_SHADER_USER_DATA_PS__ALPHA_REF,
1545                                   state->spi_shader_user_data_ps_alpha_ref);
1546          }
1547       }
1548       radeon_end(); /* don't track context rolls on GFX11 */
1549    } else {
1550       radeon_begin(&sctx->gfx_cs);
1551       radeon_opt_set_context_reg(sctx, R_028800_DB_DEPTH_CONTROL, SI_TRACKED_DB_DEPTH_CONTROL,
1552                                  state->db_depth_control);
1553       if (state->stencil_enabled) {
1554          radeon_opt_set_context_reg(sctx, R_02842C_DB_STENCIL_CONTROL, SI_TRACKED_DB_STENCIL_CONTROL,
1555                                     state->db_stencil_control);
1556       }
1557       if (state->depth_bounds_enabled) {
1558          radeon_opt_set_context_reg2(sctx, R_028020_DB_DEPTH_BOUNDS_MIN,
1559                                      SI_TRACKED_DB_DEPTH_BOUNDS_MIN,
1560                                      state->db_depth_bounds_min,
1561                                      state->db_depth_bounds_max);
1562       }
1563       radeon_end_update_context_roll();
1564 
1565       if (state->alpha_func != PIPE_FUNC_ALWAYS) {
1566          radeon_begin(&sctx->gfx_cs);
1567          radeon_opt_set_sh_reg(sctx, R_00B030_SPI_SHADER_USER_DATA_PS_0 + SI_SGPR_ALPHA_REF * 4,
1568                                SI_TRACKED_SPI_SHADER_USER_DATA_PS__ALPHA_REF,
1569                                state->spi_shader_user_data_ps_alpha_ref);
1570          radeon_end();
1571       }
1572    }
1573 
1574    sctx->emitted.named.dsa = state;
1575 }
1576 
si_bind_dsa_state(struct pipe_context * ctx,void * state)1577 static void si_bind_dsa_state(struct pipe_context *ctx, void *state)
1578 {
1579    struct si_context *sctx = (struct si_context *)ctx;
1580    struct si_state_dsa *old_dsa = sctx->queued.named.dsa;
1581    struct si_state_dsa *dsa = state;
1582 
1583    if (!dsa)
1584       dsa = (struct si_state_dsa *)sctx->noop_dsa;
1585 
1586    si_pm4_bind_state(sctx, dsa, dsa);
1587 
1588    if (memcmp(&dsa->stencil_ref, &sctx->stencil_ref.dsa_part,
1589               sizeof(struct si_dsa_stencil_ref_part)) != 0) {
1590       sctx->stencil_ref.dsa_part = dsa->stencil_ref;
1591       si_mark_atom_dirty(sctx, &sctx->atoms.s.stencil_ref);
1592    }
1593 
1594    if (old_dsa->alpha_func != dsa->alpha_func) {
1595       si_ps_key_update_dsa(sctx);
1596       si_update_ps_inputs_read_or_disabled(sctx);
1597       sctx->do_update_shaders = true;
1598    }
1599 
1600    if (sctx->occlusion_query_mode == SI_OCCLUSION_QUERY_MODE_PRECISE_BOOLEAN &&
1601        (old_dsa->depth_enabled != dsa->depth_enabled ||
1602         old_dsa->depth_write_enabled != dsa->depth_write_enabled))
1603       si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
1604 
1605    if (sctx->screen->dpbb_allowed && ((old_dsa->depth_enabled != dsa->depth_enabled ||
1606                                        old_dsa->stencil_enabled != dsa->stencil_enabled ||
1607                                        old_dsa->db_can_write != dsa->db_can_write)))
1608       si_mark_atom_dirty(sctx, &sctx->atoms.s.dpbb_state);
1609 
1610    if (sctx->screen->info.has_out_of_order_rast &&
1611        (memcmp(old_dsa->order_invariance, dsa->order_invariance,
1612                sizeof(old_dsa->order_invariance))))
1613       si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config);
1614 }
1615 
si_delete_dsa_state(struct pipe_context * ctx,void * state)1616 static void si_delete_dsa_state(struct pipe_context *ctx, void *state)
1617 {
1618    struct si_context *sctx = (struct si_context *)ctx;
1619 
1620    if (sctx->queued.named.dsa == state)
1621       si_bind_dsa_state(ctx, sctx->noop_dsa);
1622 
1623    si_pm4_free_state(sctx, (struct si_pm4_state*)state, SI_STATE_IDX(dsa));
1624 }
1625 
si_create_db_flush_dsa(struct si_context * sctx)1626 static void *si_create_db_flush_dsa(struct si_context *sctx)
1627 {
1628    struct pipe_depth_stencil_alpha_state dsa = {};
1629 
1630    return sctx->b.create_depth_stencil_alpha_state(&sctx->b, &dsa);
1631 }
1632 
1633 /* DB RENDER STATE */
1634 
si_set_active_query_state(struct pipe_context * ctx,bool enable)1635 static void si_set_active_query_state(struct pipe_context *ctx, bool enable)
1636 {
1637    struct si_context *sctx = (struct si_context *)ctx;
1638 
1639    /* Pipeline stat & streamout queries. */
1640    if (enable) {
1641       /* Disable pipeline stats if there are no active queries. */
1642       if (sctx->num_hw_pipestat_streamout_queries) {
1643          sctx->flags &= ~SI_CONTEXT_STOP_PIPELINE_STATS;
1644          sctx->flags |= SI_CONTEXT_START_PIPELINE_STATS;
1645          si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
1646       }
1647    } else {
1648       if (sctx->num_hw_pipestat_streamout_queries) {
1649          sctx->flags &= ~SI_CONTEXT_START_PIPELINE_STATS;
1650          sctx->flags |= SI_CONTEXT_STOP_PIPELINE_STATS;
1651          si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
1652       }
1653    }
1654 
1655    /* Occlusion queries. */
1656    if (sctx->occlusion_queries_disabled != !enable) {
1657       sctx->occlusion_queries_disabled = !enable;
1658       si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
1659    }
1660 }
1661 
si_save_qbo_state(struct si_context * sctx,struct si_qbo_state * st)1662 void si_save_qbo_state(struct si_context *sctx, struct si_qbo_state *st)
1663 {
1664    si_get_pipe_constant_buffer(sctx, PIPE_SHADER_COMPUTE, 0, &st->saved_const0);
1665 }
1666 
si_restore_qbo_state(struct si_context * sctx,struct si_qbo_state * st)1667 void si_restore_qbo_state(struct si_context *sctx, struct si_qbo_state *st)
1668 {
1669    sctx->b.set_constant_buffer(&sctx->b, PIPE_SHADER_COMPUTE, 0, true, &st->saved_const0);
1670 }
1671 
si_emit_db_render_state(struct si_context * sctx,unsigned index)1672 static void si_emit_db_render_state(struct si_context *sctx, unsigned index)
1673 {
1674    unsigned db_shader_control = 0, db_render_control = 0, db_count_control = 0, vrs_override_cntl = 0;
1675 
1676    /* DB_RENDER_CONTROL */
1677    if (sctx->dbcb_depth_copy_enabled || sctx->dbcb_stencil_copy_enabled) {
1678       assert(sctx->gfx_level < GFX11);
1679       db_render_control |= S_028000_DEPTH_COPY(sctx->dbcb_depth_copy_enabled) |
1680                            S_028000_STENCIL_COPY(sctx->dbcb_stencil_copy_enabled) |
1681                            S_028000_COPY_CENTROID(1) | S_028000_COPY_SAMPLE(sctx->dbcb_copy_sample);
1682    } else if (sctx->db_flush_depth_inplace || sctx->db_flush_stencil_inplace) {
1683       db_render_control |= S_028000_DEPTH_COMPRESS_DISABLE(sctx->db_flush_depth_inplace) |
1684                            S_028000_STENCIL_COMPRESS_DISABLE(sctx->db_flush_stencil_inplace);
1685    } else {
1686       db_render_control |= S_028000_DEPTH_CLEAR_ENABLE(sctx->db_depth_clear) |
1687                            S_028000_STENCIL_CLEAR_ENABLE(sctx->db_stencil_clear);
1688    }
1689 
1690    if (sctx->gfx_level >= GFX11) {
1691       unsigned max_allowed_tiles_in_wave;
1692 
1693       if (sctx->screen->info.has_dedicated_vram) {
1694          if (sctx->framebuffer.nr_samples == 8)
1695             max_allowed_tiles_in_wave = 6;
1696          else if (sctx->framebuffer.nr_samples == 4)
1697             max_allowed_tiles_in_wave = 13;
1698          else
1699             max_allowed_tiles_in_wave = 0;
1700       } else {
1701          if (sctx->framebuffer.nr_samples == 8)
1702             max_allowed_tiles_in_wave = 7;
1703          else if (sctx->framebuffer.nr_samples == 4)
1704             max_allowed_tiles_in_wave = 15;
1705          else
1706             max_allowed_tiles_in_wave = 0;
1707       }
1708 
1709       db_render_control |= S_028000_MAX_ALLOWED_TILES_IN_WAVE(max_allowed_tiles_in_wave);
1710    }
1711 
1712    /* DB_COUNT_CONTROL (occlusion queries) */
1713    if (sctx->occlusion_query_mode == SI_OCCLUSION_QUERY_MODE_DISABLE ||
1714        sctx->occlusion_queries_disabled) {
1715       /* Occlusion queries disabled. */
1716       if (sctx->gfx_level >= GFX7)
1717          db_count_control |= S_028004_ZPASS_ENABLE(0);
1718       else
1719          db_count_control |= S_028004_ZPASS_INCREMENT_DISABLE(1);
1720    } else {
1721       /* Occlusion queries enabled. */
1722       db_count_control |= S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples);
1723 
1724       if (sctx->gfx_level >= GFX7) {
1725          db_count_control |= S_028004_ZPASS_ENABLE(1) |
1726                              S_028004_SLICE_EVEN_ENABLE(1) |
1727                              S_028004_SLICE_ODD_ENABLE(1);
1728       }
1729 
1730       if (sctx->occlusion_query_mode == SI_OCCLUSION_QUERY_MODE_PRECISE_INTEGER ||
1731           /* Boolean occlusion queries must set PERFECT_ZPASS_COUNTS for depth-only rendering
1732            * without depth writes or when depth testing is disabled. */
1733           (sctx->occlusion_query_mode == SI_OCCLUSION_QUERY_MODE_PRECISE_BOOLEAN &&
1734            (!sctx->queued.named.dsa->depth_enabled ||
1735             (!sctx->queued.named.blend->cb_target_mask &&
1736              !sctx->queued.named.dsa->depth_write_enabled))))
1737          db_count_control |= S_028004_PERFECT_ZPASS_COUNTS(1);
1738 
1739       if (sctx->gfx_level >= GFX10 &&
1740           sctx->occlusion_query_mode != SI_OCCLUSION_QUERY_MODE_CONSERVATIVE_BOOLEAN)
1741          db_count_control |= S_028004_DISABLE_CONSERVATIVE_ZPASS_COUNTS(1);
1742    }
1743 
1744    /* This should always be set on GFX11. */
1745    if (sctx->gfx_level >= GFX11)
1746       db_count_control |= S_028004_DISABLE_CONSERVATIVE_ZPASS_COUNTS(1);
1747 
1748    db_shader_control |= sctx->ps_db_shader_control;
1749 
1750    if (sctx->screen->info.has_export_conflict_bug &&
1751        sctx->queued.named.blend->blend_enable_4bit &&
1752        si_get_num_coverage_samples(sctx) == 1) {
1753       db_shader_control |= S_02880C_OVERRIDE_INTRINSIC_RATE_ENABLE(1) |
1754                            S_02880C_OVERRIDE_INTRINSIC_RATE(2);
1755    }
1756 
1757    if (sctx->gfx_level >= GFX10_3) {
1758       /* Variable rate shading. */
1759       unsigned mode, log_rate_x, log_rate_y;
1760 
1761       if (sctx->allow_flat_shading) {
1762          mode = V_028064_SC_VRS_COMB_MODE_OVERRIDE;
1763          log_rate_x = log_rate_y = 1; /* 2x2 VRS (log2(2) == 1) */
1764       } else {
1765          /* If the shader is using discard, turn off coarse shading because discarding at 2x2 pixel
1766           * granularity degrades quality too much.
1767           *
1768           * The shader writes the VRS rate and we either pass it through or do MIN(shader, 1x1)
1769           * to disable coarse shading.
1770           */
1771          mode = sctx->screen->options.vrs2x2 && G_02880C_KILL_ENABLE(db_shader_control) ?
1772                    V_028064_SC_VRS_COMB_MODE_MIN : V_028064_SC_VRS_COMB_MODE_PASSTHRU;
1773          log_rate_x = log_rate_y = 0; /* 1x1 VRS (log2(1) == 0) */
1774       }
1775 
1776       if (sctx->gfx_level >= GFX11) {
1777          vrs_override_cntl = S_0283D0_VRS_OVERRIDE_RATE_COMBINER_MODE(mode) |
1778                              S_0283D0_VRS_RATE(log_rate_x * 4 + log_rate_y);
1779       } else {
1780          vrs_override_cntl = S_028064_VRS_OVERRIDE_RATE_COMBINER_MODE(mode) |
1781                              S_028064_VRS_OVERRIDE_RATE_X(log_rate_x) |
1782                              S_028064_VRS_OVERRIDE_RATE_Y(log_rate_y);
1783       }
1784    }
1785 
1786    unsigned db_render_override2 =
1787          S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(sctx->db_depth_disable_expclear) |
1788          S_028010_DISABLE_SMEM_EXPCLEAR_OPTIMIZATION(sctx->db_stencil_disable_expclear) |
1789          S_028010_DECOMPRESS_Z_ON_FLUSH(sctx->framebuffer.nr_samples >= 4) |
1790          S_028010_CENTROID_COMPUTATION_MODE(sctx->gfx_level >= GFX10_3 ? 1 : 0);
1791 
1792    if (sctx->screen->info.has_set_context_pairs_packed) {
1793       radeon_begin(&sctx->gfx_cs);
1794       gfx11_begin_packed_context_regs();
1795       gfx11_opt_set_context_reg(R_028000_DB_RENDER_CONTROL, SI_TRACKED_DB_RENDER_CONTROL,
1796                                 db_render_control);
1797       gfx11_opt_set_context_reg(R_028004_DB_COUNT_CONTROL, SI_TRACKED_DB_COUNT_CONTROL,
1798                                 db_count_control);
1799       gfx11_opt_set_context_reg(R_028010_DB_RENDER_OVERRIDE2, SI_TRACKED_DB_RENDER_OVERRIDE2,
1800                                 db_render_override2);
1801       gfx11_opt_set_context_reg(R_02880C_DB_SHADER_CONTROL, SI_TRACKED_DB_SHADER_CONTROL,
1802                                 db_shader_control);
1803       gfx11_opt_set_context_reg(R_0283D0_PA_SC_VRS_OVERRIDE_CNTL,
1804                                 SI_TRACKED_DB_PA_SC_VRS_OVERRIDE_CNTL, vrs_override_cntl);
1805       gfx11_end_packed_context_regs();
1806       radeon_end(); /* don't track context rolls on GFX11 */
1807    } else {
1808       radeon_begin(&sctx->gfx_cs);
1809       radeon_opt_set_context_reg2(sctx, R_028000_DB_RENDER_CONTROL, SI_TRACKED_DB_RENDER_CONTROL,
1810                                   db_render_control, db_count_control);
1811       radeon_opt_set_context_reg(sctx, R_028010_DB_RENDER_OVERRIDE2,
1812                                  SI_TRACKED_DB_RENDER_OVERRIDE2, db_render_override2);
1813       radeon_opt_set_context_reg(sctx, R_02880C_DB_SHADER_CONTROL, SI_TRACKED_DB_SHADER_CONTROL,
1814                                  db_shader_control);
1815 
1816       if (sctx->gfx_level >= GFX11) {
1817          radeon_opt_set_context_reg(sctx, R_0283D0_PA_SC_VRS_OVERRIDE_CNTL,
1818                                     SI_TRACKED_DB_PA_SC_VRS_OVERRIDE_CNTL, vrs_override_cntl);
1819       } else if (sctx->gfx_level >= GFX10_3) {
1820          radeon_opt_set_context_reg(sctx, R_028064_DB_VRS_OVERRIDE_CNTL,
1821                                     SI_TRACKED_DB_PA_SC_VRS_OVERRIDE_CNTL, vrs_override_cntl);
1822       }
1823       radeon_end_update_context_roll(sctx);
1824    }
1825 }
1826 
1827 /*
1828  * format translation
1829  */
1830 
si_colorformat_endian_swap(uint32_t colorformat)1831 static uint32_t si_colorformat_endian_swap(uint32_t colorformat)
1832 {
1833    if (UTIL_ARCH_BIG_ENDIAN) {
1834       switch (colorformat) {
1835       /* 8-bit buffers. */
1836       case V_028C70_COLOR_8:
1837          return V_028C70_ENDIAN_NONE;
1838 
1839       /* 16-bit buffers. */
1840       case V_028C70_COLOR_5_6_5:
1841       case V_028C70_COLOR_1_5_5_5:
1842       case V_028C70_COLOR_4_4_4_4:
1843       case V_028C70_COLOR_16:
1844       case V_028C70_COLOR_8_8:
1845          return V_028C70_ENDIAN_8IN16;
1846 
1847       /* 32-bit buffers. */
1848       case V_028C70_COLOR_8_8_8_8:
1849       case V_028C70_COLOR_2_10_10_10:
1850       case V_028C70_COLOR_10_10_10_2:
1851       case V_028C70_COLOR_8_24:
1852       case V_028C70_COLOR_24_8:
1853       case V_028C70_COLOR_16_16:
1854          return V_028C70_ENDIAN_8IN32;
1855 
1856       /* 64-bit buffers. */
1857       case V_028C70_COLOR_16_16_16_16:
1858          return V_028C70_ENDIAN_8IN16;
1859 
1860       case V_028C70_COLOR_32_32:
1861          return V_028C70_ENDIAN_8IN32;
1862 
1863       /* 128-bit buffers. */
1864       case V_028C70_COLOR_32_32_32_32:
1865          return V_028C70_ENDIAN_8IN32;
1866       default:
1867          return V_028C70_ENDIAN_NONE; /* Unsupported. */
1868       }
1869    } else {
1870       return V_028C70_ENDIAN_NONE;
1871    }
1872 }
1873 
si_translate_dbformat(enum pipe_format format)1874 static uint32_t si_translate_dbformat(enum pipe_format format)
1875 {
1876    switch (format) {
1877    case PIPE_FORMAT_Z16_UNORM:
1878       return V_028040_Z_16;
1879    case PIPE_FORMAT_S8_UINT_Z24_UNORM:
1880    case PIPE_FORMAT_X8Z24_UNORM:
1881    case PIPE_FORMAT_Z24X8_UNORM:
1882    case PIPE_FORMAT_Z24_UNORM_S8_UINT:
1883       return V_028040_Z_24; /* deprecated on AMD GCN */
1884    case PIPE_FORMAT_Z32_FLOAT:
1885    case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
1886       return V_028040_Z_32_FLOAT;
1887    default:
1888       return V_028040_Z_INVALID;
1889    }
1890 }
1891 
1892 /*
1893  * Texture translation
1894  */
1895 
si_translate_texformat(struct pipe_screen * screen,enum pipe_format format,const struct util_format_description * desc,int first_non_void)1896 static uint32_t si_translate_texformat(struct pipe_screen *screen, enum pipe_format format,
1897                                        const struct util_format_description *desc,
1898                                        int first_non_void)
1899 {
1900    struct si_screen *sscreen = (struct si_screen *)screen;
1901    bool uniform = true;
1902    int i;
1903 
1904    assert(sscreen->info.gfx_level <= GFX9);
1905 
1906    /* Colorspace (return non-RGB formats directly). */
1907    switch (desc->colorspace) {
1908    /* Depth stencil formats */
1909    case UTIL_FORMAT_COLORSPACE_ZS:
1910       switch (format) {
1911       case PIPE_FORMAT_Z16_UNORM:
1912          return V_008F14_IMG_DATA_FORMAT_16;
1913       case PIPE_FORMAT_X24S8_UINT:
1914       case PIPE_FORMAT_S8X24_UINT:
1915          /*
1916           * Implemented as an 8_8_8_8 data format to fix texture
1917           * gathers in stencil sampling. This affects at least
1918           * GL45-CTS.texture_cube_map_array.sampling on GFX8.
1919           */
1920          if (sscreen->info.gfx_level <= GFX8)
1921             return V_008F14_IMG_DATA_FORMAT_8_8_8_8;
1922 
1923          if (format == PIPE_FORMAT_X24S8_UINT)
1924             return V_008F14_IMG_DATA_FORMAT_8_24;
1925          else
1926             return V_008F14_IMG_DATA_FORMAT_24_8;
1927       case PIPE_FORMAT_Z24X8_UNORM:
1928       case PIPE_FORMAT_Z24_UNORM_S8_UINT:
1929          return V_008F14_IMG_DATA_FORMAT_8_24;
1930       case PIPE_FORMAT_X8Z24_UNORM:
1931       case PIPE_FORMAT_S8_UINT_Z24_UNORM:
1932          return V_008F14_IMG_DATA_FORMAT_24_8;
1933       case PIPE_FORMAT_S8_UINT:
1934          return V_008F14_IMG_DATA_FORMAT_8;
1935       case PIPE_FORMAT_Z32_FLOAT:
1936          return V_008F14_IMG_DATA_FORMAT_32;
1937       case PIPE_FORMAT_X32_S8X24_UINT:
1938       case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
1939          return V_008F14_IMG_DATA_FORMAT_X24_8_32;
1940       default:
1941          goto out_unknown;
1942       }
1943 
1944    case UTIL_FORMAT_COLORSPACE_YUV:
1945       goto out_unknown; /* TODO */
1946 
1947    case UTIL_FORMAT_COLORSPACE_SRGB:
1948       if (desc->nr_channels != 4 && desc->nr_channels != 1)
1949          goto out_unknown;
1950       break;
1951 
1952    default:
1953       break;
1954    }
1955 
1956    if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) {
1957       switch (format) {
1958       case PIPE_FORMAT_RGTC1_SNORM:
1959       case PIPE_FORMAT_LATC1_SNORM:
1960       case PIPE_FORMAT_RGTC1_UNORM:
1961       case PIPE_FORMAT_LATC1_UNORM:
1962          return V_008F14_IMG_DATA_FORMAT_BC4;
1963       case PIPE_FORMAT_RGTC2_SNORM:
1964       case PIPE_FORMAT_LATC2_SNORM:
1965       case PIPE_FORMAT_RGTC2_UNORM:
1966       case PIPE_FORMAT_LATC2_UNORM:
1967          return V_008F14_IMG_DATA_FORMAT_BC5;
1968       default:
1969          goto out_unknown;
1970       }
1971    }
1972 
1973    if (desc->layout == UTIL_FORMAT_LAYOUT_ETC &&
1974        (sscreen->info.family == CHIP_STONEY || sscreen->info.family == CHIP_VEGA10 ||
1975         sscreen->info.family == CHIP_RAVEN || sscreen->info.family == CHIP_RAVEN2)) {
1976       switch (format) {
1977       case PIPE_FORMAT_ETC1_RGB8:
1978       case PIPE_FORMAT_ETC2_RGB8:
1979       case PIPE_FORMAT_ETC2_SRGB8:
1980          return V_008F14_IMG_DATA_FORMAT_ETC2_RGB;
1981       case PIPE_FORMAT_ETC2_RGB8A1:
1982       case PIPE_FORMAT_ETC2_SRGB8A1:
1983          return V_008F14_IMG_DATA_FORMAT_ETC2_RGBA1;
1984       case PIPE_FORMAT_ETC2_RGBA8:
1985       case PIPE_FORMAT_ETC2_SRGBA8:
1986          return V_008F14_IMG_DATA_FORMAT_ETC2_RGBA;
1987       case PIPE_FORMAT_ETC2_R11_UNORM:
1988       case PIPE_FORMAT_ETC2_R11_SNORM:
1989          return V_008F14_IMG_DATA_FORMAT_ETC2_R;
1990       case PIPE_FORMAT_ETC2_RG11_UNORM:
1991       case PIPE_FORMAT_ETC2_RG11_SNORM:
1992          return V_008F14_IMG_DATA_FORMAT_ETC2_RG;
1993       default:
1994          goto out_unknown;
1995       }
1996    }
1997 
1998    if (desc->layout == UTIL_FORMAT_LAYOUT_BPTC) {
1999       switch (format) {
2000       case PIPE_FORMAT_BPTC_RGBA_UNORM:
2001       case PIPE_FORMAT_BPTC_SRGBA:
2002          return V_008F14_IMG_DATA_FORMAT_BC7;
2003       case PIPE_FORMAT_BPTC_RGB_FLOAT:
2004       case PIPE_FORMAT_BPTC_RGB_UFLOAT:
2005          return V_008F14_IMG_DATA_FORMAT_BC6;
2006       default:
2007          goto out_unknown;
2008       }
2009    }
2010 
2011    if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
2012       switch (format) {
2013       case PIPE_FORMAT_R8G8_B8G8_UNORM:
2014       case PIPE_FORMAT_G8R8_B8R8_UNORM:
2015          return V_008F14_IMG_DATA_FORMAT_GB_GR;
2016       case PIPE_FORMAT_G8R8_G8B8_UNORM:
2017       case PIPE_FORMAT_R8G8_R8B8_UNORM:
2018          return V_008F14_IMG_DATA_FORMAT_BG_RG;
2019       default:
2020          goto out_unknown;
2021       }
2022    }
2023 
2024    if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
2025       switch (format) {
2026       case PIPE_FORMAT_DXT1_RGB:
2027       case PIPE_FORMAT_DXT1_RGBA:
2028       case PIPE_FORMAT_DXT1_SRGB:
2029       case PIPE_FORMAT_DXT1_SRGBA:
2030          return V_008F14_IMG_DATA_FORMAT_BC1;
2031       case PIPE_FORMAT_DXT3_RGBA:
2032       case PIPE_FORMAT_DXT3_SRGBA:
2033          return V_008F14_IMG_DATA_FORMAT_BC2;
2034       case PIPE_FORMAT_DXT5_RGBA:
2035       case PIPE_FORMAT_DXT5_SRGBA:
2036          return V_008F14_IMG_DATA_FORMAT_BC3;
2037       default:
2038          goto out_unknown;
2039       }
2040    }
2041 
2042    if (format == PIPE_FORMAT_R9G9B9E5_FLOAT) {
2043       return V_008F14_IMG_DATA_FORMAT_5_9_9_9;
2044    } else if (format == PIPE_FORMAT_R11G11B10_FLOAT) {
2045       return V_008F14_IMG_DATA_FORMAT_10_11_11;
2046    }
2047 
2048    /* Other "OTHER" layouts are unsupported. */
2049    if (desc->layout == UTIL_FORMAT_LAYOUT_OTHER)
2050       goto out_unknown;
2051 
2052    /* hw cannot support mixed formats (except depth/stencil, since only
2053     * depth is read).*/
2054    if (desc->is_mixed && desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS)
2055       goto out_unknown;
2056 
2057    if (first_non_void < 0 || first_non_void > 3)
2058       goto out_unknown;
2059 
2060    /* Reject SCALED formats because we don't implement them for CB and do the same for texturing. */
2061    if ((desc->channel[first_non_void].type == UTIL_FORMAT_TYPE_UNSIGNED ||
2062         desc->channel[first_non_void].type == UTIL_FORMAT_TYPE_SIGNED) &&
2063        !desc->channel[first_non_void].normalized &&
2064        !desc->channel[first_non_void].pure_integer)
2065       goto out_unknown;
2066 
2067    /* Reject unsupported 32_*NORM and FIXED formats. */
2068    if (desc->channel[first_non_void].size == 32 &&
2069        (desc->channel[first_non_void].normalized ||
2070         desc->channel[first_non_void].type == UTIL_FORMAT_TYPE_FIXED))
2071       goto out_unknown;
2072 
2073    /* This format fails on Gfx8/Carrizo´. */
2074    if (format == PIPE_FORMAT_A8R8_UNORM)
2075       goto out_unknown;
2076 
2077    /* See whether the components are of the same size. */
2078    for (i = 1; i < desc->nr_channels; i++) {
2079       uniform = uniform && desc->channel[0].size == desc->channel[i].size;
2080    }
2081 
2082    /* Non-uniform formats. */
2083    if (!uniform) {
2084       switch (desc->nr_channels) {
2085       case 3:
2086          if (desc->channel[0].size == 5 && desc->channel[1].size == 6 &&
2087              desc->channel[2].size == 5) {
2088             return V_008F14_IMG_DATA_FORMAT_5_6_5;
2089          }
2090          goto out_unknown;
2091       case 4:
2092          /* 5551 and 1555 UINT formats fail on Gfx8/Carrizo´. */
2093          if (desc->channel[1].size == 5 &&
2094              desc->channel[2].size == 5 &&
2095              desc->channel[first_non_void].type == UTIL_FORMAT_TYPE_UNSIGNED &&
2096              desc->channel[first_non_void].pure_integer)
2097             goto out_unknown;
2098 
2099          if (desc->channel[0].size == 5 && desc->channel[1].size == 5 &&
2100              desc->channel[2].size == 5 && desc->channel[3].size == 1) {
2101             return V_008F14_IMG_DATA_FORMAT_1_5_5_5;
2102          }
2103          if (desc->channel[0].size == 1 && desc->channel[1].size == 5 &&
2104              desc->channel[2].size == 5 && desc->channel[3].size == 5) {
2105             return V_008F14_IMG_DATA_FORMAT_5_5_5_1;
2106          }
2107          if (desc->channel[0].size == 10 && desc->channel[1].size == 10 &&
2108              desc->channel[2].size == 10 && desc->channel[3].size == 2) {
2109             return V_008F14_IMG_DATA_FORMAT_2_10_10_10;
2110          }
2111          goto out_unknown;
2112       }
2113       goto out_unknown;
2114    }
2115 
2116    /* uniform formats */
2117    switch (desc->channel[first_non_void].size) {
2118    case 4:
2119       switch (desc->nr_channels) {
2120       case 4:
2121          /* 4444 UINT formats fail on Gfx8/Carrizo´. */
2122          if (desc->channel[first_non_void].type == UTIL_FORMAT_TYPE_UNSIGNED &&
2123              desc->channel[first_non_void].pure_integer)
2124             goto out_unknown;
2125 
2126          return V_008F14_IMG_DATA_FORMAT_4_4_4_4;
2127       }
2128       break;
2129    case 8:
2130       switch (desc->nr_channels) {
2131       case 1:
2132          return V_008F14_IMG_DATA_FORMAT_8;
2133       case 2:
2134          return V_008F14_IMG_DATA_FORMAT_8_8;
2135       case 4:
2136          return V_008F14_IMG_DATA_FORMAT_8_8_8_8;
2137       }
2138       break;
2139    case 16:
2140       switch (desc->nr_channels) {
2141       case 1:
2142          return V_008F14_IMG_DATA_FORMAT_16;
2143       case 2:
2144          return V_008F14_IMG_DATA_FORMAT_16_16;
2145       case 4:
2146          return V_008F14_IMG_DATA_FORMAT_16_16_16_16;
2147       }
2148       break;
2149    case 32:
2150       switch (desc->nr_channels) {
2151       case 1:
2152          return V_008F14_IMG_DATA_FORMAT_32;
2153       case 2:
2154          return V_008F14_IMG_DATA_FORMAT_32_32;
2155 #if 0 /* Not supported for render targets */
2156       case 3:
2157          return V_008F14_IMG_DATA_FORMAT_32_32_32;
2158 #endif
2159       case 4:
2160          return V_008F14_IMG_DATA_FORMAT_32_32_32_32;
2161       }
2162    }
2163 
2164 out_unknown:
2165    return ~0;
2166 }
2167 
is_wrap_mode_legal(struct si_screen * screen,unsigned wrap)2168 static unsigned is_wrap_mode_legal(struct si_screen *screen, unsigned wrap)
2169 {
2170    if (!screen->info.has_3d_cube_border_color_mipmap) {
2171       switch (wrap) {
2172       case PIPE_TEX_WRAP_CLAMP:
2173       case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
2174       case PIPE_TEX_WRAP_MIRROR_CLAMP:
2175       case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
2176          return false;
2177       }
2178    }
2179    return true;
2180 }
2181 
si_tex_wrap(unsigned wrap)2182 static unsigned si_tex_wrap(unsigned wrap)
2183 {
2184    switch (wrap) {
2185    default:
2186    case PIPE_TEX_WRAP_REPEAT:
2187       return V_008F30_SQ_TEX_WRAP;
2188    case PIPE_TEX_WRAP_CLAMP:
2189       return V_008F30_SQ_TEX_CLAMP_HALF_BORDER;
2190    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
2191       return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
2192    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
2193       return V_008F30_SQ_TEX_CLAMP_BORDER;
2194    case PIPE_TEX_WRAP_MIRROR_REPEAT:
2195       return V_008F30_SQ_TEX_MIRROR;
2196    case PIPE_TEX_WRAP_MIRROR_CLAMP:
2197       return V_008F30_SQ_TEX_MIRROR_ONCE_HALF_BORDER;
2198    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
2199       return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
2200    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
2201       return V_008F30_SQ_TEX_MIRROR_ONCE_BORDER;
2202    }
2203 }
2204 
si_tex_mipfilter(unsigned filter)2205 static unsigned si_tex_mipfilter(unsigned filter)
2206 {
2207    switch (filter) {
2208    case PIPE_TEX_MIPFILTER_NEAREST:
2209       return V_008F38_SQ_TEX_Z_FILTER_POINT;
2210    case PIPE_TEX_MIPFILTER_LINEAR:
2211       return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
2212    default:
2213    case PIPE_TEX_MIPFILTER_NONE:
2214       return V_008F38_SQ_TEX_Z_FILTER_NONE;
2215    }
2216 }
2217 
si_tex_compare(unsigned mode,unsigned compare)2218 static unsigned si_tex_compare(unsigned mode, unsigned compare)
2219 {
2220    if (mode == PIPE_TEX_COMPARE_NONE)
2221       return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
2222 
2223    switch (compare) {
2224    default:
2225    case PIPE_FUNC_NEVER:
2226       return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
2227    case PIPE_FUNC_LESS:
2228       return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
2229    case PIPE_FUNC_EQUAL:
2230       return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
2231    case PIPE_FUNC_LEQUAL:
2232       return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
2233    case PIPE_FUNC_GREATER:
2234       return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
2235    case PIPE_FUNC_NOTEQUAL:
2236       return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
2237    case PIPE_FUNC_GEQUAL:
2238       return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
2239    case PIPE_FUNC_ALWAYS:
2240       return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
2241    }
2242 }
2243 
si_tex_dim(struct si_screen * sscreen,struct si_texture * tex,unsigned view_target,unsigned nr_samples)2244 static unsigned si_tex_dim(struct si_screen *sscreen, struct si_texture *tex, unsigned view_target,
2245                            unsigned nr_samples)
2246 {
2247    unsigned res_target = tex->buffer.b.b.target;
2248 
2249    if (view_target == PIPE_TEXTURE_CUBE || view_target == PIPE_TEXTURE_CUBE_ARRAY)
2250       res_target = view_target;
2251    /* If interpreting cubemaps as something else, set 2D_ARRAY. */
2252    else if (res_target == PIPE_TEXTURE_CUBE || res_target == PIPE_TEXTURE_CUBE_ARRAY)
2253       res_target = PIPE_TEXTURE_2D_ARRAY;
2254 
2255    /* GFX9 allocates 1D textures as 2D. */
2256    if ((res_target == PIPE_TEXTURE_1D || res_target == PIPE_TEXTURE_1D_ARRAY) &&
2257        sscreen->info.gfx_level == GFX9 &&
2258        tex->surface.u.gfx9.resource_type == RADEON_RESOURCE_2D) {
2259       if (res_target == PIPE_TEXTURE_1D)
2260          res_target = PIPE_TEXTURE_2D;
2261       else
2262          res_target = PIPE_TEXTURE_2D_ARRAY;
2263    }
2264 
2265    switch (res_target) {
2266    default:
2267    case PIPE_TEXTURE_1D:
2268       return V_008F1C_SQ_RSRC_IMG_1D;
2269    case PIPE_TEXTURE_1D_ARRAY:
2270       return V_008F1C_SQ_RSRC_IMG_1D_ARRAY;
2271    case PIPE_TEXTURE_2D:
2272    case PIPE_TEXTURE_RECT:
2273       return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA : V_008F1C_SQ_RSRC_IMG_2D;
2274    case PIPE_TEXTURE_2D_ARRAY:
2275       return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY : V_008F1C_SQ_RSRC_IMG_2D_ARRAY;
2276    case PIPE_TEXTURE_3D:
2277       return V_008F1C_SQ_RSRC_IMG_3D;
2278    case PIPE_TEXTURE_CUBE:
2279    case PIPE_TEXTURE_CUBE_ARRAY:
2280       return V_008F1C_SQ_RSRC_IMG_CUBE;
2281    }
2282 }
2283 
2284 /*
2285  * Format support testing
2286  */
2287 
si_is_sampler_format_supported(struct pipe_screen * screen,enum pipe_format format)2288 static bool si_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format)
2289 {
2290    struct si_screen *sscreen = (struct si_screen *)screen;
2291    const struct util_format_description *desc = util_format_description(format);
2292 
2293    /* Samplers don't support 64 bits per channel. */
2294    if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
2295        desc->channel[0].size == 64)
2296       return false;
2297 
2298    if (sscreen->info.gfx_level >= GFX10) {
2299       const struct gfx10_format *fmt = &ac_get_gfx10_format_table(&sscreen->info)[format];
2300       if (!fmt->img_format || fmt->buffers_only)
2301          return false;
2302       return true;
2303    }
2304 
2305    return si_translate_texformat(screen, format, desc,
2306                                  util_format_get_first_non_void_channel(format)) != ~0U;
2307 }
2308 
si_translate_buffer_dataformat(struct pipe_screen * screen,const struct util_format_description * desc,int first_non_void)2309 static uint32_t si_translate_buffer_dataformat(struct pipe_screen *screen,
2310                                                const struct util_format_description *desc,
2311                                                int first_non_void)
2312 {
2313    int i;
2314 
2315    assert(((struct si_screen *)screen)->info.gfx_level <= GFX9);
2316 
2317    if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT)
2318       return V_008F0C_BUF_DATA_FORMAT_10_11_11;
2319 
2320    assert(first_non_void >= 0);
2321 
2322    if (desc->nr_channels == 4 && desc->channel[0].size == 10 && desc->channel[1].size == 10 &&
2323        desc->channel[2].size == 10 && desc->channel[3].size == 2)
2324       return V_008F0C_BUF_DATA_FORMAT_2_10_10_10;
2325 
2326    /* See whether the components are of the same size. */
2327    for (i = 0; i < desc->nr_channels; i++) {
2328       if (desc->channel[first_non_void].size != desc->channel[i].size)
2329          return V_008F0C_BUF_DATA_FORMAT_INVALID;
2330    }
2331 
2332    switch (desc->channel[first_non_void].size) {
2333    case 8:
2334       switch (desc->nr_channels) {
2335       case 1:
2336       case 3: /* 3 loads */
2337          return V_008F0C_BUF_DATA_FORMAT_8;
2338       case 2:
2339          return V_008F0C_BUF_DATA_FORMAT_8_8;
2340       case 4:
2341          return V_008F0C_BUF_DATA_FORMAT_8_8_8_8;
2342       }
2343       break;
2344    case 16:
2345       switch (desc->nr_channels) {
2346       case 1:
2347       case 3: /* 3 loads */
2348          return V_008F0C_BUF_DATA_FORMAT_16;
2349       case 2:
2350          return V_008F0C_BUF_DATA_FORMAT_16_16;
2351       case 4:
2352          return V_008F0C_BUF_DATA_FORMAT_16_16_16_16;
2353       }
2354       break;
2355    case 32:
2356       switch (desc->nr_channels) {
2357       case 1:
2358          return V_008F0C_BUF_DATA_FORMAT_32;
2359       case 2:
2360          return V_008F0C_BUF_DATA_FORMAT_32_32;
2361       case 3:
2362          return V_008F0C_BUF_DATA_FORMAT_32_32_32;
2363       case 4:
2364          return V_008F0C_BUF_DATA_FORMAT_32_32_32_32;
2365       }
2366       break;
2367    case 64:
2368       /* Legacy double formats. */
2369       switch (desc->nr_channels) {
2370       case 1: /* 1 load */
2371          return V_008F0C_BUF_DATA_FORMAT_32_32;
2372       case 2: /* 1 load */
2373          return V_008F0C_BUF_DATA_FORMAT_32_32_32_32;
2374       case 3: /* 3 loads */
2375          return V_008F0C_BUF_DATA_FORMAT_32_32;
2376       case 4: /* 2 loads */
2377          return V_008F0C_BUF_DATA_FORMAT_32_32_32_32;
2378       }
2379       break;
2380    }
2381 
2382    return V_008F0C_BUF_DATA_FORMAT_INVALID;
2383 }
2384 
si_translate_buffer_numformat(struct pipe_screen * screen,const struct util_format_description * desc,int first_non_void)2385 static uint32_t si_translate_buffer_numformat(struct pipe_screen *screen,
2386                                               const struct util_format_description *desc,
2387                                               int first_non_void)
2388 {
2389    assert(((struct si_screen *)screen)->info.gfx_level <= GFX9);
2390 
2391    if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT)
2392       return V_008F0C_BUF_NUM_FORMAT_FLOAT;
2393 
2394    assert(first_non_void >= 0);
2395 
2396    switch (desc->channel[first_non_void].type) {
2397    case UTIL_FORMAT_TYPE_SIGNED:
2398    case UTIL_FORMAT_TYPE_FIXED:
2399       if (desc->channel[first_non_void].size >= 32 || desc->channel[first_non_void].pure_integer)
2400          return V_008F0C_BUF_NUM_FORMAT_SINT;
2401       else if (desc->channel[first_non_void].normalized)
2402          return V_008F0C_BUF_NUM_FORMAT_SNORM;
2403       else
2404          return V_008F0C_BUF_NUM_FORMAT_SSCALED;
2405       break;
2406    case UTIL_FORMAT_TYPE_UNSIGNED:
2407       if (desc->channel[first_non_void].size >= 32 || desc->channel[first_non_void].pure_integer)
2408          return V_008F0C_BUF_NUM_FORMAT_UINT;
2409       else if (desc->channel[first_non_void].normalized)
2410          return V_008F0C_BUF_NUM_FORMAT_UNORM;
2411       else
2412          return V_008F0C_BUF_NUM_FORMAT_USCALED;
2413       break;
2414    case UTIL_FORMAT_TYPE_FLOAT:
2415    default:
2416       return V_008F0C_BUF_NUM_FORMAT_FLOAT;
2417    }
2418 }
2419 
si_is_vertex_format_supported(struct pipe_screen * screen,enum pipe_format format,unsigned usage)2420 static unsigned si_is_vertex_format_supported(struct pipe_screen *screen, enum pipe_format format,
2421                                               unsigned usage)
2422 {
2423    struct si_screen *sscreen = (struct si_screen *)screen;
2424    const struct util_format_description *desc;
2425    int first_non_void;
2426    unsigned data_format;
2427 
2428    assert((usage & ~(PIPE_BIND_SHADER_IMAGE | PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_VERTEX_BUFFER)) ==
2429           0);
2430 
2431    desc = util_format_description(format);
2432 
2433    /* There are no native 8_8_8 or 16_16_16 data formats, and we currently
2434     * select 8_8_8_8 and 16_16_16_16 instead. This works reasonably well
2435     * for read-only access (with caveats surrounding bounds checks), but
2436     * obviously fails for write access which we have to implement for
2437     * shader images. Luckily, OpenGL doesn't expect this to be supported
2438     * anyway, and so the only impact is on PBO uploads / downloads, which
2439     * shouldn't be expected to be fast for GL_RGB anyway.
2440     */
2441    if (desc->block.bits == 3 * 8 || desc->block.bits == 3 * 16) {
2442       if (usage & (PIPE_BIND_SHADER_IMAGE | PIPE_BIND_SAMPLER_VIEW)) {
2443          usage &= ~(PIPE_BIND_SHADER_IMAGE | PIPE_BIND_SAMPLER_VIEW);
2444          if (!usage)
2445             return 0;
2446       }
2447    }
2448 
2449    if (sscreen->info.gfx_level >= GFX10) {
2450       const struct gfx10_format *fmt = &ac_get_gfx10_format_table(&sscreen->info)[format];
2451       unsigned first_image_only_format = sscreen->info.gfx_level >= GFX11 ? 64 : 128;
2452 
2453       if (!fmt->img_format || fmt->img_format >= first_image_only_format)
2454          return 0;
2455       return usage;
2456    }
2457 
2458    first_non_void = util_format_get_first_non_void_channel(format);
2459    data_format = si_translate_buffer_dataformat(screen, desc, first_non_void);
2460    if (data_format == V_008F0C_BUF_DATA_FORMAT_INVALID)
2461       return 0;
2462 
2463    return usage;
2464 }
2465 
si_is_colorbuffer_format_supported(enum amd_gfx_level gfx_level,enum pipe_format format)2466 static bool si_is_colorbuffer_format_supported(enum amd_gfx_level gfx_level,
2467                                                enum pipe_format format)
2468 {
2469    return ac_get_cb_format(gfx_level, format) != V_028C70_COLOR_INVALID &&
2470           si_translate_colorswap(gfx_level, format, false) != ~0U;
2471 }
2472 
si_is_zs_format_supported(enum pipe_format format)2473 static bool si_is_zs_format_supported(enum pipe_format format)
2474 {
2475    return si_translate_dbformat(format) != V_028040_Z_INVALID;
2476 }
2477 
si_is_format_supported(struct pipe_screen * screen,enum pipe_format format,enum pipe_texture_target target,unsigned sample_count,unsigned storage_sample_count,unsigned usage)2478 static bool si_is_format_supported(struct pipe_screen *screen, enum pipe_format format,
2479                                    enum pipe_texture_target target, unsigned sample_count,
2480                                    unsigned storage_sample_count, unsigned usage)
2481 {
2482    struct si_screen *sscreen = (struct si_screen *)screen;
2483    unsigned retval = 0;
2484 
2485    if (target >= PIPE_MAX_TEXTURE_TYPES) {
2486       PRINT_ERR("radeonsi: unsupported texture type %d\n", target);
2487       return false;
2488    }
2489 
2490    /* Require PIPE_BIND_SAMPLER_VIEW support when PIPE_BIND_RENDER_TARGET
2491     * is requested.
2492     */
2493    if (usage & PIPE_BIND_RENDER_TARGET)
2494       usage |= PIPE_BIND_SAMPLER_VIEW;
2495 
2496    if ((target == PIPE_TEXTURE_3D || target == PIPE_TEXTURE_CUBE) &&
2497         !sscreen->info.has_3d_cube_border_color_mipmap)
2498       return false;
2499 
2500    if (util_format_get_num_planes(format) >= 2)
2501       return false;
2502 
2503    if (MAX2(1, sample_count) < MAX2(1, storage_sample_count))
2504       return false;
2505 
2506    if (sample_count > 1) {
2507       if (!screen->get_param(screen, PIPE_CAP_TEXTURE_MULTISAMPLE))
2508          return false;
2509 
2510       /* Only power-of-two sample counts are supported. */
2511       if (!util_is_power_of_two_or_zero(sample_count) ||
2512           !util_is_power_of_two_or_zero(storage_sample_count))
2513          return false;
2514 
2515       /* Chips with 1 RB don't increment occlusion queries at 16x MSAA sample rate,
2516        * so don't expose 16 samples there.
2517        */
2518       const unsigned max_eqaa_samples =
2519          (sscreen->info.gfx_level >= GFX11 ||
2520           util_bitcount64(sscreen->info.enabled_rb_mask) <= 1) ? 8 : 16;
2521       const unsigned max_samples = 8;
2522 
2523       /* MSAA support without framebuffer attachments. */
2524       if (format == PIPE_FORMAT_NONE && sample_count <= max_eqaa_samples)
2525          return true;
2526 
2527       if (!sscreen->info.has_eqaa_surface_allocator || util_format_is_depth_or_stencil(format)) {
2528          /* Color without EQAA or depth/stencil. */
2529          if (sample_count > max_samples || sample_count != storage_sample_count)
2530             return false;
2531       } else {
2532          /* Color with EQAA. */
2533          if (sample_count > max_eqaa_samples || storage_sample_count > max_samples)
2534             return false;
2535       }
2536    }
2537 
2538    if (usage & (PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_SHADER_IMAGE)) {
2539       if (target == PIPE_BUFFER) {
2540          retval |= si_is_vertex_format_supported(
2541             screen, format, usage & (PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_SHADER_IMAGE));
2542       } else {
2543          if (si_is_sampler_format_supported(screen, format))
2544             retval |= usage & (PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_SHADER_IMAGE);
2545       }
2546    }
2547 
2548    if ((usage & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT |
2549                  PIPE_BIND_SHARED | PIPE_BIND_BLENDABLE)) &&
2550        si_is_colorbuffer_format_supported(sscreen->info.gfx_level, format)) {
2551       retval |= usage & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT |
2552                          PIPE_BIND_SHARED);
2553       if (!util_format_is_pure_integer(format) && !util_format_is_depth_or_stencil(format))
2554          retval |= usage & PIPE_BIND_BLENDABLE;
2555    }
2556 
2557    if ((usage & PIPE_BIND_DEPTH_STENCIL) && si_is_zs_format_supported(format)) {
2558       retval |= PIPE_BIND_DEPTH_STENCIL;
2559    }
2560 
2561    if (usage & PIPE_BIND_VERTEX_BUFFER) {
2562       retval |= si_is_vertex_format_supported(screen, format, PIPE_BIND_VERTEX_BUFFER);
2563    }
2564 
2565    if (usage & PIPE_BIND_INDEX_BUFFER) {
2566       if (format == PIPE_FORMAT_R8_UINT ||
2567           format == PIPE_FORMAT_R16_UINT ||
2568           format == PIPE_FORMAT_R32_UINT)
2569          retval |= PIPE_BIND_INDEX_BUFFER;
2570    }
2571 
2572    if ((usage & PIPE_BIND_LINEAR) && !util_format_is_compressed(format) &&
2573        !(usage & PIPE_BIND_DEPTH_STENCIL))
2574       retval |= PIPE_BIND_LINEAR;
2575 
2576    return retval == usage;
2577 }
2578 
2579 /*
2580  * framebuffer handling
2581  */
2582 
si_choose_spi_color_formats(struct si_surface * surf,unsigned format,unsigned swap,unsigned ntype,bool is_depth)2583 static void si_choose_spi_color_formats(struct si_surface *surf, unsigned format, unsigned swap,
2584                                         unsigned ntype, bool is_depth)
2585 {
2586    struct ac_spi_color_formats formats = {};
2587 
2588    ac_choose_spi_color_formats(format, swap, ntype, is_depth, true, &formats);
2589 
2590    surf->spi_shader_col_format = formats.normal;
2591    surf->spi_shader_col_format_alpha = formats.alpha;
2592    surf->spi_shader_col_format_blend = formats.blend;
2593    surf->spi_shader_col_format_blend_alpha = formats.blend_alpha;
2594 }
2595 
si_initialize_color_surface(struct si_context * sctx,struct si_surface * surf)2596 static void si_initialize_color_surface(struct si_context *sctx, struct si_surface *surf)
2597 {
2598    struct si_texture *tex = (struct si_texture *)surf->base.texture;
2599    unsigned format, swap, ntype, endian;
2600    const struct util_format_description *desc;
2601    unsigned blend_clamp = 0, blend_bypass = 0;
2602 
2603    desc = util_format_description(surf->base.format);
2604 
2605    ntype = ac_get_cb_number_type(surf->base.format);
2606    format = ac_get_cb_format(sctx->gfx_level, surf->base.format);
2607 
2608    if (format == V_028C70_COLOR_INVALID) {
2609       PRINT_ERR("Invalid CB format: %d, disabling CB.\n", surf->base.format);
2610    }
2611    assert(format != V_028C70_COLOR_INVALID);
2612    swap = si_translate_colorswap(sctx->gfx_level, surf->base.format, false);
2613    endian = si_colorformat_endian_swap(format);
2614 
2615    /* blend clamp should be set for all NORM/SRGB types */
2616    if (ntype == V_028C70_NUMBER_UNORM || ntype == V_028C70_NUMBER_SNORM ||
2617        ntype == V_028C70_NUMBER_SRGB)
2618       blend_clamp = 1;
2619 
2620    /* set blend bypass according to docs if SINT/UINT or
2621       8/24 COLOR variants */
2622    if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
2623        format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
2624        format == V_028C70_COLOR_X24_8_32_FLOAT) {
2625       blend_clamp = 0;
2626       blend_bypass = 1;
2627    }
2628 
2629    if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) {
2630       if (format == V_028C70_COLOR_8 || format == V_028C70_COLOR_8_8 ||
2631           format == V_028C70_COLOR_8_8_8_8)
2632          surf->color_is_int8 = true;
2633       else if (format == V_028C70_COLOR_10_10_10_2 || format == V_028C70_COLOR_2_10_10_10)
2634          surf->color_is_int10 = true;
2635    }
2636 
2637    unsigned log_samples = util_logbase2(tex->buffer.b.b.nr_samples);
2638    unsigned log_fragments = util_logbase2(tex->buffer.b.b.nr_storage_samples);
2639    /* Intensity is implemented as Red, so treat it that way. */
2640    bool force_dst_alpha_1 = desc->swizzle[3] == PIPE_SWIZZLE_1 ||
2641                             util_format_is_intensity(surf->base.format);
2642    bool round_mode = ntype != V_028C70_NUMBER_UNORM && ntype != V_028C70_NUMBER_SNORM &&
2643                      ntype != V_028C70_NUMBER_SRGB &&
2644                      format != V_028C70_COLOR_8_24 && format != V_028C70_COLOR_24_8;
2645    /* amdvlk: [min-compressed-block-size] should be set to 32 for dGPU and
2646     * 64 for APU because all of our APUs to date use DIMMs which have
2647     * a request granularity size of 64B while all other chips have a
2648     * 32B request size */
2649    unsigned min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_32B;
2650    if (!sctx->screen->info.has_dedicated_vram)
2651       min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_64B;
2652 
2653    surf->cb_color_info = S_028C70_COMP_SWAP(swap) |
2654                          S_028C70_BLEND_CLAMP(blend_clamp) |
2655                          S_028C70_BLEND_BYPASS(blend_bypass) |
2656                          S_028C70_SIMPLE_FLOAT(1) |
2657                          S_028C70_ROUND_MODE(round_mode) |
2658                          S_028C70_NUMBER_TYPE(ntype);
2659 
2660    unsigned width0 = surf->width0;
2661 
2662    /* GFX10.3+ can set a custom pitch for 1D and 2D non-array, but it must be a multiple of
2663     * 256B.
2664     *
2665     * We set the pitch in MIP0_WIDTH.
2666     */
2667    if (sctx->gfx_level >= GFX10_3 && tex->surface.u.gfx9.uses_custom_pitch) {
2668       ASSERTED unsigned min_alignment = 256;
2669       assert((tex->surface.u.gfx9.surf_pitch * tex->surface.bpe) % min_alignment == 0);
2670       assert(tex->buffer.b.b.target == PIPE_TEXTURE_2D ||
2671              tex->buffer.b.b.target == PIPE_TEXTURE_RECT);
2672       assert(tex->surface.is_linear);
2673 
2674       width0 = tex->surface.u.gfx9.surf_pitch;
2675 
2676       /* Subsampled images have the pitch in the units of blocks. */
2677       if (tex->surface.blk_w == 2)
2678          width0 *= 2;
2679    }
2680 
2681    if (sctx->gfx_level >= GFX10) {
2682       /* Gfx10-11. */
2683       surf->cb_color_view = S_028C6C_SLICE_START(surf->base.u.tex.first_layer) |
2684                             S_028C6C_SLICE_MAX_GFX10(surf->base.u.tex.last_layer) |
2685                             S_028C6C_MIP_LEVEL_GFX10(surf->base.u.tex.level);
2686       surf->cb_color_attrib = 0;
2687       surf->cb_color_attrib2 = S_028C68_MIP0_WIDTH(width0 - 1) |
2688                                S_028C68_MIP0_HEIGHT(surf->height0 - 1) |
2689                                S_028C68_MAX_MIP(tex->buffer.b.b.last_level);
2690       surf->cb_color_attrib3 = S_028EE0_MIP0_DEPTH(util_max_layer(&tex->buffer.b.b, 0)) |
2691                                S_028EE0_RESOURCE_TYPE(tex->surface.u.gfx9.resource_type) |
2692                                S_028EE0_RESOURCE_LEVEL(sctx->gfx_level >= GFX11 ? 0 : 1);
2693       surf->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B) |
2694                              S_028C78_MAX_COMPRESSED_BLOCK_SIZE(tex->surface.u.gfx9.color.dcc.max_compressed_block_size) |
2695                              S_028C78_MIN_COMPRESSED_BLOCK_SIZE(min_compressed_block_size) |
2696                              S_028C78_INDEPENDENT_64B_BLOCKS(tex->surface.u.gfx9.color.dcc.independent_64B_blocks);
2697 
2698       if (sctx->gfx_level >= GFX11) {
2699          assert(!UTIL_ARCH_BIG_ENDIAN);
2700          surf->cb_color_info |= S_028C70_FORMAT_GFX11(format);
2701          surf->cb_color_attrib |= S_028C74_NUM_FRAGMENTS_GFX11(log_fragments) |
2702                                   S_028C74_FORCE_DST_ALPHA_1_GFX11(force_dst_alpha_1);
2703          surf->cb_dcc_control |= S_028C78_INDEPENDENT_128B_BLOCKS_GFX11(tex->surface.u.gfx9.color.dcc.independent_128B_blocks);
2704       } else {
2705          surf->cb_color_info |= S_028C70_ENDIAN(endian) |
2706                                 S_028C70_FORMAT_GFX6(format) |
2707                                 S_028C70_COMPRESSION(!!tex->surface.fmask_offset);
2708          surf->cb_color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
2709                                   S_028C74_NUM_FRAGMENTS_GFX6(log_fragments) |
2710                                   S_028C74_FORCE_DST_ALPHA_1_GFX6(force_dst_alpha_1);
2711          surf->cb_dcc_control |= S_028C78_INDEPENDENT_128B_BLOCKS_GFX10(tex->surface.u.gfx9.color.dcc.independent_128B_blocks);
2712       }
2713    } else {
2714       /* Gfx6-9. */
2715       surf->cb_color_info |= S_028C70_ENDIAN(endian) |
2716                              S_028C70_FORMAT_GFX6(format) |
2717                              S_028C70_COMPRESSION(!!tex->surface.fmask_offset);
2718       surf->cb_color_view = S_028C6C_SLICE_START(surf->base.u.tex.first_layer) |
2719                             S_028C6C_SLICE_MAX_GFX6(surf->base.u.tex.last_layer);
2720       surf->cb_color_attrib = S_028C74_NUM_SAMPLES(log_samples) |
2721                               S_028C74_NUM_FRAGMENTS_GFX6(log_fragments) |
2722                               S_028C74_FORCE_DST_ALPHA_1_GFX6(force_dst_alpha_1);
2723       surf->cb_color_attrib2 = 0;
2724       surf->cb_dcc_control = 0;
2725 
2726       if (sctx->gfx_level == GFX9) {
2727          surf->cb_color_view |= S_028C6C_MIP_LEVEL_GFX9(surf->base.u.tex.level);
2728          surf->cb_color_attrib |= S_028C74_MIP0_DEPTH(util_max_layer(&tex->buffer.b.b, 0)) |
2729                                   S_028C74_RESOURCE_TYPE(tex->surface.u.gfx9.resource_type);
2730          surf->cb_color_attrib2 |= S_028C68_MIP0_WIDTH(surf->width0 - 1) |
2731                                    S_028C68_MIP0_HEIGHT(surf->height0 - 1) |
2732                                    S_028C68_MAX_MIP(tex->buffer.b.b.last_level);
2733       }
2734 
2735       if (sctx->gfx_level >= GFX8) {
2736          unsigned max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_256B;
2737 
2738          if (tex->buffer.b.b.nr_storage_samples > 1) {
2739             if (tex->surface.bpe == 1)
2740                max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
2741             else if (tex->surface.bpe == 2)
2742                max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_128B;
2743          }
2744 
2745          surf->cb_dcc_control |= S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
2746                                  S_028C78_MIN_COMPRESSED_BLOCK_SIZE(min_compressed_block_size) |
2747                                  S_028C78_INDEPENDENT_64B_BLOCKS(1);
2748       }
2749 
2750       if (sctx->gfx_level == GFX6) {
2751          /* Due to a hw bug, FMASK_BANK_HEIGHT must still be set on GFX6. (inherited from GFX5) */
2752          /* This must also be set for fast clear to work without FMASK. */
2753          unsigned fmask_bankh = tex->surface.fmask_offset ? tex->surface.u.legacy.color.fmask.bankh
2754                                                           : tex->surface.u.legacy.bankh;
2755          surf->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(util_logbase2(fmask_bankh));
2756       }
2757    }
2758 
2759    /* Determine pixel shader export format */
2760    si_choose_spi_color_formats(surf, format, swap, ntype, tex->is_depth);
2761 
2762    surf->color_initialized = true;
2763 }
2764 
si_init_depth_surface(struct si_context * sctx,struct si_surface * surf)2765 static void si_init_depth_surface(struct si_context *sctx, struct si_surface *surf)
2766 {
2767    struct si_texture *tex = (struct si_texture *)surf->base.texture;
2768    unsigned level = surf->base.u.tex.level;
2769    unsigned format, stencil_format;
2770 
2771    format = si_translate_dbformat(tex->db_render_format);
2772    stencil_format = tex->surface.has_stencil ? V_028044_STENCIL_8 : V_028044_STENCIL_INVALID;
2773 
2774    assert(format != V_028040_Z_INVALID);
2775    if (format == V_028040_Z_INVALID)
2776       PRINT_ERR("Invalid DB format: %d, disabling DB.\n", tex->buffer.b.b.format);
2777 
2778    /* Use the original Z format, not db_render_format, so that the polygon offset behaves as
2779     * expected by applications.
2780     */
2781    switch (tex->buffer.b.b.format) {
2782    case PIPE_FORMAT_Z16_UNORM:
2783       surf->db_format_index = 0;
2784       break;
2785    default: /* 24-bit */
2786       surf->db_format_index = 1;
2787       break;
2788    case PIPE_FORMAT_Z32_FLOAT:
2789    case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
2790       surf->db_format_index = 2;
2791       break;
2792    }
2793 
2794    if (sctx->gfx_level >= GFX9) {
2795       surf->db_htile_data_base = 0;
2796       surf->db_htile_surface = 0;
2797       surf->db_depth_view = S_028008_SLICE_START(surf->base.u.tex.first_layer) |
2798                             S_028008_SLICE_MAX(surf->base.u.tex.last_layer);
2799       if (sctx->gfx_level >= GFX10) {
2800          surf->db_depth_view |= S_028008_SLICE_START_HI(surf->base.u.tex.first_layer >> 11) |
2801                                 S_028008_SLICE_MAX_HI(surf->base.u.tex.last_layer >> 11);
2802       }
2803 
2804       assert(tex->surface.u.gfx9.surf_offset == 0);
2805       surf->db_depth_base = tex->buffer.gpu_address >> 8;
2806       surf->db_stencil_base = (tex->buffer.gpu_address + tex->surface.u.gfx9.zs.stencil_offset) >> 8;
2807       surf->db_z_info = S_028038_FORMAT(format) |
2808                         S_028038_NUM_SAMPLES(util_logbase2(tex->buffer.b.b.nr_samples)) |
2809                         S_028038_SW_MODE(tex->surface.u.gfx9.swizzle_mode) |
2810                         S_028038_MAXMIP(tex->buffer.b.b.last_level) |
2811                         S_028040_ITERATE_256(sctx->gfx_level >= GFX11);
2812       surf->db_stencil_info = S_02803C_FORMAT(stencil_format) |
2813                               S_02803C_SW_MODE(tex->surface.u.gfx9.zs.stencil_swizzle_mode) |
2814                               S_028044_ITERATE_256(sctx->gfx_level >= GFX11);
2815 
2816       if (sctx->gfx_level == GFX9) {
2817          surf->db_z_info2 = S_028068_EPITCH(tex->surface.u.gfx9.epitch);
2818          surf->db_stencil_info2 = S_02806C_EPITCH(tex->surface.u.gfx9.zs.stencil_epitch);
2819       }
2820       surf->db_depth_view |= S_028008_MIPID(level);
2821       surf->db_depth_size = S_02801C_X_MAX(tex->buffer.b.b.width0 - 1) |
2822                             S_02801C_Y_MAX(tex->buffer.b.b.height0 - 1);
2823 
2824       if (si_htile_enabled(tex, level, PIPE_MASK_ZS)) {
2825          surf->db_z_info |= S_028038_TILE_SURFACE_ENABLE(1) |
2826                             S_028038_ALLOW_EXPCLEAR(1);
2827          surf->db_stencil_info |= S_02803C_TILE_STENCIL_DISABLE(tex->htile_stencil_disabled);
2828 
2829          if (tex->surface.has_stencil && !tex->htile_stencil_disabled) {
2830             /* Stencil buffer workaround ported from the GFX6-GFX8 code.
2831              * See that for explanation.
2832              */
2833             surf->db_stencil_info |= S_02803C_ALLOW_EXPCLEAR(tex->buffer.b.b.nr_samples <= 1);
2834          }
2835 
2836          surf->db_htile_data_base = (tex->buffer.gpu_address + tex->surface.meta_offset) >> 8;
2837          surf->db_htile_surface = S_028ABC_FULL_CACHE(1) |
2838                                   S_028ABC_PIPE_ALIGNED(1);
2839          if (sctx->gfx_level == GFX9) {
2840             surf->db_htile_surface |= S_028ABC_RB_ALIGNED(1);
2841          }
2842       }
2843    } else {
2844       /* GFX6-GFX8 */
2845       struct legacy_surf_level *levelinfo = &tex->surface.u.legacy.level[level];
2846 
2847       assert(levelinfo->nblk_x % 8 == 0 && levelinfo->nblk_y % 8 == 0);
2848 
2849       surf->db_depth_base =
2850          (tex->buffer.gpu_address >> 8) + tex->surface.u.legacy.level[level].offset_256B;
2851       surf->db_stencil_base =
2852          (tex->buffer.gpu_address >> 8) + tex->surface.u.legacy.zs.stencil_level[level].offset_256B;
2853       surf->db_htile_data_base = 0;
2854       surf->db_htile_surface = 0;
2855       surf->db_depth_view = S_028008_SLICE_START(surf->base.u.tex.first_layer) |
2856                             S_028008_SLICE_MAX(surf->base.u.tex.last_layer);
2857       surf->db_z_info = S_028040_FORMAT(format) |
2858                         S_028040_NUM_SAMPLES(util_logbase2(tex->buffer.b.b.nr_samples));
2859       surf->db_stencil_info = S_028044_FORMAT(stencil_format);
2860       surf->db_depth_info = 0;
2861 
2862       if (sctx->gfx_level >= GFX7) {
2863          struct radeon_info *info = &sctx->screen->info;
2864          unsigned index = tex->surface.u.legacy.tiling_index[level];
2865          unsigned stencil_index = tex->surface.u.legacy.zs.stencil_tiling_index[level];
2866          unsigned macro_index = tex->surface.u.legacy.macro_tile_index;
2867          unsigned tile_mode = info->si_tile_mode_array[index];
2868          unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
2869          unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
2870 
2871          surf->db_depth_info |= S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
2872                                 S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
2873                                 S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
2874                                 S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
2875                                 S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
2876                                 S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
2877          surf->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
2878          surf->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
2879       } else {
2880          unsigned tile_mode_index = si_tile_mode_index(tex, level, false);
2881          surf->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
2882          tile_mode_index = si_tile_mode_index(tex, level, true);
2883          surf->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
2884       }
2885 
2886       surf->db_depth_size = S_028058_PITCH_TILE_MAX((levelinfo->nblk_x / 8) - 1) |
2887                             S_028058_HEIGHT_TILE_MAX((levelinfo->nblk_y / 8) - 1);
2888       surf->db_depth_slice =
2889          S_02805C_SLICE_TILE_MAX((levelinfo->nblk_x * levelinfo->nblk_y) / 64 - 1);
2890 
2891       if (si_htile_enabled(tex, level, PIPE_MASK_ZS)) {
2892          surf->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1) |
2893                             S_028040_ALLOW_EXPCLEAR(1);
2894          surf->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(tex->htile_stencil_disabled);
2895 
2896          if (tex->surface.has_stencil) {
2897             /* Workaround: For a not yet understood reason, the
2898              * combination of MSAA, fast stencil clear and stencil
2899              * decompress messes with subsequent stencil buffer
2900              * uses. Problem was reproduced on Verde, Bonaire,
2901              * Tonga, and Carrizo.
2902              *
2903              * Disabling EXPCLEAR works around the problem.
2904              *
2905              * Check piglit's arb_texture_multisample-stencil-clear
2906              * test if you want to try changing this.
2907              */
2908             if (tex->buffer.b.b.nr_samples <= 1)
2909                surf->db_stencil_info |= S_028044_ALLOW_EXPCLEAR(1);
2910          }
2911 
2912          surf->db_htile_data_base = (tex->buffer.gpu_address + tex->surface.meta_offset) >> 8;
2913          surf->db_htile_surface = S_028ABC_FULL_CACHE(1);
2914       }
2915    }
2916 
2917    surf->depth_initialized = true;
2918 }
2919 
si_set_sampler_depth_decompress_mask(struct si_context * sctx,struct si_texture * tex)2920 void si_set_sampler_depth_decompress_mask(struct si_context *sctx, struct si_texture *tex)
2921 {
2922    /* Check all sampler bindings in all shaders where depth textures are bound, and update
2923     * which samplers should be decompressed.
2924     */
2925    u_foreach_bit(sh, sctx->shader_has_depth_tex) {
2926       u_foreach_bit(i, sctx->samplers[sh].has_depth_tex_mask) {
2927          if (sctx->samplers[sh].views[i]->texture == &tex->buffer.b.b) {
2928             sctx->samplers[sh].needs_depth_decompress_mask |= 1 << i;
2929             sctx->shader_needs_decompress_mask |= 1 << sh;
2930          }
2931       }
2932    }
2933 }
2934 
si_update_fb_dirtiness_after_rendering(struct si_context * sctx)2935 void si_update_fb_dirtiness_after_rendering(struct si_context *sctx)
2936 {
2937    if (sctx->decompression_enabled)
2938       return;
2939 
2940    if (sctx->framebuffer.state.zsbuf) {
2941       struct pipe_surface *surf = sctx->framebuffer.state.zsbuf;
2942       struct si_texture *tex = (struct si_texture *)surf->texture;
2943 
2944       tex->dirty_level_mask |= 1 << surf->u.tex.level;
2945 
2946       if (tex->surface.has_stencil)
2947          tex->stencil_dirty_level_mask |= 1 << surf->u.tex.level;
2948 
2949       si_set_sampler_depth_decompress_mask(sctx, tex);
2950    }
2951 
2952    unsigned compressed_cb_mask = sctx->framebuffer.compressed_cb_mask;
2953    while (compressed_cb_mask) {
2954       unsigned i = u_bit_scan(&compressed_cb_mask);
2955       struct pipe_surface *surf = sctx->framebuffer.state.cbufs[i];
2956       struct si_texture *tex = (struct si_texture *)surf->texture;
2957 
2958       if (tex->surface.fmask_offset) {
2959          tex->dirty_level_mask |= 1 << surf->u.tex.level;
2960          tex->fmask_is_identity = false;
2961       }
2962    }
2963 }
2964 
si_dec_framebuffer_counters(const struct pipe_framebuffer_state * state)2965 static void si_dec_framebuffer_counters(const struct pipe_framebuffer_state *state)
2966 {
2967    for (int i = 0; i < state->nr_cbufs; ++i) {
2968       struct si_surface *surf = NULL;
2969       struct si_texture *tex;
2970 
2971       if (!state->cbufs[i])
2972          continue;
2973       surf = (struct si_surface *)state->cbufs[i];
2974       tex = (struct si_texture *)surf->base.texture;
2975 
2976       p_atomic_dec(&tex->framebuffers_bound);
2977    }
2978 }
2979 
si_mark_display_dcc_dirty(struct si_context * sctx,struct si_texture * tex)2980 void si_mark_display_dcc_dirty(struct si_context *sctx, struct si_texture *tex)
2981 {
2982    if (!tex->surface.display_dcc_offset || tex->displayable_dcc_dirty)
2983       return;
2984 
2985    if (!(tex->buffer.external_usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH)) {
2986       struct hash_entry *entry = _mesa_hash_table_search(sctx->dirty_implicit_resources, tex);
2987       if (!entry) {
2988          struct pipe_resource *dummy = NULL;
2989          pipe_resource_reference(&dummy, &tex->buffer.b.b);
2990          _mesa_hash_table_insert(sctx->dirty_implicit_resources, tex, tex);
2991       }
2992    }
2993    tex->displayable_dcc_dirty = true;
2994 }
2995 
si_update_display_dcc_dirty(struct si_context * sctx)2996 static void si_update_display_dcc_dirty(struct si_context *sctx)
2997 {
2998    const struct pipe_framebuffer_state *state = &sctx->framebuffer.state;
2999 
3000    for (unsigned i = 0; i < state->nr_cbufs; i++) {
3001       if (state->cbufs[i])
3002          si_mark_display_dcc_dirty(sctx, (struct si_texture *)state->cbufs[i]->texture);
3003    }
3004 }
3005 
si_set_framebuffer_state(struct pipe_context * ctx,const struct pipe_framebuffer_state * state)3006 static void si_set_framebuffer_state(struct pipe_context *ctx,
3007                                      const struct pipe_framebuffer_state *state)
3008 {
3009    struct si_context *sctx = (struct si_context *)ctx;
3010    struct si_surface *surf = NULL;
3011    struct si_texture *tex;
3012    bool old_any_dst_linear = sctx->framebuffer.any_dst_linear;
3013    unsigned old_nr_samples = sctx->framebuffer.nr_samples;
3014    unsigned old_colorbuf_enabled_4bit = sctx->framebuffer.colorbuf_enabled_4bit;
3015    bool old_has_zsbuf = !!sctx->framebuffer.state.zsbuf;
3016    bool old_has_stencil =
3017       old_has_zsbuf &&
3018       ((struct si_texture *)sctx->framebuffer.state.zsbuf->texture)->surface.has_stencil;
3019    uint8_t old_db_format_index =
3020       old_has_zsbuf ?
3021       ((struct si_surface *)sctx->framebuffer.state.zsbuf)->db_format_index : -1;
3022    int i;
3023 
3024    /* Reject zero-sized framebuffers due to a hw bug on GFX6 that occurs
3025     * when PA_SU_HARDWARE_SCREEN_OFFSET != 0 and any_scissor.BR_X/Y <= 0.
3026     * We could implement the full workaround here, but it's a useless case.
3027     */
3028    if ((!state->width || !state->height) && (state->nr_cbufs || state->zsbuf)) {
3029       unreachable("the framebuffer shouldn't have zero area");
3030       return;
3031    }
3032 
3033    si_update_fb_dirtiness_after_rendering(sctx);
3034 
3035    /* Disable DCC if the formats are incompatible. */
3036    if (sctx->gfx_level >= GFX8 && sctx->gfx_level < GFX11) {
3037       for (i = 0; i < state->nr_cbufs; i++) {
3038          if (!state->cbufs[i])
3039             continue;
3040 
3041          surf = (struct si_surface *)state->cbufs[i];
3042          tex = (struct si_texture *)surf->base.texture;
3043 
3044          if (!surf->dcc_incompatible)
3045             continue;
3046 
3047          if (vi_dcc_enabled(tex, surf->base.u.tex.level))
3048             if (!si_texture_disable_dcc(sctx, tex))
3049                si_decompress_dcc(sctx, tex);
3050 
3051          surf->dcc_incompatible = false;
3052       }
3053    }
3054 
3055    /* Only flush TC when changing the framebuffer state, because
3056     * the only client not using TC that can change textures is
3057     * the framebuffer.
3058     *
3059     * Wait for compute shaders because of possible transitions:
3060     * - FB write -> shader read
3061     * - shader write -> FB read
3062     *
3063     * Wait for draws because of possible transitions:
3064     * - texture -> render (eg: glBlitFramebuffer(with src=dst) then glDraw*)
3065     *
3066     * DB caches are flushed on demand (using si_decompress_textures).
3067     *
3068     * When MSAA is enabled, CB and TC caches are flushed on demand
3069     * (after FMASK decompression). Shader write -> FB read transitions
3070     * cannot happen for MSAA textures, because MSAA shader images are
3071     * not supported.
3072     *
3073     * Only flush and wait for CB if there is actually a bound color buffer.
3074     */
3075    if (sctx->framebuffer.uncompressed_cb_mask) {
3076       si_make_CB_shader_coherent(sctx, sctx->framebuffer.nr_samples,
3077                                  sctx->framebuffer.CB_has_shader_readable_metadata,
3078                                  sctx->framebuffer.all_DCC_pipe_aligned);
3079    }
3080 
3081    sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH | SI_CONTEXT_PS_PARTIAL_FLUSH;
3082    si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
3083 
3084    /* u_blitter doesn't invoke depth decompression when it does multiple
3085     * blits in a row, but the only case when it matters for DB is when
3086     * doing generate_mipmap. So here we flush DB manually between
3087     * individual generate_mipmap blits.
3088     * Note that lower mipmap levels aren't compressed.
3089     */
3090    if (sctx->generate_mipmap_for_depth) {
3091       si_make_DB_shader_coherent(sctx, 1, false, sctx->framebuffer.DB_has_shader_readable_metadata);
3092    } else if (sctx->gfx_level == GFX9) {
3093       /* It appears that DB metadata "leaks" in a sequence of:
3094        *  - depth clear
3095        *  - DCC decompress for shader image writes (with DB disabled)
3096        *  - render with DEPTH_BEFORE_SHADER=1
3097        * Flushing DB metadata works around the problem.
3098        */
3099       sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_DB_META;
3100       si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
3101    }
3102 
3103    /* Take the maximum of the old and new count. If the new count is lower,
3104     * dirtying is needed to disable the unbound colorbuffers.
3105     */
3106    sctx->framebuffer.dirty_cbufs |=
3107       (1 << MAX2(sctx->framebuffer.state.nr_cbufs, state->nr_cbufs)) - 1;
3108    sctx->framebuffer.dirty_zsbuf |= sctx->framebuffer.state.zsbuf != state->zsbuf;
3109 
3110    si_dec_framebuffer_counters(&sctx->framebuffer.state);
3111    util_copy_framebuffer_state(&sctx->framebuffer.state, state);
3112    /* Recompute layers because frontends and utils might not set it. */
3113    sctx->framebuffer.state.layers = util_framebuffer_get_num_layers(state);
3114 
3115    sctx->framebuffer.colorbuf_enabled_4bit = 0;
3116    sctx->framebuffer.spi_shader_col_format = 0;
3117    sctx->framebuffer.spi_shader_col_format_alpha = 0;
3118    sctx->framebuffer.spi_shader_col_format_blend = 0;
3119    sctx->framebuffer.spi_shader_col_format_blend_alpha = 0;
3120    sctx->framebuffer.color_is_int8 = 0;
3121    sctx->framebuffer.color_is_int10 = 0;
3122 
3123    sctx->framebuffer.compressed_cb_mask = 0;
3124    sctx->framebuffer.uncompressed_cb_mask = 0;
3125    sctx->framebuffer.nr_samples = util_framebuffer_get_num_samples(state);
3126    sctx->framebuffer.nr_color_samples = sctx->framebuffer.nr_samples;
3127    sctx->framebuffer.log_samples = util_logbase2(sctx->framebuffer.nr_samples);
3128    sctx->framebuffer.any_dst_linear = false;
3129    sctx->framebuffer.CB_has_shader_readable_metadata = false;
3130    sctx->framebuffer.DB_has_shader_readable_metadata = false;
3131    sctx->framebuffer.all_DCC_pipe_aligned = true;
3132    sctx->framebuffer.has_dcc_msaa = false;
3133    sctx->framebuffer.min_bytes_per_pixel = 0;
3134 
3135    for (i = 0; i < state->nr_cbufs; i++) {
3136       if (!state->cbufs[i])
3137          continue;
3138 
3139       surf = (struct si_surface *)state->cbufs[i];
3140       tex = (struct si_texture *)surf->base.texture;
3141 
3142       if (!surf->color_initialized) {
3143          si_initialize_color_surface(sctx, surf);
3144       }
3145 
3146       sctx->framebuffer.colorbuf_enabled_4bit |= 0xf << (i * 4);
3147       sctx->framebuffer.spi_shader_col_format |= surf->spi_shader_col_format << (i * 4);
3148       sctx->framebuffer.spi_shader_col_format_alpha |= surf->spi_shader_col_format_alpha << (i * 4);
3149       sctx->framebuffer.spi_shader_col_format_blend |= surf->spi_shader_col_format_blend << (i * 4);
3150       sctx->framebuffer.spi_shader_col_format_blend_alpha |= surf->spi_shader_col_format_blend_alpha
3151                                                              << (i * 4);
3152 
3153       if (surf->color_is_int8)
3154          sctx->framebuffer.color_is_int8 |= 1 << i;
3155       if (surf->color_is_int10)
3156          sctx->framebuffer.color_is_int10 |= 1 << i;
3157 
3158       if (tex->surface.fmask_offset)
3159          sctx->framebuffer.compressed_cb_mask |= 1 << i;
3160       else
3161          sctx->framebuffer.uncompressed_cb_mask |= 1 << i;
3162 
3163       /* Don't update nr_color_samples for non-AA buffers.
3164        * (e.g. destination of MSAA resolve)
3165        */
3166       if (tex->buffer.b.b.nr_samples >= 2 &&
3167           tex->buffer.b.b.nr_storage_samples < tex->buffer.b.b.nr_samples) {
3168          sctx->framebuffer.nr_color_samples =
3169             MIN2(sctx->framebuffer.nr_color_samples, tex->buffer.b.b.nr_storage_samples);
3170          sctx->framebuffer.nr_color_samples = MAX2(1, sctx->framebuffer.nr_color_samples);
3171       }
3172 
3173       if (tex->surface.is_linear)
3174          sctx->framebuffer.any_dst_linear = true;
3175 
3176       if (vi_dcc_enabled(tex, surf->base.u.tex.level)) {
3177          sctx->framebuffer.CB_has_shader_readable_metadata = true;
3178 
3179          if (sctx->gfx_level >= GFX9 && !tex->surface.u.gfx9.color.dcc.pipe_aligned)
3180             sctx->framebuffer.all_DCC_pipe_aligned = false;
3181 
3182          if (tex->buffer.b.b.nr_storage_samples >= 2)
3183             sctx->framebuffer.has_dcc_msaa = true;
3184       }
3185 
3186       p_atomic_inc(&tex->framebuffers_bound);
3187 
3188       /* Update the minimum but don't keep 0. */
3189       if (!sctx->framebuffer.min_bytes_per_pixel ||
3190           tex->surface.bpe < sctx->framebuffer.min_bytes_per_pixel)
3191          sctx->framebuffer.min_bytes_per_pixel = tex->surface.bpe;
3192    }
3193 
3194    struct si_texture *zstex = NULL;
3195 
3196    if (state->zsbuf) {
3197       surf = (struct si_surface *)state->zsbuf;
3198       zstex = (struct si_texture *)surf->base.texture;
3199 
3200       if (!surf->depth_initialized) {
3201          si_init_depth_surface(sctx, surf);
3202       }
3203 
3204       if (vi_tc_compat_htile_enabled(zstex, surf->base.u.tex.level, PIPE_MASK_ZS))
3205          sctx->framebuffer.DB_has_shader_readable_metadata = true;
3206 
3207       /* Update the minimum but don't keep 0. */
3208       if (!sctx->framebuffer.min_bytes_per_pixel ||
3209           zstex->surface.bpe < sctx->framebuffer.min_bytes_per_pixel)
3210          sctx->framebuffer.min_bytes_per_pixel = zstex->surface.bpe;
3211 
3212       /* Update polygon offset based on the Z format. */
3213       if (sctx->queued.named.rasterizer->uses_poly_offset &&
3214           surf->db_format_index != old_db_format_index)
3215          (sctx)->dirty_atoms |= SI_STATE_BIT(rasterizer);
3216    }
3217 
3218    si_update_ps_colorbuf0_slot(sctx);
3219    si_mark_atom_dirty(sctx, &sctx->atoms.s.cb_render_state);
3220    si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer);
3221 
3222    /* NGG cull state uses the sample count. */
3223    if (sctx->screen->use_ngg_culling)
3224       si_mark_atom_dirty(sctx, &sctx->atoms.s.ngg_cull_state);
3225 
3226    if (sctx->screen->dpbb_allowed)
3227       si_mark_atom_dirty(sctx, &sctx->atoms.s.dpbb_state);
3228 
3229    if (sctx->framebuffer.any_dst_linear != old_any_dst_linear)
3230       si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config);
3231 
3232    if (sctx->screen->info.has_out_of_order_rast &&
3233        (sctx->framebuffer.colorbuf_enabled_4bit != old_colorbuf_enabled_4bit ||
3234         !!sctx->framebuffer.state.zsbuf != old_has_zsbuf ||
3235         (zstex && zstex->surface.has_stencil != old_has_stencil)))
3236       si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config);
3237 
3238    if (sctx->framebuffer.nr_samples != old_nr_samples) {
3239       struct pipe_constant_buffer constbuf = {0};
3240 
3241       si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config);
3242       si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
3243 
3244       if (!sctx->sample_pos_buffer) {
3245          sctx->sample_pos_buffer = pipe_buffer_create_with_data(&sctx->b, 0, PIPE_USAGE_DEFAULT,
3246                                                       sizeof(sctx->sample_positions),
3247                                                       &sctx->sample_positions);
3248       }
3249       constbuf.buffer = sctx->sample_pos_buffer;
3250 
3251       /* Set sample locations as fragment shader constants. */
3252       switch (sctx->framebuffer.nr_samples) {
3253       case 1:
3254          constbuf.buffer_offset = 0;
3255          break;
3256       case 2:
3257          constbuf.buffer_offset =
3258             (uint8_t *)sctx->sample_positions.x2 - (uint8_t *)sctx->sample_positions.x1;
3259          break;
3260       case 4:
3261          constbuf.buffer_offset =
3262             (uint8_t *)sctx->sample_positions.x4 - (uint8_t *)sctx->sample_positions.x1;
3263          break;
3264       case 8:
3265          constbuf.buffer_offset =
3266             (uint8_t *)sctx->sample_positions.x8 - (uint8_t *)sctx->sample_positions.x1;
3267          break;
3268       case 16:
3269          constbuf.buffer_offset =
3270             (uint8_t *)sctx->sample_positions.x16 - (uint8_t *)sctx->sample_positions.x1;
3271          break;
3272       default:
3273          PRINT_ERR("Requested an invalid number of samples %i.\n", sctx->framebuffer.nr_samples);
3274          assert(0);
3275       }
3276       constbuf.buffer_size = sctx->framebuffer.nr_samples * 2 * 4;
3277       si_set_internal_const_buffer(sctx, SI_PS_CONST_SAMPLE_POSITIONS, &constbuf);
3278 
3279       si_mark_atom_dirty(sctx, &sctx->atoms.s.sample_locations);
3280    }
3281 
3282    si_ps_key_update_framebuffer(sctx);
3283    si_ps_key_update_framebuffer_blend_rasterizer(sctx);
3284    si_ps_key_update_framebuffer_rasterizer_sample_shading(sctx);
3285    si_vs_ps_key_update_rast_prim_smooth_stipple(sctx);
3286    si_update_ps_inputs_read_or_disabled(sctx);
3287    sctx->do_update_shaders = true;
3288 
3289    if (!sctx->decompression_enabled) {
3290       /* Prevent textures decompression when the framebuffer state
3291        * changes come from the decompression passes themselves.
3292        */
3293       sctx->need_check_render_feedback = true;
3294    }
3295 }
3296 
si_emit_framebuffer_state(struct si_context * sctx,unsigned index)3297 static void si_emit_framebuffer_state(struct si_context *sctx, unsigned index)
3298 {
3299    struct radeon_cmdbuf *cs = &sctx->gfx_cs;
3300    struct pipe_framebuffer_state *state = &sctx->framebuffer.state;
3301    unsigned i, nr_cbufs = state->nr_cbufs;
3302    struct si_texture *tex = NULL;
3303    struct si_surface *cb = NULL;
3304    bool is_msaa_resolve = state->nr_cbufs == 2 &&
3305                           state->cbufs[0] && state->cbufs[0]->texture->nr_samples > 1 &&
3306                           state->cbufs[1] && state->cbufs[1]->texture->nr_samples <= 1;
3307 
3308    /* CB can't do MSAA resolve on gfx11. */
3309    assert(!is_msaa_resolve || sctx->gfx_level < GFX11);
3310 
3311    radeon_begin(cs);
3312 
3313    /* Colorbuffers. */
3314    for (i = 0; i < nr_cbufs; i++) {
3315       if (!(sctx->framebuffer.dirty_cbufs & (1 << i)))
3316          continue;
3317 
3318       /* RB+ depth-only rendering. See the comment where we set rbplus_depth_only_opt for more
3319        * information.
3320        */
3321       if (i == 0 &&
3322           sctx->screen->info.rbplus_allowed &&
3323           !sctx->queued.named.blend->cb_target_mask) {
3324          radeon_set_context_reg(R_028C70_CB_COLOR0_INFO + i * 0x3C,
3325                                 (sctx->gfx_level >= GFX11 ?
3326                                    S_028C70_FORMAT_GFX11(V_028C70_COLOR_32) :
3327                                    S_028C70_FORMAT_GFX6(V_028C70_COLOR_32)) |
3328                                 S_028C70_NUMBER_TYPE(V_028C70_NUMBER_FLOAT));
3329          continue;
3330       }
3331 
3332       cb = (struct si_surface *)state->cbufs[i];
3333       if (!cb) {
3334          radeon_set_context_reg(R_028C70_CB_COLOR0_INFO + i * 0x3C,
3335                                 sctx->gfx_level >= GFX11 ?
3336                                    S_028C70_FORMAT_GFX11(V_028C70_COLOR_INVALID) :
3337                                    S_028C70_FORMAT_GFX6(V_028C70_COLOR_INVALID));
3338          continue;
3339       }
3340 
3341       tex = (struct si_texture *)cb->base.texture;
3342       radeon_add_to_buffer_list(
3343          sctx, &sctx->gfx_cs, &tex->buffer, RADEON_USAGE_READWRITE | RADEON_USAGE_NEEDS_IMPLICIT_SYNC |
3344          (tex->buffer.b.b.nr_samples > 1 ? RADEON_PRIO_COLOR_BUFFER_MSAA : RADEON_PRIO_COLOR_BUFFER));
3345 
3346       if (tex->cmask_buffer && tex->cmask_buffer != &tex->buffer) {
3347          radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, tex->cmask_buffer,
3348                                    RADEON_USAGE_READWRITE | RADEON_USAGE_NEEDS_IMPLICIT_SYNC |
3349                                    RADEON_PRIO_SEPARATE_META);
3350       }
3351 
3352       /* Compute mutable surface parameters. */
3353       uint64_t cb_color_base = tex->buffer.gpu_address >> 8;
3354       uint64_t cb_dcc_base = 0;
3355       unsigned cb_color_info = cb->cb_color_info | tex->cb_color_info;
3356 
3357       if (sctx->gfx_level < GFX11) {
3358          if (tex->swap_rgb_to_bgr) {
3359             /* Swap R and B channels. */
3360             static unsigned rgb_to_bgr[4] = {
3361                [V_028C70_SWAP_STD] = V_028C70_SWAP_ALT,
3362                [V_028C70_SWAP_ALT] = V_028C70_SWAP_STD,
3363                [V_028C70_SWAP_STD_REV] = V_028C70_SWAP_ALT_REV,
3364                [V_028C70_SWAP_ALT_REV] = V_028C70_SWAP_STD_REV,
3365             };
3366             unsigned swap = rgb_to_bgr[G_028C70_COMP_SWAP(cb_color_info)];
3367 
3368             cb_color_info &= C_028C70_COMP_SWAP;
3369             cb_color_info |= S_028C70_COMP_SWAP(swap);
3370          }
3371 
3372          if (cb->base.u.tex.level > 0)
3373             cb_color_info &= C_028C70_FAST_CLEAR;
3374 
3375 
3376          if (vi_dcc_enabled(tex, cb->base.u.tex.level) && (i != 1 || !is_msaa_resolve))
3377             cb_color_info |= S_028C70_DCC_ENABLE(1);
3378       }
3379 
3380       /* Set up DCC. */
3381       if (vi_dcc_enabled(tex, cb->base.u.tex.level)) {
3382          cb_dcc_base = (tex->buffer.gpu_address + tex->surface.meta_offset) >> 8;
3383 
3384          unsigned dcc_tile_swizzle = tex->surface.tile_swizzle;
3385          dcc_tile_swizzle &= ((1 << tex->surface.meta_alignment_log2) - 1) >> 8;
3386          cb_dcc_base |= dcc_tile_swizzle;
3387       }
3388 
3389       if (sctx->gfx_level >= GFX11) {
3390          unsigned cb_color_attrib3, cb_fdcc_control;
3391 
3392          /* Set mutable surface parameters. */
3393          cb_color_base += tex->surface.u.gfx9.surf_offset >> 8;
3394          cb_color_base |= tex->surface.tile_swizzle;
3395 
3396          cb_color_attrib3 = cb->cb_color_attrib3 |
3397                             S_028EE0_COLOR_SW_MODE(tex->surface.u.gfx9.swizzle_mode) |
3398                             S_028EE0_DCC_PIPE_ALIGNED(tex->surface.u.gfx9.color.dcc.pipe_aligned);
3399          cb_fdcc_control = cb->cb_dcc_control |
3400                            S_028C78_DISABLE_CONSTANT_ENCODE_REG(1) |
3401                            S_028C78_FDCC_ENABLE(vi_dcc_enabled(tex, cb->base.u.tex.level));
3402 
3403          if (sctx->family >= CHIP_GFX1103_R2) {
3404             cb_fdcc_control |= S_028C78_ENABLE_MAX_COMP_FRAG_OVERRIDE(1) |
3405                                S_028C78_MAX_COMP_FRAGS(cb->base.texture->nr_samples >= 4);
3406          }
3407 
3408          radeon_set_context_reg(R_028C60_CB_COLOR0_BASE + i * 0x3C, cb_color_base);
3409 
3410          radeon_set_context_reg_seq(R_028C6C_CB_COLOR0_VIEW + i * 0x3C, 4);
3411          radeon_emit(cb->cb_color_view);                      /* CB_COLOR0_VIEW */
3412          radeon_emit(cb_color_info);                          /* CB_COLOR0_INFO */
3413          radeon_emit(cb->cb_color_attrib);                    /* CB_COLOR0_ATTRIB */
3414          radeon_emit(cb_fdcc_control);                        /* CB_COLOR0_FDCC_CONTROL */
3415 
3416          radeon_set_context_reg(R_028C94_CB_COLOR0_DCC_BASE + i * 0x3C, cb_dcc_base);
3417          radeon_set_context_reg(R_028E40_CB_COLOR0_BASE_EXT + i * 4, cb_color_base >> 32);
3418          radeon_set_context_reg(R_028EA0_CB_COLOR0_DCC_BASE_EXT + i * 4, cb_dcc_base >> 32);
3419          radeon_set_context_reg(R_028EC0_CB_COLOR0_ATTRIB2 + i * 4, cb->cb_color_attrib2);
3420          radeon_set_context_reg(R_028EE0_CB_COLOR0_ATTRIB3 + i * 4, cb_color_attrib3);
3421       } else if (sctx->gfx_level >= GFX10) {
3422          unsigned cb_color_attrib3;
3423          uint64_t cb_color_fmask, cb_color_cmask;
3424 
3425          /* Set mutable surface parameters. */
3426          cb_color_base += tex->surface.u.gfx9.surf_offset >> 8;
3427          cb_color_base |= tex->surface.tile_swizzle;
3428 
3429          if (tex->surface.fmask_offset) {
3430             cb_color_fmask = (tex->buffer.gpu_address + tex->surface.fmask_offset) >> 8;
3431             cb_color_fmask |= tex->surface.fmask_tile_swizzle;
3432          } else {
3433             cb_color_fmask = cb_color_base;
3434          }
3435 
3436          if (cb->base.u.tex.level > 0)
3437             cb_color_cmask = cb_color_base;
3438          else
3439             cb_color_cmask = tex->cmask_base_address_reg;
3440 
3441          cb_color_attrib3 = cb->cb_color_attrib3 |
3442                             S_028EE0_COLOR_SW_MODE(tex->surface.u.gfx9.swizzle_mode) |
3443                             S_028EE0_FMASK_SW_MODE(tex->surface.u.gfx9.color.fmask_swizzle_mode) |
3444                             S_028EE0_CMASK_PIPE_ALIGNED(1) |
3445                             S_028EE0_DCC_PIPE_ALIGNED(tex->surface.u.gfx9.color.dcc.pipe_aligned);
3446 
3447          radeon_set_context_reg_seq(R_028C60_CB_COLOR0_BASE + i * 0x3C, 14);
3448          radeon_emit(cb_color_base);             /* CB_COLOR0_BASE */
3449          radeon_emit(0);                         /* hole */
3450          radeon_emit(0);                         /* hole */
3451          radeon_emit(cb->cb_color_view);         /* CB_COLOR0_VIEW */
3452          radeon_emit(cb_color_info);             /* CB_COLOR0_INFO */
3453          radeon_emit(cb->cb_color_attrib);       /* CB_COLOR0_ATTRIB */
3454          radeon_emit(cb->cb_dcc_control);        /* CB_COLOR0_DCC_CONTROL */
3455          radeon_emit(cb_color_cmask);            /* CB_COLOR0_CMASK */
3456          radeon_emit(0);                         /* hole */
3457          radeon_emit(cb_color_fmask);            /* CB_COLOR0_FMASK */
3458          radeon_emit(0);                         /* hole */
3459          radeon_emit(tex->color_clear_value[0]); /* CB_COLOR0_CLEAR_WORD0 */
3460          radeon_emit(tex->color_clear_value[1]); /* CB_COLOR0_CLEAR_WORD1 */
3461          radeon_emit(cb_dcc_base);               /* CB_COLOR0_DCC_BASE */
3462 
3463          radeon_set_context_reg(R_028E40_CB_COLOR0_BASE_EXT + i * 4, cb_color_base >> 32);
3464          radeon_set_context_reg(R_028E60_CB_COLOR0_CMASK_BASE_EXT + i * 4,
3465                                 cb_color_cmask >> 32);
3466          radeon_set_context_reg(R_028E80_CB_COLOR0_FMASK_BASE_EXT + i * 4,
3467                                 cb_color_fmask >> 32);
3468          radeon_set_context_reg(R_028EA0_CB_COLOR0_DCC_BASE_EXT + i * 4, cb_dcc_base >> 32);
3469          radeon_set_context_reg(R_028EC0_CB_COLOR0_ATTRIB2 + i * 4, cb->cb_color_attrib2);
3470          radeon_set_context_reg(R_028EE0_CB_COLOR0_ATTRIB3 + i * 4, cb_color_attrib3);
3471       } else if (sctx->gfx_level == GFX9) {
3472          struct gfx9_surf_meta_flags meta = {
3473             .rb_aligned = 1,
3474             .pipe_aligned = 1,
3475          };
3476          unsigned cb_color_attrib = cb->cb_color_attrib;
3477          uint64_t cb_color_fmask, cb_color_cmask;
3478 
3479          if (!tex->is_depth && tex->surface.meta_offset)
3480             meta = tex->surface.u.gfx9.color.dcc;
3481 
3482          /* Set mutable surface parameters. */
3483          cb_color_base += tex->surface.u.gfx9.surf_offset >> 8;
3484          cb_color_base |= tex->surface.tile_swizzle;
3485 
3486          if (tex->surface.fmask_offset) {
3487             cb_color_fmask = (tex->buffer.gpu_address + tex->surface.fmask_offset) >> 8;
3488             cb_color_fmask |= tex->surface.fmask_tile_swizzle;
3489          } else {
3490             cb_color_fmask = cb_color_base;
3491          }
3492 
3493          if (cb->base.u.tex.level > 0)
3494             cb_color_cmask = cb_color_base;
3495          else
3496             cb_color_cmask = tex->cmask_base_address_reg;
3497 
3498          cb_color_attrib |= S_028C74_COLOR_SW_MODE(tex->surface.u.gfx9.swizzle_mode) |
3499                             S_028C74_FMASK_SW_MODE(tex->surface.u.gfx9.color.fmask_swizzle_mode) |
3500                             S_028C74_RB_ALIGNED(meta.rb_aligned) |
3501                             S_028C74_PIPE_ALIGNED(meta.pipe_aligned);
3502 
3503          radeon_set_context_reg_seq(R_028C60_CB_COLOR0_BASE + i * 0x3C, 15);
3504          radeon_emit(cb_color_base);                            /* CB_COLOR0_BASE */
3505          radeon_emit(S_028C64_BASE_256B(cb_color_base >> 32));  /* CB_COLOR0_BASE_EXT */
3506          radeon_emit(cb->cb_color_attrib2);                     /* CB_COLOR0_ATTRIB2 */
3507          radeon_emit(cb->cb_color_view);                        /* CB_COLOR0_VIEW */
3508          radeon_emit(cb_color_info);                            /* CB_COLOR0_INFO */
3509          radeon_emit(cb_color_attrib);                          /* CB_COLOR0_ATTRIB */
3510          radeon_emit(cb->cb_dcc_control);                       /* CB_COLOR0_DCC_CONTROL */
3511          radeon_emit(cb_color_cmask);                           /* CB_COLOR0_CMASK */
3512          radeon_emit(S_028C80_BASE_256B(cb_color_cmask >> 32)); /* CB_COLOR0_CMASK_BASE_EXT */
3513          radeon_emit(cb_color_fmask);                           /* CB_COLOR0_FMASK */
3514          radeon_emit(S_028C88_BASE_256B(cb_color_fmask >> 32)); /* CB_COLOR0_FMASK_BASE_EXT */
3515          radeon_emit(tex->color_clear_value[0]);                /* CB_COLOR0_CLEAR_WORD0 */
3516          radeon_emit(tex->color_clear_value[1]);                /* CB_COLOR0_CLEAR_WORD1 */
3517          radeon_emit(cb_dcc_base);                              /* CB_COLOR0_DCC_BASE */
3518          radeon_emit(S_028C98_BASE_256B(cb_dcc_base >> 32));    /* CB_COLOR0_DCC_BASE_EXT */
3519 
3520          radeon_set_context_reg(R_0287A0_CB_MRT0_EPITCH + i * 4,
3521                                 S_0287A0_EPITCH(tex->surface.u.gfx9.epitch));
3522       } else {
3523          /* Compute mutable surface parameters (GFX6-GFX8). */
3524          const struct legacy_surf_level *level_info =
3525             &tex->surface.u.legacy.level[cb->base.u.tex.level];
3526          unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
3527          unsigned cb_color_pitch, cb_color_slice, cb_color_fmask_slice;
3528          unsigned cb_color_attrib = cb->cb_color_attrib;
3529          uint64_t cb_color_fmask, cb_color_cmask;
3530 
3531          cb_color_base += level_info->offset_256B;
3532          /* Only macrotiled modes can set tile swizzle. */
3533          if (level_info->mode == RADEON_SURF_MODE_2D)
3534             cb_color_base |= tex->surface.tile_swizzle;
3535 
3536          if (tex->surface.fmask_offset) {
3537             cb_color_fmask = (tex->buffer.gpu_address + tex->surface.fmask_offset) >> 8;
3538             cb_color_fmask |= tex->surface.fmask_tile_swizzle;
3539          } else {
3540             cb_color_fmask = cb_color_base;
3541          }
3542 
3543          if (cb->base.u.tex.level > 0)
3544             cb_color_cmask = cb_color_base;
3545          else
3546             cb_color_cmask = tex->cmask_base_address_reg;
3547 
3548          if (cb_dcc_base)
3549             cb_dcc_base += tex->surface.u.legacy.color.dcc_level[cb->base.u.tex.level].dcc_offset >> 8;
3550 
3551          pitch_tile_max = level_info->nblk_x / 8 - 1;
3552          slice_tile_max = level_info->nblk_x * level_info->nblk_y / 64 - 1;
3553          tile_mode_index = si_tile_mode_index(tex, cb->base.u.tex.level, false);
3554 
3555          cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index);
3556          cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
3557          cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
3558 
3559          if (tex->surface.fmask_offset) {
3560             if (sctx->gfx_level >= GFX7)
3561                cb_color_pitch |=
3562                   S_028C64_FMASK_TILE_MAX(tex->surface.u.legacy.color.fmask.pitch_in_pixels / 8 - 1);
3563             cb_color_attrib |=
3564                S_028C74_FMASK_TILE_MODE_INDEX(tex->surface.u.legacy.color.fmask.tiling_index);
3565             cb_color_fmask_slice = S_028C88_TILE_MAX(tex->surface.u.legacy.color.fmask.slice_tile_max);
3566          } else {
3567             /* This must be set for fast clear to work without FMASK. */
3568             if (sctx->gfx_level >= GFX7)
3569                cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
3570             cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
3571             cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
3572          }
3573 
3574          radeon_set_context_reg_seq(R_028C60_CB_COLOR0_BASE + i * 0x3C,
3575                                     sctx->gfx_level >= GFX8 ? 14 : 13);
3576          radeon_emit(cb_color_base);                              /* CB_COLOR0_BASE */
3577          radeon_emit(cb_color_pitch);                             /* CB_COLOR0_PITCH */
3578          radeon_emit(cb_color_slice);                             /* CB_COLOR0_SLICE */
3579          radeon_emit(cb->cb_color_view);                          /* CB_COLOR0_VIEW */
3580          radeon_emit(cb_color_info);                              /* CB_COLOR0_INFO */
3581          radeon_emit(cb_color_attrib);                            /* CB_COLOR0_ATTRIB */
3582          radeon_emit(cb->cb_dcc_control);                         /* CB_COLOR0_DCC_CONTROL */
3583          radeon_emit(cb_color_cmask);                             /* CB_COLOR0_CMASK */
3584          radeon_emit(tex->surface.u.legacy.color.cmask_slice_tile_max); /* CB_COLOR0_CMASK_SLICE */
3585          radeon_emit(cb_color_fmask);                             /* CB_COLOR0_FMASK */
3586          radeon_emit(cb_color_fmask_slice);                       /* CB_COLOR0_FMASK_SLICE */
3587          radeon_emit(tex->color_clear_value[0]);                  /* CB_COLOR0_CLEAR_WORD0 */
3588          radeon_emit(tex->color_clear_value[1]);                  /* CB_COLOR0_CLEAR_WORD1 */
3589 
3590          if (sctx->gfx_level >= GFX8) /* R_028C94_CB_COLOR0_DCC_BASE */
3591             radeon_emit(cb_dcc_base);
3592       }
3593    }
3594    for (; i < 8; i++)
3595       if (sctx->framebuffer.dirty_cbufs & (1 << i))
3596          radeon_set_context_reg(R_028C70_CB_COLOR0_INFO + i * 0x3C, 0);
3597 
3598    /* ZS buffer. */
3599    if (state->zsbuf && sctx->framebuffer.dirty_zsbuf) {
3600       struct si_surface *zb = (struct si_surface *)state->zsbuf;
3601       struct si_texture *tex = (struct si_texture *)zb->base.texture;
3602       unsigned db_z_info = zb->db_z_info;
3603       unsigned db_stencil_info = zb->db_stencil_info;
3604       unsigned db_htile_surface = zb->db_htile_surface;
3605 
3606       radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, &tex->buffer, RADEON_USAGE_READWRITE |
3607                                 (zb->base.texture->nr_samples > 1 ? RADEON_PRIO_DEPTH_BUFFER_MSAA
3608                                                                   : RADEON_PRIO_DEPTH_BUFFER));
3609       bool tc_compat_htile = vi_tc_compat_htile_enabled(tex, zb->base.u.tex.level, PIPE_MASK_ZS);
3610 
3611       /* Set fields dependent on tc_compatile_htile. */
3612       if (sctx->gfx_level >= GFX9 && tc_compat_htile) {
3613          unsigned max_zplanes = 4;
3614 
3615          if (tex->db_render_format == PIPE_FORMAT_Z16_UNORM && tex->buffer.b.b.nr_samples > 1)
3616             max_zplanes = 2;
3617 
3618          if (sctx->gfx_level >= GFX10) {
3619             bool iterate256 = tex->buffer.b.b.nr_samples >= 2;
3620             db_z_info |= S_028040_ITERATE_FLUSH(1) |
3621                          S_028040_ITERATE_256(iterate256);
3622             db_stencil_info |= S_028044_ITERATE_FLUSH(!tex->htile_stencil_disabled) |
3623                                S_028044_ITERATE_256(iterate256);
3624 
3625             /* Workaround for a DB hang when ITERATE_256 is set to 1. Only affects 4X MSAA D/S images. */
3626             if (sctx->screen->info.has_two_planes_iterate256_bug && iterate256 &&
3627                 !tex->htile_stencil_disabled && tex->buffer.b.b.nr_samples == 4) {
3628                max_zplanes = 1;
3629             }
3630          } else {
3631             db_z_info |= S_028038_ITERATE_FLUSH(1);
3632             db_stencil_info |= S_02803C_ITERATE_FLUSH(1);
3633          }
3634 
3635          db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes + 1);
3636       }
3637 
3638       unsigned level = zb->base.u.tex.level;
3639 
3640       if (sctx->gfx_level >= GFX10) {
3641          radeon_set_context_reg(R_028014_DB_HTILE_DATA_BASE, zb->db_htile_data_base);
3642          radeon_set_context_reg(R_02801C_DB_DEPTH_SIZE_XY, zb->db_depth_size);
3643 
3644          if (sctx->gfx_level >= GFX11) {
3645             radeon_set_context_reg_seq(R_028040_DB_Z_INFO, 6);
3646          } else {
3647             radeon_set_context_reg_seq(R_02803C_DB_DEPTH_INFO, 7);
3648             radeon_emit(S_02803C_RESOURCE_LEVEL(1)); /* DB_DEPTH_INFO */
3649          }
3650          radeon_emit(db_z_info |                  /* DB_Z_INFO */
3651                      S_028038_ZRANGE_PRECISION(tex->depth_clear_value[level] != 0));
3652          radeon_emit(db_stencil_info);     /* DB_STENCIL_INFO */
3653          radeon_emit(zb->db_depth_base);   /* DB_Z_READ_BASE */
3654          radeon_emit(zb->db_stencil_base); /* DB_STENCIL_READ_BASE */
3655          radeon_emit(zb->db_depth_base);   /* DB_Z_WRITE_BASE */
3656          radeon_emit(zb->db_stencil_base); /* DB_STENCIL_WRITE_BASE */
3657 
3658          radeon_set_context_reg_seq(R_028068_DB_Z_READ_BASE_HI, 5);
3659          radeon_emit(zb->db_depth_base >> 32);      /* DB_Z_READ_BASE_HI */
3660          radeon_emit(zb->db_stencil_base >> 32);    /* DB_STENCIL_READ_BASE_HI */
3661          radeon_emit(zb->db_depth_base >> 32);      /* DB_Z_WRITE_BASE_HI */
3662          radeon_emit(zb->db_stencil_base >> 32);    /* DB_STENCIL_WRITE_BASE_HI */
3663          radeon_emit(zb->db_htile_data_base >> 32); /* DB_HTILE_DATA_BASE_HI */
3664       } else if (sctx->gfx_level == GFX9) {
3665          radeon_set_context_reg_seq(R_028014_DB_HTILE_DATA_BASE, 3);
3666          radeon_emit(zb->db_htile_data_base); /* DB_HTILE_DATA_BASE */
3667          radeon_emit(S_028018_BASE_HI(zb->db_htile_data_base >> 32)); /* DB_HTILE_DATA_BASE_HI */
3668          radeon_emit(zb->db_depth_size);                          /* DB_DEPTH_SIZE */
3669 
3670          radeon_set_context_reg_seq(R_028038_DB_Z_INFO, 10);
3671          radeon_emit(db_z_info |                                   /* DB_Z_INFO */
3672                      S_028038_ZRANGE_PRECISION(tex->depth_clear_value[level] != 0));
3673          radeon_emit(db_stencil_info);                             /* DB_STENCIL_INFO */
3674          radeon_emit(zb->db_depth_base);                           /* DB_Z_READ_BASE */
3675          radeon_emit(S_028044_BASE_HI(zb->db_depth_base >> 32));   /* DB_Z_READ_BASE_HI */
3676          radeon_emit(zb->db_stencil_base);                         /* DB_STENCIL_READ_BASE */
3677          radeon_emit(S_02804C_BASE_HI(zb->db_stencil_base >> 32)); /* DB_STENCIL_READ_BASE_HI */
3678          radeon_emit(zb->db_depth_base);                           /* DB_Z_WRITE_BASE */
3679          radeon_emit(S_028054_BASE_HI(zb->db_depth_base >> 32));   /* DB_Z_WRITE_BASE_HI */
3680          radeon_emit(zb->db_stencil_base);                         /* DB_STENCIL_WRITE_BASE */
3681          radeon_emit(S_02805C_BASE_HI(zb->db_stencil_base >> 32)); /* DB_STENCIL_WRITE_BASE_HI */
3682 
3683          radeon_set_context_reg_seq(R_028068_DB_Z_INFO2, 2);
3684          radeon_emit(zb->db_z_info2);       /* DB_Z_INFO2 */
3685          radeon_emit(zb->db_stencil_info2); /* DB_STENCIL_INFO2 */
3686       } else {
3687          /* GFX6-GFX8 */
3688          /* Set fields dependent on tc_compatile_htile. */
3689          if (si_htile_enabled(tex, zb->base.u.tex.level, PIPE_MASK_ZS)) {
3690             if (tex->tc_compatible_htile) {
3691                db_htile_surface |= S_028ABC_TC_COMPATIBLE(1);
3692 
3693                /* 0 = full compression. N = only compress up to N-1 Z planes. */
3694                if (tex->buffer.b.b.nr_samples <= 1)
3695                   db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(5);
3696                else if (tex->buffer.b.b.nr_samples <= 4)
3697                   db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(3);
3698                else
3699                   db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(2);
3700             }
3701          }
3702 
3703          radeon_set_context_reg(R_028014_DB_HTILE_DATA_BASE, zb->db_htile_data_base);
3704 
3705          radeon_set_context_reg_seq(R_02803C_DB_DEPTH_INFO, 9);
3706          radeon_emit(zb->db_depth_info |   /* DB_DEPTH_INFO */
3707                      S_02803C_ADDR5_SWIZZLE_MASK(!tex->tc_compatible_htile));
3708          radeon_emit(db_z_info |           /* DB_Z_INFO */
3709                      S_028040_ZRANGE_PRECISION(tex->depth_clear_value[level] != 0));
3710          radeon_emit(db_stencil_info);     /* DB_STENCIL_INFO */
3711          radeon_emit(zb->db_depth_base);   /* DB_Z_READ_BASE */
3712          radeon_emit(zb->db_stencil_base); /* DB_STENCIL_READ_BASE */
3713          radeon_emit(zb->db_depth_base);   /* DB_Z_WRITE_BASE */
3714          radeon_emit(zb->db_stencil_base); /* DB_STENCIL_WRITE_BASE */
3715          radeon_emit(zb->db_depth_size);   /* DB_DEPTH_SIZE */
3716          radeon_emit(zb->db_depth_slice);  /* DB_DEPTH_SLICE */
3717       }
3718 
3719       radeon_set_context_reg_seq(R_028028_DB_STENCIL_CLEAR, 2);
3720       radeon_emit(tex->stencil_clear_value[level]);    /* R_028028_DB_STENCIL_CLEAR */
3721       radeon_emit(fui(tex->depth_clear_value[level])); /* R_02802C_DB_DEPTH_CLEAR */
3722 
3723       radeon_set_context_reg(R_028008_DB_DEPTH_VIEW, zb->db_depth_view);
3724       radeon_set_context_reg(R_028ABC_DB_HTILE_SURFACE, db_htile_surface);
3725    } else if (sctx->framebuffer.dirty_zsbuf) {
3726       if (sctx->gfx_level == GFX9)
3727          radeon_set_context_reg_seq(R_028038_DB_Z_INFO, 2);
3728       else
3729          radeon_set_context_reg_seq(R_028040_DB_Z_INFO, 2);
3730 
3731       /* Gfx11+: DB_Z_INFO.NUM_SAMPLES should match the framebuffer samples if no Z/S is bound.
3732        * It determines the sample count for VRS, primitive-ordered pixel shading, and occlusion
3733        * queries.
3734        */
3735       radeon_emit(S_028040_FORMAT(V_028040_Z_INVALID) |       /* DB_Z_INFO */
3736                   S_028040_NUM_SAMPLES(sctx->gfx_level >= GFX11 ? sctx->framebuffer.log_samples : 0));
3737       radeon_emit(S_028044_FORMAT(V_028044_STENCIL_INVALID)); /* DB_STENCIL_INFO */
3738    }
3739 
3740    /* Framebuffer dimensions. */
3741    /* PA_SC_WINDOW_SCISSOR_TL is set to 0,0 in gfx*_init_gfx_preamble_state */
3742    radeon_set_context_reg(R_028208_PA_SC_WINDOW_SCISSOR_BR,
3743                           S_028208_BR_X(state->width) | S_028208_BR_Y(state->height));
3744 
3745    if (sctx->screen->dpbb_allowed &&
3746        sctx->screen->pbb_context_states_per_bin > 1) {
3747       radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
3748       radeon_emit(EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0));
3749    }
3750    radeon_end();
3751 
3752    si_update_display_dcc_dirty(sctx);
3753 
3754    sctx->framebuffer.dirty_cbufs = 0;
3755    sctx->framebuffer.dirty_zsbuf = false;
3756 }
3757 
gfx11_dgpu_emit_framebuffer_state(struct si_context * sctx,unsigned index)3758 static void gfx11_dgpu_emit_framebuffer_state(struct si_context *sctx, unsigned index)
3759 {
3760    struct radeon_cmdbuf *cs = &sctx->gfx_cs;
3761    struct pipe_framebuffer_state *state = &sctx->framebuffer.state;
3762    unsigned i, nr_cbufs = state->nr_cbufs;
3763    struct si_texture *tex = NULL;
3764    struct si_surface *cb = NULL;
3765    bool is_msaa_resolve = state->nr_cbufs == 2 &&
3766                           state->cbufs[0] && state->cbufs[0]->texture->nr_samples > 1 &&
3767                           state->cbufs[1] && state->cbufs[1]->texture->nr_samples <= 1;
3768 
3769    /* CB can't do MSAA resolve on gfx11. */
3770    assert(!is_msaa_resolve);
3771 
3772    radeon_begin(cs);
3773    gfx11_begin_packed_context_regs();
3774 
3775    /* Colorbuffers. */
3776    for (i = 0; i < nr_cbufs; i++) {
3777       if (!(sctx->framebuffer.dirty_cbufs & (1 << i)))
3778          continue;
3779 
3780       /* RB+ depth-only rendering. See the comment where we set rbplus_depth_only_opt for more
3781        * information.
3782        */
3783       if (i == 0 &&
3784           sctx->screen->info.rbplus_allowed &&
3785           !sctx->queued.named.blend->cb_target_mask) {
3786          gfx11_set_context_reg(R_028C70_CB_COLOR0_INFO + i * 0x3C,
3787                                S_028C70_FORMAT_GFX11(V_028C70_COLOR_32) |
3788                                S_028C70_NUMBER_TYPE(V_028C70_NUMBER_FLOAT));
3789          continue;
3790       }
3791 
3792       cb = (struct si_surface *)state->cbufs[i];
3793       if (!cb) {
3794          gfx11_set_context_reg(R_028C70_CB_COLOR0_INFO + i * 0x3C,
3795                                S_028C70_FORMAT_GFX11(V_028C70_COLOR_INVALID));
3796          continue;
3797       }
3798 
3799       tex = (struct si_texture *)cb->base.texture;
3800       radeon_add_to_buffer_list(
3801          sctx, &sctx->gfx_cs, &tex->buffer, RADEON_USAGE_READWRITE | RADEON_USAGE_NEEDS_IMPLICIT_SYNC |
3802          (tex->buffer.b.b.nr_samples > 1 ? RADEON_PRIO_COLOR_BUFFER_MSAA : RADEON_PRIO_COLOR_BUFFER));
3803 
3804       if (tex->cmask_buffer && tex->cmask_buffer != &tex->buffer) {
3805          radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, tex->cmask_buffer,
3806                                    RADEON_USAGE_READWRITE | RADEON_USAGE_NEEDS_IMPLICIT_SYNC |
3807                                    RADEON_PRIO_SEPARATE_META);
3808       }
3809 
3810       /* Compute mutable surface parameters. */
3811       uint64_t cb_color_base = tex->buffer.gpu_address >> 8;
3812       uint64_t cb_dcc_base = 0;
3813       unsigned cb_color_info = cb->cb_color_info | tex->cb_color_info;
3814 
3815       /* Set up DCC. */
3816       if (vi_dcc_enabled(tex, cb->base.u.tex.level)) {
3817          cb_dcc_base = (tex->buffer.gpu_address + tex->surface.meta_offset) >> 8;
3818 
3819          unsigned dcc_tile_swizzle = tex->surface.tile_swizzle;
3820          dcc_tile_swizzle &= ((1 << tex->surface.meta_alignment_log2) - 1) >> 8;
3821          cb_dcc_base |= dcc_tile_swizzle;
3822       }
3823 
3824       unsigned cb_color_attrib3, cb_fdcc_control;
3825 
3826       /* Set mutable surface parameters. */
3827       cb_color_base += tex->surface.u.gfx9.surf_offset >> 8;
3828       cb_color_base |= tex->surface.tile_swizzle;
3829 
3830       cb_color_attrib3 = cb->cb_color_attrib3 |
3831                          S_028EE0_COLOR_SW_MODE(tex->surface.u.gfx9.swizzle_mode) |
3832                          S_028EE0_DCC_PIPE_ALIGNED(tex->surface.u.gfx9.color.dcc.pipe_aligned);
3833       cb_fdcc_control = cb->cb_dcc_control |
3834                         S_028C78_DISABLE_CONSTANT_ENCODE_REG(1) |
3835                         S_028C78_FDCC_ENABLE(vi_dcc_enabled(tex, cb->base.u.tex.level));
3836 
3837       if (sctx->family >= CHIP_GFX1103_R2) {
3838          cb_fdcc_control |= S_028C78_ENABLE_MAX_COMP_FRAG_OVERRIDE(1) |
3839                             S_028C78_MAX_COMP_FRAGS(cb->base.texture->nr_samples >= 4);
3840       }
3841 
3842       gfx11_set_context_reg(R_028C60_CB_COLOR0_BASE + i * 0x3C, cb_color_base);
3843       gfx11_set_context_reg(R_028C6C_CB_COLOR0_VIEW + i * 0x3C, cb->cb_color_view);
3844       gfx11_set_context_reg(R_028C70_CB_COLOR0_INFO + i * 0x3C, cb_color_info);
3845       gfx11_set_context_reg(R_028C74_CB_COLOR0_ATTRIB + i * 0x3C, cb->cb_color_attrib);
3846       gfx11_set_context_reg(R_028C78_CB_COLOR0_DCC_CONTROL + i * 0x3C, cb_fdcc_control);
3847       gfx11_set_context_reg(R_028C94_CB_COLOR0_DCC_BASE + i * 0x3C, cb_dcc_base);
3848       gfx11_set_context_reg(R_028E40_CB_COLOR0_BASE_EXT + i * 4, cb_color_base >> 32);
3849       gfx11_set_context_reg(R_028EA0_CB_COLOR0_DCC_BASE_EXT + i * 4, cb_dcc_base >> 32);
3850       gfx11_set_context_reg(R_028EC0_CB_COLOR0_ATTRIB2 + i * 4, cb->cb_color_attrib2);
3851       gfx11_set_context_reg(R_028EE0_CB_COLOR0_ATTRIB3 + i * 4, cb_color_attrib3);
3852    }
3853    for (; i < 8; i++)
3854       if (sctx->framebuffer.dirty_cbufs & (1 << i))
3855          gfx11_set_context_reg(R_028C70_CB_COLOR0_INFO + i * 0x3C, 0);
3856 
3857    /* ZS buffer. */
3858    if (state->zsbuf && sctx->framebuffer.dirty_zsbuf) {
3859       struct si_surface *zb = (struct si_surface *)state->zsbuf;
3860       struct si_texture *tex = (struct si_texture *)zb->base.texture;
3861       unsigned db_z_info = zb->db_z_info;
3862       unsigned db_stencil_info = zb->db_stencil_info;
3863       unsigned db_htile_surface = zb->db_htile_surface;
3864 
3865       radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, &tex->buffer, RADEON_USAGE_READWRITE |
3866                                 (zb->base.texture->nr_samples > 1 ? RADEON_PRIO_DEPTH_BUFFER_MSAA
3867                                                                   : RADEON_PRIO_DEPTH_BUFFER));
3868       bool tc_compat_htile = vi_tc_compat_htile_enabled(tex, zb->base.u.tex.level, PIPE_MASK_ZS);
3869 
3870       /* Set fields dependent on tc_compatile_htile. */
3871       if (tc_compat_htile) {
3872          unsigned max_zplanes = 4;
3873 
3874          if (tex->db_render_format == PIPE_FORMAT_Z16_UNORM && tex->buffer.b.b.nr_samples > 1)
3875             max_zplanes = 2;
3876 
3877          bool iterate256 = tex->buffer.b.b.nr_samples >= 2;
3878          db_z_info |= S_028040_ITERATE_FLUSH(1) |
3879                       S_028040_ITERATE_256(iterate256);
3880          db_stencil_info |= S_028044_ITERATE_FLUSH(!tex->htile_stencil_disabled) |
3881                             S_028044_ITERATE_256(iterate256);
3882 
3883          /* Workaround for a DB hang when ITERATE_256 is set to 1. Only affects 4X MSAA D/S images. */
3884          if (sctx->screen->info.has_two_planes_iterate256_bug && iterate256 &&
3885              !tex->htile_stencil_disabled && tex->buffer.b.b.nr_samples == 4)
3886             max_zplanes = 1;
3887 
3888          db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes + 1);
3889       }
3890 
3891       unsigned level = zb->base.u.tex.level;
3892 
3893       gfx11_set_context_reg(R_028014_DB_HTILE_DATA_BASE, zb->db_htile_data_base);
3894       gfx11_set_context_reg(R_02801C_DB_DEPTH_SIZE_XY, zb->db_depth_size);
3895       gfx11_set_context_reg(R_028040_DB_Z_INFO, db_z_info |
3896                             S_028038_ZRANGE_PRECISION(tex->depth_clear_value[level] != 0));
3897       gfx11_set_context_reg(R_028044_DB_STENCIL_INFO, db_stencil_info);
3898       gfx11_set_context_reg(R_028048_DB_Z_READ_BASE, zb->db_depth_base);
3899       gfx11_set_context_reg(R_02804C_DB_STENCIL_READ_BASE, zb->db_stencil_base);
3900       gfx11_set_context_reg(R_028050_DB_Z_WRITE_BASE, zb->db_depth_base);
3901       gfx11_set_context_reg(R_028054_DB_STENCIL_WRITE_BASE, zb->db_stencil_base);
3902       gfx11_set_context_reg(R_028068_DB_Z_READ_BASE_HI, zb->db_depth_base >> 32);
3903       gfx11_set_context_reg(R_02806C_DB_STENCIL_READ_BASE_HI, zb->db_stencil_base >> 32);
3904       gfx11_set_context_reg(R_028070_DB_Z_WRITE_BASE_HI, zb->db_depth_base >> 32);
3905       gfx11_set_context_reg(R_028074_DB_STENCIL_WRITE_BASE_HI, zb->db_stencil_base >> 32);
3906       gfx11_set_context_reg(R_028078_DB_HTILE_DATA_BASE_HI, zb->db_htile_data_base >> 32);
3907       gfx11_set_context_reg(R_028028_DB_STENCIL_CLEAR, tex->stencil_clear_value[level]);
3908       gfx11_set_context_reg(R_02802C_DB_DEPTH_CLEAR, fui(tex->depth_clear_value[level]));
3909       gfx11_set_context_reg(R_028008_DB_DEPTH_VIEW, zb->db_depth_view);
3910       gfx11_set_context_reg(R_028ABC_DB_HTILE_SURFACE, db_htile_surface);
3911    } else if (sctx->framebuffer.dirty_zsbuf) {
3912       /* Gfx11+: DB_Z_INFO.NUM_SAMPLES should match the framebuffer samples if no Z/S is bound.
3913        * It determines the sample count for VRS, primitive-ordered pixel shading, and occlusion
3914        * queries.
3915        */
3916       gfx11_set_context_reg(R_028040_DB_Z_INFO,
3917                             S_028040_FORMAT(V_028040_Z_INVALID) |
3918                             S_028040_NUM_SAMPLES(sctx->framebuffer.log_samples));
3919       gfx11_set_context_reg(R_028044_DB_STENCIL_INFO, S_028044_FORMAT(V_028044_STENCIL_INVALID));
3920    }
3921 
3922    /* Framebuffer dimensions. */
3923    /* PA_SC_WINDOW_SCISSOR_TL is set to 0,0 in gfx*_init_gfx_preamble_state */
3924    gfx11_set_context_reg(R_028208_PA_SC_WINDOW_SCISSOR_BR,
3925                          S_028208_BR_X(state->width) | S_028208_BR_Y(state->height));
3926    gfx11_end_packed_context_regs();
3927 
3928    if (sctx->screen->dpbb_allowed &&
3929        sctx->screen->pbb_context_states_per_bin > 1) {
3930       radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
3931       radeon_emit(EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0));
3932    }
3933    radeon_end();
3934 
3935    si_update_display_dcc_dirty(sctx);
3936 
3937    sctx->framebuffer.dirty_cbufs = 0;
3938    sctx->framebuffer.dirty_zsbuf = false;
3939 }
3940 
si_out_of_order_rasterization(struct si_context * sctx)3941 static bool si_out_of_order_rasterization(struct si_context *sctx)
3942 {
3943    struct si_state_blend *blend = sctx->queued.named.blend;
3944    struct si_state_dsa *dsa = sctx->queued.named.dsa;
3945 
3946    if (!sctx->screen->info.has_out_of_order_rast)
3947       return false;
3948 
3949    unsigned colormask = sctx->framebuffer.colorbuf_enabled_4bit;
3950 
3951    colormask &= blend->cb_target_enabled_4bit;
3952 
3953    /* Conservative: No logic op. */
3954    if (colormask && blend->logicop_enable)
3955       return false;
3956 
3957    struct si_dsa_order_invariance dsa_order_invariant = {.zs = true,
3958                                                          .pass_set = true};
3959 
3960    if (sctx->framebuffer.state.zsbuf) {
3961       struct si_texture *zstex = (struct si_texture *)sctx->framebuffer.state.zsbuf->texture;
3962       bool has_stencil = zstex->surface.has_stencil;
3963       dsa_order_invariant = dsa->order_invariance[has_stencil];
3964       if (!dsa_order_invariant.zs)
3965          return false;
3966 
3967       /* The set of PS invocations is always order invariant,
3968        * except when early Z/S tests are requested. */
3969       if (sctx->shader.ps.cso && sctx->shader.ps.cso->info.base.writes_memory &&
3970           sctx->shader.ps.cso->info.base.fs.early_fragment_tests &&
3971           !dsa_order_invariant.pass_set)
3972          return false;
3973 
3974       if (sctx->occlusion_query_mode == SI_OCCLUSION_QUERY_MODE_PRECISE_INTEGER &&
3975           !dsa_order_invariant.pass_set)
3976          return false;
3977    }
3978 
3979    if (!colormask)
3980       return true;
3981 
3982    unsigned blendmask = colormask & blend->blend_enable_4bit;
3983 
3984    if (blendmask) {
3985       /* Only commutative blending. */
3986       if (blendmask & ~blend->commutative_4bit)
3987          return false;
3988 
3989       if (!dsa_order_invariant.pass_set)
3990          return false;
3991    }
3992 
3993    if (colormask & ~blendmask)
3994       return false;
3995 
3996    return true;
3997 }
3998 
si_emit_msaa_config(struct si_context * sctx,unsigned index)3999 static void si_emit_msaa_config(struct si_context *sctx, unsigned index)
4000 {
4001    struct radeon_cmdbuf *cs = &sctx->gfx_cs;
4002    unsigned num_tile_pipes = sctx->screen->info.num_tile_pipes;
4003    /* 33% faster rendering to linear color buffers */
4004    bool dst_is_linear = sctx->framebuffer.any_dst_linear;
4005    bool out_of_order_rast = si_out_of_order_rasterization(sctx);
4006    unsigned sc_mode_cntl_1 =
4007       S_028A4C_WALK_SIZE(dst_is_linear) | S_028A4C_WALK_FENCE_ENABLE(!dst_is_linear) |
4008       S_028A4C_WALK_FENCE_SIZE(num_tile_pipes == 2 ? 2 : 3) |
4009       S_028A4C_OUT_OF_ORDER_PRIMITIVE_ENABLE(out_of_order_rast) |
4010       S_028A4C_OUT_OF_ORDER_WATER_MARK(0x7) |
4011       /* always 1: */
4012       S_028A4C_WALK_ALIGN8_PRIM_FITS_ST(1) | S_028A4C_SUPERTILE_WALK_ORDER_ENABLE(1) |
4013       S_028A4C_TILE_WALK_ORDER_ENABLE(1) | S_028A4C_MULTI_SHADER_ENGINE_PRIM_DISCARD_ENABLE(1) |
4014       S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) | S_028A4C_FORCE_EOV_REZ_ENABLE(1);
4015    unsigned db_eqaa = S_028804_HIGH_QUALITY_INTERSECTIONS(1) | S_028804_INCOHERENT_EQAA_READS(1) |
4016                       S_028804_STATIC_ANCHOR_ASSOCIATIONS(1);
4017    unsigned coverage_samples, z_samples;
4018    struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
4019 
4020    /* S: Coverage samples (up to 16x):
4021     * - Scan conversion samples (PA_SC_AA_CONFIG.MSAA_NUM_SAMPLES)
4022     * - CB FMASK samples (CB_COLORi_ATTRIB.NUM_SAMPLES)
4023     *
4024     * Z: Z/S samples (up to 8x, must be <= coverage samples and >= color samples):
4025     * - Value seen by DB (DB_Z_INFO.NUM_SAMPLES)
4026     * - Value seen by CB, must be correct even if Z/S is unbound (DB_EQAA.MAX_ANCHOR_SAMPLES)
4027     * # Missing samples are derived from Z planes if Z is compressed (up to 16x quality), or
4028     * # from the closest defined sample if Z is uncompressed (same quality as the number of
4029     * # Z samples).
4030     *
4031     * F: Color samples (up to 8x, must be <= coverage samples):
4032     * - CB color samples (CB_COLORi_ATTRIB.NUM_FRAGMENTS)
4033     * - PS iter samples (DB_EQAA.PS_ITER_SAMPLES)
4034     *
4035     * Can be anything between coverage and color samples:
4036     * - SampleMaskIn samples (PA_SC_AA_CONFIG.MSAA_EXPOSED_SAMPLES)
4037     * - SampleMaskOut samples (DB_EQAA.MASK_EXPORT_NUM_SAMPLES)
4038     * - Alpha-to-coverage samples (DB_EQAA.ALPHA_TO_MASK_NUM_SAMPLES)
4039     * - Occlusion query samples (DB_COUNT_CONTROL.SAMPLE_RATE)
4040     * # All are currently set the same as coverage samples.
4041     *
4042     * If color samples < coverage samples, FMASK has a higher bpp to store an "unknown"
4043     * flag for undefined color samples. A shader-based resolve must handle unknowns
4044     * or mask them out with AND. Unknowns can also be guessed from neighbors via
4045     * an edge-detect shader-based resolve, which is required to make "color samples = 1"
4046     * useful. The CB resolve always drops unknowns.
4047     *
4048     * Sensible AA configurations:
4049     *   EQAA 16s 8z 8f - might look the same as 16x MSAA if Z is compressed
4050     *   EQAA 16s 8z 4f - might look the same as 16x MSAA if Z is compressed
4051     *   EQAA 16s 4z 4f - might look the same as 16x MSAA if Z is compressed
4052     *   EQAA  8s 8z 8f = 8x MSAA
4053     *   EQAA  8s 8z 4f - might look the same as 8x MSAA
4054     *   EQAA  8s 8z 2f - might look the same as 8x MSAA with low-density geometry
4055     *   EQAA  8s 4z 4f - might look the same as 8x MSAA if Z is compressed
4056     *   EQAA  8s 4z 2f - might look the same as 8x MSAA with low-density geometry if Z is compressed
4057     *   EQAA  4s 4z 4f = 4x MSAA
4058     *   EQAA  4s 4z 2f - might look the same as 4x MSAA with low-density geometry
4059     *   EQAA  2s 2z 2f = 2x MSAA
4060     */
4061    coverage_samples = si_get_num_coverage_samples(sctx);
4062 
4063    /* DCC_DECOMPRESS and ELIMINATE_FAST_CLEAR require MSAA_NUM_SAMPLES=0. */
4064    if (sctx->gfx_level >= GFX11 && sctx->gfx11_force_msaa_num_samples_zero)
4065       coverage_samples = 1;
4066 
4067    /* The DX10 diamond test is not required by GL and decreases line rasterization
4068     * performance, so don't use it.
4069     */
4070    unsigned sc_line_cntl = 0;
4071    unsigned sc_aa_config = 0;
4072 
4073    if (coverage_samples > 1 && (rs->multisample_enable ||
4074                                 sctx->smoothing_enabled)) {
4075       unsigned log_samples = util_logbase2(coverage_samples);
4076 
4077       sc_line_cntl |= S_028BDC_EXPAND_LINE_WIDTH(1) |
4078                       S_028BDC_PERPENDICULAR_ENDCAP_ENA(rs->perpendicular_end_caps) |
4079                       S_028BDC_EXTRA_DX_DY_PRECISION(rs->perpendicular_end_caps &&
4080                                                      (sctx->family == CHIP_VEGA20 ||
4081                                                       sctx->gfx_level >= GFX10));
4082       sc_aa_config = S_028BE0_MSAA_NUM_SAMPLES(log_samples) |
4083                      S_028BE0_MAX_SAMPLE_DIST(si_msaa_max_distance[log_samples]) |
4084                      S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples) |
4085                      S_028BE0_COVERED_CENTROID_IS_CENTER(sctx->gfx_level >= GFX10_3);
4086    }
4087 
4088    if (sctx->framebuffer.nr_samples > 1 ||
4089        sctx->smoothing_enabled) {
4090       if (sctx->framebuffer.state.zsbuf) {
4091          z_samples = sctx->framebuffer.state.zsbuf->texture->nr_samples;
4092          z_samples = MAX2(1, z_samples);
4093       } else {
4094          z_samples = coverage_samples;
4095       }
4096       unsigned log_samples = util_logbase2(coverage_samples);
4097       unsigned log_z_samples = util_logbase2(z_samples);
4098       unsigned ps_iter_samples = si_get_ps_iter_samples(sctx);
4099       unsigned log_ps_iter_samples = util_logbase2(ps_iter_samples);
4100       if (sctx->framebuffer.nr_samples > 1) {
4101          db_eqaa |= S_028804_MAX_ANCHOR_SAMPLES(log_z_samples) |
4102                     S_028804_PS_ITER_SAMPLES(log_ps_iter_samples) |
4103                     S_028804_MASK_EXPORT_NUM_SAMPLES(log_samples) |
4104                     S_028804_ALPHA_TO_MASK_NUM_SAMPLES(log_samples);
4105          sc_mode_cntl_1 |= S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1);
4106       } else if (sctx->smoothing_enabled) {
4107          db_eqaa |= S_028804_OVERRASTERIZATION_AMOUNT(log_samples);
4108       }
4109    }
4110 
4111    if (sctx->screen->info.has_set_context_pairs_packed) {
4112       radeon_begin(cs);
4113       gfx11_begin_packed_context_regs();
4114       gfx11_opt_set_context_reg(R_028BDC_PA_SC_LINE_CNTL, SI_TRACKED_PA_SC_LINE_CNTL,
4115                                 sc_line_cntl);
4116       gfx11_opt_set_context_reg(R_028BE0_PA_SC_AA_CONFIG, SI_TRACKED_PA_SC_AA_CONFIG,
4117                                 sc_aa_config);
4118       gfx11_opt_set_context_reg(R_028804_DB_EQAA, SI_TRACKED_DB_EQAA, db_eqaa);
4119       gfx11_opt_set_context_reg(R_028A4C_PA_SC_MODE_CNTL_1, SI_TRACKED_PA_SC_MODE_CNTL_1,
4120                                 sc_mode_cntl_1);
4121       gfx11_end_packed_context_regs();
4122       radeon_end(); /* don't track context rolls on GFX11 */
4123    } else {
4124       radeon_begin(cs);
4125       radeon_opt_set_context_reg2(sctx, R_028BDC_PA_SC_LINE_CNTL, SI_TRACKED_PA_SC_LINE_CNTL,
4126                                   sc_line_cntl, sc_aa_config);
4127       radeon_opt_set_context_reg(sctx, R_028804_DB_EQAA, SI_TRACKED_DB_EQAA, db_eqaa);
4128       radeon_opt_set_context_reg(sctx, R_028A4C_PA_SC_MODE_CNTL_1, SI_TRACKED_PA_SC_MODE_CNTL_1,
4129                                  sc_mode_cntl_1);
4130       radeon_end_update_context_roll(sctx);
4131    }
4132 }
4133 
si_update_ps_iter_samples(struct si_context * sctx)4134 void si_update_ps_iter_samples(struct si_context *sctx)
4135 {
4136    if (sctx->framebuffer.nr_samples > 1)
4137       si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config);
4138    if (sctx->screen->dpbb_allowed)
4139       si_mark_atom_dirty(sctx, &sctx->atoms.s.dpbb_state);
4140 }
4141 
si_set_min_samples(struct pipe_context * ctx,unsigned min_samples)4142 static void si_set_min_samples(struct pipe_context *ctx, unsigned min_samples)
4143 {
4144    struct si_context *sctx = (struct si_context *)ctx;
4145 
4146    /* The hardware can only do sample shading with 2^n samples. */
4147    min_samples = util_next_power_of_two(min_samples);
4148 
4149    if (sctx->ps_iter_samples == min_samples)
4150       return;
4151 
4152    sctx->ps_iter_samples = min_samples;
4153 
4154    si_ps_key_update_sample_shading(sctx);
4155    si_ps_key_update_framebuffer_rasterizer_sample_shading(sctx);
4156    sctx->do_update_shaders = true;
4157 
4158    si_update_ps_iter_samples(sctx);
4159 }
4160 
4161 /*
4162  * Samplers
4163  */
4164 
4165 /**
4166  * Build the sampler view descriptor for a buffer texture.
4167  * @param state 256-bit descriptor; only the high 128 bits are filled in
4168  */
si_make_buffer_descriptor(struct si_screen * screen,struct si_resource * buf,enum pipe_format format,unsigned offset,unsigned num_elements,uint32_t * state)4169 void si_make_buffer_descriptor(struct si_screen *screen, struct si_resource *buf,
4170                                enum pipe_format format, unsigned offset, unsigned num_elements,
4171                                uint32_t *state)
4172 {
4173    const struct util_format_description *desc;
4174    unsigned stride;
4175    unsigned num_records;
4176 
4177    desc = util_format_description(format);
4178    stride = desc->block.bits / 8;
4179 
4180    num_records = num_elements;
4181    num_records = MIN2(num_records, (buf->b.b.width0 - offset) / stride);
4182 
4183    /* The NUM_RECORDS field has a different meaning depending on the chip,
4184     * instruction type, STRIDE, and SWIZZLE_ENABLE.
4185     *
4186     * GFX6-7,10:
4187     * - If STRIDE == 0, it's in byte units.
4188     * - If STRIDE != 0, it's in units of STRIDE, used with inst.IDXEN.
4189     *
4190     * GFX8:
4191     * - For SMEM and STRIDE == 0, it's in byte units.
4192     * - For SMEM and STRIDE != 0, it's in units of STRIDE.
4193     * - For VMEM and STRIDE == 0 or SWIZZLE_ENABLE == 0, it's in byte units.
4194     * - For VMEM and STRIDE != 0 and SWIZZLE_ENABLE == 1, it's in units of STRIDE.
4195     * NOTE: There is incompatibility between VMEM and SMEM opcodes due to SWIZZLE_-
4196     *       ENABLE. The workaround is to set STRIDE = 0 if SWIZZLE_ENABLE == 0 when
4197     *       using SMEM. This can be done in the shader by clearing STRIDE with s_and.
4198     *       That way the same descriptor can be used by both SMEM and VMEM.
4199     *
4200     * GFX9:
4201     * - For SMEM and STRIDE == 0, it's in byte units.
4202     * - For SMEM and STRIDE != 0, it's in units of STRIDE.
4203     * - For VMEM and inst.IDXEN == 0 or STRIDE == 0, it's in byte units.
4204     * - For VMEM and inst.IDXEN == 1 and STRIDE != 0, it's in units of STRIDE.
4205     */
4206    if (screen->info.gfx_level == GFX8)
4207       num_records *= stride;
4208 
4209    state[4] = 0;
4210    state[5] = S_008F04_STRIDE(stride);
4211    state[6] = num_records;
4212    state[7] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) |
4213               S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) |
4214               S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) |
4215               S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3]));
4216 
4217    if (screen->info.gfx_level >= GFX10) {
4218       const struct gfx10_format *fmt = &ac_get_gfx10_format_table(&screen->info)[format];
4219 
4220       /* OOB_SELECT chooses the out-of-bounds check.
4221        *
4222        * GFX10:
4223        *  - 0: (index >= NUM_RECORDS) || (offset >= STRIDE)
4224        *  - 1: index >= NUM_RECORDS
4225        *  - 2: NUM_RECORDS == 0
4226        *  - 3: if SWIZZLE_ENABLE:
4227        *          swizzle_address >= NUM_RECORDS
4228        *       else:
4229        *          offset >= NUM_RECORDS
4230        *
4231        * GFX11:
4232        *  - 0: (index >= NUM_RECORDS) || (offset+payload > STRIDE)
4233        *  - 1: index >= NUM_RECORDS
4234        *  - 2: NUM_RECORDS == 0
4235        *  - 3: if SWIZZLE_ENABLE && STRIDE:
4236        *          (index >= NUM_RECORDS) || ( offset+payload > STRIDE)
4237        *       else:
4238        *          offset+payload > NUM_RECORDS
4239        */
4240       state[7] |= S_008F0C_FORMAT(fmt->img_format) |
4241                   S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_STRUCTURED_WITH_OFFSET) |
4242                   S_008F0C_RESOURCE_LEVEL(screen->info.gfx_level < GFX11);
4243    } else {
4244       int first_non_void;
4245       unsigned num_format, data_format;
4246 
4247       first_non_void = util_format_get_first_non_void_channel(format);
4248       num_format = si_translate_buffer_numformat(&screen->b, desc, first_non_void);
4249       data_format = si_translate_buffer_dataformat(&screen->b, desc, first_non_void);
4250 
4251       state[7] |= S_008F0C_NUM_FORMAT(num_format) | S_008F0C_DATA_FORMAT(data_format);
4252    }
4253 }
4254 
gfx9_border_color_swizzle(const unsigned char swizzle[4])4255 static unsigned gfx9_border_color_swizzle(const unsigned char swizzle[4])
4256 {
4257    unsigned bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
4258 
4259    if (swizzle[3] == PIPE_SWIZZLE_X) {
4260       /* For the pre-defined border color values (white, opaque
4261        * black, transparent black), the only thing that matters is
4262        * that the alpha channel winds up in the correct place
4263        * (because the RGB channels are all the same) so either of
4264        * these enumerations will work.
4265        */
4266       if (swizzle[2] == PIPE_SWIZZLE_Y)
4267          bc_swizzle = V_008F20_BC_SWIZZLE_WZYX;
4268       else
4269          bc_swizzle = V_008F20_BC_SWIZZLE_WXYZ;
4270    } else if (swizzle[0] == PIPE_SWIZZLE_X) {
4271       if (swizzle[1] == PIPE_SWIZZLE_Y)
4272          bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
4273       else
4274          bc_swizzle = V_008F20_BC_SWIZZLE_XWYZ;
4275    } else if (swizzle[1] == PIPE_SWIZZLE_X) {
4276       bc_swizzle = V_008F20_BC_SWIZZLE_YXWZ;
4277    } else if (swizzle[2] == PIPE_SWIZZLE_X) {
4278       bc_swizzle = V_008F20_BC_SWIZZLE_ZYXW;
4279    }
4280 
4281    return bc_swizzle;
4282 }
4283 
4284 /**
4285  * Translate the parameters to an image descriptor for CDNA image emulation.
4286  * In this function, we choose our own image descriptor format because we emulate image opcodes
4287  * using buffer opcodes.
4288  */
cdna_emu_make_image_descriptor(struct si_screen * screen,struct si_texture * tex,bool sampler,enum pipe_texture_target target,enum pipe_format pipe_format,const unsigned char state_swizzle[4],unsigned first_level,unsigned last_level,unsigned first_layer,unsigned last_layer,unsigned width,unsigned height,unsigned depth,uint32_t * state,uint32_t * fmask_state)4289 static void cdna_emu_make_image_descriptor(struct si_screen *screen, struct si_texture *tex,
4290                                            bool sampler, enum pipe_texture_target target,
4291                                            enum pipe_format pipe_format,
4292                                            const unsigned char state_swizzle[4], unsigned first_level,
4293                                            unsigned last_level, unsigned first_layer,
4294                                            unsigned last_layer, unsigned width, unsigned height,
4295                                            unsigned depth, uint32_t *state, uint32_t *fmask_state)
4296 {
4297    const struct util_format_description *desc = util_format_description(pipe_format);
4298 
4299    /* We don't need support these. We only need enough to support VAAPI and OpenMAX. */
4300    if (target == PIPE_TEXTURE_CUBE ||
4301        target == PIPE_TEXTURE_CUBE_ARRAY ||
4302        tex->buffer.b.b.last_level > 0 ||
4303        tex->buffer.b.b.nr_samples >= 2 ||
4304        desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB ||
4305        desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED ||
4306        util_format_is_compressed(pipe_format)) {
4307       assert(!"unexpected texture type");
4308       memset(state, 0, 8 * 4);
4309       return;
4310    }
4311 
4312    /* Adjust the image parameters according to the texture type. */
4313    switch (target) {
4314    case PIPE_TEXTURE_1D:
4315       height = 1;
4316       FALLTHROUGH;
4317    case PIPE_TEXTURE_2D:
4318    case PIPE_TEXTURE_RECT:
4319       depth = 1;
4320       break;
4321 
4322    case PIPE_TEXTURE_1D_ARRAY:
4323       height = 1;
4324       FALLTHROUGH;
4325    case PIPE_TEXTURE_2D_ARRAY:
4326       first_layer = MIN2(first_layer, tex->buffer.b.b.array_size - 1);
4327       last_layer = MIN2(last_layer, tex->buffer.b.b.array_size - 1);
4328       last_layer = MAX2(last_layer, first_layer);
4329       depth = last_layer - first_layer + 1;
4330       break;
4331 
4332    case PIPE_TEXTURE_3D:
4333       first_layer = 0;
4334       break;
4335 
4336    default:
4337       unreachable("invalid texture target");
4338    }
4339 
4340    unsigned stride = desc->block.bits / 8;
4341    uint64_t num_records = tex->surface.surf_size / stride;
4342    assert(num_records <= UINT32_MAX);
4343 
4344    /* Prepare the format fields. */
4345    unsigned char swizzle[4];
4346    util_format_compose_swizzles(desc->swizzle, state_swizzle, swizzle);
4347 
4348    /* Buffer descriptor */
4349    state[0] = 0;
4350    state[1] = S_008F04_STRIDE(stride);
4351    state[2] = num_records;
4352    state[3] = S_008F0C_DST_SEL_X(si_map_swizzle(swizzle[0])) |
4353               S_008F0C_DST_SEL_Y(si_map_swizzle(swizzle[1])) |
4354               S_008F0C_DST_SEL_Z(si_map_swizzle(swizzle[2])) |
4355               S_008F0C_DST_SEL_W(si_map_swizzle(swizzle[3]));
4356 
4357    if (screen->info.gfx_level >= GFX10) {
4358       const struct gfx10_format *fmt = &ac_get_gfx10_format_table(&screen->info)[pipe_format];
4359 
4360       state[3] |= S_008F0C_FORMAT(fmt->img_format) |
4361                   S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_STRUCTURED_WITH_OFFSET) |
4362                   S_008F0C_RESOURCE_LEVEL(screen->info.gfx_level < GFX11);
4363    } else {
4364       int first_non_void = util_format_get_first_non_void_channel(pipe_format);
4365       unsigned num_format = si_translate_buffer_numformat(&screen->b, desc, first_non_void);
4366       unsigned data_format = si_translate_buffer_dataformat(&screen->b, desc, first_non_void);
4367 
4368       state[3] |= S_008F0C_NUM_FORMAT(num_format) |
4369                   S_008F0C_DATA_FORMAT(data_format);
4370    }
4371 
4372    /* Additional fields used by image opcode emulation. */
4373    state[4] = width | (height << 16);
4374    state[5] = depth | (first_layer << 16);
4375    state[6] = tex->surface.u.gfx9.surf_pitch;
4376    state[7] = (uint32_t)tex->surface.u.gfx9.surf_pitch * tex->surface.u.gfx9.surf_height;
4377 }
4378 
4379 /**
4380  * Build the sampler view descriptor for a texture.
4381  */
gfx10_make_texture_descriptor(struct si_screen * screen,struct si_texture * tex,bool sampler,enum pipe_texture_target target,enum pipe_format pipe_format,const unsigned char state_swizzle[4],unsigned first_level,unsigned last_level,unsigned first_layer,unsigned last_layer,unsigned width,unsigned height,unsigned depth,bool get_bo_metadata,uint32_t * state,uint32_t * fmask_state)4382 static void gfx10_make_texture_descriptor(
4383    struct si_screen *screen, struct si_texture *tex, bool sampler, enum pipe_texture_target target,
4384    enum pipe_format pipe_format, const unsigned char state_swizzle[4], unsigned first_level,
4385    unsigned last_level, unsigned first_layer, unsigned last_layer, unsigned width, unsigned height,
4386    unsigned depth, bool get_bo_metadata, uint32_t *state, uint32_t *fmask_state)
4387 {
4388    if (!screen->info.has_image_opcodes && !get_bo_metadata) {
4389       cdna_emu_make_image_descriptor(screen, tex, sampler, target, pipe_format, state_swizzle,
4390                                      first_level, last_level, first_layer, last_layer, width,
4391                                      height, depth, state, fmask_state);
4392       return;
4393    }
4394 
4395    struct pipe_resource *res = &tex->buffer.b.b;
4396    const struct util_format_description *desc;
4397    unsigned img_format;
4398    unsigned char swizzle[4];
4399    unsigned type;
4400    uint64_t va;
4401 
4402    desc = util_format_description(pipe_format);
4403    img_format = ac_get_gfx10_format_table(&screen->info)[pipe_format].img_format;
4404 
4405    if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
4406       const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
4407       const unsigned char swizzle_yyyy[4] = {1, 1, 1, 1};
4408       const unsigned char swizzle_wwww[4] = {3, 3, 3, 3};
4409       bool is_stencil = false;
4410 
4411       switch (pipe_format) {
4412       case PIPE_FORMAT_S8_UINT_Z24_UNORM:
4413       case PIPE_FORMAT_X32_S8X24_UINT:
4414       case PIPE_FORMAT_X8Z24_UNORM:
4415          util_format_compose_swizzles(swizzle_yyyy, state_swizzle, swizzle);
4416          is_stencil = true;
4417          break;
4418       case PIPE_FORMAT_X24S8_UINT:
4419          /*
4420           * X24S8 is implemented as an 8_8_8_8 data format, to
4421           * fix texture gathers. This affects at least
4422           * GL45-CTS.texture_cube_map_array.sampling on GFX8.
4423           */
4424          util_format_compose_swizzles(swizzle_wwww, state_swizzle, swizzle);
4425          is_stencil = true;
4426          break;
4427       default:
4428          util_format_compose_swizzles(swizzle_xxxx, state_swizzle, swizzle);
4429          is_stencil = pipe_format == PIPE_FORMAT_S8_UINT;
4430       }
4431 
4432       if (tex->upgraded_depth && !is_stencil) {
4433          if (screen->info.gfx_level >= GFX11) {
4434             assert(img_format == V_008F0C_GFX11_FORMAT_32_FLOAT);
4435             img_format = V_008F0C_GFX11_FORMAT_32_FLOAT_CLAMP;
4436          } else {
4437             assert(img_format == V_008F0C_GFX10_FORMAT_32_FLOAT);
4438             img_format = V_008F0C_GFX10_FORMAT_32_FLOAT_CLAMP;
4439          }
4440       }
4441    } else {
4442       util_format_compose_swizzles(desc->swizzle, state_swizzle, swizzle);
4443    }
4444 
4445    if (!sampler && (res->target == PIPE_TEXTURE_CUBE || res->target == PIPE_TEXTURE_CUBE_ARRAY)) {
4446       /* For the purpose of shader images, treat cube maps as 2D
4447        * arrays.
4448        */
4449       type = V_008F1C_SQ_RSRC_IMG_2D_ARRAY;
4450    } else {
4451       type = si_tex_dim(screen, tex, target, res->nr_samples);
4452    }
4453 
4454    if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
4455       height = 1;
4456       depth = res->array_size;
4457    } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY || type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
4458       if (sampler || res->target != PIPE_TEXTURE_3D)
4459          depth = res->array_size;
4460    } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
4461       depth = res->array_size / 6;
4462 
4463    state[0] = 0;
4464    state[1] = S_00A004_FORMAT(img_format) | S_00A004_WIDTH_LO(width - 1);
4465    state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) | S_00A008_HEIGHT(height - 1) |
4466               S_00A008_RESOURCE_LEVEL(screen->info.gfx_level < GFX11);
4467 
4468    state[3] =
4469       S_00A00C_DST_SEL_X(si_map_swizzle(swizzle[0])) |
4470       S_00A00C_DST_SEL_Y(si_map_swizzle(swizzle[1])) |
4471       S_00A00C_DST_SEL_Z(si_map_swizzle(swizzle[2])) |
4472       S_00A00C_DST_SEL_W(si_map_swizzle(swizzle[3])) |
4473       S_00A00C_BASE_LEVEL(res->nr_samples > 1 ? 0 : first_level) |
4474       S_00A00C_LAST_LEVEL(res->nr_samples > 1 ? util_logbase2(res->nr_samples) : last_level) |
4475       S_00A00C_BC_SWIZZLE(gfx9_border_color_swizzle(desc->swizzle)) | S_00A00C_TYPE(type);
4476    /* Depth is the the last accessible layer on gfx9+. The hw doesn't need
4477     * to know the total number of layers.
4478     */
4479    state[4] =
4480       S_00A010_DEPTH((type == V_008F1C_SQ_RSRC_IMG_3D && sampler) ? depth - 1 : last_layer) |
4481       S_00A010_BASE_ARRAY(first_layer);
4482    state[5] = S_00A014_ARRAY_PITCH(!!(type == V_008F1C_SQ_RSRC_IMG_3D && !sampler)) |
4483               S_00A014_PERF_MOD(4);
4484 
4485    unsigned max_mip = res->nr_samples > 1 ? util_logbase2(res->nr_samples) :
4486                                             tex->buffer.b.b.last_level;
4487 
4488    if (screen->info.gfx_level >= GFX11) {
4489       state[1] |= S_00A004_MAX_MIP(max_mip);
4490    } else {
4491       state[5] |= S_00A014_MAX_MIP(max_mip);
4492    }
4493    state[6] = 0;
4494    state[7] = 0;
4495 
4496    if (vi_dcc_enabled(tex, first_level)) {
4497       state[6] |= S_00A018_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B) |
4498                   S_00A018_MAX_COMPRESSED_BLOCK_SIZE(tex->surface.u.gfx9.color.dcc.max_compressed_block_size) |
4499                   S_00A018_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(screen, pipe_format));
4500    }
4501 
4502    /* Initialize the sampler view for FMASK. */
4503    if (tex->surface.fmask_offset) {
4504       uint32_t format;
4505 
4506       va = tex->buffer.gpu_address + tex->surface.fmask_offset;
4507 
4508 #define FMASK(s, f) (((unsigned)(MAX2(1, s)) * 16) + (MAX2(1, f)))
4509       switch (FMASK(res->nr_samples, res->nr_storage_samples)) {
4510       case FMASK(2, 1):
4511          format = V_008F0C_GFX10_FORMAT_FMASK8_S2_F1;
4512          break;
4513       case FMASK(2, 2):
4514          format = V_008F0C_GFX10_FORMAT_FMASK8_S2_F2;
4515          break;
4516       case FMASK(4, 1):
4517          format = V_008F0C_GFX10_FORMAT_FMASK8_S4_F1;
4518          break;
4519       case FMASK(4, 2):
4520          format = V_008F0C_GFX10_FORMAT_FMASK8_S4_F2;
4521          break;
4522       case FMASK(4, 4):
4523          format = V_008F0C_GFX10_FORMAT_FMASK8_S4_F4;
4524          break;
4525       case FMASK(8, 1):
4526          format = V_008F0C_GFX10_FORMAT_FMASK8_S8_F1;
4527          break;
4528       case FMASK(8, 2):
4529          format = V_008F0C_GFX10_FORMAT_FMASK16_S8_F2;
4530          break;
4531       case FMASK(8, 4):
4532          format = V_008F0C_GFX10_FORMAT_FMASK32_S8_F4;
4533          break;
4534       case FMASK(8, 8):
4535          format = V_008F0C_GFX10_FORMAT_FMASK32_S8_F8;
4536          break;
4537       case FMASK(16, 1):
4538          format = V_008F0C_GFX10_FORMAT_FMASK16_S16_F1;
4539          break;
4540       case FMASK(16, 2):
4541          format = V_008F0C_GFX10_FORMAT_FMASK32_S16_F2;
4542          break;
4543       case FMASK(16, 4):
4544          format = V_008F0C_GFX10_FORMAT_FMASK64_S16_F4;
4545          break;
4546       case FMASK(16, 8):
4547          format = V_008F0C_GFX10_FORMAT_FMASK64_S16_F8;
4548          break;
4549       default:
4550          unreachable("invalid nr_samples");
4551       }
4552 #undef FMASK
4553       fmask_state[0] = (va >> 8) | tex->surface.fmask_tile_swizzle;
4554       fmask_state[1] = S_00A004_BASE_ADDRESS_HI(va >> 40) | S_00A004_FORMAT(format) |
4555                        S_00A004_WIDTH_LO(width - 1);
4556       fmask_state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) | S_00A008_HEIGHT(height - 1) |
4557                        S_00A008_RESOURCE_LEVEL(1);
4558       fmask_state[3] =
4559          S_00A00C_DST_SEL_X(V_008F1C_SQ_SEL_X) | S_00A00C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
4560          S_00A00C_DST_SEL_Z(V_008F1C_SQ_SEL_X) | S_00A00C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
4561          S_00A00C_SW_MODE(tex->surface.u.gfx9.color.fmask_swizzle_mode) |
4562          S_00A00C_TYPE(si_tex_dim(screen, tex, target, 0));
4563       fmask_state[4] = S_00A010_DEPTH(last_layer) | S_00A010_BASE_ARRAY(first_layer);
4564       fmask_state[5] = 0;
4565       fmask_state[6] = S_00A018_META_PIPE_ALIGNED(1);
4566       fmask_state[7] = 0;
4567    }
4568 }
4569 
4570 /**
4571  * Build the sampler view descriptor for a texture (SI-GFX9).
4572  */
si_make_texture_descriptor(struct si_screen * screen,struct si_texture * tex,bool sampler,enum pipe_texture_target target,enum pipe_format pipe_format,const unsigned char state_swizzle[4],unsigned first_level,unsigned last_level,unsigned first_layer,unsigned last_layer,unsigned width,unsigned height,unsigned depth,bool get_bo_metadata,uint32_t * state,uint32_t * fmask_state)4573 static void si_make_texture_descriptor(struct si_screen *screen, struct si_texture *tex,
4574                                        bool sampler, enum pipe_texture_target target,
4575                                        enum pipe_format pipe_format,
4576                                        const unsigned char state_swizzle[4], unsigned first_level,
4577                                        unsigned last_level, unsigned first_layer,
4578                                        unsigned last_layer, unsigned width, unsigned height,
4579                                        unsigned depth, bool get_bo_metadata,
4580                                        uint32_t *state, uint32_t *fmask_state)
4581 {
4582    if (!screen->info.has_image_opcodes && !get_bo_metadata) {
4583       cdna_emu_make_image_descriptor(screen, tex, sampler, target, pipe_format, state_swizzle,
4584                                      first_level, last_level, first_layer, last_layer, width,
4585                                      height, depth, state, fmask_state);
4586       return;
4587    }
4588 
4589    struct pipe_resource *res = &tex->buffer.b.b;
4590    const struct util_format_description *desc;
4591    unsigned char swizzle[4];
4592    int first_non_void;
4593    unsigned num_format, data_format, type, num_samples;
4594    uint64_t va;
4595 
4596    desc = util_format_description(pipe_format);
4597 
4598    num_samples = desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS ? MAX2(1, res->nr_samples)
4599                                                                : MAX2(1, res->nr_storage_samples);
4600 
4601    if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
4602       const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
4603       const unsigned char swizzle_yyyy[4] = {1, 1, 1, 1};
4604       const unsigned char swizzle_wwww[4] = {3, 3, 3, 3};
4605 
4606       switch (pipe_format) {
4607       case PIPE_FORMAT_S8_UINT_Z24_UNORM:
4608       case PIPE_FORMAT_X32_S8X24_UINT:
4609       case PIPE_FORMAT_X8Z24_UNORM:
4610          util_format_compose_swizzles(swizzle_yyyy, state_swizzle, swizzle);
4611          break;
4612       case PIPE_FORMAT_X24S8_UINT:
4613          /*
4614           * X24S8 is implemented as an 8_8_8_8 data format, to
4615           * fix texture gathers. This affects at least
4616           * GL45-CTS.texture_cube_map_array.sampling on GFX8.
4617           */
4618          if (screen->info.gfx_level <= GFX8)
4619             util_format_compose_swizzles(swizzle_wwww, state_swizzle, swizzle);
4620          else
4621             util_format_compose_swizzles(swizzle_yyyy, state_swizzle, swizzle);
4622          break;
4623       default:
4624          util_format_compose_swizzles(swizzle_xxxx, state_swizzle, swizzle);
4625       }
4626    } else {
4627       util_format_compose_swizzles(desc->swizzle, state_swizzle, swizzle);
4628    }
4629 
4630    first_non_void = util_format_get_first_non_void_channel(pipe_format);
4631 
4632    switch (pipe_format) {
4633    case PIPE_FORMAT_S8_UINT_Z24_UNORM:
4634       num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
4635       break;
4636    default:
4637       if (first_non_void < 0) {
4638          if (util_format_is_compressed(pipe_format)) {
4639             switch (pipe_format) {
4640             case PIPE_FORMAT_DXT1_SRGB:
4641             case PIPE_FORMAT_DXT1_SRGBA:
4642             case PIPE_FORMAT_DXT3_SRGBA:
4643             case PIPE_FORMAT_DXT5_SRGBA:
4644             case PIPE_FORMAT_BPTC_SRGBA:
4645             case PIPE_FORMAT_ETC2_SRGB8:
4646             case PIPE_FORMAT_ETC2_SRGB8A1:
4647             case PIPE_FORMAT_ETC2_SRGBA8:
4648                num_format = V_008F14_IMG_NUM_FORMAT_SRGB;
4649                break;
4650             case PIPE_FORMAT_RGTC1_SNORM:
4651             case PIPE_FORMAT_LATC1_SNORM:
4652             case PIPE_FORMAT_RGTC2_SNORM:
4653             case PIPE_FORMAT_LATC2_SNORM:
4654             case PIPE_FORMAT_ETC2_R11_SNORM:
4655             case PIPE_FORMAT_ETC2_RG11_SNORM:
4656             /* implies float, so use SNORM/UNORM to determine
4657                whether data is signed or not */
4658             case PIPE_FORMAT_BPTC_RGB_FLOAT:
4659                num_format = V_008F14_IMG_NUM_FORMAT_SNORM;
4660                break;
4661             default:
4662                num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
4663                break;
4664             }
4665          } else if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
4666             num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
4667          } else {
4668             num_format = V_008F14_IMG_NUM_FORMAT_FLOAT;
4669          }
4670       } else if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
4671          num_format = V_008F14_IMG_NUM_FORMAT_SRGB;
4672       } else {
4673          num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
4674 
4675          switch (desc->channel[first_non_void].type) {
4676          case UTIL_FORMAT_TYPE_FLOAT:
4677             num_format = V_008F14_IMG_NUM_FORMAT_FLOAT;
4678             break;
4679          case UTIL_FORMAT_TYPE_SIGNED:
4680             if (desc->channel[first_non_void].normalized)
4681                num_format = V_008F14_IMG_NUM_FORMAT_SNORM;
4682             else if (desc->channel[first_non_void].pure_integer)
4683                num_format = V_008F14_IMG_NUM_FORMAT_SINT;
4684             else
4685                num_format = V_008F14_IMG_NUM_FORMAT_SSCALED;
4686             break;
4687          case UTIL_FORMAT_TYPE_UNSIGNED:
4688             if (desc->channel[first_non_void].normalized)
4689                num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
4690             else if (desc->channel[first_non_void].pure_integer)
4691                num_format = V_008F14_IMG_NUM_FORMAT_UINT;
4692             else
4693                num_format = V_008F14_IMG_NUM_FORMAT_USCALED;
4694          }
4695       }
4696    }
4697 
4698    data_format = si_translate_texformat(&screen->b, pipe_format, desc, first_non_void);
4699    if (data_format == ~0) {
4700       data_format = 0;
4701    }
4702 
4703    /* S8 with Z32 HTILE needs a special format. */
4704    if (screen->info.gfx_level == GFX9 && pipe_format == PIPE_FORMAT_S8_UINT)
4705       data_format = V_008F14_IMG_DATA_FORMAT_S8_32;
4706 
4707    if (!sampler && (res->target == PIPE_TEXTURE_CUBE || res->target == PIPE_TEXTURE_CUBE_ARRAY ||
4708                     (screen->info.gfx_level <= GFX8 && res->target == PIPE_TEXTURE_3D))) {
4709       /* For the purpose of shader images, treat cube maps and 3D
4710        * textures as 2D arrays. For 3D textures, the address
4711        * calculations for mipmaps are different, so we rely on the
4712        * caller to effectively disable mipmaps.
4713        */
4714       type = V_008F1C_SQ_RSRC_IMG_2D_ARRAY;
4715 
4716       assert(res->target != PIPE_TEXTURE_3D || (first_level == 0 && last_level == 0));
4717    } else {
4718       type = si_tex_dim(screen, tex, target, num_samples);
4719    }
4720 
4721    if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
4722       height = 1;
4723       depth = res->array_size;
4724    } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY || type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
4725       if (sampler || res->target != PIPE_TEXTURE_3D)
4726          depth = res->array_size;
4727    } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
4728       depth = res->array_size / 6;
4729 
4730    state[0] = 0;
4731    state[1] = (S_008F14_DATA_FORMAT(data_format) | S_008F14_NUM_FORMAT(num_format));
4732    state[2] = (S_008F18_WIDTH(width - 1) | S_008F18_HEIGHT(height - 1) | S_008F18_PERF_MOD(4));
4733    state[3] = (S_008F1C_DST_SEL_X(si_map_swizzle(swizzle[0])) |
4734                S_008F1C_DST_SEL_Y(si_map_swizzle(swizzle[1])) |
4735                S_008F1C_DST_SEL_Z(si_map_swizzle(swizzle[2])) |
4736                S_008F1C_DST_SEL_W(si_map_swizzle(swizzle[3])) |
4737                S_008F1C_BASE_LEVEL(num_samples > 1 ? 0 : first_level) |
4738                S_008F1C_LAST_LEVEL(num_samples > 1 ? util_logbase2(num_samples) : last_level) |
4739                S_008F1C_TYPE(type));
4740    state[4] = 0;
4741    state[5] = S_008F24_BASE_ARRAY(first_layer);
4742    state[6] = 0;
4743    state[7] = 0;
4744 
4745    if (screen->info.gfx_level == GFX9) {
4746       unsigned bc_swizzle = gfx9_border_color_swizzle(desc->swizzle);
4747 
4748       /* Depth is the the last accessible layer on Gfx9.
4749        * The hw doesn't need to know the total number of layers.
4750        */
4751       if (type == V_008F1C_SQ_RSRC_IMG_3D)
4752          state[4] |= S_008F20_DEPTH(depth - 1);
4753       else
4754          state[4] |= S_008F20_DEPTH(last_layer);
4755 
4756       state[4] |= S_008F20_BC_SWIZZLE(bc_swizzle);
4757       state[5] |= S_008F24_MAX_MIP(num_samples > 1 ? util_logbase2(num_samples)
4758                                                    : tex->buffer.b.b.last_level);
4759    } else {
4760       state[3] |= S_008F1C_POW2_PAD(res->last_level > 0);
4761       state[4] |= S_008F20_DEPTH(depth - 1);
4762       state[5] |= S_008F24_LAST_ARRAY(last_layer);
4763    }
4764 
4765    if (vi_dcc_enabled(tex, first_level)) {
4766       state[6] = S_008F28_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(screen, pipe_format));
4767    } else {
4768       /* The last dword is unused by hw. The shader uses it to clear
4769        * bits in the first dword of sampler state.
4770        */
4771       if (screen->info.gfx_level <= GFX7 && res->nr_samples <= 1) {
4772          if (first_level == last_level)
4773             state[7] = C_008F30_MAX_ANISO_RATIO;
4774          else
4775             state[7] = 0xffffffff;
4776       }
4777    }
4778 
4779    /* Initialize the sampler view for FMASK. */
4780    if (tex->surface.fmask_offset) {
4781       uint32_t data_format, num_format;
4782 
4783       va = tex->buffer.gpu_address + tex->surface.fmask_offset;
4784 
4785 #define FMASK(s, f) (((unsigned)(MAX2(1, s)) * 16) + (MAX2(1, f)))
4786       if (screen->info.gfx_level == GFX9) {
4787          data_format = V_008F14_IMG_DATA_FORMAT_FMASK;
4788          switch (FMASK(res->nr_samples, res->nr_storage_samples)) {
4789          case FMASK(2, 1):
4790             num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_2_1;
4791             break;
4792          case FMASK(2, 2):
4793             num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_2_2;
4794             break;
4795          case FMASK(4, 1):
4796             num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_4_1;
4797             break;
4798          case FMASK(4, 2):
4799             num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_4_2;
4800             break;
4801          case FMASK(4, 4):
4802             num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_4_4;
4803             break;
4804          case FMASK(8, 1):
4805             num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_8_1;
4806             break;
4807          case FMASK(8, 2):
4808             num_format = V_008F14_IMG_NUM_FORMAT_FMASK_16_8_2;
4809             break;
4810          case FMASK(8, 4):
4811             num_format = V_008F14_IMG_NUM_FORMAT_FMASK_32_8_4;
4812             break;
4813          case FMASK(8, 8):
4814             num_format = V_008F14_IMG_NUM_FORMAT_FMASK_32_8_8;
4815             break;
4816          case FMASK(16, 1):
4817             num_format = V_008F14_IMG_NUM_FORMAT_FMASK_16_16_1;
4818             break;
4819          case FMASK(16, 2):
4820             num_format = V_008F14_IMG_NUM_FORMAT_FMASK_32_16_2;
4821             break;
4822          case FMASK(16, 4):
4823             num_format = V_008F14_IMG_NUM_FORMAT_FMASK_64_16_4;
4824             break;
4825          case FMASK(16, 8):
4826             num_format = V_008F14_IMG_NUM_FORMAT_FMASK_64_16_8;
4827             break;
4828          default:
4829             unreachable("invalid nr_samples");
4830          }
4831       } else {
4832          switch (FMASK(res->nr_samples, res->nr_storage_samples)) {
4833          case FMASK(2, 1):
4834             data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F1;
4835             break;
4836          case FMASK(2, 2):
4837             data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2;
4838             break;
4839          case FMASK(4, 1):
4840             data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F1;
4841             break;
4842          case FMASK(4, 2):
4843             data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F2;
4844             break;
4845          case FMASK(4, 4):
4846             data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4;
4847             break;
4848          case FMASK(8, 1):
4849             data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S8_F1;
4850             break;
4851          case FMASK(8, 2):
4852             data_format = V_008F14_IMG_DATA_FORMAT_FMASK16_S8_F2;
4853             break;
4854          case FMASK(8, 4):
4855             data_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F4;
4856             break;
4857          case FMASK(8, 8):
4858             data_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8;
4859             break;
4860          case FMASK(16, 1):
4861             data_format = V_008F14_IMG_DATA_FORMAT_FMASK16_S16_F1;
4862             break;
4863          case FMASK(16, 2):
4864             data_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S16_F2;
4865             break;
4866          case FMASK(16, 4):
4867             data_format = V_008F14_IMG_DATA_FORMAT_FMASK64_S16_F4;
4868             break;
4869          case FMASK(16, 8):
4870             data_format = V_008F14_IMG_DATA_FORMAT_FMASK64_S16_F8;
4871             break;
4872          default:
4873             unreachable("invalid nr_samples");
4874          }
4875          num_format = V_008F14_IMG_NUM_FORMAT_UINT;
4876       }
4877 #undef FMASK
4878 
4879       fmask_state[0] = (va >> 8) | tex->surface.fmask_tile_swizzle;
4880       fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) | S_008F14_DATA_FORMAT(data_format) |
4881                        S_008F14_NUM_FORMAT(num_format);
4882       fmask_state[2] = S_008F18_WIDTH(width - 1) | S_008F18_HEIGHT(height - 1);
4883       fmask_state[3] =
4884          S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) | S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
4885          S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) | S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
4886          S_008F1C_TYPE(si_tex_dim(screen, tex, target, 0));
4887       fmask_state[4] = 0;
4888       fmask_state[5] = S_008F24_BASE_ARRAY(first_layer);
4889       fmask_state[6] = 0;
4890       fmask_state[7] = 0;
4891 
4892       if (screen->info.gfx_level == GFX9) {
4893          fmask_state[3] |= S_008F1C_SW_MODE(tex->surface.u.gfx9.color.fmask_swizzle_mode);
4894          fmask_state[4] |=
4895             S_008F20_DEPTH(last_layer) | S_008F20_PITCH(tex->surface.u.gfx9.color.fmask_epitch);
4896          fmask_state[5] |= S_008F24_META_PIPE_ALIGNED(1) |
4897                            S_008F24_META_RB_ALIGNED(1);
4898       } else {
4899          fmask_state[3] |= S_008F1C_TILING_INDEX(tex->surface.u.legacy.color.fmask.tiling_index);
4900          fmask_state[4] |= S_008F20_DEPTH(depth - 1) |
4901                            S_008F20_PITCH(tex->surface.u.legacy.color.fmask.pitch_in_pixels - 1);
4902          fmask_state[5] |= S_008F24_LAST_ARRAY(last_layer);
4903       }
4904    }
4905 }
4906 
4907 /**
4908  * Create a sampler view.
4909  *
4910  * @param ctx      context
4911  * @param texture  texture
4912  * @param state    sampler view template
4913  */
si_create_sampler_view(struct pipe_context * ctx,struct pipe_resource * texture,const struct pipe_sampler_view * state)4914 static struct pipe_sampler_view *si_create_sampler_view(struct pipe_context *ctx,
4915                                                         struct pipe_resource *texture,
4916                                                         const struct pipe_sampler_view *state)
4917 {
4918    struct si_context *sctx = (struct si_context *)ctx;
4919    struct si_sampler_view *view = CALLOC_STRUCT_CL(si_sampler_view);
4920    struct si_texture *tex = (struct si_texture *)texture;
4921    unsigned char state_swizzle[4];
4922    unsigned last_layer = state->u.tex.last_layer;
4923    enum pipe_format pipe_format;
4924    const struct legacy_surf_level *surflevel;
4925 
4926    if (!view)
4927       return NULL;
4928 
4929    /* initialize base object */
4930    view->base = *state;
4931    view->base.texture = NULL;
4932    view->base.reference.count = 1;
4933    view->base.context = ctx;
4934 
4935    assert(texture);
4936    pipe_resource_reference(&view->base.texture, texture);
4937 
4938    if (state->format == PIPE_FORMAT_X24S8_UINT || state->format == PIPE_FORMAT_S8X24_UINT ||
4939        state->format == PIPE_FORMAT_X32_S8X24_UINT || state->format == PIPE_FORMAT_S8_UINT)
4940       view->is_stencil_sampler = true;
4941 
4942    /* Buffer resource. */
4943    if (texture->target == PIPE_BUFFER) {
4944       uint32_t elements = si_clamp_texture_texel_count(sctx->screen->max_texel_buffer_elements,
4945                                                        state->format, state->u.buf.size);
4946 
4947       si_make_buffer_descriptor(sctx->screen, si_resource(texture), state->format,
4948                                 state->u.buf.offset, elements, view->state);
4949       return &view->base;
4950    }
4951 
4952    state_swizzle[0] = state->swizzle_r;
4953    state_swizzle[1] = state->swizzle_g;
4954    state_swizzle[2] = state->swizzle_b;
4955    state_swizzle[3] = state->swizzle_a;
4956 
4957    /* This is not needed if gallium frontends set last_layer correctly. */
4958    if (state->target == PIPE_TEXTURE_1D || state->target == PIPE_TEXTURE_2D ||
4959        state->target == PIPE_TEXTURE_RECT || state->target == PIPE_TEXTURE_CUBE)
4960       last_layer = state->u.tex.first_layer;
4961 
4962    /* Texturing with separate depth and stencil. */
4963    pipe_format = state->format;
4964 
4965    /* Depth/stencil texturing sometimes needs separate texture. */
4966    if (tex->is_depth && !si_can_sample_zs(tex, view->is_stencil_sampler)) {
4967       if (!tex->flushed_depth_texture && !si_init_flushed_depth_texture(ctx, texture)) {
4968          pipe_resource_reference(&view->base.texture, NULL);
4969          FREE(view);
4970          return NULL;
4971       }
4972 
4973       assert(tex->flushed_depth_texture);
4974 
4975       /* Override format for the case where the flushed texture
4976        * contains only Z or only S.
4977        */
4978       if (tex->flushed_depth_texture->buffer.b.b.format != tex->buffer.b.b.format)
4979          pipe_format = tex->flushed_depth_texture->buffer.b.b.format;
4980 
4981       tex = tex->flushed_depth_texture;
4982    }
4983 
4984    surflevel = tex->surface.u.legacy.level;
4985 
4986    if (tex->db_compatible) {
4987       if (!view->is_stencil_sampler)
4988          pipe_format = tex->db_render_format;
4989 
4990       switch (pipe_format) {
4991       case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
4992          pipe_format = PIPE_FORMAT_Z32_FLOAT;
4993          break;
4994       case PIPE_FORMAT_X8Z24_UNORM:
4995       case PIPE_FORMAT_S8_UINT_Z24_UNORM:
4996          /* Z24 is always stored like this for DB
4997           * compatibility.
4998           */
4999          pipe_format = PIPE_FORMAT_Z24X8_UNORM;
5000          break;
5001       case PIPE_FORMAT_X24S8_UINT:
5002       case PIPE_FORMAT_S8X24_UINT:
5003       case PIPE_FORMAT_X32_S8X24_UINT:
5004          pipe_format = PIPE_FORMAT_S8_UINT;
5005          surflevel = tex->surface.u.legacy.zs.stencil_level;
5006          break;
5007       default:;
5008       }
5009    }
5010 
5011    view->dcc_incompatible =
5012       vi_dcc_formats_are_incompatible(texture, state->u.tex.first_level, state->format);
5013 
5014    sctx->screen->make_texture_descriptor(
5015       sctx->screen, tex, true, state->target, pipe_format, state_swizzle,
5016       state->u.tex.first_level, state->u.tex.last_level,
5017       state->u.tex.first_layer, last_layer, texture->width0, texture->height0, texture->depth0,
5018       false, view->state, view->fmask_state);
5019 
5020    view->base_level_info = &surflevel[0];
5021    view->block_width = util_format_get_blockwidth(pipe_format);
5022    return &view->base;
5023 }
5024 
si_sampler_view_destroy(struct pipe_context * ctx,struct pipe_sampler_view * state)5025 static void si_sampler_view_destroy(struct pipe_context *ctx, struct pipe_sampler_view *state)
5026 {
5027    struct si_sampler_view *view = (struct si_sampler_view *)state;
5028 
5029    pipe_resource_reference(&state->texture, NULL);
5030    FREE_CL(view);
5031 }
5032 
wrap_mode_uses_border_color(unsigned wrap,bool linear_filter)5033 static bool wrap_mode_uses_border_color(unsigned wrap, bool linear_filter)
5034 {
5035    return wrap == PIPE_TEX_WRAP_CLAMP_TO_BORDER || wrap == PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER ||
5036           (linear_filter && (wrap == PIPE_TEX_WRAP_CLAMP || wrap == PIPE_TEX_WRAP_MIRROR_CLAMP));
5037 }
5038 
si_translate_border_color(struct si_context * sctx,const struct pipe_sampler_state * state,const union pipe_color_union * color,bool is_integer)5039 static uint32_t si_translate_border_color(struct si_context *sctx,
5040                                           const struct pipe_sampler_state *state,
5041                                           const union pipe_color_union *color, bool is_integer)
5042 {
5043    bool linear_filter = state->min_img_filter != PIPE_TEX_FILTER_NEAREST ||
5044                         state->mag_img_filter != PIPE_TEX_FILTER_NEAREST;
5045 
5046    if (!wrap_mode_uses_border_color(state->wrap_s, linear_filter) &&
5047        !wrap_mode_uses_border_color(state->wrap_t, linear_filter) &&
5048        !wrap_mode_uses_border_color(state->wrap_r, linear_filter))
5049       return S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK);
5050 
5051 #define simple_border_types(elt)                                                                   \
5052    do {                                                                                            \
5053       if (color->elt[0] == 0 && color->elt[1] == 0 && color->elt[2] == 0 && color->elt[3] == 0)    \
5054          return S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK);              \
5055       if (color->elt[0] == 0 && color->elt[1] == 0 && color->elt[2] == 0 && color->elt[3] == 1)    \
5056          return S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK);             \
5057       if (color->elt[0] == 1 && color->elt[1] == 1 && color->elt[2] == 1 && color->elt[3] == 1)    \
5058          return S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE);             \
5059    } while (false)
5060 
5061    if (is_integer)
5062       simple_border_types(ui);
5063    else
5064       simple_border_types(f);
5065 
5066 #undef simple_border_types
5067 
5068    int i;
5069 
5070    /* Check if the border has been uploaded already. */
5071    for (i = 0; i < sctx->border_color_count; i++)
5072       if (memcmp(&sctx->border_color_table[i], color, sizeof(*color)) == 0)
5073          break;
5074 
5075    if (i >= SI_MAX_BORDER_COLORS) {
5076       /* Getting 4096 unique border colors is very unlikely. */
5077       static bool printed;
5078       if (!printed) {
5079          fprintf(stderr, "radeonsi: The border color table is full. "
5080                          "Any new border colors will be just black. "
5081                          "This is a hardware limitation.\n");
5082          printed = true;
5083       }
5084       return S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK);
5085    }
5086 
5087    if (i == sctx->border_color_count) {
5088       /* Upload a new border color. */
5089       memcpy(&sctx->border_color_table[i], color, sizeof(*color));
5090       util_memcpy_cpu_to_le32(&sctx->border_color_map[i], color, sizeof(*color));
5091       sctx->border_color_count++;
5092    }
5093 
5094    return (sctx->screen->info.gfx_level >= GFX11 ? S_008F3C_BORDER_COLOR_PTR_GFX11(i):
5095                                                     S_008F3C_BORDER_COLOR_PTR_GFX6(i)) |
5096           S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER);
5097 }
5098 
S_FIXED(float value,unsigned frac_bits)5099 static inline int S_FIXED(float value, unsigned frac_bits)
5100 {
5101    return value * (1 << frac_bits);
5102 }
5103 
si_tex_filter(unsigned filter,unsigned max_aniso)5104 static inline unsigned si_tex_filter(unsigned filter, unsigned max_aniso)
5105 {
5106    if (filter == PIPE_TEX_FILTER_LINEAR)
5107       return max_aniso > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR
5108                            : V_008F38_SQ_TEX_XY_FILTER_BILINEAR;
5109    else
5110       return max_aniso > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT
5111                            : V_008F38_SQ_TEX_XY_FILTER_POINT;
5112 }
5113 
si_tex_aniso_filter(unsigned filter)5114 static inline unsigned si_tex_aniso_filter(unsigned filter)
5115 {
5116    if (filter < 2)
5117       return 0;
5118    if (filter < 4)
5119       return 1;
5120    if (filter < 8)
5121       return 2;
5122    if (filter < 16)
5123       return 3;
5124    return 4;
5125 }
5126 
si_create_sampler_state(struct pipe_context * ctx,const struct pipe_sampler_state * state)5127 static void *si_create_sampler_state(struct pipe_context *ctx,
5128                                      const struct pipe_sampler_state *state)
5129 {
5130    struct si_context *sctx = (struct si_context *)ctx;
5131    struct si_screen *sscreen = sctx->screen;
5132    struct si_sampler_state *rstate = CALLOC_STRUCT(si_sampler_state);
5133    unsigned max_aniso = sscreen->force_aniso >= 0 ? sscreen->force_aniso : state->max_anisotropy;
5134    unsigned max_aniso_ratio = si_tex_aniso_filter(max_aniso);
5135    bool trunc_coord = (state->min_img_filter == PIPE_TEX_FILTER_NEAREST &&
5136                        state->mag_img_filter == PIPE_TEX_FILTER_NEAREST &&
5137                        state->compare_mode == PIPE_TEX_COMPARE_NONE) ||
5138                       sscreen->info.conformant_trunc_coord;
5139    union pipe_color_union clamped_border_color;
5140 
5141    if (!rstate) {
5142       return NULL;
5143    }
5144 
5145    /* Validate inputs. */
5146    if (!is_wrap_mode_legal(sscreen, state->wrap_s) ||
5147        !is_wrap_mode_legal(sscreen, state->wrap_t) ||
5148        !is_wrap_mode_legal(sscreen, state->wrap_r) ||
5149        (!sscreen->info.has_3d_cube_border_color_mipmap &&
5150         (state->min_mip_filter != PIPE_TEX_MIPFILTER_NONE ||
5151          state->max_anisotropy > 0))) {
5152       assert(0);
5153       return NULL;
5154    }
5155 
5156 #ifndef NDEBUG
5157    rstate->magic = SI_SAMPLER_STATE_MAGIC;
5158 #endif
5159    rstate->val[0] =
5160       (S_008F30_CLAMP_X(si_tex_wrap(state->wrap_s)) | S_008F30_CLAMP_Y(si_tex_wrap(state->wrap_t)) |
5161        S_008F30_CLAMP_Z(si_tex_wrap(state->wrap_r)) | S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) |
5162        S_008F30_DEPTH_COMPARE_FUNC(si_tex_compare(state->compare_mode, state->compare_func)) |
5163        S_008F30_FORCE_UNNORMALIZED(state->unnormalized_coords) |
5164        S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) | S_008F30_ANISO_BIAS(max_aniso_ratio) |
5165        S_008F30_DISABLE_CUBE_WRAP(!state->seamless_cube_map) |
5166        S_008F30_TRUNC_COORD(trunc_coord));
5167    rstate->val[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 8)) |
5168                      S_008F34_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 8)) |
5169                      S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
5170    rstate->val[2] = (S_008F38_XY_MAG_FILTER(si_tex_filter(state->mag_img_filter, max_aniso)) |
5171                      S_008F38_XY_MIN_FILTER(si_tex_filter(state->min_img_filter, max_aniso)) |
5172                      S_008F38_MIP_FILTER(si_tex_mipfilter(state->min_mip_filter)));
5173    rstate->val[3] = si_translate_border_color(sctx, state, &state->border_color,
5174                                               state->border_color_is_integer);
5175 
5176    if (sscreen->info.gfx_level >= GFX10) {
5177       rstate->val[2] |= S_008F38_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -32, 31), 8)) |
5178                         S_008F38_ANISO_OVERRIDE_GFX10(1);
5179    } else {
5180       rstate->val[0] |= S_008F30_COMPAT_MODE(sctx->gfx_level >= GFX8);
5181       rstate->val[2] |= S_008F38_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) |
5182                         S_008F38_DISABLE_LSB_CEIL(sctx->gfx_level <= GFX8) |
5183                         S_008F38_FILTER_PREC_FIX(1) |
5184                         S_008F38_ANISO_OVERRIDE_GFX8(sctx->gfx_level >= GFX8);
5185    }
5186 
5187    /* Create sampler resource for upgraded depth textures. */
5188    memcpy(rstate->upgraded_depth_val, rstate->val, sizeof(rstate->val));
5189 
5190    for (unsigned i = 0; i < 4; ++i) {
5191       /* Use channel 0 on purpose, so that we can use OPAQUE_WHITE
5192        * when the border color is 1.0. */
5193       clamped_border_color.f[i] = CLAMP(state->border_color.f[0], 0, 1);
5194    }
5195 
5196    if (memcmp(&state->border_color, &clamped_border_color, sizeof(clamped_border_color)) == 0) {
5197       if (sscreen->info.gfx_level <= GFX9)
5198          rstate->upgraded_depth_val[3] |= S_008F3C_UPGRADED_DEPTH(1);
5199    } else {
5200       rstate->upgraded_depth_val[3] =
5201          si_translate_border_color(sctx, state, &clamped_border_color, false);
5202    }
5203 
5204    return rstate;
5205 }
5206 
si_set_sample_mask(struct pipe_context * ctx,unsigned sample_mask)5207 static void si_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask)
5208 {
5209    struct si_context *sctx = (struct si_context *)ctx;
5210 
5211    if (sctx->sample_mask == (uint16_t)sample_mask)
5212       return;
5213 
5214    sctx->sample_mask = sample_mask;
5215    si_mark_atom_dirty(sctx, &sctx->atoms.s.sample_mask);
5216 }
5217 
si_emit_sample_mask(struct si_context * sctx,unsigned index)5218 static void si_emit_sample_mask(struct si_context *sctx, unsigned index)
5219 {
5220    struct radeon_cmdbuf *cs = &sctx->gfx_cs;
5221    unsigned mask = sctx->sample_mask;
5222 
5223    /* Needed for line and polygon smoothing as well as for the Polaris
5224     * small primitive filter. We expect the gallium frontend to take care of
5225     * this for us.
5226     */
5227    assert(mask == 0xffff || sctx->framebuffer.nr_samples > 1 ||
5228           (mask & 1 && sctx->blitter_running));
5229 
5230    radeon_begin(cs);
5231    radeon_set_context_reg_seq(R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2);
5232    radeon_emit(mask | (mask << 16));
5233    radeon_emit(mask | (mask << 16));
5234    radeon_end();
5235 }
5236 
si_delete_sampler_state(struct pipe_context * ctx,void * state)5237 static void si_delete_sampler_state(struct pipe_context *ctx, void *state)
5238 {
5239 #ifndef NDEBUG
5240    struct si_sampler_state *s = state;
5241 
5242    assert(s->magic == SI_SAMPLER_STATE_MAGIC);
5243    s->magic = 0;
5244 #endif
5245    free(state);
5246 }
5247 
5248 /*
5249  * Vertex elements & buffers
5250  */
5251 
si_compute_fast_udiv_info32(uint32_t D,unsigned num_bits)5252 struct si_fast_udiv_info32 si_compute_fast_udiv_info32(uint32_t D, unsigned num_bits)
5253 {
5254    struct util_fast_udiv_info info = util_compute_fast_udiv_info(D, num_bits, 32);
5255 
5256    struct si_fast_udiv_info32 result = {
5257       info.multiplier,
5258       info.pre_shift,
5259       info.post_shift,
5260       info.increment,
5261    };
5262    return result;
5263 }
5264 
si_create_vertex_elements(struct pipe_context * ctx,unsigned count,const struct pipe_vertex_element * elements)5265 static void *si_create_vertex_elements(struct pipe_context *ctx, unsigned count,
5266                                        const struct pipe_vertex_element *elements)
5267 {
5268    struct si_screen *sscreen = (struct si_screen *)ctx->screen;
5269 
5270    if (sscreen->debug_flags & DBG(VERTEX_ELEMENTS)) {
5271       for (int i = 0; i < count; ++i) {
5272          const struct pipe_vertex_element *e = elements + i;
5273          fprintf(stderr, "elements[%d]: offset %2d, buffer_index %d, dual_slot %d, format %3d, divisor %u\n",
5274                 i, e->src_offset, e->vertex_buffer_index, e->dual_slot, e->src_format, e->instance_divisor);
5275       }
5276    }
5277 
5278    struct si_vertex_elements *v = CALLOC_STRUCT(si_vertex_elements);
5279    struct si_fast_udiv_info32 divisor_factors[SI_MAX_ATTRIBS] = {};
5280    STATIC_ASSERT(sizeof(struct si_fast_udiv_info32) == 16);
5281    STATIC_ASSERT(sizeof(divisor_factors[0].multiplier) == 4);
5282    STATIC_ASSERT(sizeof(divisor_factors[0].pre_shift) == 4);
5283    STATIC_ASSERT(sizeof(divisor_factors[0].post_shift) == 4);
5284    STATIC_ASSERT(sizeof(divisor_factors[0].increment) == 4);
5285    int i;
5286 
5287    assert(count <= SI_MAX_ATTRIBS);
5288    if (!v)
5289       return NULL;
5290 
5291    v->count = count;
5292 
5293    unsigned num_vbos_in_user_sgprs = si_num_vbos_in_user_sgprs(sscreen);
5294    unsigned alloc_count =
5295       count > num_vbos_in_user_sgprs ? count - num_vbos_in_user_sgprs : 0;
5296    v->vb_desc_list_alloc_size = align(alloc_count * 16, SI_CPDMA_ALIGNMENT);
5297 
5298    for (i = 0; i < count; ++i) {
5299       const struct util_format_description *desc;
5300       const struct util_format_channel_description *channel;
5301       int first_non_void;
5302       unsigned vbo_index = elements[i].vertex_buffer_index;
5303 
5304       if (vbo_index >= SI_NUM_VERTEX_BUFFERS) {
5305          FREE(v);
5306          return NULL;
5307       }
5308 
5309       unsigned instance_divisor = elements[i].instance_divisor;
5310       if (instance_divisor) {
5311          if (instance_divisor == 1) {
5312             v->instance_divisor_is_one |= 1u << i;
5313          } else {
5314             v->instance_divisor_is_fetched |= 1u << i;
5315             divisor_factors[i] = si_compute_fast_udiv_info32(instance_divisor, 32);
5316          }
5317       }
5318 
5319       desc = util_format_description(elements[i].src_format);
5320       first_non_void = util_format_get_first_non_void_channel(elements[i].src_format);
5321       channel = first_non_void >= 0 ? &desc->channel[first_non_void] : NULL;
5322 
5323       v->elem[i].format_size = desc->block.bits / 8;
5324       v->elem[i].src_offset = elements[i].src_offset;
5325       v->elem[i].stride = elements[i].src_stride;
5326       v->vertex_buffer_index[i] = vbo_index;
5327 
5328       bool always_fix = false;
5329       union si_vs_fix_fetch fix_fetch;
5330       unsigned log_hw_load_size; /* the load element size as seen by the hardware */
5331 
5332       fix_fetch.bits = 0;
5333       log_hw_load_size = MIN2(2, util_logbase2(desc->block.bits) - 3);
5334 
5335       if (channel) {
5336          switch (channel->type) {
5337          case UTIL_FORMAT_TYPE_FLOAT:
5338             fix_fetch.u.format = AC_FETCH_FORMAT_FLOAT;
5339             break;
5340          case UTIL_FORMAT_TYPE_FIXED:
5341             fix_fetch.u.format = AC_FETCH_FORMAT_FIXED;
5342             break;
5343          case UTIL_FORMAT_TYPE_SIGNED: {
5344             if (channel->pure_integer)
5345                fix_fetch.u.format = AC_FETCH_FORMAT_SINT;
5346             else if (channel->normalized)
5347                fix_fetch.u.format = AC_FETCH_FORMAT_SNORM;
5348             else
5349                fix_fetch.u.format = AC_FETCH_FORMAT_SSCALED;
5350             break;
5351          }
5352          case UTIL_FORMAT_TYPE_UNSIGNED: {
5353             if (channel->pure_integer)
5354                fix_fetch.u.format = AC_FETCH_FORMAT_UINT;
5355             else if (channel->normalized)
5356                fix_fetch.u.format = AC_FETCH_FORMAT_UNORM;
5357             else
5358                fix_fetch.u.format = AC_FETCH_FORMAT_USCALED;
5359             break;
5360          }
5361          default:
5362             unreachable("bad format type");
5363          }
5364       } else {
5365          switch (elements[i].src_format) {
5366          case PIPE_FORMAT_R11G11B10_FLOAT:
5367             fix_fetch.u.format = AC_FETCH_FORMAT_FLOAT;
5368             break;
5369          default:
5370             unreachable("bad other format");
5371          }
5372       }
5373 
5374       if (desc->channel[0].size == 10) {
5375          fix_fetch.u.log_size = 3; /* special encoding for 2_10_10_10 */
5376          log_hw_load_size = 2;
5377 
5378          /* The hardware always treats the 2-bit alpha channel as
5379           * unsigned, so a shader workaround is needed. The affected
5380           * chips are GFX8 and older except Stoney (GFX8.1).
5381           */
5382          always_fix = sscreen->info.gfx_level <= GFX8 && sscreen->info.family != CHIP_STONEY &&
5383                       channel->type == UTIL_FORMAT_TYPE_SIGNED;
5384       } else if (elements[i].src_format == PIPE_FORMAT_R11G11B10_FLOAT) {
5385          fix_fetch.u.log_size = 3; /* special encoding */
5386          fix_fetch.u.format = AC_FETCH_FORMAT_FIXED;
5387          log_hw_load_size = 2;
5388       } else {
5389          fix_fetch.u.log_size = util_logbase2(channel->size) - 3;
5390          fix_fetch.u.num_channels_m1 = desc->nr_channels - 1;
5391 
5392          /* Always fix up:
5393           * - doubles (multiple loads + truncate to float)
5394           * - 32-bit requiring a conversion
5395           */
5396          always_fix = (fix_fetch.u.log_size == 3) ||
5397                       (fix_fetch.u.log_size == 2 && fix_fetch.u.format != AC_FETCH_FORMAT_FLOAT &&
5398                        fix_fetch.u.format != AC_FETCH_FORMAT_UINT &&
5399                        fix_fetch.u.format != AC_FETCH_FORMAT_SINT);
5400 
5401          /* Also fixup 8_8_8 and 16_16_16. */
5402          if (desc->nr_channels == 3 && fix_fetch.u.log_size <= 1) {
5403             always_fix = true;
5404             log_hw_load_size = fix_fetch.u.log_size;
5405          }
5406       }
5407 
5408       if (desc->swizzle[0] != PIPE_SWIZZLE_X) {
5409          assert(desc->swizzle[0] == PIPE_SWIZZLE_Z &&
5410                 (desc->swizzle[2] == PIPE_SWIZZLE_X || desc->swizzle[2] == PIPE_SWIZZLE_0));
5411          fix_fetch.u.reverse = 1;
5412       }
5413 
5414       /* Force the workaround for unaligned access here already if the
5415        * offset relative to the vertex buffer base is unaligned.
5416        *
5417        * There is a theoretical case in which this is too conservative:
5418        * if the vertex buffer's offset is also unaligned in just the
5419        * right way, we end up with an aligned address after all.
5420        * However, this case should be extremely rare in practice (it
5421        * won't happen in well-behaved applications), and taking it
5422        * into account would complicate the fast path (where everything
5423        * is nicely aligned).
5424        */
5425       bool check_alignment =
5426             log_hw_load_size >= 1 &&
5427             (sscreen->info.gfx_level == GFX6 || sscreen->info.gfx_level >= GFX10);
5428       bool opencode = sscreen->options.vs_fetch_always_opencode;
5429 
5430       if (check_alignment && ((elements[i].src_offset & ((1 << log_hw_load_size) - 1)) != 0 ||
5431                               elements[i].src_stride & 3))
5432          opencode = true;
5433 
5434       if (always_fix || check_alignment || opencode)
5435          v->fix_fetch[i] = fix_fetch.bits;
5436 
5437       if (opencode)
5438          v->fix_fetch_opencode |= 1 << i;
5439       if (opencode || always_fix)
5440          v->fix_fetch_always |= 1 << i;
5441 
5442       if (check_alignment && !opencode) {
5443          assert(log_hw_load_size == 1 || log_hw_load_size == 2);
5444 
5445          v->fix_fetch_unaligned |= 1 << i;
5446          v->hw_load_is_dword |= (log_hw_load_size - 1) << i;
5447          v->vb_alignment_check_mask |= 1 << vbo_index;
5448       }
5449 
5450       v->elem[i].rsrc_word3 = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) |
5451                               S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) |
5452                               S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) |
5453                               S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3]));
5454 
5455       if (sscreen->info.gfx_level >= GFX10) {
5456          const struct gfx10_format *fmt = &ac_get_gfx10_format_table(&sscreen->info)[elements[i].src_format];
5457          ASSERTED unsigned last_vertex_format = sscreen->info.gfx_level >= GFX11 ? 64 : 128;
5458          assert(fmt->img_format != 0 && fmt->img_format < last_vertex_format);
5459          v->elem[i].rsrc_word3 |=
5460             S_008F0C_FORMAT(fmt->img_format) |
5461             S_008F0C_RESOURCE_LEVEL(sscreen->info.gfx_level < GFX11) |
5462             /* OOB_SELECT chooses the out-of-bounds check:
5463              *  - 1: index >= NUM_RECORDS (Structured)
5464              *  - 3: offset >= NUM_RECORDS (Raw)
5465              */
5466             S_008F0C_OOB_SELECT(v->elem[i].stride ? V_008F0C_OOB_SELECT_STRUCTURED
5467                                                   : V_008F0C_OOB_SELECT_RAW);
5468       } else {
5469          unsigned data_format, num_format;
5470          data_format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void);
5471          num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void);
5472          v->elem[i].rsrc_word3 |= S_008F0C_NUM_FORMAT(num_format) |
5473                                   S_008F0C_DATA_FORMAT(data_format);
5474       }
5475    }
5476 
5477    if (v->instance_divisor_is_fetched) {
5478       unsigned num_divisors = util_last_bit(v->instance_divisor_is_fetched);
5479 
5480       v->instance_divisor_factor_buffer = (struct si_resource *)pipe_buffer_create(
5481          &sscreen->b, 0, PIPE_USAGE_DEFAULT, num_divisors * sizeof(divisor_factors[0]));
5482       if (!v->instance_divisor_factor_buffer) {
5483          FREE(v);
5484          return NULL;
5485       }
5486       void *map =
5487          sscreen->ws->buffer_map(sscreen->ws, v->instance_divisor_factor_buffer->buf, NULL, PIPE_MAP_WRITE);
5488       memcpy(map, divisor_factors, num_divisors * sizeof(divisor_factors[0]));
5489    }
5490    return v;
5491 }
5492 
si_bind_vertex_elements(struct pipe_context * ctx,void * state)5493 static void si_bind_vertex_elements(struct pipe_context *ctx, void *state)
5494 {
5495    struct si_context *sctx = (struct si_context *)ctx;
5496    struct si_vertex_elements *old = sctx->vertex_elements;
5497    struct si_vertex_elements *v = (struct si_vertex_elements *)state;
5498 
5499    if (!v)
5500       v = sctx->no_velems_state;
5501 
5502    sctx->vertex_elements = v;
5503    sctx->num_vertex_elements = v->count;
5504    sctx->vertex_buffers_dirty = sctx->num_vertex_elements > 0;
5505 
5506    if (old->instance_divisor_is_one != v->instance_divisor_is_one ||
5507        old->instance_divisor_is_fetched != v->instance_divisor_is_fetched ||
5508        (old->vb_alignment_check_mask ^ v->vb_alignment_check_mask) &
5509        sctx->vertex_buffer_unaligned ||
5510        ((v->vb_alignment_check_mask & sctx->vertex_buffer_unaligned) &&
5511         memcmp(old->vertex_buffer_index, v->vertex_buffer_index,
5512                sizeof(v->vertex_buffer_index[0]) * MAX2(old->count, v->count))) ||
5513        /* fix_fetch_{always,opencode,unaligned} and hw_load_is_dword are
5514         * functions of fix_fetch and the src_offset alignment.
5515         * If they change and fix_fetch doesn't, it must be due to different
5516         * src_offset alignment, which is reflected in fix_fetch_opencode. */
5517        old->fix_fetch_opencode != v->fix_fetch_opencode ||
5518        memcmp(old->fix_fetch, v->fix_fetch, sizeof(v->fix_fetch[0]) *
5519               MAX2(old->count, v->count))) {
5520       si_vs_key_update_inputs(sctx);
5521       sctx->do_update_shaders = true;
5522    }
5523 
5524    if (v->instance_divisor_is_fetched) {
5525       struct pipe_constant_buffer cb;
5526 
5527       cb.buffer = &v->instance_divisor_factor_buffer->b.b;
5528       cb.user_buffer = NULL;
5529       cb.buffer_offset = 0;
5530       cb.buffer_size = 0xffffffff;
5531       si_set_internal_const_buffer(sctx, SI_VS_CONST_INSTANCE_DIVISORS, &cb);
5532    }
5533 }
5534 
si_delete_vertex_element(struct pipe_context * ctx,void * state)5535 static void si_delete_vertex_element(struct pipe_context *ctx, void *state)
5536 {
5537    struct si_context *sctx = (struct si_context *)ctx;
5538    struct si_vertex_elements *v = (struct si_vertex_elements *)state;
5539 
5540    if (sctx->vertex_elements == state)
5541       si_bind_vertex_elements(ctx, sctx->no_velems_state);
5542 
5543    si_resource_reference(&v->instance_divisor_factor_buffer, NULL);
5544    FREE(state);
5545 }
5546 
si_set_vertex_buffers(struct pipe_context * ctx,unsigned count,const struct pipe_vertex_buffer * buffers)5547 static void si_set_vertex_buffers(struct pipe_context *ctx, unsigned count,
5548                                   const struct pipe_vertex_buffer *buffers)
5549 {
5550    struct si_context *sctx = (struct si_context *)ctx;
5551    uint32_t unaligned = 0;
5552    unsigned i;
5553 
5554    assert(count <= ARRAY_SIZE(sctx->vertex_buffer));
5555    assert(!count || buffers);
5556 
5557    for (i = 0; i < count; i++) {
5558       const struct pipe_vertex_buffer *src = buffers + i;
5559       struct pipe_vertex_buffer *dst = sctx->vertex_buffer + i;
5560       struct pipe_resource *buf = src->buffer.resource;
5561 
5562       dst->buffer_offset = src->buffer_offset;
5563 
5564       /* Only unreference bound vertex buffers. */
5565       pipe_resource_reference(&dst->buffer.resource, NULL);
5566       dst->buffer.resource = src->buffer.resource;
5567 
5568       if (src->buffer_offset & 3)
5569          unaligned |= BITFIELD_BIT(i);
5570 
5571       if (buf) {
5572          si_resource(buf)->bind_history |= SI_BIND_VERTEX_BUFFER;
5573          radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, si_resource(buf),
5574                                    RADEON_USAGE_READ | RADEON_PRIO_VERTEX_BUFFER);
5575       }
5576    }
5577 
5578    unsigned last_count = sctx->num_vertex_buffers;
5579    for (; i < last_count; i++)
5580       pipe_resource_reference(&sctx->vertex_buffer[i].buffer.resource, NULL);
5581 
5582    sctx->num_vertex_buffers = count;
5583    sctx->vertex_buffers_dirty = sctx->num_vertex_elements > 0;
5584    sctx->vertex_buffer_unaligned = unaligned;
5585 
5586    /* Check whether alignment may have changed in a way that requires
5587     * shader changes. This check is conservative: a vertex buffer can only
5588     * trigger a shader change if the misalignment amount changes (e.g.
5589     * from byte-aligned to short-aligned), but we only keep track of
5590     * whether buffers are at least dword-aligned, since that should always
5591     * be the case in well-behaved applications anyway.
5592     */
5593    if (sctx->vertex_elements->vb_alignment_check_mask & unaligned) {
5594       si_vs_key_update_inputs(sctx);
5595       sctx->do_update_shaders = true;
5596    }
5597 }
5598 
5599 static struct pipe_vertex_state *
si_create_vertex_state(struct pipe_screen * screen,struct pipe_vertex_buffer * buffer,const struct pipe_vertex_element * elements,unsigned num_elements,struct pipe_resource * indexbuf,uint32_t full_velem_mask)5600 si_create_vertex_state(struct pipe_screen *screen,
5601                        struct pipe_vertex_buffer *buffer,
5602                        const struct pipe_vertex_element *elements,
5603                        unsigned num_elements,
5604                        struct pipe_resource *indexbuf,
5605                        uint32_t full_velem_mask)
5606 {
5607    struct si_screen *sscreen = (struct si_screen *)screen;
5608    struct si_vertex_state *state = CALLOC_STRUCT(si_vertex_state);
5609 
5610    util_init_pipe_vertex_state(screen, buffer, elements, num_elements, indexbuf, full_velem_mask,
5611                                &state->b);
5612 
5613    /* Initialize the vertex element state in state->element.
5614     * Do it by creating a vertex element state object and copying it there.
5615     */
5616    struct si_context ctx = {};
5617    ctx.b.screen = screen;
5618    struct si_vertex_elements *velems = si_create_vertex_elements(&ctx.b, num_elements, elements);
5619    state->velems = *velems;
5620    si_delete_vertex_element(&ctx.b, velems);
5621 
5622    assert(!state->velems.instance_divisor_is_one);
5623    assert(!state->velems.instance_divisor_is_fetched);
5624    assert(!state->velems.fix_fetch_always);
5625    assert(buffer->buffer_offset % 4 == 0);
5626    assert(!buffer->is_user_buffer);
5627    for (unsigned i = 0; i < num_elements; i++) {
5628       assert(elements[i].src_offset % 4 == 0);
5629       assert(!elements[i].dual_slot);
5630       assert(elements[i].src_stride % 4 == 0);
5631    }
5632 
5633    for (unsigned i = 0; i < num_elements; i++) {
5634       si_set_vertex_buffer_descriptor(sscreen, &state->velems, &state->b.input.vbuffer, i,
5635                                       &state->descriptors[i * 4]);
5636    }
5637 
5638    return &state->b;
5639 }
5640 
si_vertex_state_destroy(struct pipe_screen * screen,struct pipe_vertex_state * state)5641 static void si_vertex_state_destroy(struct pipe_screen *screen,
5642                                     struct pipe_vertex_state *state)
5643 {
5644    pipe_vertex_buffer_unreference(&state->input.vbuffer);
5645    pipe_resource_reference(&state->input.indexbuf, NULL);
5646    FREE(state);
5647 }
5648 
5649 static struct pipe_vertex_state *
si_pipe_create_vertex_state(struct pipe_screen * screen,struct pipe_vertex_buffer * buffer,const struct pipe_vertex_element * elements,unsigned num_elements,struct pipe_resource * indexbuf,uint32_t full_velem_mask)5650 si_pipe_create_vertex_state(struct pipe_screen *screen,
5651                             struct pipe_vertex_buffer *buffer,
5652                             const struct pipe_vertex_element *elements,
5653                             unsigned num_elements,
5654                             struct pipe_resource *indexbuf,
5655                             uint32_t full_velem_mask)
5656 {
5657    struct si_screen *sscreen = (struct si_screen *)screen;
5658 
5659    return util_vertex_state_cache_get(screen, buffer, elements, num_elements, indexbuf,
5660                                       full_velem_mask, &sscreen->vertex_state_cache);
5661 }
5662 
si_pipe_vertex_state_destroy(struct pipe_screen * screen,struct pipe_vertex_state * state)5663 static void si_pipe_vertex_state_destroy(struct pipe_screen *screen,
5664                                          struct pipe_vertex_state *state)
5665 {
5666    struct si_screen *sscreen = (struct si_screen *)screen;
5667 
5668    util_vertex_state_destroy(screen, &sscreen->vertex_state_cache, state);
5669 }
5670 
5671 /*
5672  * Misc
5673  */
5674 
si_set_tess_state(struct pipe_context * ctx,const float default_outer_level[4],const float default_inner_level[2])5675 static void si_set_tess_state(struct pipe_context *ctx, const float default_outer_level[4],
5676                               const float default_inner_level[2])
5677 {
5678    struct si_context *sctx = (struct si_context *)ctx;
5679    struct pipe_constant_buffer cb;
5680    float array[8];
5681 
5682    memcpy(array, default_outer_level, sizeof(float) * 4);
5683    memcpy(array + 4, default_inner_level, sizeof(float) * 2);
5684 
5685    cb.buffer = NULL;
5686    cb.user_buffer = array;
5687    cb.buffer_offset = 0;
5688    cb.buffer_size = sizeof(array);
5689 
5690    si_set_internal_const_buffer(sctx, SI_HS_CONST_DEFAULT_TESS_LEVELS, &cb);
5691 }
5692 
si_texture_barrier(struct pipe_context * ctx,unsigned flags)5693 static void si_texture_barrier(struct pipe_context *ctx, unsigned flags)
5694 {
5695    struct si_context *sctx = (struct si_context *)ctx;
5696 
5697    si_update_fb_dirtiness_after_rendering(sctx);
5698 
5699    /* Multisample surfaces are flushed in si_decompress_textures. */
5700    if (sctx->framebuffer.uncompressed_cb_mask) {
5701       si_make_CB_shader_coherent(sctx, sctx->framebuffer.nr_samples,
5702                                  sctx->framebuffer.CB_has_shader_readable_metadata,
5703                                  sctx->framebuffer.all_DCC_pipe_aligned);
5704    }
5705 }
5706 
5707 /* This only ensures coherency for shader image/buffer stores. */
si_memory_barrier(struct pipe_context * ctx,unsigned flags)5708 static void si_memory_barrier(struct pipe_context *ctx, unsigned flags)
5709 {
5710    struct si_context *sctx = (struct si_context *)ctx;
5711 
5712    if (!(flags & ~PIPE_BARRIER_UPDATE))
5713       return;
5714 
5715    /* Subsequent commands must wait for all shader invocations to
5716     * complete. */
5717    sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH |
5718                   SI_CONTEXT_PFP_SYNC_ME;
5719 
5720    if (flags & PIPE_BARRIER_CONSTANT_BUFFER)
5721       sctx->flags |= SI_CONTEXT_INV_SCACHE | SI_CONTEXT_INV_VCACHE;
5722 
5723    if (flags & (PIPE_BARRIER_VERTEX_BUFFER | PIPE_BARRIER_SHADER_BUFFER | PIPE_BARRIER_TEXTURE |
5724                 PIPE_BARRIER_IMAGE | PIPE_BARRIER_STREAMOUT_BUFFER | PIPE_BARRIER_GLOBAL_BUFFER)) {
5725       /* As far as I can tell, L1 contents are written back to L2
5726        * automatically at end of shader, but the contents of other
5727        * L1 caches might still be stale. */
5728       sctx->flags |= SI_CONTEXT_INV_VCACHE;
5729 
5730       if (flags & (PIPE_BARRIER_IMAGE | PIPE_BARRIER_TEXTURE) &&
5731           sctx->screen->info.tcc_rb_non_coherent)
5732          sctx->flags |= SI_CONTEXT_INV_L2;
5733    }
5734 
5735    if (flags & PIPE_BARRIER_INDEX_BUFFER) {
5736       /* Indices are read through TC L2 since GFX8.
5737        * L1 isn't used.
5738        */
5739       if (sctx->screen->info.gfx_level <= GFX7)
5740          sctx->flags |= SI_CONTEXT_WB_L2;
5741    }
5742 
5743    /* MSAA color, any depth and any stencil are flushed in
5744     * si_decompress_textures when needed.
5745     */
5746    if (flags & PIPE_BARRIER_FRAMEBUFFER && sctx->framebuffer.uncompressed_cb_mask) {
5747       sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_CB;
5748 
5749       if (sctx->gfx_level <= GFX8)
5750          sctx->flags |= SI_CONTEXT_WB_L2;
5751    }
5752 
5753    /* Indirect buffers use TC L2 on GFX9, but not older hw. */
5754    if (sctx->screen->info.gfx_level <= GFX8 && flags & PIPE_BARRIER_INDIRECT_BUFFER)
5755       sctx->flags |= SI_CONTEXT_WB_L2;
5756 
5757    si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
5758 }
5759 
si_create_blend_custom(struct si_context * sctx,unsigned mode)5760 static void *si_create_blend_custom(struct si_context *sctx, unsigned mode)
5761 {
5762    struct pipe_blend_state blend;
5763 
5764    memset(&blend, 0, sizeof(blend));
5765    blend.independent_blend_enable = true;
5766    blend.rt[0].colormask = 0xf;
5767    return si_create_blend_state_mode(&sctx->b, &blend, mode);
5768 }
5769 
si_emit_cache_flush_state(struct si_context * sctx,unsigned index)5770 static void si_emit_cache_flush_state(struct si_context *sctx, unsigned index)
5771 {
5772    sctx->emit_cache_flush(sctx, &sctx->gfx_cs);
5773 }
5774 
si_init_state_compute_functions(struct si_context * sctx)5775 void si_init_state_compute_functions(struct si_context *sctx)
5776 {
5777    sctx->b.create_sampler_state = si_create_sampler_state;
5778    sctx->b.delete_sampler_state = si_delete_sampler_state;
5779    sctx->b.create_sampler_view = si_create_sampler_view;
5780    sctx->b.sampler_view_destroy = si_sampler_view_destroy;
5781    sctx->b.memory_barrier = si_memory_barrier;
5782 }
5783 
si_init_state_functions(struct si_context * sctx)5784 void si_init_state_functions(struct si_context *sctx)
5785 {
5786    sctx->atoms.s.pm4_states[SI_STATE_IDX(blend)].emit = si_pm4_emit_state;
5787    sctx->atoms.s.pm4_states[SI_STATE_IDX(rasterizer)].emit = si_pm4_emit_rasterizer;
5788    sctx->atoms.s.pm4_states[SI_STATE_IDX(dsa)].emit = si_pm4_emit_dsa;
5789    sctx->atoms.s.pm4_states[SI_STATE_IDX(sqtt_pipeline)].emit = si_pm4_emit_state;
5790    sctx->atoms.s.pm4_states[SI_STATE_IDX(ls)].emit = si_pm4_emit_shader;
5791    sctx->atoms.s.pm4_states[SI_STATE_IDX(hs)].emit = si_pm4_emit_shader;
5792    sctx->atoms.s.pm4_states[SI_STATE_IDX(es)].emit = si_pm4_emit_shader;
5793    sctx->atoms.s.pm4_states[SI_STATE_IDX(gs)].emit = si_pm4_emit_shader;
5794    sctx->atoms.s.pm4_states[SI_STATE_IDX(vs)].emit = si_pm4_emit_shader;
5795    sctx->atoms.s.pm4_states[SI_STATE_IDX(ps)].emit = si_pm4_emit_shader;
5796 
5797    if (sctx->screen->info.has_set_context_pairs_packed)
5798       sctx->atoms.s.framebuffer.emit = gfx11_dgpu_emit_framebuffer_state;
5799    else
5800       sctx->atoms.s.framebuffer.emit = si_emit_framebuffer_state;
5801 
5802    sctx->atoms.s.db_render_state.emit = si_emit_db_render_state;
5803    sctx->atoms.s.dpbb_state.emit = si_emit_dpbb_state;
5804    sctx->atoms.s.msaa_config.emit = si_emit_msaa_config;
5805    sctx->atoms.s.sample_mask.emit = si_emit_sample_mask;
5806    sctx->atoms.s.cb_render_state.emit = si_emit_cb_render_state;
5807    sctx->atoms.s.blend_color.emit = si_emit_blend_color;
5808    sctx->atoms.s.clip_regs.emit = si_emit_clip_regs;
5809    sctx->atoms.s.clip_state.emit = si_emit_clip_state;
5810    sctx->atoms.s.stencil_ref.emit = si_emit_stencil_ref;
5811    sctx->atoms.s.cache_flush.emit = si_emit_cache_flush_state;
5812 
5813    sctx->b.create_blend_state = si_create_blend_state;
5814    sctx->b.bind_blend_state = si_bind_blend_state;
5815    sctx->b.delete_blend_state = si_delete_blend_state;
5816    sctx->b.set_blend_color = si_set_blend_color;
5817 
5818    sctx->b.create_rasterizer_state = si_create_rs_state;
5819    sctx->b.bind_rasterizer_state = si_bind_rs_state;
5820    sctx->b.delete_rasterizer_state = si_delete_rs_state;
5821 
5822    sctx->b.create_depth_stencil_alpha_state = si_create_dsa_state;
5823    sctx->b.bind_depth_stencil_alpha_state = si_bind_dsa_state;
5824    sctx->b.delete_depth_stencil_alpha_state = si_delete_dsa_state;
5825 
5826    sctx->custom_dsa_flush = si_create_db_flush_dsa(sctx);
5827 
5828    if (sctx->gfx_level < GFX11) {
5829       sctx->custom_blend_resolve = si_create_blend_custom(sctx, V_028808_CB_RESOLVE);
5830       sctx->custom_blend_fmask_decompress = si_create_blend_custom(sctx, V_028808_CB_FMASK_DECOMPRESS);
5831       sctx->custom_blend_eliminate_fastclear =
5832          si_create_blend_custom(sctx, V_028808_CB_ELIMINATE_FAST_CLEAR);
5833    }
5834 
5835    sctx->custom_blend_dcc_decompress =
5836       si_create_blend_custom(sctx, sctx->gfx_level >= GFX11 ?
5837                                 V_028808_CB_DCC_DECOMPRESS_GFX11 :
5838                                 V_028808_CB_DCC_DECOMPRESS_GFX8);
5839 
5840    sctx->b.set_clip_state = si_set_clip_state;
5841    sctx->b.set_stencil_ref = si_set_stencil_ref;
5842 
5843    sctx->b.set_framebuffer_state = si_set_framebuffer_state;
5844 
5845    sctx->b.set_sample_mask = si_set_sample_mask;
5846 
5847    sctx->b.create_vertex_elements_state = si_create_vertex_elements;
5848    sctx->b.bind_vertex_elements_state = si_bind_vertex_elements;
5849    sctx->b.delete_vertex_elements_state = si_delete_vertex_element;
5850    sctx->b.set_vertex_buffers = si_set_vertex_buffers;
5851 
5852    sctx->b.texture_barrier = si_texture_barrier;
5853    sctx->b.set_min_samples = si_set_min_samples;
5854    sctx->b.set_tess_state = si_set_tess_state;
5855 
5856    sctx->b.set_active_query_state = si_set_active_query_state;
5857 }
5858 
si_init_screen_state_functions(struct si_screen * sscreen)5859 void si_init_screen_state_functions(struct si_screen *sscreen)
5860 {
5861    sscreen->b.is_format_supported = si_is_format_supported;
5862    sscreen->b.create_vertex_state = si_pipe_create_vertex_state;
5863    sscreen->b.vertex_state_destroy = si_pipe_vertex_state_destroy;
5864 
5865    if (sscreen->info.gfx_level >= GFX10)
5866       sscreen->make_texture_descriptor = gfx10_make_texture_descriptor;
5867    else
5868       sscreen->make_texture_descriptor = si_make_texture_descriptor;
5869 
5870    util_vertex_state_cache_init(&sscreen->vertex_state_cache,
5871                                 si_create_vertex_state, si_vertex_state_destroy);
5872 }
5873 
si_set_grbm_gfx_index(struct si_context * sctx,struct si_pm4_state * pm4,unsigned value)5874 static void si_set_grbm_gfx_index(struct si_context *sctx, struct si_pm4_state *pm4, unsigned value)
5875 {
5876    unsigned reg = sctx->gfx_level >= GFX7 ? R_030800_GRBM_GFX_INDEX : R_00802C_GRBM_GFX_INDEX;
5877    si_pm4_set_reg(pm4, reg, value);
5878 }
5879 
si_set_grbm_gfx_index_se(struct si_context * sctx,struct si_pm4_state * pm4,unsigned se)5880 static void si_set_grbm_gfx_index_se(struct si_context *sctx, struct si_pm4_state *pm4, unsigned se)
5881 {
5882    assert(se == ~0 || se < sctx->screen->info.max_se);
5883    si_set_grbm_gfx_index(sctx, pm4,
5884                          (se == ~0 ? S_030800_SE_BROADCAST_WRITES(1) : S_030800_SE_INDEX(se)) |
5885                             S_030800_SH_BROADCAST_WRITES(1) |
5886                             S_030800_INSTANCE_BROADCAST_WRITES(1));
5887 }
5888 
si_write_harvested_raster_configs(struct si_context * sctx,struct si_pm4_state * pm4,unsigned raster_config,unsigned raster_config_1)5889 static void si_write_harvested_raster_configs(struct si_context *sctx, struct si_pm4_state *pm4,
5890                                               unsigned raster_config, unsigned raster_config_1)
5891 {
5892    unsigned num_se = MAX2(sctx->screen->info.max_se, 1);
5893    unsigned raster_config_se[4];
5894    unsigned se;
5895 
5896    ac_get_harvested_configs(&sctx->screen->info, raster_config, &raster_config_1, raster_config_se);
5897 
5898    for (se = 0; se < num_se; se++) {
5899       si_set_grbm_gfx_index_se(sctx, pm4, se);
5900       si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, raster_config_se[se]);
5901    }
5902    si_set_grbm_gfx_index(sctx, pm4, ~0);
5903 
5904    if (sctx->gfx_level >= GFX7) {
5905       si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1);
5906    }
5907 }
5908 
si_set_raster_config(struct si_context * sctx,struct si_pm4_state * pm4)5909 static void si_set_raster_config(struct si_context *sctx, struct si_pm4_state *pm4)
5910 {
5911    struct si_screen *sscreen = sctx->screen;
5912    unsigned num_rb = MIN2(sscreen->info.max_render_backends, 16);
5913    uint64_t rb_mask = sscreen->info.enabled_rb_mask;
5914    unsigned raster_config = sscreen->pa_sc_raster_config;
5915    unsigned raster_config_1 = sscreen->pa_sc_raster_config_1;
5916 
5917    if (!rb_mask || util_bitcount64(rb_mask) >= num_rb) {
5918       /* Always use the default config when all backends are enabled
5919        * (or when we failed to determine the enabled backends).
5920        */
5921       si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, raster_config);
5922       if (sctx->gfx_level >= GFX7)
5923          si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1);
5924    } else {
5925       si_write_harvested_raster_configs(sctx, pm4, raster_config, raster_config_1);
5926    }
5927 }
5928 
gfx103_get_cu_mask_ps(struct si_screen * sscreen)5929 unsigned gfx103_get_cu_mask_ps(struct si_screen *sscreen)
5930 {
5931    /* It's wasteful to enable all CUs for PS if shader arrays have a different
5932     * number of CUs. The reason is that the hardware sends the same number of PS
5933     * waves to each shader array, so the slowest shader array limits the performance.
5934     * Disable the extra CUs for PS in other shader arrays to save power and thus
5935     * increase clocks for busy CUs. In the future, we might disable or enable this
5936     * tweak only for certain apps.
5937     */
5938    return u_bit_consecutive(0, sscreen->info.min_good_cu_per_sa);
5939 }
5940 
gfx6_init_gfx_preamble_state(struct si_context * sctx)5941 static void gfx6_init_gfx_preamble_state(struct si_context *sctx)
5942 {
5943    struct si_screen *sscreen = sctx->screen;
5944    uint64_t border_color_va =
5945       sctx->border_color_buffer ? sctx->border_color_buffer->gpu_address : 0;
5946    uint32_t compute_cu_en = S_00B858_SH0_CU_EN(sscreen->info.spi_cu_en) |
5947                             S_00B858_SH1_CU_EN(sscreen->info.spi_cu_en);
5948    bool has_clear_state = sscreen->info.has_clear_state;
5949 
5950    /* We need more space because the preamble is large. */
5951    struct si_pm4_state *pm4 = si_pm4_create_sized(sscreen, 214, sctx->has_graphics);
5952    if (!pm4)
5953       return;
5954 
5955    if (sctx->has_graphics && !sctx->shadowing.registers) {
5956       si_pm4_cmd_add(pm4, PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
5957       si_pm4_cmd_add(pm4, CC0_UPDATE_LOAD_ENABLES(1));
5958       si_pm4_cmd_add(pm4, CC1_UPDATE_SHADOW_ENABLES(1));
5959 
5960       if (sscreen->dpbb_allowed) {
5961          si_pm4_cmd_add(pm4, PKT3(PKT3_EVENT_WRITE, 0, 0));
5962          si_pm4_cmd_add(pm4, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0));
5963       }
5964 
5965       if (has_clear_state) {
5966          si_pm4_cmd_add(pm4, PKT3(PKT3_CLEAR_STATE, 0, 0));
5967          si_pm4_cmd_add(pm4, 0);
5968       }
5969    }
5970 
5971    /* Compute registers. */
5972    si_pm4_set_reg(pm4, R_00B834_COMPUTE_PGM_HI, S_00B834_DATA(sctx->screen->info.address32_hi >> 8));
5973    si_pm4_set_reg(pm4, R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0, compute_cu_en);
5974    si_pm4_set_reg(pm4, R_00B85C_COMPUTE_STATIC_THREAD_MGMT_SE1, compute_cu_en);
5975 
5976    if (sctx->gfx_level >= GFX7) {
5977       si_pm4_set_reg(pm4, R_00B864_COMPUTE_STATIC_THREAD_MGMT_SE2, compute_cu_en);
5978       si_pm4_set_reg(pm4, R_00B868_COMPUTE_STATIC_THREAD_MGMT_SE3, compute_cu_en);
5979    }
5980 
5981    if (sctx->gfx_level >= GFX9)
5982       si_pm4_set_reg(pm4, R_0301EC_CP_COHER_START_DELAY, 0);
5983 
5984    /* Set the pointer to border colors. MI200 doesn't support border colors. */
5985    if (sctx->gfx_level >= GFX7 && sctx->border_color_buffer) {
5986       si_pm4_set_reg(pm4, R_030E00_TA_CS_BC_BASE_ADDR, border_color_va >> 8);
5987       si_pm4_set_reg(pm4, R_030E04_TA_CS_BC_BASE_ADDR_HI,
5988                      S_030E04_ADDRESS(border_color_va >> 40));
5989    } else if (sctx->gfx_level == GFX6) {
5990       si_pm4_set_reg(pm4, R_00950C_TA_CS_BC_BASE_ADDR, border_color_va >> 8);
5991    }
5992 
5993    if (!sctx->has_graphics)
5994       goto done;
5995 
5996    /* Graphics registers. */
5997    /* CLEAR_STATE doesn't restore these correctly. */
5998    si_pm4_set_reg(pm4, R_028240_PA_SC_GENERIC_SCISSOR_TL, S_028240_WINDOW_OFFSET_DISABLE(1));
5999    si_pm4_set_reg(pm4, R_028244_PA_SC_GENERIC_SCISSOR_BR,
6000                   S_028244_BR_X(16384) | S_028244_BR_Y(16384));
6001 
6002    si_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64));
6003    if (!has_clear_state)
6004       si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0));
6005 
6006    if (!has_clear_state) {
6007       si_pm4_set_reg(pm4, R_028820_PA_CL_NANINF_CNTL, 0);
6008       si_pm4_set_reg(pm4, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0);
6009       si_pm4_set_reg(pm4, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0);
6010       si_pm4_set_reg(pm4, R_028AC8_DB_PRELOAD_CONTROL, 0x0);
6011       si_pm4_set_reg(pm4, R_02800C_DB_RENDER_OVERRIDE, 0);
6012       si_pm4_set_reg(pm4, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0);
6013 
6014       si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0);
6015       si_pm4_set_reg(pm4, R_028A5C_VGT_GS_PER_VS, 0x2);
6016       si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0);
6017    }
6018 
6019    si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8);
6020    if (sctx->gfx_level >= GFX7)
6021       si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, S_028084_ADDRESS(border_color_va >> 40));
6022 
6023    if (sctx->gfx_level == GFX6) {
6024       si_pm4_set_reg(pm4, R_008A14_PA_CL_ENHANCE,
6025                      S_008A14_NUM_CLIP_SEQ(3) | S_008A14_CLIP_VTX_REORDER_ENA(1));
6026    }
6027 
6028    if (sctx->gfx_level >= GFX7) {
6029       si_pm4_set_reg(pm4, R_030A00_PA_SU_LINE_STIPPLE_VALUE, 0);
6030       si_pm4_set_reg(pm4, R_030A04_PA_SC_LINE_STIPPLE_STATE, 0);
6031    } else {
6032       si_pm4_set_reg(pm4, R_008A60_PA_SU_LINE_STIPPLE_VALUE, 0);
6033       si_pm4_set_reg(pm4, R_008B10_PA_SC_LINE_STIPPLE_STATE, 0);
6034    }
6035 
6036    /* If any sample location uses the -8 coordinate, the EXCLUSION fields should be set to 0. */
6037    si_pm4_set_reg(pm4, R_02882C_PA_SU_PRIM_FILTER_CNTL,
6038                   S_02882C_XMAX_RIGHT_EXCLUSION(sctx->gfx_level >= GFX7) |
6039                   S_02882C_YMAX_BOTTOM_EXCLUSION(sctx->gfx_level >= GFX7));
6040 
6041    if (sctx->family >= CHIP_POLARIS10 && !sctx->screen->info.has_small_prim_filter_sample_loc_bug) {
6042       /* Polaris10-12 should disable small line culling, but those also have the sample loc bug,
6043        * so they never enter this branch.
6044        */
6045       assert(sctx->family > CHIP_POLARIS12);
6046       si_pm4_set_reg(pm4, R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL,
6047                      S_028830_SMALL_PRIM_FILTER_ENABLE(1));
6048    }
6049 
6050    if (sctx->gfx_level <= GFX7 || !has_clear_state) {
6051       si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14);
6052       si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 16);
6053 
6054       /* CLEAR_STATE doesn't clear these correctly on certain generations.
6055        * I don't know why. Deduced by trial and error.
6056        */
6057       si_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);
6058       si_pm4_set_reg(pm4, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1));
6059       si_pm4_set_reg(pm4, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0);
6060       si_pm4_set_reg(pm4, R_028034_PA_SC_SCREEN_SCISSOR_BR,
6061                      S_028034_BR_X(16384) | S_028034_BR_Y(16384));
6062    }
6063 
6064    if (sctx->gfx_level >= GFX7) {
6065       si_pm4_set_reg_idx3(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS,
6066                           ac_apply_cu_en(S_00B01C_CU_EN(0xffffffff) |
6067                                          S_00B01C_WAVE_LIMIT(0x3F),
6068                                          C_00B01C_CU_EN, 0, &sscreen->info));
6069    }
6070 
6071    if (sctx->gfx_level <= GFX8) {
6072       si_set_raster_config(sctx, pm4);
6073 
6074       /* FIXME calculate these values somehow ??? */
6075       si_pm4_set_reg(pm4, R_028A54_VGT_GS_PER_ES, SI_GS_PER_ES);
6076       si_pm4_set_reg(pm4, R_028A58_VGT_ES_PER_GS, 0x40);
6077 
6078       /* These registers, when written, also overwrite the CLEAR_STATE
6079        * context, so we can't rely on CLEAR_STATE setting them.
6080        * It would be an issue if there was another UMD changing them.
6081        */
6082       si_pm4_set_reg(pm4, R_028400_VGT_MAX_VTX_INDX, ~0);
6083       si_pm4_set_reg(pm4, R_028404_VGT_MIN_VTX_INDX, 0);
6084       si_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET, 0);
6085    }
6086 
6087    if (sctx->gfx_level == GFX9) {
6088       si_pm4_set_reg(pm4, R_00B414_SPI_SHADER_PGM_HI_LS,
6089                      S_00B414_MEM_BASE(sscreen->info.address32_hi >> 8));
6090       si_pm4_set_reg(pm4, R_00B214_SPI_SHADER_PGM_HI_ES,
6091                      S_00B214_MEM_BASE(sscreen->info.address32_hi >> 8));
6092    } else {
6093       si_pm4_set_reg(pm4, R_00B524_SPI_SHADER_PGM_HI_LS,
6094                      S_00B524_MEM_BASE(sscreen->info.address32_hi >> 8));
6095    }
6096 
6097    if (sctx->gfx_level >= GFX7 && sctx->gfx_level <= GFX8) {
6098       si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS,
6099                      ac_apply_cu_en(S_00B51C_CU_EN(0xffff) | S_00B51C_WAVE_LIMIT(0x3F),
6100                                     C_00B51C_CU_EN, 0, &sscreen->info));
6101       si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, S_00B41C_WAVE_LIMIT(0x3F));
6102       si_pm4_set_reg(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES,
6103                      ac_apply_cu_en(S_00B31C_CU_EN(0xffff) | S_00B31C_WAVE_LIMIT(0x3F),
6104                                     C_00B31C_CU_EN, 0, &sscreen->info));
6105 
6106       /* If this is 0, Bonaire can hang even if GS isn't being used.
6107        * Other chips are unaffected. These are suboptimal values,
6108        * but we don't use on-chip GS.
6109        */
6110       si_pm4_set_reg(pm4, R_028A44_VGT_GS_ONCHIP_CNTL,
6111                      S_028A44_ES_VERTS_PER_SUBGRP(64) | S_028A44_GS_PRIMS_PER_SUBGRP(4));
6112    }
6113 
6114    if (sctx->gfx_level >= GFX8) {
6115       unsigned vgt_tess_distribution;
6116 
6117       if (sctx->gfx_level == GFX9) {
6118          vgt_tess_distribution = S_028B50_ACCUM_ISOLINE(12) |
6119                                  S_028B50_ACCUM_TRI(30) |
6120                                  S_028B50_ACCUM_QUAD(24) |
6121                                  S_028B50_DONUT_SPLIT_GFX9(24) |
6122                                  S_028B50_TRAP_SPLIT(6);
6123       } else {
6124          vgt_tess_distribution = S_028B50_ACCUM_ISOLINE(32) |
6125                                  S_028B50_ACCUM_TRI(11) |
6126                                  S_028B50_ACCUM_QUAD(11) |
6127                                  S_028B50_DONUT_SPLIT_GFX81(16);
6128 
6129          /* Testing with Unigine Heaven extreme tessellation yielded best results
6130           * with TRAP_SPLIT = 3.
6131           */
6132          if (sctx->family == CHIP_FIJI || sctx->family >= CHIP_POLARIS10)
6133             vgt_tess_distribution |= S_028B50_TRAP_SPLIT(3);
6134       }
6135 
6136       si_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION, vgt_tess_distribution);
6137    }
6138 
6139    si_pm4_set_reg(pm4, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 1);
6140 
6141    if (sctx->gfx_level == GFX9) {
6142       si_pm4_set_reg(pm4, R_030920_VGT_MAX_VTX_INDX, ~0);
6143       si_pm4_set_reg(pm4, R_030924_VGT_MIN_VTX_INDX, 0);
6144       si_pm4_set_reg(pm4, R_030928_VGT_INDX_OFFSET, 0);
6145 
6146       si_pm4_set_reg(pm4, R_028060_DB_DFSM_CONTROL, S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF));
6147 
6148       si_pm4_set_reg_idx3(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS,
6149                           ac_apply_cu_en(S_00B41C_CU_EN(0xffff) | S_00B41C_WAVE_LIMIT(0x3F),
6150                                          C_00B41C_CU_EN, 0, &sscreen->info));
6151 
6152       si_pm4_set_reg(pm4, R_028C48_PA_SC_BINNER_CNTL_1,
6153                      S_028C48_MAX_ALLOC_COUNT(sscreen->info.pbb_max_alloc_count - 1) |
6154                      S_028C48_MAX_PRIM_PER_BATCH(1023));
6155       si_pm4_set_reg(pm4, R_028C4C_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL,
6156                      S_028C4C_NULL_SQUAD_AA_MASK_ENABLE(1));
6157 
6158       si_pm4_set_reg(pm4, R_028AAC_VGT_ESGS_RING_ITEMSIZE, 1);
6159       si_pm4_set_reg(pm4, R_030968_VGT_INSTANCE_BASE_ID, 0);
6160    }
6161 
6162 done:
6163    si_pm4_finalize(pm4);
6164    sctx->cs_preamble_state = pm4;
6165    sctx->cs_preamble_state_tmz = si_pm4_clone(pm4); /* Make a copy of the preamble for TMZ. */
6166 }
6167 
cdna_init_compute_preamble_state(struct si_context * sctx)6168 static void cdna_init_compute_preamble_state(struct si_context *sctx)
6169 {
6170    struct si_screen *sscreen = sctx->screen;
6171    uint64_t border_color_va =
6172       sctx->border_color_buffer ? sctx->border_color_buffer->gpu_address : 0;
6173    uint32_t compute_cu_en = S_00B858_SH0_CU_EN(sscreen->info.spi_cu_en) |
6174                             S_00B858_SH1_CU_EN(sscreen->info.spi_cu_en);
6175 
6176    struct si_pm4_state *pm4 = si_pm4_create_sized(sscreen, 48, true);
6177    if (!pm4)
6178       return;
6179 
6180    /* Compute registers. */
6181    /* Disable profiling on compute chips. */
6182    si_pm4_set_reg(pm4, R_00B82C_COMPUTE_PERFCOUNT_ENABLE, 0);
6183    si_pm4_set_reg(pm4, R_00B834_COMPUTE_PGM_HI, S_00B834_DATA(sctx->screen->info.address32_hi >> 8));
6184    si_pm4_set_reg(pm4, R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0, compute_cu_en);
6185    si_pm4_set_reg(pm4, R_00B85C_COMPUTE_STATIC_THREAD_MGMT_SE1, compute_cu_en);
6186    si_pm4_set_reg(pm4, R_00B864_COMPUTE_STATIC_THREAD_MGMT_SE2, compute_cu_en);
6187    si_pm4_set_reg(pm4, R_00B868_COMPUTE_STATIC_THREAD_MGMT_SE3, compute_cu_en);
6188    si_pm4_set_reg(pm4, R_00B878_COMPUTE_THREAD_TRACE_ENABLE, 0);
6189 
6190    if (sscreen->info.family >= CHIP_GFX940) {
6191       si_pm4_set_reg(pm4, R_00B89C_COMPUTE_TG_CHUNK_SIZE, 0);
6192       si_pm4_set_reg(pm4, R_00B8B4_COMPUTE_PGM_RSRC3, 0);
6193    } else {
6194       si_pm4_set_reg(pm4, R_00B894_COMPUTE_STATIC_THREAD_MGMT_SE4, compute_cu_en);
6195       si_pm4_set_reg(pm4, R_00B898_COMPUTE_STATIC_THREAD_MGMT_SE5, compute_cu_en);
6196       si_pm4_set_reg(pm4, R_00B89C_COMPUTE_STATIC_THREAD_MGMT_SE6, compute_cu_en);
6197       si_pm4_set_reg(pm4, R_00B8A0_COMPUTE_STATIC_THREAD_MGMT_SE7, compute_cu_en);
6198    }
6199 
6200    si_pm4_set_reg(pm4, R_0301EC_CP_COHER_START_DELAY, 0);
6201 
6202    /* Set the pointer to border colors. Only MI100 supports border colors. */
6203    if (sscreen->info.family == CHIP_MI100) {
6204       si_pm4_set_reg(pm4, R_030E00_TA_CS_BC_BASE_ADDR, border_color_va >> 8);
6205       si_pm4_set_reg(pm4, R_030E04_TA_CS_BC_BASE_ADDR_HI,
6206                      S_030E04_ADDRESS(border_color_va >> 40));
6207    }
6208 
6209    si_pm4_finalize(pm4);
6210    sctx->cs_preamble_state = pm4;
6211    sctx->cs_preamble_state_tmz = si_pm4_clone(pm4); /* Make a copy of the preamble for TMZ. */
6212 }
6213 
gfx10_init_gfx_preamble_state(struct si_context * sctx)6214 static void gfx10_init_gfx_preamble_state(struct si_context *sctx)
6215 {
6216    struct si_screen *sscreen = sctx->screen;
6217    uint64_t border_color_va =
6218       sctx->border_color_buffer ? sctx->border_color_buffer->gpu_address : 0;
6219    uint32_t compute_cu_en = S_00B858_SH0_CU_EN(sscreen->info.spi_cu_en) |
6220                             S_00B858_SH1_CU_EN(sscreen->info.spi_cu_en);
6221    unsigned meta_write_policy, meta_read_policy;
6222    unsigned no_alloc = sctx->gfx_level >= GFX11 ? V_02807C_CACHE_NOA_GFX11:
6223                                                   V_02807C_CACHE_NOA_GFX10;
6224    /* Enable CMASK/HTILE/DCC caching in L2 for small chips. */
6225    if (sscreen->info.max_render_backends <= 4) {
6226       meta_write_policy = V_02807C_CACHE_LRU_WR; /* cache writes */
6227       meta_read_policy = V_02807C_CACHE_LRU_RD;  /* cache reads */
6228    } else {
6229       meta_write_policy = V_02807C_CACHE_STREAM; /* write combine */
6230       meta_read_policy = no_alloc; /* don't cache reads that miss */
6231    }
6232 
6233    /* We need more space because the preamble is large. */
6234    struct si_pm4_state *pm4 = si_pm4_create_sized(sscreen, 214, sctx->has_graphics);
6235    if (!pm4)
6236       return;
6237 
6238    if (sctx->has_graphics && !sctx->shadowing.registers) {
6239       si_pm4_cmd_add(pm4, PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
6240       si_pm4_cmd_add(pm4, CC0_UPDATE_LOAD_ENABLES(1));
6241       si_pm4_cmd_add(pm4, CC1_UPDATE_SHADOW_ENABLES(1));
6242 
6243       if (sscreen->dpbb_allowed) {
6244          si_pm4_cmd_add(pm4, PKT3(PKT3_EVENT_WRITE, 0, 0));
6245          si_pm4_cmd_add(pm4, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0));
6246       }
6247 
6248       si_pm4_cmd_add(pm4, PKT3(PKT3_CLEAR_STATE, 0, 0));
6249       si_pm4_cmd_add(pm4, 0);
6250    }
6251 
6252    /* Non-graphics uconfig registers. */
6253    if (sctx->gfx_level < GFX11)
6254       si_pm4_set_reg(pm4, R_0301EC_CP_COHER_START_DELAY, 0x20);
6255    si_pm4_set_reg(pm4, R_030E00_TA_CS_BC_BASE_ADDR, border_color_va >> 8);
6256    si_pm4_set_reg(pm4, R_030E04_TA_CS_BC_BASE_ADDR_HI, S_030E04_ADDRESS(border_color_va >> 40));
6257 
6258    /* Compute registers. */
6259    si_pm4_set_reg(pm4, R_00B834_COMPUTE_PGM_HI, S_00B834_DATA(sscreen->info.address32_hi >> 8));
6260    si_pm4_set_reg(pm4, R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0, compute_cu_en);
6261    si_pm4_set_reg(pm4, R_00B85C_COMPUTE_STATIC_THREAD_MGMT_SE1, compute_cu_en);
6262 
6263    si_pm4_set_reg(pm4, R_00B864_COMPUTE_STATIC_THREAD_MGMT_SE2, compute_cu_en);
6264    si_pm4_set_reg(pm4, R_00B868_COMPUTE_STATIC_THREAD_MGMT_SE3, compute_cu_en);
6265 
6266    si_pm4_set_reg(pm4, R_00B890_COMPUTE_USER_ACCUM_0, 0);
6267    si_pm4_set_reg(pm4, R_00B894_COMPUTE_USER_ACCUM_1, 0);
6268    si_pm4_set_reg(pm4, R_00B898_COMPUTE_USER_ACCUM_2, 0);
6269    si_pm4_set_reg(pm4, R_00B89C_COMPUTE_USER_ACCUM_3, 0);
6270 
6271    if (sctx->gfx_level >= GFX11) {
6272       si_pm4_set_reg(pm4, R_00B8AC_COMPUTE_STATIC_THREAD_MGMT_SE4, compute_cu_en);
6273       si_pm4_set_reg(pm4, R_00B8B0_COMPUTE_STATIC_THREAD_MGMT_SE5, compute_cu_en);
6274       si_pm4_set_reg(pm4, R_00B8B4_COMPUTE_STATIC_THREAD_MGMT_SE6, compute_cu_en);
6275       si_pm4_set_reg(pm4, R_00B8B8_COMPUTE_STATIC_THREAD_MGMT_SE7, compute_cu_en);
6276 
6277       /* How many threads should go to 1 SE before moving onto the next. Think of GL1 cache hits.
6278        * Only these values are valid: 0 (disabled), 64, 128, 256, 512
6279        * Recommendation: 64 = RT, 256 = non-RT (run benchmarks to be sure)
6280        */
6281       si_pm4_set_reg(pm4, R_00B8BC_COMPUTE_DISPATCH_INTERLEAVE, S_00B8BC_INTERLEAVE(256));
6282    } else {
6283       si_pm4_set_reg(pm4, R_00B8A0_COMPUTE_PGM_RSRC3, 0);
6284    }
6285 
6286    si_pm4_set_reg(pm4, R_00B9F4_COMPUTE_DISPATCH_TUNNEL, 0);
6287 
6288    if (!sctx->has_graphics)
6289       goto done;
6290 
6291    /* Shader registers - PS. */
6292    unsigned cu_mask_ps = sctx->gfx_level >= GFX10_3 ? gfx103_get_cu_mask_ps(sscreen) : ~0u;
6293    if (sctx->gfx_level < GFX11) {
6294       si_pm4_set_reg_idx3(pm4, R_00B004_SPI_SHADER_PGM_RSRC4_PS,
6295                           ac_apply_cu_en(S_00B004_CU_EN(cu_mask_ps >> 16), /* CUs 16-31 */
6296                                          C_00B004_CU_EN, 16, &sscreen->info));
6297    }
6298    si_pm4_set_reg_idx3(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS,
6299                        ac_apply_cu_en(S_00B01C_CU_EN(cu_mask_ps) |
6300                                       S_00B01C_WAVE_LIMIT(0x3F) |
6301                                       S_00B01C_LDS_GROUP_SIZE(sctx->gfx_level >= GFX11),
6302                                       C_00B01C_CU_EN, 0, &sscreen->info));
6303    si_pm4_set_reg(pm4, R_00B0C0_SPI_SHADER_REQ_CTRL_PS,
6304                   S_00B0C0_SOFT_GROUPING_EN(1) |
6305                   S_00B0C0_NUMBER_OF_REQUESTS_PER_CU(4 - 1));
6306    si_pm4_set_reg(pm4, R_00B0C8_SPI_SHADER_USER_ACCUM_PS_0, 0);
6307    si_pm4_set_reg(pm4, R_00B0CC_SPI_SHADER_USER_ACCUM_PS_1, 0);
6308    si_pm4_set_reg(pm4, R_00B0D0_SPI_SHADER_USER_ACCUM_PS_2, 0);
6309    si_pm4_set_reg(pm4, R_00B0D4_SPI_SHADER_USER_ACCUM_PS_3, 0);
6310 
6311    /* Shader registers - VS. */
6312    if (sctx->gfx_level < GFX11) {
6313       si_pm4_set_reg_idx3(pm4, R_00B104_SPI_SHADER_PGM_RSRC4_VS,
6314                           ac_apply_cu_en(S_00B104_CU_EN(0xffff), /* CUs 16-31 */
6315                                          C_00B104_CU_EN, 16, &sscreen->info));
6316       si_pm4_set_reg(pm4, R_00B1C0_SPI_SHADER_REQ_CTRL_VS, 0);
6317       si_pm4_set_reg(pm4, R_00B1C8_SPI_SHADER_USER_ACCUM_VS_0, 0);
6318       si_pm4_set_reg(pm4, R_00B1CC_SPI_SHADER_USER_ACCUM_VS_1, 0);
6319       si_pm4_set_reg(pm4, R_00B1D0_SPI_SHADER_USER_ACCUM_VS_2, 0);
6320       si_pm4_set_reg(pm4, R_00B1D4_SPI_SHADER_USER_ACCUM_VS_3, 0);
6321    }
6322 
6323    /* Shader registers - GS. */
6324    si_pm4_set_reg(pm4, R_00B2C8_SPI_SHADER_USER_ACCUM_ESGS_0, 0);
6325    si_pm4_set_reg(pm4, R_00B2CC_SPI_SHADER_USER_ACCUM_ESGS_1, 0);
6326    si_pm4_set_reg(pm4, R_00B2D0_SPI_SHADER_USER_ACCUM_ESGS_2, 0);
6327    si_pm4_set_reg(pm4, R_00B2D4_SPI_SHADER_USER_ACCUM_ESGS_3, 0);
6328    si_pm4_set_reg(pm4, R_00B324_SPI_SHADER_PGM_HI_ES,
6329                   S_00B324_MEM_BASE(sscreen->info.address32_hi >> 8));
6330 
6331    /* Shader registers - HS. */
6332    if (sctx->gfx_level < GFX11) {
6333       si_pm4_set_reg_idx3(pm4, R_00B404_SPI_SHADER_PGM_RSRC4_HS,
6334                           ac_apply_cu_en(S_00B404_CU_EN(0xffff), /* CUs 16-31 */
6335                                          C_00B404_CU_EN, 16, &sscreen->info));
6336    }
6337    si_pm4_set_reg_idx3(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS,
6338                        ac_apply_cu_en(S_00B41C_CU_EN(0xffff) | S_00B41C_WAVE_LIMIT(0x3F),
6339                                       C_00B41C_CU_EN, 0, &sscreen->info));
6340    si_pm4_set_reg(pm4, R_00B4C8_SPI_SHADER_USER_ACCUM_LSHS_0, 0);
6341    si_pm4_set_reg(pm4, R_00B4CC_SPI_SHADER_USER_ACCUM_LSHS_1, 0);
6342    si_pm4_set_reg(pm4, R_00B4D0_SPI_SHADER_USER_ACCUM_LSHS_2, 0);
6343    si_pm4_set_reg(pm4, R_00B4D4_SPI_SHADER_USER_ACCUM_LSHS_3, 0);
6344    si_pm4_set_reg(pm4, R_00B524_SPI_SHADER_PGM_HI_LS,
6345                   S_00B524_MEM_BASE(sscreen->info.address32_hi >> 8));
6346 
6347    /* Context registers. */
6348    if (sctx->gfx_level < GFX11) {
6349       si_pm4_set_reg(pm4, R_028038_DB_DFSM_CONTROL, S_028038_PUNCHOUT_MODE(V_028038_FORCE_OFF));
6350    }
6351    si_pm4_set_reg(pm4, R_02807C_DB_RMI_L2_CACHE_CONTROL,
6352                   S_02807C_Z_WR_POLICY(V_02807C_CACHE_STREAM) |
6353                   S_02807C_S_WR_POLICY(V_02807C_CACHE_STREAM) |
6354                   S_02807C_HTILE_WR_POLICY(meta_write_policy) |
6355                   S_02807C_ZPCPSD_WR_POLICY(V_02807C_CACHE_STREAM) |
6356                   S_02807C_Z_RD_POLICY(no_alloc) |
6357                   S_02807C_S_RD_POLICY(no_alloc) |
6358                   S_02807C_HTILE_RD_POLICY(meta_read_policy));
6359    si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8);
6360    si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, S_028084_ADDRESS(border_color_va >> 40));
6361 
6362    si_pm4_set_reg(pm4, R_028410_CB_RMI_GL2_CACHE_CONTROL,
6363                   (sctx->gfx_level >= GFX11 ?
6364                       S_028410_DCC_WR_POLICY_GFX11(meta_write_policy) |
6365                       S_028410_COLOR_WR_POLICY_GFX11(V_028410_CACHE_STREAM) |
6366                       S_028410_COLOR_RD_POLICY(V_028410_CACHE_NOA_GFX11)
6367                     :
6368                       S_028410_CMASK_WR_POLICY(meta_write_policy) |
6369                       S_028410_FMASK_WR_POLICY(V_028410_CACHE_STREAM) |
6370                       S_028410_DCC_WR_POLICY_GFX10(meta_write_policy) |
6371                       S_028410_COLOR_WR_POLICY_GFX10(V_028410_CACHE_STREAM) |
6372                       S_028410_CMASK_RD_POLICY(meta_read_policy) |
6373                       S_028410_FMASK_RD_POLICY(V_028410_CACHE_NOA_GFX10) |
6374                       S_028410_COLOR_RD_POLICY(V_028410_CACHE_NOA_GFX10)) |
6375                   S_028410_DCC_RD_POLICY(meta_read_policy));
6376    si_pm4_set_reg(pm4, R_028708_SPI_SHADER_IDX_FORMAT,
6377                   S_028708_IDX0_EXPORT_FORMAT(V_028708_SPI_SHADER_1COMP));
6378 
6379    if (sctx->gfx_level >= GFX10_3)
6380       si_pm4_set_reg(pm4, R_028750_SX_PS_DOWNCONVERT_CONTROL, 0xff);
6381 
6382    /* If any sample location uses the -8 coordinate, the EXCLUSION fields should be set to 0. */
6383    si_pm4_set_reg(pm4, R_02882C_PA_SU_PRIM_FILTER_CNTL,
6384                   S_02882C_XMAX_RIGHT_EXCLUSION(1) |
6385                   S_02882C_YMAX_BOTTOM_EXCLUSION(1));
6386    si_pm4_set_reg(pm4, R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL,
6387                   S_028830_SMALL_PRIM_FILTER_ENABLE(1));
6388    if (sctx->gfx_level >= GFX10_3) {
6389       /* The rate combiners have no effect if they are disabled like this:
6390        *   VERTEX_RATE:    BYPASS_VTX_RATE_COMBINER = 1
6391        *   PRIMITIVE_RATE: BYPASS_PRIM_RATE_COMBINER = 1
6392        *   HTILE_RATE:     VRS_HTILE_ENCODING = 0
6393        *   SAMPLE_ITER:    PS_ITER_SAMPLE = 0
6394        *
6395        * Use OVERRIDE, which will ignore results from previous combiners.
6396        * (e.g. enabled sample shading overrides the vertex rate)
6397        */
6398       si_pm4_set_reg(pm4, R_028848_PA_CL_VRS_CNTL,
6399                      S_028848_VERTEX_RATE_COMBINER_MODE(V_028848_SC_VRS_COMB_MODE_OVERRIDE) |
6400                      S_028848_SAMPLE_ITER_COMBINER_MODE(V_028848_SC_VRS_COMB_MODE_OVERRIDE));
6401    }
6402 
6403    si_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64));
6404    si_pm4_set_reg(pm4, R_028AAC_VGT_ESGS_RING_ITEMSIZE, 1);
6405    si_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION,
6406                   sctx->gfx_level >= GFX11 ?
6407                      S_028B50_ACCUM_ISOLINE(128) |
6408                      S_028B50_ACCUM_TRI(128) |
6409                      S_028B50_ACCUM_QUAD(128) |
6410                      S_028B50_DONUT_SPLIT_GFX9(24) |
6411                      S_028B50_TRAP_SPLIT(6)
6412                    :
6413                      S_028B50_ACCUM_ISOLINE(12) |
6414                      S_028B50_ACCUM_TRI(30) |
6415                      S_028B50_ACCUM_QUAD(24) |
6416                      S_028B50_DONUT_SPLIT_GFX9(24) |
6417                      S_028B50_TRAP_SPLIT(6));
6418 
6419    si_pm4_set_reg(pm4, R_028C48_PA_SC_BINNER_CNTL_1,
6420                   S_028C48_MAX_ALLOC_COUNT(sscreen->info.pbb_max_alloc_count - 1) |
6421                   S_028C48_MAX_PRIM_PER_BATCH(1023));
6422 
6423    if (sctx->gfx_level >= GFX11_5)
6424       si_pm4_set_reg(pm4, R_028C54_PA_SC_BINNER_CNTL_2,
6425                      S_028C54_ENABLE_PING_PONG_BIN_ORDER(1));
6426 
6427    /* Break up a pixel wave if it contains deallocs for more than
6428     * half the parameter cache.
6429     *
6430     * To avoid a deadlock where pixel waves aren't launched
6431     * because they're waiting for more pixels while the frontend
6432     * is stuck waiting for PC space, the maximum allowed value is
6433     * the size of the PC minus the largest possible allocation for
6434     * a single primitive shader subgroup.
6435     */
6436    si_pm4_set_reg(pm4, R_028C50_PA_SC_NGG_MODE_CNTL,
6437                   S_028C50_MAX_DEALLOCS_IN_WAVE(sctx->gfx_level >= GFX11 ? 16 : 512));
6438    if (sctx->gfx_level < GFX11)
6439       si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14); /* Reuse for legacy (non-NGG) only. */
6440 
6441    /* Uconfig registers. */
6442    si_pm4_set_reg(pm4, R_030924_GE_MIN_VTX_INDX, 0);
6443    si_pm4_set_reg(pm4, R_030928_GE_INDX_OFFSET, 0);
6444    if (sctx->gfx_level >= GFX11) {
6445       /* This is changed by draws for indexed draws, but we need to set DISABLE_FOR_AUTO_INDEX
6446        * here, which disables primitive restart for all non-indexed draws, so that those draws
6447        * won't have to set this state.
6448        */
6449       si_pm4_set_reg(pm4, R_03092C_GE_MULTI_PRIM_IB_RESET_EN, S_03092C_DISABLE_FOR_AUTO_INDEX(1));
6450    }
6451    si_pm4_set_reg(pm4, R_030964_GE_MAX_VTX_INDX, ~0);
6452    si_pm4_set_reg(pm4, R_030968_VGT_INSTANCE_BASE_ID, 0);
6453    si_pm4_set_reg(pm4, R_03097C_GE_STEREO_CNTL, 0);
6454    si_pm4_set_reg(pm4, R_030988_GE_USER_VGPR_EN, 0);
6455 
6456    si_pm4_set_reg(pm4, R_030A00_PA_SU_LINE_STIPPLE_VALUE, 0);
6457    si_pm4_set_reg(pm4, R_030A04_PA_SC_LINE_STIPPLE_STATE, 0);
6458 
6459    if (sctx->gfx_level >= GFX11) {
6460       uint64_t rb_mask = BITFIELD64_MASK(sscreen->info.max_render_backends);
6461 
6462       si_pm4_cmd_add(pm4, PKT3(PKT3_EVENT_WRITE, 2, 0));
6463       si_pm4_cmd_add(pm4, EVENT_TYPE(V_028A90_PIXEL_PIPE_STAT_CONTROL) | EVENT_INDEX(1));
6464       si_pm4_cmd_add(pm4, PIXEL_PIPE_STATE_CNTL_COUNTER_ID(0) |
6465                           PIXEL_PIPE_STATE_CNTL_STRIDE(2) |
6466                           PIXEL_PIPE_STATE_CNTL_INSTANCE_EN_LO(rb_mask));
6467       si_pm4_cmd_add(pm4, PIXEL_PIPE_STATE_CNTL_INSTANCE_EN_HI(rb_mask));
6468 
6469       /* We must wait for idle using an EOP event before changing the attribute ring registers.
6470        * Use the bottom-of-pipe EOP event, but increment the PWS counter instead of writing memory.
6471        */
6472       si_pm4_cmd_add(pm4, PKT3(PKT3_RELEASE_MEM, 6, 0));
6473       si_pm4_cmd_add(pm4, S_490_EVENT_TYPE(V_028A90_BOTTOM_OF_PIPE_TS) |
6474                           S_490_EVENT_INDEX(5) |
6475                           S_490_PWS_ENABLE(1));
6476       si_pm4_cmd_add(pm4, 0); /* DST_SEL, INT_SEL, DATA_SEL */
6477       si_pm4_cmd_add(pm4, 0); /* ADDRESS_LO */
6478       si_pm4_cmd_add(pm4, 0); /* ADDRESS_HI */
6479       si_pm4_cmd_add(pm4, 0); /* DATA_LO */
6480       si_pm4_cmd_add(pm4, 0); /* DATA_HI */
6481       si_pm4_cmd_add(pm4, 0); /* INT_CTXID */
6482 
6483       /* Wait for the PWS counter. */
6484       si_pm4_cmd_add(pm4, PKT3(PKT3_ACQUIRE_MEM, 6, 0));
6485       si_pm4_cmd_add(pm4, S_580_PWS_STAGE_SEL(V_580_CP_ME) |
6486                           S_580_PWS_COUNTER_SEL(V_580_TS_SELECT) |
6487                           S_580_PWS_ENA2(1) |
6488                           S_580_PWS_COUNT(0));
6489       si_pm4_cmd_add(pm4, 0xffffffff); /* GCR_SIZE */
6490       si_pm4_cmd_add(pm4, 0x01ffffff); /* GCR_SIZE_HI */
6491       si_pm4_cmd_add(pm4, 0); /* GCR_BASE_LO */
6492       si_pm4_cmd_add(pm4, 0); /* GCR_BASE_HI */
6493       si_pm4_cmd_add(pm4, S_585_PWS_ENA(1));
6494       si_pm4_cmd_add(pm4, 0); /* GCR_CNTL */
6495 
6496       si_pm4_set_reg(pm4, R_031110_SPI_GS_THROTTLE_CNTL1, 0x12355123);
6497       si_pm4_set_reg(pm4, R_031114_SPI_GS_THROTTLE_CNTL2, 0x1544D);
6498 
6499       assert((sscreen->attribute_ring->gpu_address >> 32) == sscreen->info.address32_hi);
6500 
6501       /* The PS will read inputs from this address. */
6502       si_pm4_set_reg(pm4, R_031118_SPI_ATTRIBUTE_RING_BASE,
6503                      sscreen->attribute_ring->gpu_address >> 16);
6504       si_pm4_set_reg(pm4, R_03111C_SPI_ATTRIBUTE_RING_SIZE,
6505                      S_03111C_MEM_SIZE((sscreen->info.attribute_ring_size_per_se >> 16) - 1) |
6506                      S_03111C_BIG_PAGE(sscreen->info.discardable_allows_big_page) |
6507                      S_03111C_L1_POLICY(1));
6508    }
6509 
6510 done:
6511    si_pm4_finalize(pm4);
6512    sctx->cs_preamble_state = pm4;
6513    sctx->cs_preamble_state_tmz = si_pm4_clone(pm4); /* Make a copy of the preamble for TMZ. */
6514 }
6515 
si_init_gfx_preamble_state(struct si_context * sctx)6516 void si_init_gfx_preamble_state(struct si_context *sctx)
6517 {
6518    if (!sctx->screen->info.has_graphics)
6519       cdna_init_compute_preamble_state(sctx);
6520    else if (sctx->gfx_level >= GFX10)
6521       gfx10_init_gfx_preamble_state(sctx);
6522    else
6523       gfx6_init_gfx_preamble_state(sctx);
6524 }
6525