• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  * Copyright 2024 Valve Corporation
4  *
5  * SPDX-License-Identifier: MIT
6  */
7 
8 #include "ac_cmdbuf.h"
9 #include "ac_pm4.h"
10 #include "ac_shader_util.h"
11 
12 #include "sid.h"
13 
14 #include "util/u_math.h"
15 
16 #define SI_GS_PER_ES 128
17 
18 static void
gfx6_init_compute_preamble_state(const struct ac_preamble_state * state,struct ac_pm4_state * pm4)19 gfx6_init_compute_preamble_state(const struct ac_preamble_state *state,
20                                  struct ac_pm4_state *pm4)
21 {
22    const struct radeon_info *info = pm4->info;
23    const uint32_t compute_cu_en = S_00B858_SH0_CU_EN(info->spi_cu_en) |
24                                   S_00B858_SH1_CU_EN(info->spi_cu_en);
25 
26    ac_pm4_set_reg(pm4, R_00B834_COMPUTE_PGM_HI, S_00B834_DATA(info->address32_hi >> 8));
27 
28    for (unsigned i = 0; i < 2; ++i)
29       ac_pm4_set_reg(pm4, R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0 + i * 4,
30                      i < info->max_se ? compute_cu_en : 0x0);
31 
32    if (info->gfx_level >= GFX7) {
33       for (unsigned i = 2; i < 4; ++i)
34          ac_pm4_set_reg(pm4, R_00B864_COMPUTE_STATIC_THREAD_MGMT_SE2 + (i - 2) * 4,
35                         i < info->max_se ? compute_cu_en : 0x0);
36    }
37 
38    if (info->gfx_level >= GFX9)
39       ac_pm4_set_reg(pm4, R_0301EC_CP_COHER_START_DELAY, 0);
40 
41    /* Set the pointer to border colors. */
42    if (info->gfx_level >= GFX7) {
43       ac_pm4_set_reg(pm4, R_030E00_TA_CS_BC_BASE_ADDR, state->border_color_va >> 8);
44       ac_pm4_set_reg(pm4, R_030E04_TA_CS_BC_BASE_ADDR_HI,
45                      S_030E04_ADDRESS(state->border_color_va >> 40));
46    } else if (info->gfx_level == GFX6) {
47       ac_pm4_set_reg(pm4, R_00950C_TA_CS_BC_BASE_ADDR, state->border_color_va >> 8);
48    }
49 }
50 
51 static void
gfx10_init_compute_preamble_state(const struct ac_preamble_state * state,struct ac_pm4_state * pm4)52 gfx10_init_compute_preamble_state(const struct ac_preamble_state *state,
53                                   struct ac_pm4_state *pm4)
54 {
55    const struct radeon_info *info = pm4->info;
56    const uint32_t compute_cu_en = S_00B858_SH0_CU_EN(info->spi_cu_en) |
57                                   S_00B858_SH1_CU_EN(info->spi_cu_en);
58 
59    if (info->gfx_level < GFX11)
60       ac_pm4_set_reg(pm4, R_0301EC_CP_COHER_START_DELAY, 0x20);
61    ac_pm4_set_reg(pm4, R_030E00_TA_CS_BC_BASE_ADDR, state->border_color_va >> 8);
62    ac_pm4_set_reg(pm4, R_030E04_TA_CS_BC_BASE_ADDR_HI, S_030E04_ADDRESS(state->border_color_va >> 40));
63 
64    ac_pm4_set_reg(pm4, R_00B834_COMPUTE_PGM_HI, S_00B834_DATA(info->address32_hi >> 8));
65 
66    for (unsigned i = 0; i < 2; ++i)
67       ac_pm4_set_reg(pm4, R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0 + i * 4,
68                      i < info->max_se ? compute_cu_en : 0x0);
69 
70    for (unsigned i = 2; i < 4; ++i)
71       ac_pm4_set_reg(pm4, R_00B864_COMPUTE_STATIC_THREAD_MGMT_SE2 + (i - 2) * 4,
72                      i < info->max_se ? compute_cu_en : 0x0);
73 
74    ac_pm4_set_reg(pm4, R_00B890_COMPUTE_USER_ACCUM_0, 0);
75    ac_pm4_set_reg(pm4, R_00B894_COMPUTE_USER_ACCUM_1, 0);
76    ac_pm4_set_reg(pm4, R_00B898_COMPUTE_USER_ACCUM_2, 0);
77    ac_pm4_set_reg(pm4, R_00B89C_COMPUTE_USER_ACCUM_3, 0);
78 
79    if (info->gfx_level >= GFX11) {
80       for (unsigned i = 4; i < 8; ++i)
81          ac_pm4_set_reg(pm4, R_00B8AC_COMPUTE_STATIC_THREAD_MGMT_SE4 + (i - 4) * 4,
82                         i < info->max_se ? compute_cu_en : 0x0);
83 
84       /* How many threads should go to 1 SE before moving onto the next. Think of GL1 cache hits.
85        * Only these values are valid: 0 (disabled), 64, 128, 256, 512
86        * Recommendation: 64 = RT, 256 = non-RT (run benchmarks to be sure)
87        */
88       ac_pm4_set_reg(pm4, R_00B8BC_COMPUTE_DISPATCH_INTERLEAVE,
89                      S_00B8BC_INTERLEAVE(state->gfx11.compute_dispatch_interleave));
90    }
91 
92    ac_pm4_set_reg(pm4, R_00B9F4_COMPUTE_DISPATCH_TUNNEL, 0);
93 }
94 
95 static void
gfx12_init_compute_preamble_state(const struct ac_preamble_state * state,struct ac_pm4_state * pm4)96 gfx12_init_compute_preamble_state(const struct ac_preamble_state *state,
97                                   struct ac_pm4_state *pm4)
98 {
99    const struct radeon_info *info = pm4->info;
100    const uint32_t compute_cu_en = S_00B858_SH0_CU_EN(info->spi_cu_en) |
101                                   S_00B858_SH1_CU_EN(info->spi_cu_en);
102    const uint32_t num_se = info->max_se;
103 
104    ac_pm4_set_reg(pm4, R_030E00_TA_CS_BC_BASE_ADDR, state->border_color_va >> 8);
105    ac_pm4_set_reg(pm4, R_030E04_TA_CS_BC_BASE_ADDR_HI, S_030E04_ADDRESS(state->border_color_va >> 40));
106 
107    ac_pm4_set_reg(pm4, R_00B82C_COMPUTE_PERFCOUNT_ENABLE, 0);
108    ac_pm4_set_reg(pm4, R_00B834_COMPUTE_PGM_HI, S_00B834_DATA(info->address32_hi >> 8));
109    ac_pm4_set_reg(pm4, R_00B838_COMPUTE_DISPATCH_PKT_ADDR_LO, 0);
110    ac_pm4_set_reg(pm4, R_00B83C_COMPUTE_DISPATCH_PKT_ADDR_HI, 0);
111    ac_pm4_set_reg(pm4, R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0, compute_cu_en);
112    ac_pm4_set_reg(pm4, R_00B85C_COMPUTE_STATIC_THREAD_MGMT_SE1, num_se > 1 ? compute_cu_en : 0);
113    ac_pm4_set_reg(pm4, R_00B864_COMPUTE_STATIC_THREAD_MGMT_SE2, num_se > 2 ? compute_cu_en : 0);
114    ac_pm4_set_reg(pm4, R_00B868_COMPUTE_STATIC_THREAD_MGMT_SE3, num_se > 3 ? compute_cu_en : 0);
115    ac_pm4_set_reg(pm4, R_00B88C_COMPUTE_STATIC_THREAD_MGMT_SE8, num_se > 8 ? compute_cu_en : 0);
116    ac_pm4_set_reg(pm4, R_00B890_COMPUTE_USER_ACCUM_0, 0);
117    ac_pm4_set_reg(pm4, R_00B894_COMPUTE_USER_ACCUM_1, 0);
118    ac_pm4_set_reg(pm4, R_00B898_COMPUTE_USER_ACCUM_2, 0);
119    ac_pm4_set_reg(pm4, R_00B89C_COMPUTE_USER_ACCUM_3, 0);
120    ac_pm4_set_reg(pm4, R_00B8AC_COMPUTE_STATIC_THREAD_MGMT_SE4, num_se > 4 ? compute_cu_en : 0);
121    ac_pm4_set_reg(pm4, R_00B8B0_COMPUTE_STATIC_THREAD_MGMT_SE5, num_se > 5 ? compute_cu_en : 0);
122    ac_pm4_set_reg(pm4, R_00B8B4_COMPUTE_STATIC_THREAD_MGMT_SE6, num_se > 6 ? compute_cu_en : 0);
123    ac_pm4_set_reg(pm4, R_00B8B8_COMPUTE_STATIC_THREAD_MGMT_SE7, num_se > 7 ? compute_cu_en : 0);
124    ac_pm4_set_reg(pm4, R_00B9F4_COMPUTE_DISPATCH_TUNNEL, 0);
125 }
126 
127 void
ac_init_compute_preamble_state(const struct ac_preamble_state * state,struct ac_pm4_state * pm4)128 ac_init_compute_preamble_state(const struct ac_preamble_state *state,
129                                struct ac_pm4_state *pm4)
130 {
131    const struct radeon_info *info = pm4->info;
132 
133    if (info->gfx_level >= GFX12) {
134       gfx12_init_compute_preamble_state(state, pm4);
135    } else if (info->gfx_level >= GFX10) {
136       gfx10_init_compute_preamble_state(state, pm4);
137    } else {
138       gfx6_init_compute_preamble_state(state, pm4);
139    }
140 }
141 
142 static void
ac_set_grbm_gfx_index(const struct radeon_info * info,struct ac_pm4_state * pm4,unsigned value)143 ac_set_grbm_gfx_index(const struct radeon_info *info, struct ac_pm4_state *pm4, unsigned value)
144 {
145    const unsigned reg = info->gfx_level >= GFX7 ? R_030800_GRBM_GFX_INDEX : R_00802C_GRBM_GFX_INDEX;
146    ac_pm4_set_reg(pm4, reg, value);
147 }
148 
149 static void
ac_set_grbm_gfx_index_se(const struct radeon_info * info,struct ac_pm4_state * pm4,unsigned se)150 ac_set_grbm_gfx_index_se(const struct radeon_info *info, struct ac_pm4_state *pm4, unsigned se)
151 {
152    assert(se == ~0 || se < info->max_se);
153    ac_set_grbm_gfx_index(info, pm4,
154                          (se == ~0 ? S_030800_SE_BROADCAST_WRITES(1) : S_030800_SE_INDEX(se)) |
155                             S_030800_SH_BROADCAST_WRITES(1) |
156                             S_030800_INSTANCE_BROADCAST_WRITES(1));
157 }
158 
159 static void
ac_write_harvested_raster_configs(const struct radeon_info * info,struct ac_pm4_state * pm4,unsigned raster_config,unsigned raster_config_1)160 ac_write_harvested_raster_configs(const struct radeon_info *info, struct ac_pm4_state *pm4,
161                                   unsigned raster_config, unsigned raster_config_1)
162 {
163    const unsigned num_se = MAX2(info->max_se, 1);
164    unsigned raster_config_se[4];
165    unsigned se;
166 
167    ac_get_harvested_configs(info, raster_config, &raster_config_1, raster_config_se);
168 
169    for (se = 0; se < num_se; se++) {
170       ac_set_grbm_gfx_index_se(info, pm4, se);
171       ac_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, raster_config_se[se]);
172    }
173    ac_set_grbm_gfx_index(info, pm4, ~0);
174 
175    if (info->gfx_level >= GFX7) {
176       ac_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1);
177    }
178 }
179 
180 static void
ac_set_raster_config(const struct radeon_info * info,struct ac_pm4_state * pm4)181 ac_set_raster_config(const struct radeon_info *info, struct ac_pm4_state *pm4)
182 {
183    const unsigned num_rb = MIN2(info->max_render_backends, 16);
184    const uint64_t rb_mask = info->enabled_rb_mask;
185    unsigned raster_config, raster_config_1;
186 
187    ac_get_raster_config(info, &raster_config, &raster_config_1, NULL);
188 
189    if (!rb_mask || util_bitcount64(rb_mask) >= num_rb) {
190       /* Always use the default config when all backends are enabled
191        * (or when we failed to determine the enabled backends).
192        */
193       ac_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, raster_config);
194       if (info->gfx_level >= GFX7)
195          ac_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1);
196    } else {
197       ac_write_harvested_raster_configs(info, pm4, raster_config, raster_config_1);
198    }
199 }
200 
201 static void
gfx6_init_graphics_preamble_state(const struct ac_preamble_state * state,struct ac_pm4_state * pm4)202 gfx6_init_graphics_preamble_state(const struct ac_preamble_state *state,
203                                   struct ac_pm4_state *pm4)
204 {
205    const struct radeon_info *info = pm4->info;
206 
207    /* Graphics registers. */
208    /* CLEAR_STATE doesn't restore these correctly. */
209    ac_pm4_set_reg(pm4, R_028240_PA_SC_GENERIC_SCISSOR_TL, S_028240_WINDOW_OFFSET_DISABLE(1));
210    ac_pm4_set_reg(pm4, R_028244_PA_SC_GENERIC_SCISSOR_BR,
211                   S_028244_BR_X(16384) | S_028244_BR_Y(16384));
212 
213    ac_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64));
214    if (!info->has_clear_state)
215       ac_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0));
216 
217    if (!info->has_clear_state) {
218       ac_pm4_set_reg(pm4, R_028820_PA_CL_NANINF_CNTL, 0);
219       ac_pm4_set_reg(pm4, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0);
220       ac_pm4_set_reg(pm4, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0);
221       ac_pm4_set_reg(pm4, R_028AC8_DB_PRELOAD_CONTROL, 0x0);
222       ac_pm4_set_reg(pm4, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0);
223       ac_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0);
224       ac_pm4_set_reg(pm4, R_028A5C_VGT_GS_PER_VS, 0x2);
225       ac_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0);
226    }
227 
228    ac_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, state->border_color_va >> 8);
229    if (info->gfx_level >= GFX7)
230       ac_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, S_028084_ADDRESS(state->border_color_va >> 40));
231 
232    if (info->gfx_level == GFX6) {
233       ac_pm4_set_reg(pm4, R_008A14_PA_CL_ENHANCE,
234                      S_008A14_NUM_CLIP_SEQ(3) | S_008A14_CLIP_VTX_REORDER_ENA(1));
235    }
236 
237    if (info->gfx_level >= GFX7) {
238       ac_pm4_set_reg(pm4, R_030A00_PA_SU_LINE_STIPPLE_VALUE, 0);
239       ac_pm4_set_reg(pm4, R_030A04_PA_SC_LINE_STIPPLE_STATE, 0);
240    } else {
241       ac_pm4_set_reg(pm4, R_008A60_PA_SU_LINE_STIPPLE_VALUE, 0);
242       ac_pm4_set_reg(pm4, R_008B10_PA_SC_LINE_STIPPLE_STATE, 0);
243    }
244 
245    if (info->gfx_level <= GFX7 || !info->has_clear_state) {
246       ac_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14);
247       ac_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 16);
248 
249       /* CLEAR_STATE doesn't clear these correctly on certain generations.
250        * I don't know why. Deduced by trial and error.
251        */
252       ac_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);
253       ac_pm4_set_reg(pm4, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1));
254       ac_pm4_set_reg(pm4, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0);
255    }
256 
257    if (info->gfx_level >= GFX7) {
258       ac_pm4_set_reg_idx3(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS,
259                           ac_apply_cu_en(S_00B01C_CU_EN(0xffffffff) |
260                                          S_00B01C_WAVE_LIMIT_GFX7(0x3F),
261                                          C_00B01C_CU_EN, 0, info));
262    }
263 
264    if (info->gfx_level <= GFX8) {
265       ac_set_raster_config(info, pm4);
266 
267       /* FIXME calculate these values somehow ??? */
268       ac_pm4_set_reg(pm4, R_028A54_VGT_GS_PER_ES, SI_GS_PER_ES);
269       ac_pm4_set_reg(pm4, R_028A58_VGT_ES_PER_GS, 0x40);
270 
271       /* These registers, when written, also overwrite the CLEAR_STATE
272        * context, so we can't rely on CLEAR_STATE setting them.
273        * It would be an issue if there was another UMD changing them.
274        */
275       ac_pm4_set_reg(pm4, R_028400_VGT_MAX_VTX_INDX, ~0);
276       ac_pm4_set_reg(pm4, R_028404_VGT_MIN_VTX_INDX, 0);
277       ac_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET, 0);
278    }
279 
280    if (info->gfx_level == GFX9) {
281       ac_pm4_set_reg(pm4, R_00B414_SPI_SHADER_PGM_HI_LS,
282                      S_00B414_MEM_BASE(info->address32_hi >> 8));
283       ac_pm4_set_reg(pm4, R_00B214_SPI_SHADER_PGM_HI_ES,
284                      S_00B214_MEM_BASE(info->address32_hi >> 8));
285    } else {
286       ac_pm4_set_reg(pm4, R_00B524_SPI_SHADER_PGM_HI_LS,
287                      S_00B524_MEM_BASE(info->address32_hi >> 8));
288    }
289 
290    if (info->gfx_level >= GFX7 && info->gfx_level <= GFX8) {
291       ac_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS,
292                      ac_apply_cu_en(S_00B51C_CU_EN(0xffff) | S_00B51C_WAVE_LIMIT(0x3F),
293                                     C_00B51C_CU_EN, 0, info));
294       ac_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, S_00B41C_WAVE_LIMIT(0x3F));
295       ac_pm4_set_reg(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES,
296                      ac_apply_cu_en(S_00B31C_CU_EN(0xffff) | S_00B31C_WAVE_LIMIT(0x3F),
297                                     C_00B31C_CU_EN, 0, info));
298 
299       /* If this is 0, Bonaire can hang even if GS isn't being used.
300        * Other chips are unaffected. These are suboptimal values,
301        * but we don't use on-chip GS.
302        */
303       ac_pm4_set_reg(pm4, R_028A44_VGT_GS_ONCHIP_CNTL,
304                      S_028A44_ES_VERTS_PER_SUBGRP(64) | S_028A44_GS_PRIMS_PER_SUBGRP(4));
305    }
306 
307    if (info->gfx_level >= GFX8) {
308       unsigned vgt_tess_distribution;
309 
310       if (info->gfx_level == GFX9) {
311          vgt_tess_distribution = S_028B50_ACCUM_ISOLINE(12) |
312                                  S_028B50_ACCUM_TRI(30) |
313                                  S_028B50_ACCUM_QUAD(24) |
314                                  S_028B50_DONUT_SPLIT_GFX9(24) |
315                                  S_028B50_TRAP_SPLIT(6);
316       } else {
317          vgt_tess_distribution = S_028B50_ACCUM_ISOLINE(32) |
318                                  S_028B50_ACCUM_TRI(11) |
319                                  S_028B50_ACCUM_QUAD(11) |
320                                  S_028B50_DONUT_SPLIT_GFX81(16);
321 
322          /* Testing with Unigine Heaven extreme tessellation yielded best results
323           * with TRAP_SPLIT = 3.
324           */
325          if (info->family == CHIP_FIJI || info->family >= CHIP_POLARIS10)
326             vgt_tess_distribution |= S_028B50_TRAP_SPLIT(3);
327       }
328 
329       ac_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION, vgt_tess_distribution);
330    }
331 
332    ac_pm4_set_reg(pm4, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 1);
333 
334    if (info->gfx_level == GFX9) {
335       ac_pm4_set_reg(pm4, R_030920_VGT_MAX_VTX_INDX, ~0);
336       ac_pm4_set_reg(pm4, R_030924_VGT_MIN_VTX_INDX, 0);
337       ac_pm4_set_reg(pm4, R_030928_VGT_INDX_OFFSET, 0);
338 
339       ac_pm4_set_reg(pm4, R_028060_DB_DFSM_CONTROL, S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF));
340 
341       ac_pm4_set_reg_idx3(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS,
342                           ac_apply_cu_en(S_00B41C_CU_EN(0xffff) | S_00B41C_WAVE_LIMIT(0x3F),
343                                          C_00B41C_CU_EN, 0, info));
344 
345       ac_pm4_set_reg(pm4, R_028C48_PA_SC_BINNER_CNTL_1,
346                      S_028C48_MAX_ALLOC_COUNT(info->pbb_max_alloc_count - 1) |
347                      S_028C48_MAX_PRIM_PER_BATCH(1023));
348 
349       ac_pm4_set_reg(pm4, R_028AAC_VGT_ESGS_RING_ITEMSIZE, 1);
350       ac_pm4_set_reg(pm4, R_030968_VGT_INSTANCE_BASE_ID, 0);
351    }
352 }
353 
354 static void
gfx10_init_graphics_preamble_state(const struct ac_preamble_state * state,struct ac_pm4_state * pm4)355 gfx10_init_graphics_preamble_state(const struct ac_preamble_state *state,
356                                   struct ac_pm4_state *pm4)
357 {
358    const struct radeon_info *info = pm4->info;
359    unsigned meta_write_policy, meta_read_policy, color_write_policy, color_read_policy;
360    unsigned zs_write_policy, zs_read_policy;
361    unsigned cache_no_alloc = info->gfx_level >= GFX11 ? V_02807C_CACHE_NOA_GFX11:
362                                                         V_02807C_CACHE_NOA_GFX10;
363 
364    if (state->gfx10.cache_rb_gl2) {
365       color_write_policy = V_028410_CACHE_LRU_WR;
366       color_read_policy = V_028410_CACHE_LRU_RD;
367       zs_write_policy = V_02807C_CACHE_LRU_WR;
368       zs_read_policy = V_02807C_CACHE_LRU_RD;
369       meta_write_policy = V_02807C_CACHE_LRU_WR;
370       meta_read_policy = V_02807C_CACHE_LRU_RD;
371    } else {
372       color_write_policy = V_028410_CACHE_STREAM;
373       color_read_policy = cache_no_alloc;
374       zs_write_policy = V_02807C_CACHE_STREAM;
375       zs_read_policy = cache_no_alloc;
376 
377       /* Enable CMASK/HTILE/DCC caching in L2 for small chips. */
378       if (info->max_render_backends <= 4) {
379          meta_write_policy = V_02807C_CACHE_LRU_WR; /* cache writes */
380          meta_read_policy = V_02807C_CACHE_LRU_RD;  /* cache reads */
381       } else {
382          meta_write_policy = V_02807C_CACHE_STREAM; /* write combine */
383          meta_read_policy = cache_no_alloc; /* don't cache reads that miss */
384       }
385    }
386 
387    const unsigned cu_mask_ps = info->gfx_level >= GFX10_3 ? ac_gfx103_get_cu_mask_ps(info) : ~0u;
388    ac_pm4_set_reg_idx3(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS,
389                        ac_apply_cu_en(S_00B01C_CU_EN(cu_mask_ps) |
390                                       S_00B01C_WAVE_LIMIT_GFX7(0x3F) |
391                                       S_00B01C_LDS_GROUP_SIZE_GFX11(info->gfx_level >= GFX11),
392                                       C_00B01C_CU_EN, 0, info));
393    ac_pm4_set_reg(pm4, R_00B0C0_SPI_SHADER_REQ_CTRL_PS,
394                   S_00B0C0_SOFT_GROUPING_EN(1) |
395                   S_00B0C0_NUMBER_OF_REQUESTS_PER_CU(4 - 1));
396    ac_pm4_set_reg(pm4, R_00B0C8_SPI_SHADER_USER_ACCUM_PS_0, 0);
397    ac_pm4_set_reg(pm4, R_00B0CC_SPI_SHADER_USER_ACCUM_PS_1, 0);
398    ac_pm4_set_reg(pm4, R_00B0D0_SPI_SHADER_USER_ACCUM_PS_2, 0);
399    ac_pm4_set_reg(pm4, R_00B0D4_SPI_SHADER_USER_ACCUM_PS_3, 0);
400 
401    if (info->gfx_level < GFX11) {
402       /* Shader registers - VS. */
403       ac_pm4_set_reg_idx3(pm4, R_00B104_SPI_SHADER_PGM_RSRC4_VS,
404                           ac_apply_cu_en(S_00B104_CU_EN(0xffff), /* CUs 16-31 */
405                                          C_00B104_CU_EN, 16, info));
406       ac_pm4_set_reg(pm4, R_00B1C0_SPI_SHADER_REQ_CTRL_VS, 0);
407       ac_pm4_set_reg(pm4, R_00B1C8_SPI_SHADER_USER_ACCUM_VS_0, 0);
408       ac_pm4_set_reg(pm4, R_00B1CC_SPI_SHADER_USER_ACCUM_VS_1, 0);
409       ac_pm4_set_reg(pm4, R_00B1D0_SPI_SHADER_USER_ACCUM_VS_2, 0);
410       ac_pm4_set_reg(pm4, R_00B1D4_SPI_SHADER_USER_ACCUM_VS_3, 0);
411 
412       /* Shader registers - PS. */
413       unsigned cu_mask_ps = info->gfx_level >= GFX10_3 ? ac_gfx103_get_cu_mask_ps(info) : ~0u;
414       ac_pm4_set_reg_idx3(pm4, R_00B004_SPI_SHADER_PGM_RSRC4_PS,
415                           ac_apply_cu_en(S_00B004_CU_EN(cu_mask_ps >> 16), /* CUs 16-31 */
416                                             C_00B004_CU_EN, 16, info));
417 
418       /* Shader registers - HS. */
419       ac_pm4_set_reg_idx3(pm4, R_00B404_SPI_SHADER_PGM_RSRC4_HS,
420                           ac_apply_cu_en(S_00B404_CU_EN(0xffff), /* CUs 16-31 */
421                                          C_00B404_CU_EN, 16, info));
422    }
423 
424    /* Shader registers - GS. */
425    ac_pm4_set_reg(pm4, R_00B2C8_SPI_SHADER_USER_ACCUM_ESGS_0, 0);
426    ac_pm4_set_reg(pm4, R_00B2CC_SPI_SHADER_USER_ACCUM_ESGS_1, 0);
427    ac_pm4_set_reg(pm4, R_00B2D0_SPI_SHADER_USER_ACCUM_ESGS_2, 0);
428    ac_pm4_set_reg(pm4, R_00B2D4_SPI_SHADER_USER_ACCUM_ESGS_3, 0);
429    ac_pm4_set_reg(pm4, R_00B324_SPI_SHADER_PGM_HI_ES,
430                   S_00B324_MEM_BASE(info->address32_hi >> 8));
431 
432    ac_pm4_set_reg_idx3(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS,
433                        ac_apply_cu_en(S_00B41C_CU_EN(0xffff) | S_00B41C_WAVE_LIMIT(0x3F),
434                                       C_00B41C_CU_EN, 0, info));
435    ac_pm4_set_reg(pm4, R_00B4C8_SPI_SHADER_USER_ACCUM_LSHS_0, 0);
436    ac_pm4_set_reg(pm4, R_00B4CC_SPI_SHADER_USER_ACCUM_LSHS_1, 0);
437    ac_pm4_set_reg(pm4, R_00B4D0_SPI_SHADER_USER_ACCUM_LSHS_2, 0);
438    ac_pm4_set_reg(pm4, R_00B4D4_SPI_SHADER_USER_ACCUM_LSHS_3, 0);
439    ac_pm4_set_reg(pm4, R_00B524_SPI_SHADER_PGM_HI_LS,
440                   S_00B524_MEM_BASE(info->address32_hi >> 8));
441 
442    /* Context registers. */
443    if (info->gfx_level < GFX11) {
444       ac_pm4_set_reg(pm4, R_028038_DB_DFSM_CONTROL, S_028038_PUNCHOUT_MODE(V_028038_FORCE_OFF));
445    }
446 
447    ac_pm4_set_reg(pm4, R_02807C_DB_RMI_L2_CACHE_CONTROL,
448                   S_02807C_Z_WR_POLICY(zs_write_policy) |
449                   S_02807C_S_WR_POLICY(zs_write_policy) |
450                   S_02807C_HTILE_WR_POLICY(meta_write_policy) |
451                   S_02807C_ZPCPSD_WR_POLICY(V_02807C_CACHE_STREAM) | /* occlusion query writes */
452                   S_02807C_Z_RD_POLICY(zs_read_policy) |
453                   S_02807C_S_RD_POLICY(zs_read_policy) |
454                   S_02807C_HTILE_RD_POLICY(meta_read_policy));
455    ac_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, state->border_color_va >> 8);
456    ac_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, S_028084_ADDRESS(state->border_color_va >> 40));
457 
458    ac_pm4_set_reg(pm4, R_028410_CB_RMI_GL2_CACHE_CONTROL,
459                   (info->gfx_level >= GFX11 ?
460                       S_028410_COLOR_WR_POLICY_GFX11(color_write_policy) |
461                       S_028410_COLOR_RD_POLICY(color_read_policy) |
462                       S_028410_DCC_WR_POLICY_GFX11(meta_write_policy) |
463                       S_028410_DCC_RD_POLICY(meta_read_policy)
464                     :
465                       S_028410_COLOR_WR_POLICY_GFX10(color_write_policy) |
466                       S_028410_COLOR_RD_POLICY(color_read_policy)) |
467                       S_028410_FMASK_WR_POLICY(color_write_policy) |
468                       S_028410_FMASK_RD_POLICY(color_read_policy) |
469                       S_028410_CMASK_WR_POLICY(meta_write_policy) |
470                       S_028410_CMASK_RD_POLICY(meta_read_policy) |
471                       S_028410_DCC_WR_POLICY_GFX10(meta_write_policy) |
472                       S_028410_DCC_RD_POLICY(meta_read_policy));
473 
474    if (info->gfx_level >= GFX10_3)
475       ac_pm4_set_reg(pm4, R_028750_SX_PS_DOWNCONVERT_CONTROL, 0xff);
476 
477    ac_pm4_set_reg(pm4, R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL,
478                   S_028830_SMALL_PRIM_FILTER_ENABLE(1));
479 
480    ac_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64));
481    ac_pm4_set_reg(pm4, R_028AAC_VGT_ESGS_RING_ITEMSIZE, 1);
482    ac_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION,
483                   info->gfx_level >= GFX11 ?
484                      S_028B50_ACCUM_ISOLINE(128) |
485                      S_028B50_ACCUM_TRI(128) |
486                      S_028B50_ACCUM_QUAD(128) |
487                      S_028B50_DONUT_SPLIT_GFX9(24) |
488                      S_028B50_TRAP_SPLIT(6)
489                    :
490                      S_028B50_ACCUM_ISOLINE(12) |
491                      S_028B50_ACCUM_TRI(30) |
492                      S_028B50_ACCUM_QUAD(24) |
493                      S_028B50_DONUT_SPLIT_GFX9(24) |
494                      S_028B50_TRAP_SPLIT(6));
495 
496    /* GFX11+ shouldn't subtract 1 from pbb_max_alloc_count.  */
497    unsigned gfx10_one = info->gfx_level < GFX11;
498    ac_pm4_set_reg(pm4, R_028C48_PA_SC_BINNER_CNTL_1,
499                   S_028C48_MAX_ALLOC_COUNT(info->pbb_max_alloc_count - gfx10_one) |
500                   S_028C48_MAX_PRIM_PER_BATCH(1023));
501 
502    if (info->gfx_level >= GFX11_5)
503       ac_pm4_set_reg(pm4, R_028C54_PA_SC_BINNER_CNTL_2,
504                      S_028C54_ENABLE_PING_PONG_BIN_ORDER(1));
505 
506    /* Break up a pixel wave if it contains deallocs for more than
507     * half the parameter cache.
508     *
509     * To avoid a deadlock where pixel waves aren't launched
510     * because they're waiting for more pixels while the frontend
511     * is stuck waiting for PC space, the maximum allowed value is
512     * the size of the PC minus the largest possible allocation for
513     * a single primitive shader subgroup.
514     */
515    ac_pm4_set_reg(pm4, R_028C50_PA_SC_NGG_MODE_CNTL,
516                   S_028C50_MAX_DEALLOCS_IN_WAVE(info->gfx_level >= GFX11 ? 16 : 512));
517    if (info->gfx_level < GFX11)
518       ac_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14); /* Reuse for legacy (non-NGG) only. */
519 
520    /* Uconfig registers. */
521    ac_pm4_set_reg(pm4, R_030924_GE_MIN_VTX_INDX, 0);
522    ac_pm4_set_reg(pm4, R_030928_GE_INDX_OFFSET, 0);
523    if (info->gfx_level >= GFX11) {
524       /* This is changed by draws for indexed draws, but we need to set DISABLE_FOR_AUTO_INDEX
525        * here, which disables primitive restart for all non-indexed draws, so that those draws
526        * won't have to set this state.
527        */
528       ac_pm4_set_reg(pm4, R_03092C_GE_MULTI_PRIM_IB_RESET_EN, S_03092C_DISABLE_FOR_AUTO_INDEX(1));
529    }
530    ac_pm4_set_reg(pm4, R_030964_GE_MAX_VTX_INDX, ~0);
531    ac_pm4_set_reg(pm4, R_030968_VGT_INSTANCE_BASE_ID, 0);
532    ac_pm4_set_reg(pm4, R_03097C_GE_STEREO_CNTL, 0);
533    ac_pm4_set_reg(pm4, R_030988_GE_USER_VGPR_EN, 0);
534 
535    ac_pm4_set_reg(pm4, R_030A00_PA_SU_LINE_STIPPLE_VALUE, 0);
536    ac_pm4_set_reg(pm4, R_030A04_PA_SC_LINE_STIPPLE_STATE, 0);
537 
538    if (info->gfx_level >= GFX11) {
539       uint64_t rb_mask = BITFIELD64_MASK(info->max_render_backends);
540 
541       ac_pm4_cmd_add(pm4, PKT3(PKT3_EVENT_WRITE, 2, 0));
542       ac_pm4_cmd_add(pm4, EVENT_TYPE(V_028A90_PIXEL_PIPE_STAT_CONTROL) | EVENT_INDEX(1));
543       ac_pm4_cmd_add(pm4, PIXEL_PIPE_STATE_CNTL_COUNTER_ID(0) |
544                           PIXEL_PIPE_STATE_CNTL_STRIDE(2) |
545                           PIXEL_PIPE_STATE_CNTL_INSTANCE_EN_LO(rb_mask));
546       ac_pm4_cmd_add(pm4, PIXEL_PIPE_STATE_CNTL_INSTANCE_EN_HI(rb_mask));
547    }
548 }
549 
550 static void
gfx12_init_graphics_preamble_state(const struct ac_preamble_state * state,struct ac_pm4_state * pm4)551 gfx12_init_graphics_preamble_state(const struct ac_preamble_state *state,
552                                   struct ac_pm4_state *pm4)
553 {
554    const struct radeon_info *info = pm4->info;
555    unsigned color_write_policy, color_read_policy;
556    enum gfx12_store_temporal_hint color_write_temporal_hint, zs_write_temporal_hint;
557    enum gfx12_load_temporal_hint color_read_temporal_hint, zs_read_temporal_hint;
558 
559    if (state->gfx10.cache_rb_gl2) {
560       color_write_policy = V_028410_CACHE_LRU_WR;
561       color_read_policy = V_028410_CACHE_LRU_RD;
562       color_write_temporal_hint = gfx12_store_regular_temporal;
563       color_read_temporal_hint = gfx12_load_regular_temporal;
564       zs_write_temporal_hint = gfx12_store_regular_temporal;
565       zs_read_temporal_hint = gfx12_load_regular_temporal;
566    } else {
567       color_write_policy = V_028410_CACHE_STREAM;
568       color_read_policy = V_02807C_CACHE_NOA_GFX11;
569       color_write_temporal_hint = gfx12_store_near_non_temporal_far_regular_temporal;
570       color_read_temporal_hint = gfx12_load_near_non_temporal_far_regular_temporal;
571       zs_write_temporal_hint = gfx12_store_near_non_temporal_far_regular_temporal;
572       zs_read_temporal_hint = gfx12_load_near_non_temporal_far_regular_temporal;
573    }
574 
575    /* Shader registers - PS */
576    ac_pm4_set_reg_idx3(pm4, R_00B018_SPI_SHADER_PGM_RSRC3_PS,
577                        ac_apply_cu_en(S_00B018_CU_EN(0xffff),
578                                       C_00B018_CU_EN, 0, info));
579    ac_pm4_set_reg(pm4, R_00B0C0_SPI_SHADER_REQ_CTRL_PS,
580                   S_00B0C0_SOFT_GROUPING_EN(1) |
581                   S_00B0C0_NUMBER_OF_REQUESTS_PER_CU(4 - 1));
582    ac_pm4_set_reg(pm4, R_00B0C8_SPI_SHADER_USER_ACCUM_PS_0, 0);
583    ac_pm4_set_reg(pm4, R_00B0CC_SPI_SHADER_USER_ACCUM_PS_1, 0);
584    ac_pm4_set_reg(pm4, R_00B0D0_SPI_SHADER_USER_ACCUM_PS_2, 0);
585    ac_pm4_set_reg(pm4, R_00B0D4_SPI_SHADER_USER_ACCUM_PS_3, 0);
586 
587    /* Shader registers - GS */
588    ac_pm4_set_reg(pm4, R_00B218_SPI_SHADER_PGM_HI_ES,
589                   S_00B324_MEM_BASE(info->address32_hi >> 8));
590    ac_pm4_set_reg_idx3(pm4, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
591                        ac_apply_cu_en(0xfffffdfd, 0, 0, info));
592    ac_pm4_set_reg(pm4, R_00B2C8_SPI_SHADER_USER_ACCUM_ESGS_0, 0);
593    ac_pm4_set_reg(pm4, R_00B2CC_SPI_SHADER_USER_ACCUM_ESGS_1, 0);
594    ac_pm4_set_reg(pm4, R_00B2D0_SPI_SHADER_USER_ACCUM_ESGS_2, 0);
595    ac_pm4_set_reg(pm4, R_00B2D4_SPI_SHADER_USER_ACCUM_ESGS_3, 0);
596 
597    /* Shader registers - HS */
598    ac_pm4_set_reg(pm4, R_00B418_SPI_SHADER_PGM_HI_LS,
599                   S_00B524_MEM_BASE(info->address32_hi >> 8));
600    ac_pm4_set_reg_idx3(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS,
601                        ac_apply_cu_en(0xffffffff, 0, 0, info));
602    ac_pm4_set_reg(pm4, R_00B4C8_SPI_SHADER_USER_ACCUM_LSHS_0, 0);
603    ac_pm4_set_reg(pm4, R_00B4CC_SPI_SHADER_USER_ACCUM_LSHS_1, 0);
604    ac_pm4_set_reg(pm4, R_00B4D0_SPI_SHADER_USER_ACCUM_LSHS_2, 0);
605    ac_pm4_set_reg(pm4, R_00B4D4_SPI_SHADER_USER_ACCUM_LSHS_3, 0);
606 
607    /* Context registers */
608    ac_pm4_set_reg(pm4, R_028040_DB_GL1_INTERFACE_CONTROL, 0);
609    ac_pm4_set_reg(pm4, R_028048_DB_MEM_TEMPORAL,
610                   S_028048_Z_TEMPORAL_READ(zs_read_temporal_hint) |
611                   S_028048_Z_TEMPORAL_WRITE(zs_write_temporal_hint) |
612                   S_028048_STENCIL_TEMPORAL_READ(zs_read_temporal_hint) |
613                   S_028048_STENCIL_TEMPORAL_WRITE(zs_write_temporal_hint) |
614                   S_028048_OCCLUSION_TEMPORAL_WRITE(gfx12_store_regular_temporal));
615    ac_pm4_set_reg(pm4, R_028064_DB_VIEWPORT_CONTROL, 0);
616    ac_pm4_set_reg(pm4, R_028068_DB_SPI_VRS_CENTER_LOCATION, 0);
617    ac_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, state->border_color_va >> 8);
618    ac_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, S_028084_ADDRESS(state->border_color_va >> 40));
619    ac_pm4_set_reg(pm4, R_02808C_DB_STENCIL_OPVAL, S_02808C_OPVAL(1) | S_02808C_OPVAL_BF(1));
620    ac_pm4_set_reg(pm4, R_0280F8_SC_MEM_TEMPORAL,
621                   S_0280F8_VRS_TEMPORAL_READ(gfx12_load_regular_temporal) |
622                   S_0280F8_VRS_TEMPORAL_WRITE(gfx12_store_regular_temporal) |
623                   S_0280F8_HIZ_TEMPORAL_READ(gfx12_load_regular_temporal) |
624                   S_0280F8_HIZ_TEMPORAL_WRITE(gfx12_store_regular_temporal) |
625                   S_0280F8_HIS_TEMPORAL_READ(gfx12_load_regular_temporal) |
626                   S_0280F8_HIS_TEMPORAL_WRITE(gfx12_store_regular_temporal));
627    ac_pm4_set_reg(pm4, R_0280FC_SC_MEM_SPEC_READ,
628                   S_0280FC_VRS_SPECULATIVE_READ(gfx12_spec_read_force_on) |
629                   S_0280FC_HIZ_SPECULATIVE_READ(gfx12_spec_read_force_on) |
630                   S_0280FC_HIS_SPECULATIVE_READ(gfx12_spec_read_force_on));
631 
632    /* We don't need to initialize PA_SC_VPORT_* because we don't enable
633     * IMPLICIT_VPORT_SCISSOR_ENABLE, but it might be useful for Vulkan.
634     *
635     * If you set IMPLICIT_VPORT_SCISSOR_ENABLE, PA_SC_VPORT_* will take effect and allows
636     * setting a scissor that covers the whole viewport. If you set VPORT_SCISSOR_ENABLE,
637     * PA_SC_VPORT_SCISSOR_* will take effect and allows setting a user scissor. If you set
638     * both enable bits, the hw will use the intersection of both. It allows separating implicit
639     * viewport scissors from user scissors.
640     */
641    ac_pm4_set_reg(pm4, R_028180_PA_SC_SCREEN_SCISSOR_TL, 0);
642    ac_pm4_set_reg(pm4, R_028184_PA_SC_SCREEN_SCISSOR_BR,
643                   S_028184_BR_X(65535) | S_028184_BR_Y(65535)); /* inclusive bounds */
644    ac_pm4_set_reg(pm4, R_028204_PA_SC_WINDOW_SCISSOR_TL, 0);
645    ac_pm4_set_reg(pm4, R_028240_PA_SC_GENERIC_SCISSOR_TL, 0);
646    ac_pm4_set_reg(pm4, R_028244_PA_SC_GENERIC_SCISSOR_BR,
647                   S_028244_BR_X(65535) | S_028244_BR_Y(65535)); /* inclusive bounds */
648    ac_pm4_set_reg(pm4, R_028358_PA_SC_SCREEN_EXTENT_CONTROL, 0);
649    ac_pm4_set_reg(pm4, R_02835C_PA_SC_TILE_STEERING_OVERRIDE,
650                   info->pa_sc_tile_steering_override);
651    ac_pm4_set_reg(pm4, R_0283E0_PA_SC_VRS_INFO, 0);
652 
653    ac_pm4_set_reg(pm4, R_028410_CB_RMI_GL2_CACHE_CONTROL,
654                   S_028410_COLOR_WR_POLICY_GFX11(color_write_policy) |
655                   S_028410_COLOR_RD_POLICY(color_read_policy));
656    ac_pm4_set_reg(pm4, R_0286E4_SPI_BARYC_SSAA_CNTL, S_0286E4_COVERED_CENTROID_IS_CENTER(1));
657    ac_pm4_set_reg(pm4, R_028750_SX_PS_DOWNCONVERT_CONTROL, 0xff);
658    ac_pm4_set_reg(pm4, R_0287D4_PA_CL_POINT_X_RAD, 0);
659    ac_pm4_set_reg(pm4, R_0287D8_PA_CL_POINT_Y_RAD, 0);
660    ac_pm4_set_reg(pm4, R_0287DC_PA_CL_POINT_SIZE, 0);
661    ac_pm4_set_reg(pm4, R_0287E0_PA_CL_POINT_CULL_RAD, 0);
662    ac_pm4_set_reg(pm4, R_028820_PA_CL_NANINF_CNTL, 0);
663    ac_pm4_set_reg(pm4, R_028824_PA_SU_LINE_STIPPLE_CNTL, 0);
664    ac_pm4_set_reg(pm4, R_028828_PA_SU_LINE_STIPPLE_SCALE, 0);
665    ac_pm4_set_reg(pm4, R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL,
666                   S_028830_SMALL_PRIM_FILTER_ENABLE(1) |
667                   S_028830_SC_1XMSAA_COMPATIBLE_DISABLE(1) /* use sample locations even for MSAA 1x */);
668    ac_pm4_set_reg(pm4, R_02883C_PA_SU_OVER_RASTERIZATION_CNTL, 0);
669    ac_pm4_set_reg(pm4, R_028840_PA_STEREO_CNTL, S_028840_STEREO_MODE(1));
670 
671    ac_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64));
672    ac_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0));
673    ac_pm4_set_reg(pm4, R_028A50_GE_SE_ENHANCE, 0);
674    ac_pm4_set_reg(pm4, R_028A70_GE_IA_ENHANCE, 0);
675    ac_pm4_set_reg(pm4, R_028A80_GE_WD_ENHANCE, 0);
676    ac_pm4_set_reg(pm4, R_028A9C_VGT_REUSE_OFF, 0);
677    ac_pm4_set_reg(pm4, R_028AA0_VGT_DRAW_PAYLOAD_CNTL, 0);
678    ac_pm4_set_reg(pm4, R_028ABC_DB_HTILE_SURFACE, 0);
679 
680    ac_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);
681    ac_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION,
682                   S_028B50_ACCUM_ISOLINE(128) |
683                   S_028B50_ACCUM_TRI(128) |
684                   S_028B50_ACCUM_QUAD(128) |
685                   S_028B50_DONUT_SPLIT_GFX9(24) |
686                   S_028B50_TRAP_SPLIT(6));
687    ac_pm4_set_reg(pm4, R_028BC0_PA_SC_HISZ_RENDER_OVERRIDE, 0);
688 
689    ac_pm4_set_reg(pm4, R_028C40_PA_SC_BINNER_OUTPUT_TIMEOUT_COUNTER, 0x800);
690    ac_pm4_set_reg(pm4, R_028C48_PA_SC_BINNER_CNTL_1,
691                   S_028C48_MAX_ALLOC_COUNT(254) |
692                   S_028C48_MAX_PRIM_PER_BATCH(511));
693    ac_pm4_set_reg(pm4, R_028C4C_PA_SC_BINNER_CNTL_2, S_028C4C_ENABLE_PING_PONG_BIN_ORDER(1));
694    ac_pm4_set_reg(pm4, R_028C50_PA_SC_NGG_MODE_CNTL, S_028C50_MAX_DEALLOCS_IN_WAVE(64));
695    ac_pm4_set_reg(pm4, R_028C58_PA_SC_SHADER_CONTROL,
696                   S_028C58_REALIGN_DQUADS_AFTER_N_WAVES(1));
697 
698    for (unsigned i = 0; i < 8; i++) {
699       ac_pm4_set_reg(pm4, R_028F00_CB_MEM0_INFO + i * 4,
700                      S_028F00_TEMPORAL_READ(color_read_temporal_hint) |
701                      S_028F00_TEMPORAL_WRITE(color_write_temporal_hint));
702    }
703 
704    /* Uconfig registers. */
705    ac_pm4_set_reg(pm4, R_030924_GE_MIN_VTX_INDX, 0);
706    ac_pm4_set_reg(pm4, R_030928_GE_INDX_OFFSET, 0);
707    /* This is changed by draws for indexed draws, but we need to set DISABLE_FOR_AUTO_INDEX
708     * here, which disables primitive restart for all non-indexed draws, so that those draws
709     * won't have to set this state.
710     */
711    ac_pm4_set_reg(pm4, R_03092C_GE_MULTI_PRIM_IB_RESET_EN, S_03092C_DISABLE_FOR_AUTO_INDEX(1));
712    ac_pm4_set_reg(pm4, R_030950_GE_GS_THROTTLE,
713                   S_030950_T0(0x1) |
714                   S_030950_T1(0x4) |
715                   S_030950_T2(0x3) |
716                   S_030950_STALL_CYCLES(0x40) |
717                   S_030950_FACTOR1(0x2) |
718                   S_030950_FACTOR2(0x3) |
719                   S_030950_ENABLE_THROTTLE(0) |
720                   S_030950_NUM_INIT_GRPS(0xff));
721    ac_pm4_set_reg(pm4, R_030964_GE_MAX_VTX_INDX, ~0);
722    ac_pm4_set_reg(pm4, R_030968_VGT_INSTANCE_BASE_ID, 0);
723    ac_pm4_set_reg(pm4, R_03097C_GE_STEREO_CNTL, 0);
724    ac_pm4_set_reg(pm4, R_030980_GE_USER_VGPR_EN, 0);
725    ac_pm4_set_reg(pm4, R_0309B4_VGT_PRIMITIVEID_RESET, 0);
726    ac_pm4_set_reg(pm4, R_03098C_GE_VRS_RATE, 0);
727    ac_pm4_set_reg(pm4, R_030A00_PA_SU_LINE_STIPPLE_VALUE, 0);
728    ac_pm4_set_reg(pm4, R_030A04_PA_SC_LINE_STIPPLE_STATE, 0);
729 
730    ac_pm4_set_reg(pm4, R_031128_SPI_GRP_LAUNCH_GUARANTEE_ENABLE,
731                   S_031128_ENABLE(1) |
732                   S_031128_GS_ASSIST_EN(1) |
733                   S_031128_MRT_ASSIST_EN(1) |
734                   S_031128_GFX_NUM_LOCK_WGP(2) |
735                   S_031128_CS_NUM_LOCK_WGP(2) |
736                   S_031128_LOCK_PERIOD(1) |
737                   S_031128_LOCK_MAINT_COUNT(1));
738    ac_pm4_set_reg(pm4, R_03112C_SPI_GRP_LAUNCH_GUARANTEE_CTRL,
739                   S_03112C_NUM_MRT_THRESHOLD(3) |
740                   S_03112C_GFX_PENDING_THRESHOLD(4) |
741                   S_03112C_PRIORITY_LOST_THRESHOLD(4) |
742                   S_03112C_ALLOC_SUCCESS_THRESHOLD(4) |
743                   S_03112C_CS_WAVE_THRESHOLD_HIGH(8));
744 
745    uint64_t rb_mask = BITFIELD64_MASK(info->max_render_backends);
746 
747    ac_pm4_cmd_add(pm4, PKT3(PKT3_EVENT_WRITE, 2, 0));
748    ac_pm4_cmd_add(pm4, EVENT_TYPE(V_028A90_PIXEL_PIPE_STAT_CONTROL) | EVENT_INDEX(1));
749    ac_pm4_cmd_add(pm4, PIXEL_PIPE_STATE_CNTL_COUNTER_ID(0) |
750                        PIXEL_PIPE_STATE_CNTL_STRIDE(2) |
751                        PIXEL_PIPE_STATE_CNTL_INSTANCE_EN_LO(rb_mask));
752    ac_pm4_cmd_add(pm4, PIXEL_PIPE_STATE_CNTL_INSTANCE_EN_HI(rb_mask));
753 }
754 
755 void
ac_init_graphics_preamble_state(const struct ac_preamble_state * state,struct ac_pm4_state * pm4)756 ac_init_graphics_preamble_state(const struct ac_preamble_state *state,
757                                struct ac_pm4_state *pm4)
758 {
759    const struct radeon_info *info = pm4->info;
760 
761    if (info->gfx_level >= GFX12) {
762       gfx12_init_graphics_preamble_state(state, pm4);
763    } else if (info->gfx_level >= GFX10) {
764       gfx10_init_graphics_preamble_state(state, pm4);
765    } else {
766       gfx6_init_graphics_preamble_state(state, pm4);
767    }
768 }
769