1 /*
2 * Copyright 2015 Advanced Micro Devices, Inc.
3 * Copyright 2024 Valve Corporation
4 *
5 * SPDX-License-Identifier: MIT
6 */
7
8 #include "ac_descriptors.h"
9 #include "ac_gpu_info.h"
10 #include "ac_formats.h"
11 #include "ac_surface.h"
12
13 #include "gfx10_format_table.h"
14 #include "sid.h"
15
16 #include "util/u_math.h"
17 #include "util/format/u_format.h"
18
19 unsigned
ac_map_swizzle(unsigned swizzle)20 ac_map_swizzle(unsigned swizzle)
21 {
22 switch (swizzle) {
23 case PIPE_SWIZZLE_Y:
24 return V_008F0C_SQ_SEL_Y;
25 case PIPE_SWIZZLE_Z:
26 return V_008F0C_SQ_SEL_Z;
27 case PIPE_SWIZZLE_W:
28 return V_008F0C_SQ_SEL_W;
29 case PIPE_SWIZZLE_0:
30 return V_008F0C_SQ_SEL_0;
31 case PIPE_SWIZZLE_1:
32 return V_008F0C_SQ_SEL_1;
33 default: /* PIPE_SWIZZLE_X */
34 return V_008F0C_SQ_SEL_X;
35 }
36 }
37
38 void
ac_build_sampler_descriptor(const enum amd_gfx_level gfx_level,const struct ac_sampler_state * state,uint32_t desc[4])39 ac_build_sampler_descriptor(const enum amd_gfx_level gfx_level, const struct ac_sampler_state *state, uint32_t desc[4])
40 {
41 const unsigned perf_mip = state->max_aniso_ratio ? state->max_aniso_ratio + 6 : 0;
42 const bool compat_mode = gfx_level == GFX8 || gfx_level == GFX9;
43
44 desc[0] = S_008F30_CLAMP_X(state->address_mode_u) |
45 S_008F30_CLAMP_Y(state->address_mode_v) |
46 S_008F30_CLAMP_Z(state->address_mode_w) |
47 S_008F30_MAX_ANISO_RATIO(state->max_aniso_ratio) |
48 S_008F30_DEPTH_COMPARE_FUNC(state->depth_compare_func) |
49 S_008F30_FORCE_UNNORMALIZED(state->unnormalized_coords) |
50 S_008F30_ANISO_THRESHOLD(state->max_aniso_ratio >> 1) |
51 S_008F30_ANISO_BIAS(state->max_aniso_ratio) |
52 S_008F30_DISABLE_CUBE_WRAP(!state->cube_wrap) |
53 S_008F30_COMPAT_MODE(compat_mode) |
54 S_008F30_TRUNC_COORD(state->trunc_coord) |
55 S_008F30_FILTER_MODE(state->filter_mode);
56 desc[1] = 0;
57 desc[2] = S_008F38_XY_MAG_FILTER(state->mag_filter) |
58 S_008F38_XY_MIN_FILTER(state->min_filter) |
59 S_008F38_MIP_FILTER(state->mip_filter);
60 desc[3] = S_008F3C_BORDER_COLOR_TYPE(state->border_color_type);
61
62 if (gfx_level >= GFX12) {
63 desc[1] |= S_008F34_MIN_LOD_GFX12(util_unsigned_fixed(CLAMP(state->min_lod, 0, 17), 8)) |
64 S_008F34_MAX_LOD_GFX12(util_unsigned_fixed(CLAMP(state->max_lod, 0, 17), 8));
65 desc[2] |= S_008F38_PERF_MIP_LO(perf_mip);
66 desc[3] |= S_008F3C_PERF_MIP_HI(perf_mip >> 2);
67 } else {
68 desc[1] |= S_008F34_MIN_LOD_GFX6(util_unsigned_fixed(CLAMP(state->min_lod, 0, 15), 8)) |
69 S_008F34_MAX_LOD_GFX6(util_unsigned_fixed(CLAMP(state->max_lod, 0, 15), 8)) |
70 S_008F34_PERF_MIP(perf_mip);
71 }
72
73 if (gfx_level >= GFX10) {
74 desc[2] |= S_008F38_LOD_BIAS(util_signed_fixed(CLAMP(state->lod_bias, -32, 31), 8)) |
75 S_008F38_ANISO_OVERRIDE_GFX10(!state->aniso_single_level);
76 } else {
77 desc[2] |= S_008F38_LOD_BIAS(util_signed_fixed(CLAMP(state->lod_bias, -16, 16), 8)) |
78 S_008F38_DISABLE_LSB_CEIL(gfx_level <= GFX8) |
79 S_008F38_FILTER_PREC_FIX(1) |
80 S_008F38_ANISO_OVERRIDE_GFX8(gfx_level >= GFX8 && !state->aniso_single_level);
81 }
82
83 if (gfx_level >= GFX11) {
84 desc[3] |= S_008F3C_BORDER_COLOR_PTR_GFX11(state->border_color_ptr);
85 } else {
86 desc[3] |= S_008F3C_BORDER_COLOR_PTR_GFX6(state->border_color_ptr);
87 }
88 }
89
90 static void
ac_build_gfx6_fmask_descriptor(const enum amd_gfx_level gfx_level,const struct ac_fmask_state * state,uint32_t desc[8])91 ac_build_gfx6_fmask_descriptor(const enum amd_gfx_level gfx_level, const struct ac_fmask_state *state, uint32_t desc[8])
92 {
93 const struct radeon_surf *surf = state->surf;
94 const uint64_t va = state->va + surf->fmask_offset;
95 uint32_t data_format, num_format;
96
97 #define FMASK(s, f) (((unsigned)(MAX2(1, s)) * 16) + (MAX2(1, f)))
98 if (gfx_level == GFX9) {
99 data_format = V_008F14_IMG_DATA_FORMAT_FMASK;
100 switch (FMASK(state->num_samples, state->num_storage_samples)) {
101 case FMASK(2, 1):
102 num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_2_1;
103 break;
104 case FMASK(2, 2):
105 num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_2_2;
106 break;
107 case FMASK(4, 1):
108 num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_4_1;
109 break;
110 case FMASK(4, 2):
111 num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_4_2;
112 break;
113 case FMASK(4, 4):
114 num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_4_4;
115 break;
116 case FMASK(8, 1):
117 num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_8_1;
118 break;
119 case FMASK(8, 2):
120 num_format = V_008F14_IMG_NUM_FORMAT_FMASK_16_8_2;
121 break;
122 case FMASK(8, 4):
123 num_format = V_008F14_IMG_NUM_FORMAT_FMASK_32_8_4;
124 break;
125 case FMASK(8, 8):
126 num_format = V_008F14_IMG_NUM_FORMAT_FMASK_32_8_8;
127 break;
128 case FMASK(16, 1):
129 num_format = V_008F14_IMG_NUM_FORMAT_FMASK_16_16_1;
130 break;
131 case FMASK(16, 2):
132 num_format = V_008F14_IMG_NUM_FORMAT_FMASK_32_16_2;
133 break;
134 case FMASK(16, 4):
135 num_format = V_008F14_IMG_NUM_FORMAT_FMASK_64_16_4;
136 break;
137 case FMASK(16, 8):
138 num_format = V_008F14_IMG_NUM_FORMAT_FMASK_64_16_8;
139 break;
140 default:
141 unreachable("invalid nr_samples");
142 }
143 } else {
144 switch (FMASK(state->num_samples, state->num_storage_samples)) {
145 case FMASK(2, 1):
146 data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F1;
147 break;
148 case FMASK(2, 2):
149 data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2;
150 break;
151 case FMASK(4, 1):
152 data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F1;
153 break;
154 case FMASK(4, 2):
155 data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F2;
156 break;
157 case FMASK(4, 4):
158 data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4;
159 break;
160 case FMASK(8, 1):
161 data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S8_F1;
162 break;
163 case FMASK(8, 2):
164 data_format = V_008F14_IMG_DATA_FORMAT_FMASK16_S8_F2;
165 break;
166 case FMASK(8, 4):
167 data_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F4;
168 break;
169 case FMASK(8, 8):
170 data_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8;
171 break;
172 case FMASK(16, 1):
173 data_format = V_008F14_IMG_DATA_FORMAT_FMASK16_S16_F1;
174 break;
175 case FMASK(16, 2):
176 data_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S16_F2;
177 break;
178 case FMASK(16, 4):
179 data_format = V_008F14_IMG_DATA_FORMAT_FMASK64_S16_F4;
180 break;
181 case FMASK(16, 8):
182 data_format = V_008F14_IMG_DATA_FORMAT_FMASK64_S16_F8;
183 break;
184 default:
185 unreachable("invalid nr_samples");
186 }
187 num_format = V_008F14_IMG_NUM_FORMAT_UINT;
188 }
189 #undef FMASK
190
191 desc[0] = (va >> 8) | surf->fmask_tile_swizzle;
192 desc[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) |
193 S_008F14_DATA_FORMAT(data_format) |
194 S_008F14_NUM_FORMAT(num_format);
195 desc[2] = S_008F18_WIDTH(state->width - 1) |
196 S_008F18_HEIGHT(state->height - 1);
197 desc[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
198 S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
199 S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
200 S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
201 S_008F1C_TYPE(state->type);
202 desc[4] = 0;
203 desc[5] = S_008F24_BASE_ARRAY(state->first_layer);
204 desc[6] = 0;
205 desc[7] = 0;
206
207 if (gfx_level == GFX9) {
208 desc[3] |= S_008F1C_SW_MODE(surf->u.gfx9.color.fmask_swizzle_mode);
209 desc[4] |= S_008F20_DEPTH(state->last_layer) |
210 S_008F20_PITCH(surf->u.gfx9.color.fmask_epitch);
211 desc[5] |= S_008F24_META_PIPE_ALIGNED(1) |
212 S_008F24_META_RB_ALIGNED(1);
213
214 if (state->tc_compat_cmask) {
215 const uint64_t cmask_va = state->va + surf->cmask_offset;
216
217 desc[5] |= S_008F24_META_DATA_ADDRESS(cmask_va >> 40);
218 desc[6] |= S_008F28_COMPRESSION_EN(1);
219 desc[7] |= cmask_va >> 8;
220 }
221 } else {
222 desc[3] |= S_008F1C_TILING_INDEX(surf->u.legacy.color.fmask.tiling_index);
223 desc[4] |= S_008F20_DEPTH(state->depth - 1) |
224 S_008F20_PITCH(surf->u.legacy.color.fmask.pitch_in_pixels - 1);
225 desc[5] |= S_008F24_LAST_ARRAY(state->last_layer);
226
227 if (state->tc_compat_cmask) {
228 const uint64_t cmask_va = state->va + surf->cmask_offset;
229
230 desc[6] |= S_008F28_COMPRESSION_EN(1);
231 desc[7] |= (cmask_va >> 8) | surf->fmask_tile_swizzle;
232 }
233 }
234 }
235
236 static void
ac_build_gfx10_fmask_descriptor(const enum amd_gfx_level gfx_level,const struct ac_fmask_state * state,uint32_t desc[8])237 ac_build_gfx10_fmask_descriptor(const enum amd_gfx_level gfx_level, const struct ac_fmask_state *state, uint32_t desc[8])
238 {
239 const struct radeon_surf *surf = state->surf;
240 const uint64_t va = state->va + surf->fmask_offset;
241 uint32_t format;
242
243 #define FMASK(s, f) (((unsigned)(MAX2(1, s)) * 16) + (MAX2(1, f)))
244 switch (FMASK(state->num_samples, state->num_storage_samples)) {
245 case FMASK(2, 1):
246 format = V_008F0C_GFX10_FORMAT_FMASK8_S2_F1;
247 break;
248 case FMASK(2, 2):
249 format = V_008F0C_GFX10_FORMAT_FMASK8_S2_F2;
250 break;
251 case FMASK(4, 1):
252 format = V_008F0C_GFX10_FORMAT_FMASK8_S4_F1;
253 break;
254 case FMASK(4, 2):
255 format = V_008F0C_GFX10_FORMAT_FMASK8_S4_F2;
256 break;
257 case FMASK(4, 4):
258 format = V_008F0C_GFX10_FORMAT_FMASK8_S4_F4;
259 break;
260 case FMASK(8, 1):
261 format = V_008F0C_GFX10_FORMAT_FMASK8_S8_F1;
262 break;
263 case FMASK(8, 2):
264 format = V_008F0C_GFX10_FORMAT_FMASK16_S8_F2;
265 break;
266 case FMASK(8, 4):
267 format = V_008F0C_GFX10_FORMAT_FMASK32_S8_F4;
268 break;
269 case FMASK(8, 8):
270 format = V_008F0C_GFX10_FORMAT_FMASK32_S8_F8;
271 break;
272 case FMASK(16, 1):
273 format = V_008F0C_GFX10_FORMAT_FMASK16_S16_F1;
274 break;
275 case FMASK(16, 2):
276 format = V_008F0C_GFX10_FORMAT_FMASK32_S16_F2;
277 break;
278 case FMASK(16, 4):
279 format = V_008F0C_GFX10_FORMAT_FMASK64_S16_F4;
280 break;
281 case FMASK(16, 8):
282 format = V_008F0C_GFX10_FORMAT_FMASK64_S16_F8;
283 break;
284 default:
285 unreachable("invalid nr_samples");
286 }
287 #undef FMASK
288
289 desc[0] = (va >> 8) | surf->fmask_tile_swizzle;
290 desc[1] = S_00A004_BASE_ADDRESS_HI(va >> 40) |
291 S_00A004_FORMAT_GFX10(format) |
292 S_00A004_WIDTH_LO(state->width - 1);
293 desc[2] = S_00A008_WIDTH_HI((state->width - 1) >> 2) |
294 S_00A008_HEIGHT(state->height - 1) |
295 S_00A008_RESOURCE_LEVEL(1);
296 desc[3] = S_00A00C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
297 S_00A00C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
298 S_00A00C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
299 S_00A00C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
300 S_00A00C_SW_MODE(surf->u.gfx9.color.fmask_swizzle_mode) |
301 S_00A00C_TYPE(state->type);
302 desc[4] = S_00A010_DEPTH_GFX10(state->last_layer) | S_00A010_BASE_ARRAY(state->first_layer);
303 desc[5] = 0;
304 desc[6] = S_00A018_META_PIPE_ALIGNED(1);
305 desc[7] = 0;
306
307 if (state->tc_compat_cmask) {
308 uint64_t cmask_va = state->va + surf->cmask_offset;
309
310 desc[6] |= S_00A018_COMPRESSION_EN(1);
311 desc[6] |= S_00A018_META_DATA_ADDRESS_LO(cmask_va >> 8);
312 desc[7] |= cmask_va >> 16;
313 }
314 }
315
316 void
ac_build_fmask_descriptor(const enum amd_gfx_level gfx_level,const struct ac_fmask_state * state,uint32_t desc[8])317 ac_build_fmask_descriptor(const enum amd_gfx_level gfx_level, const struct ac_fmask_state *state, uint32_t desc[8])
318 {
319 assert(gfx_level < GFX11);
320
321 if (gfx_level >= GFX10) {
322 ac_build_gfx10_fmask_descriptor(gfx_level, state, desc);
323 } else {
324 ac_build_gfx6_fmask_descriptor(gfx_level, state, desc);
325 }
326 }
327
328 static void
ac_build_gfx6_texture_descriptor(const struct radeon_info * info,const struct ac_texture_state * state,uint32_t desc[8])329 ac_build_gfx6_texture_descriptor(const struct radeon_info *info, const struct ac_texture_state *state, uint32_t desc[8])
330 {
331 const struct util_format_description *fmt_desc = util_format_description(state->format);
332 uint32_t num_format, data_format, num_samples;
333 int first_non_void;
334
335 num_samples = fmt_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS ? MAX2(1, state->num_samples)
336 : MAX2(1, state->num_storage_samples);
337
338 first_non_void = util_format_get_first_non_void_channel(state->format);
339
340 num_format = ac_translate_tex_numformat(fmt_desc, first_non_void);
341
342 data_format = ac_translate_tex_dataformat(info, fmt_desc, first_non_void);
343 if (data_format == ~0) {
344 data_format = 0;
345 }
346
347 /* S8 with either Z16 or Z32 HTILE need a special format. */
348 if (info->gfx_level == GFX9 && state->format == PIPE_FORMAT_S8_UINT && state->tc_compat_htile_enabled) {
349 if (state->img_format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT ||
350 state->img_format == PIPE_FORMAT_Z24_UNORM_S8_UINT ||
351 state->img_format == PIPE_FORMAT_S8_UINT_Z24_UNORM) {
352 data_format = V_008F14_IMG_DATA_FORMAT_S8_32;
353 } else if (state->img_format == PIPE_FORMAT_Z16_UNORM_S8_UINT) {
354 data_format = V_008F14_IMG_DATA_FORMAT_S8_16;
355 }
356 }
357
358 desc[0] = 0;
359 desc[1] = S_008F14_MIN_LOD(util_unsigned_fixed(CLAMP(state->min_lod, 0, 15), 8)) |
360 S_008F14_DATA_FORMAT(data_format) |
361 S_008F14_NUM_FORMAT(num_format);
362 desc[2] = S_008F18_WIDTH(state->width - 1) |
363 S_008F18_HEIGHT(state->height - 1) |
364 S_008F18_PERF_MOD(4);
365 desc[3] = S_008F1C_DST_SEL_X(ac_map_swizzle(state->swizzle[0])) |
366 S_008F1C_DST_SEL_Y(ac_map_swizzle(state->swizzle[1])) |
367 S_008F1C_DST_SEL_Z(ac_map_swizzle(state->swizzle[2])) |
368 S_008F1C_DST_SEL_W(ac_map_swizzle(state->swizzle[3])) |
369 S_008F1C_BASE_LEVEL(num_samples > 1 ? 0 : state->first_level) |
370 S_008F1C_LAST_LEVEL(num_samples > 1 ? util_logbase2(num_samples) : state->last_level) |
371 S_008F1C_TYPE(state->type);
372 desc[4] = 0;
373 desc[5] = S_008F24_BASE_ARRAY(state->first_layer);
374 desc[6] = 0;
375 desc[7] = 0;
376
377 if (info->gfx_level == GFX9) {
378 const uint32_t bc_swizzle = ac_border_color_swizzle(fmt_desc);
379
380 /* Depth is the last accessible layer on Gfx9.
381 * The hw doesn't need to know the total number of layers.
382 */
383 if (state->type == V_008F1C_SQ_RSRC_IMG_3D)
384 desc[4] |= S_008F20_DEPTH(state->depth - 1);
385 else
386 desc[4] |= S_008F20_DEPTH(state->last_layer);
387
388 desc[4] |= S_008F20_BC_SWIZZLE(bc_swizzle);
389 desc[5] |= S_008F24_MAX_MIP(num_samples > 1 ? util_logbase2(num_samples) : state->num_levels - 1);
390 } else {
391 desc[3] |= S_008F1C_POW2_PAD(state->num_levels > 1);
392 desc[4] |= S_008F20_DEPTH(state->depth - 1);
393 desc[5] |= S_008F24_LAST_ARRAY(state->last_layer);
394 }
395
396 if (state->dcc_enabled) {
397 desc[6] = S_008F28_ALPHA_IS_ON_MSB(ac_alpha_is_on_msb(info, state->format));
398 } else {
399 if (!state->aniso_single_level) {
400 /* The last dword is unused by hw. The shader uses it to clear
401 * bits in the first dword of sampler state.
402 */
403 if (info->gfx_level <= GFX7 && state->num_samples <= 1) {
404 if (state->first_level == state->last_level)
405 desc[7] = C_008F30_MAX_ANISO_RATIO;
406 else
407 desc[7] = 0xffffffff;
408 }
409 }
410 }
411 }
412
413 static uint32_t
ac_get_gfx10_img_format(const enum amd_gfx_level gfx_level,const struct ac_texture_state * state)414 ac_get_gfx10_img_format(const enum amd_gfx_level gfx_level, const struct ac_texture_state *state)
415 {
416 const struct gfx10_format *fmt = &ac_get_gfx10_format_table(gfx_level)[state->format];
417 const struct util_format_description *desc = util_format_description(state->format);
418 uint32_t img_format = fmt->img_format;
419
420 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS &&
421 state->gfx10.upgraded_depth && !util_format_has_stencil(desc)) {
422 if (gfx_level >= GFX11) {
423 assert(img_format == V_008F0C_GFX11_FORMAT_32_FLOAT);
424 img_format = V_008F0C_GFX11_FORMAT_32_FLOAT_CLAMP;
425 } else {
426 assert(img_format == V_008F0C_GFX10_FORMAT_32_FLOAT);
427 img_format = V_008F0C_GFX10_FORMAT_32_FLOAT_CLAMP;
428 }
429 }
430
431 return img_format;
432 }
433
434 static void
ac_build_gfx10_texture_descriptor(const struct radeon_info * info,const struct ac_texture_state * state,uint32_t desc[8])435 ac_build_gfx10_texture_descriptor(const struct radeon_info *info, const struct ac_texture_state *state, uint32_t desc[8])
436 {
437 const struct radeon_surf *surf = state->surf;
438 const struct util_format_description *fmt_desc = util_format_description(state->format);
439 const uint32_t img_format = ac_get_gfx10_img_format(info->gfx_level, state);
440 const struct ac_surf_nbc_view *nbc_view = state->gfx9.nbc_view;
441 const uint32_t field_last_level = state->num_samples > 1 ? util_logbase2(state->num_samples) : state->last_level;
442
443 desc[0] = 0;
444 desc[1] = S_00A004_FORMAT_GFX10(img_format) |
445 S_00A004_WIDTH_LO(state->width - 1);
446 desc[2] = S_00A008_WIDTH_HI((state->width - 1) >> 2) |
447 S_00A008_HEIGHT(state->height - 1) |
448 S_00A008_RESOURCE_LEVEL(info->gfx_level < GFX11);
449 desc[3] = S_00A00C_DST_SEL_X(ac_map_swizzle(state->swizzle[0])) |
450 S_00A00C_DST_SEL_Y(ac_map_swizzle(state->swizzle[1])) |
451 S_00A00C_DST_SEL_Z(ac_map_swizzle(state->swizzle[2])) |
452 S_00A00C_DST_SEL_W(ac_map_swizzle(state->swizzle[3])) |
453 S_00A00C_BASE_LEVEL(state->num_samples > 1 ? 0 : state->first_level) |
454 S_00A00C_LAST_LEVEL_GFX10(field_last_level) |
455 S_00A00C_BC_SWIZZLE(ac_border_color_swizzle(fmt_desc)) |
456 S_00A00C_TYPE(state->type);
457
458 /* Depth is the the last accessible layer on gfx9+. The hw doesn't need
459 * to know the total number of layers.
460 */
461 desc[4] = S_00A010_DEPTH_GFX10(state->depth) |
462 S_00A010_BASE_ARRAY(state->first_layer);
463
464 /* ARRAY_PITCH is only meaningful for 3D images, 0 means SRV, 1 means UAV.
465 * In SRV mode, BASE_ARRAY is ignored and DEPTH is the last slice of mipmap level 0.
466 * In UAV mode, BASE_ARRAY is the first slice and DEPTH is the last slice of the bound level.
467 */
468 desc[5] = S_00A014_ARRAY_PITCH(state->gfx10.uav3d) | S_00A014_PERF_MOD(4);
469 desc[6] = 0;
470 desc[7] = 0;
471
472 uint32_t max_mip = state->num_samples > 1 ? util_logbase2(state->num_samples) : state->num_levels - 1;
473 if (nbc_view && nbc_view->valid)
474 max_mip = nbc_view->num_levels - 1;
475
476 const uint32_t min_lod_clamped = util_unsigned_fixed(CLAMP(state->min_lod, 0, 15), 8);
477 if (info->gfx_level >= GFX11) {
478 desc[1] |= S_00A004_MAX_MIP_GFX11(max_mip);
479 desc[5] |= S_00A014_MIN_LOD_LO_GFX11(min_lod_clamped);
480 desc[6] |= S_00A018_MIN_LOD_HI(min_lod_clamped >> 5);
481 } else {
482 desc[1] |= S_00A004_MIN_LOD(min_lod_clamped);
483 desc[5] |= S_00A014_MAX_MIP(max_mip);
484 }
485
486 if (state->dcc_enabled) {
487 desc[6] |= S_00A018_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B) |
488 S_00A018_MAX_COMPRESSED_BLOCK_SIZE(surf->u.gfx9.color.dcc.max_compressed_block_size) |
489 S_00A018_ALPHA_IS_ON_MSB(ac_alpha_is_on_msb(info, state->format));
490 }
491 }
492
493 static void
ac_build_gfx12_texture_descriptor(const struct radeon_info * info,const struct ac_texture_state * state,uint32_t desc[8])494 ac_build_gfx12_texture_descriptor(const struct radeon_info *info, const struct ac_texture_state *state, uint32_t desc[8])
495 {
496 const struct radeon_surf *surf = state->surf;
497 const struct util_format_description *fmt_desc = util_format_description(state->format);
498 const uint32_t img_format = ac_get_gfx10_img_format(info->gfx_level, state);
499 const uint32_t field_last_level = state->num_samples > 1 ? util_logbase2(state->num_samples) : state->last_level;
500 const bool no_edge_clamp = state->num_levels > 1 && util_format_is_compressed(state->img_format) &&
501 !util_format_is_compressed(state->format);
502 const uint32_t min_lod_clamped = util_unsigned_fixed(CLAMP(state->min_lod, 0, 15), 8);
503 const struct ac_surf_nbc_view *nbc_view = state->gfx9.nbc_view;
504
505 uint32_t max_mip = state->num_samples > 1 ? util_logbase2(state->num_samples) : state->num_levels - 1;
506 if (nbc_view && nbc_view->valid)
507 max_mip = nbc_view->num_levels - 1;
508
509 desc[0] = 0;
510 desc[1] = S_00A004_MAX_MIP_GFX12(max_mip) |
511 S_00A004_FORMAT_GFX12(img_format) |
512 S_00A004_BASE_LEVEL(state->num_samples > 1 ? 0 : state->first_level) |
513 S_00A004_WIDTH_LO(state->width - 1);
514 desc[2] = S_00A008_WIDTH_HI((state->width - 1) >> 2) |
515 S_00A008_HEIGHT(state->height - 1);
516 desc[3] = S_00A00C_DST_SEL_X(ac_map_swizzle(state->swizzle[0])) |
517 S_00A00C_DST_SEL_Y(ac_map_swizzle(state->swizzle[1])) |
518 S_00A00C_DST_SEL_Z(ac_map_swizzle(state->swizzle[2])) |
519 S_00A00C_DST_SEL_W(ac_map_swizzle(state->swizzle[3])) |
520 S_00A00C_NO_EDGE_CLAMP(no_edge_clamp) |
521 S_00A00C_LAST_LEVEL_GFX12(field_last_level) |
522 S_00A00C_BC_SWIZZLE(ac_border_color_swizzle(fmt_desc)) |
523 S_00A00C_TYPE(state->type);
524
525 /* Depth is the the last accessible layer on gfx9+. The hw doesn't need
526 * to know the total number of layers.
527 */
528 desc[4] = S_00A010_DEPTH_GFX12(state->depth) |
529 S_00A010_BASE_ARRAY(state->first_layer);
530 desc[5] = S_00A014_UAV3D(state->gfx10.uav3d) |
531 S_00A014_PERF_MOD(4) |
532 S_00A014_MIN_LOD_LO_GFX12(min_lod_clamped);
533 desc[6] = S_00A018_MAX_UNCOMPRESSED_BLOCK_SIZE(1 /*256B*/) |
534 S_00A018_MAX_COMPRESSED_BLOCK_SIZE(surf->u.gfx9.color.dcc.max_compressed_block_size) |
535 S_00A018_MIN_LOD_HI(min_lod_clamped >> 6);
536 desc[7] = 0;
537 }
538
539 void
ac_build_texture_descriptor(const struct radeon_info * info,const struct ac_texture_state * state,uint32_t desc[8])540 ac_build_texture_descriptor(const struct radeon_info *info, const struct ac_texture_state *state, uint32_t desc[8])
541 {
542 if (info->gfx_level >= GFX12) {
543 ac_build_gfx12_texture_descriptor(info, state, desc);
544 } else if (info->gfx_level >= GFX10) {
545 ac_build_gfx10_texture_descriptor(info, state, desc);
546 } else {
547 ac_build_gfx6_texture_descriptor(info, state, desc);
548 }
549 }
550
551 uint32_t
ac_tile_mode_index(const struct radeon_surf * surf,unsigned level,bool stencil)552 ac_tile_mode_index(const struct radeon_surf *surf, unsigned level, bool stencil)
553 {
554 if (stencil)
555 return surf->u.legacy.zs.stencil_tiling_index[level];
556 else
557 return surf->u.legacy.tiling_index[level];
558 }
559
560 void
ac_set_mutable_tex_desc_fields(const struct radeon_info * info,const struct ac_mutable_tex_state * state,uint32_t desc[8])561 ac_set_mutable_tex_desc_fields(const struct radeon_info *info, const struct ac_mutable_tex_state *state, uint32_t desc[8])
562 {
563 const struct radeon_surf *surf = state->surf;
564 const struct legacy_surf_level *base_level_info = state->gfx6.base_level_info;
565 const struct ac_surf_nbc_view *nbc_view = state->gfx9.nbc_view;
566 uint8_t swizzle = surf->tile_swizzle;
567 uint64_t va = state->va, meta_va = 0;
568
569 if (info->gfx_level >= GFX9) {
570 if (state->is_stencil) {
571 va += surf->u.gfx9.zs.stencil_offset;
572 } else {
573 va += surf->u.gfx9.surf_offset;
574 }
575
576 if (nbc_view && nbc_view->valid) {
577 va += nbc_view->base_address_offset;
578 swizzle = nbc_view->tile_swizzle;
579 }
580 } else {
581 va += (uint64_t)base_level_info->offset_256B * 256;
582 }
583
584 if (!info->has_image_opcodes) {
585 /* Set it as a buffer descriptor. */
586 desc[0] = va;
587 desc[1] |= S_008F04_BASE_ADDRESS_HI(va >> 32);
588 return;
589 }
590
591 desc[0] = va >> 8;
592 desc[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
593
594 if (info->gfx_level >= GFX8 && info->gfx_level < GFX12) {
595 if (state->dcc_enabled) {
596 meta_va = state->va + surf->meta_offset;
597 if (info->gfx_level == GFX8) {
598 meta_va += surf->u.legacy.color.dcc_level[state->gfx6.base_level].dcc_offset;
599 assert(base_level_info->mode == RADEON_SURF_MODE_2D);
600 }
601
602 unsigned dcc_tile_swizzle = swizzle << 8;
603 dcc_tile_swizzle &= (1 << surf->meta_alignment_log2) - 1;
604 meta_va |= dcc_tile_swizzle;
605 } else if (state->tc_compat_htile_enabled) {
606 meta_va = state->va + surf->meta_offset;
607 }
608 }
609
610 if (info->gfx_level >= GFX10) {
611 desc[0] |= swizzle;
612
613 if (state->is_stencil) {
614 desc[3] |= S_00A00C_SW_MODE(surf->u.gfx9.zs.stencil_swizzle_mode);
615 } else {
616 desc[3] |= S_00A00C_SW_MODE(surf->u.gfx9.swizzle_mode);
617 }
618
619 /* GFX10.3+ can set a custom pitch for 1D and 2D non-array, but it must be a multiple
620 * of 256B.
621 */
622 if (info->gfx_level >= GFX10_3 && surf->u.gfx9.uses_custom_pitch) {
623 ASSERTED unsigned min_alignment = info->gfx_level >= GFX12 ? 128 : 256;
624 assert((surf->u.gfx9.surf_pitch * surf->bpe) % min_alignment == 0);
625 assert(surf->is_linear);
626 unsigned pitch = surf->u.gfx9.surf_pitch;
627
628 /* Subsampled images have the pitch in the units of blocks. */
629 if (surf->blk_w == 2)
630 pitch *= 2;
631
632 if (info->gfx_level >= GFX12) {
633 desc[4] |= S_00A010_DEPTH_GFX12(pitch - 1) | /* DEPTH contains low bits of PITCH. */
634 S_00A010_PITCH_MSB_GFX12((pitch - 1) >> 14);
635 } else {
636 desc[4] |= S_00A010_DEPTH_GFX10(pitch - 1) | /* DEPTH contains low bits of PITCH. */
637 S_00A010_PITCH_MSB_GFX103((pitch - 1) >> 13);
638 }
639 }
640
641 if (info->gfx_level >= GFX12) {
642 /* Color and Z/S always support compressed image stores on Gfx12. Enablement is
643 * mostly controlled by PTE.D (page table bit). The rule is:
644 *
645 * Shader Engines (shaders, CB, DB, SC):
646 * COMPRESSION_ENABLED = PTE.D && COMPRESSION_EN;
647 *
648 * Central Hub (CP, SDMA, indices, tess factor loads):
649 * PTE.D is ignored. Packets and states fully determine enablement.
650 *
651 * If !PTE.D, the states enabling compression in shaders, CB, DB, and SC have no effect.
652 * PTE.D is set per buffer allocation in Linux, not per VM page, so that it's
653 * automatically propagated between processes. We could optionally allow setting it
654 * per VM page too.
655 *
656 * The DCC/HTILE buffer isn't allocated separately on Gfx12 anymore. The DCC/HTILE
657 * metadata storage is mostly hidden from userspace, and any buffer can be compressed.
658 */
659 if (state->dcc_enabled) {
660 desc[6] |= S_00A018_COMPRESSION_EN(1) |
661 S_00A018_WRITE_COMPRESS_ENABLE(state->gfx10.write_compress_enable);
662 }
663 } else if (meta_va) {
664 /* Gfx10-11. */
665 struct gfx9_surf_meta_flags meta = {
666 .rb_aligned = 1,
667 .pipe_aligned = 1,
668 };
669
670 if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && surf->meta_offset)
671 meta = surf->u.gfx9.color.dcc;
672
673 desc[6] |= S_00A018_COMPRESSION_EN(1) |
674 S_00A018_META_PIPE_ALIGNED(meta.pipe_aligned) |
675 S_00A018_META_DATA_ADDRESS_LO(meta_va >> 8) |
676 /* DCC image stores require the following settings:
677 * - INDEPENDENT_64B_BLOCKS = 0
678 * - INDEPENDENT_128B_BLOCKS = 1
679 * - MAX_COMPRESSED_BLOCK_SIZE = 128B
680 * - MAX_UNCOMPRESSED_BLOCK_SIZE = 256B (always used)
681 *
682 * The same limitations apply to SDMA compressed stores because
683 * SDMA uses the same DCC codec.
684 */
685 S_00A018_WRITE_COMPRESS_ENABLE(state->gfx10.write_compress_enable) |
686 /* TC-compatible MSAA HTILE requires ITERATE_256. */
687 S_00A018_ITERATE_256(state->gfx10.iterate_256);
688
689 desc[7] = meta_va >> 16;
690 }
691 } else if (info->gfx_level == GFX9) {
692 desc[0] |= surf->tile_swizzle;
693
694 if (state->is_stencil) {
695 desc[3] |= S_008F1C_SW_MODE(surf->u.gfx9.zs.stencil_swizzle_mode);
696 desc[4] |= S_008F20_PITCH(surf->u.gfx9.zs.stencil_epitch);
697 } else {
698 desc[3] |= S_008F1C_SW_MODE(surf->u.gfx9.swizzle_mode);
699 desc[4] |= S_008F20_PITCH(surf->u.gfx9.epitch);
700 }
701
702 if (meta_va) {
703 struct gfx9_surf_meta_flags meta = {
704 .rb_aligned = 1,
705 .pipe_aligned = 1,
706 };
707
708 if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && surf->meta_offset)
709 meta = surf->u.gfx9.color.dcc;
710
711 desc[5] |= S_008F24_META_DATA_ADDRESS(meta_va >> 40) |
712 S_008F24_META_PIPE_ALIGNED(meta.pipe_aligned) |
713 S_008F24_META_RB_ALIGNED(meta.rb_aligned);
714 desc[6] |= S_008F28_COMPRESSION_EN(1);
715 desc[7] = meta_va >> 8;
716 }
717 } else {
718 /* GFX6-GFX8 */
719 unsigned pitch = base_level_info->nblk_x * state->gfx6.block_width;
720 unsigned index = ac_tile_mode_index(surf, state->gfx6.base_level, state->is_stencil);
721
722 /* Only macrotiled modes can set tile swizzle. */
723 if (base_level_info->mode == RADEON_SURF_MODE_2D)
724 desc[0] |= surf->tile_swizzle;
725
726 desc[3] |= S_008F1C_TILING_INDEX(index);
727 desc[4] |= S_008F20_PITCH(pitch - 1);
728
729 if (info->gfx_level == GFX8 && meta_va) {
730 desc[6] |= S_008F28_COMPRESSION_EN(1);
731 desc[7] = meta_va >> 8;
732 }
733 }
734 }
735
736 void
ac_set_buf_desc_word3(const enum amd_gfx_level gfx_level,const struct ac_buffer_state * state,uint32_t * rsrc_word3)737 ac_set_buf_desc_word3(const enum amd_gfx_level gfx_level, const struct ac_buffer_state *state, uint32_t *rsrc_word3)
738 {
739 *rsrc_word3 = S_008F0C_DST_SEL_X(ac_map_swizzle(state->swizzle[0])) |
740 S_008F0C_DST_SEL_Y(ac_map_swizzle(state->swizzle[1])) |
741 S_008F0C_DST_SEL_Z(ac_map_swizzle(state->swizzle[2])) |
742 S_008F0C_DST_SEL_W(ac_map_swizzle(state->swizzle[3])) |
743 S_008F0C_INDEX_STRIDE(state->index_stride) |
744 S_008F0C_ADD_TID_ENABLE(state->add_tid);
745
746 if (gfx_level >= GFX10) {
747 const struct gfx10_format *fmt = &ac_get_gfx10_format_table(gfx_level)[state->format];
748
749 /* OOB_SELECT chooses the out-of-bounds check.
750 *
751 * GFX10:
752 * - 0: (index >= NUM_RECORDS) || (offset >= STRIDE)
753 * - 1: index >= NUM_RECORDS
754 * - 2: NUM_RECORDS == 0
755 * - 3: if SWIZZLE_ENABLE:
756 * swizzle_address >= NUM_RECORDS
757 * else:
758 * offset >= NUM_RECORDS
759 *
760 * GFX11+:
761 * - 0: (index >= NUM_RECORDS) || (offset+payload > STRIDE)
762 * - 1: index >= NUM_RECORDS
763 * - 2: NUM_RECORDS == 0
764 * - 3: if SWIZZLE_ENABLE && STRIDE:
765 * (index >= NUM_RECORDS) || ( offset+payload > STRIDE)
766 * else:
767 * offset+payload > NUM_RECORDS
768 */
769 *rsrc_word3 |= (gfx_level >= GFX12 ? S_008F0C_FORMAT_GFX12(fmt->img_format) :
770 S_008F0C_FORMAT_GFX10(fmt->img_format)) |
771 S_008F0C_OOB_SELECT(state->gfx10_oob_select) |
772 S_008F0C_RESOURCE_LEVEL(gfx_level < GFX11);
773 } else {
774 const struct util_format_description * desc = util_format_description(state->format);
775 const int first_non_void = util_format_get_first_non_void_channel(state->format);
776 const uint32_t num_format = ac_translate_buffer_numformat(desc, first_non_void);
777
778 /* DATA_FORMAT is STRIDE[14:17] for MUBUF with ADD_TID_ENABLE=1 */
779 const uint32_t data_format =
780 gfx_level >= GFX8 && state->add_tid ? 0 : ac_translate_buffer_dataformat(desc, first_non_void);
781
782 *rsrc_word3 |= S_008F0C_NUM_FORMAT(num_format) |
783 S_008F0C_DATA_FORMAT(data_format) |
784 S_008F0C_ELEMENT_SIZE(state->element_size);
785 }
786 }
787
788 void
ac_build_buffer_descriptor(const enum amd_gfx_level gfx_level,const struct ac_buffer_state * state,uint32_t desc[4])789 ac_build_buffer_descriptor(const enum amd_gfx_level gfx_level, const struct ac_buffer_state *state, uint32_t desc[4])
790 {
791 uint32_t rsrc_word1 = S_008F04_BASE_ADDRESS_HI(state->va >> 32) | S_008F04_STRIDE(state->stride);
792 uint32_t rsrc_word3;
793
794 if (gfx_level >= GFX11) {
795 rsrc_word1 |= S_008F04_SWIZZLE_ENABLE_GFX11(state->swizzle_enable);
796 } else {
797 rsrc_word1 |= S_008F04_SWIZZLE_ENABLE_GFX6(state->swizzle_enable);
798 }
799
800 ac_set_buf_desc_word3(gfx_level, state, &rsrc_word3);
801
802 desc[0] = state->va;
803 desc[1] = rsrc_word1;
804 desc[2] = state->size;
805 desc[3] = rsrc_word3;
806 }
807
808 void
ac_build_raw_buffer_descriptor(const enum amd_gfx_level gfx_level,uint64_t va,uint32_t size,uint32_t desc[4])809 ac_build_raw_buffer_descriptor(const enum amd_gfx_level gfx_level, uint64_t va, uint32_t size, uint32_t desc[4])
810 {
811 const struct ac_buffer_state ac_state = {
812 .va = va,
813 .size = size,
814 .format = PIPE_FORMAT_R32_FLOAT,
815 .swizzle = {
816 PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W,
817 },
818 .gfx10_oob_select = V_008F0C_OOB_SELECT_RAW,
819 };
820
821 ac_build_buffer_descriptor(gfx_level, &ac_state, desc);
822 }
823
824 void
ac_build_attr_ring_descriptor(const enum amd_gfx_level gfx_level,uint64_t va,uint32_t size,uint32_t stride,uint32_t desc[4])825 ac_build_attr_ring_descriptor(const enum amd_gfx_level gfx_level, uint64_t va, uint32_t size, uint32_t stride, uint32_t desc[4])
826 {
827 assert(gfx_level >= GFX11);
828
829 const struct ac_buffer_state ac_state = {
830 .va = va,
831 .size = size,
832 .format = PIPE_FORMAT_R32G32B32A32_FLOAT,
833 .swizzle = {
834 PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W,
835 },
836 .stride = stride,
837 .gfx10_oob_select = V_008F0C_OOB_SELECT_STRUCTURED_WITH_OFFSET,
838 .swizzle_enable = 3, /* 16B */
839 .index_stride = 2, /* 32 elements */
840 };
841
842 ac_build_buffer_descriptor(gfx_level, &ac_state, desc);
843 }
844
845 static void
ac_init_gfx6_ds_surface(const struct radeon_info * info,const struct ac_ds_state * state,uint32_t db_format,uint32_t stencil_format,struct ac_ds_surface * ds)846 ac_init_gfx6_ds_surface(const struct radeon_info *info, const struct ac_ds_state *state,
847 uint32_t db_format, uint32_t stencil_format, struct ac_ds_surface *ds)
848 {
849 const struct radeon_surf *surf = state->surf;
850 const struct legacy_surf_level *level_info = &surf->u.legacy.level[state->level];
851
852 assert(level_info->nblk_x % 8 == 0 && level_info->nblk_y % 8 == 0);
853
854 if (state->stencil_only)
855 level_info = &surf->u.legacy.zs.stencil_level[state->level];
856
857 ds->u.gfx6.db_htile_data_base = 0;
858 ds->u.gfx6.db_htile_surface = 0;
859 ds->db_depth_base = (state->va >> 8) + surf->u.legacy.level[state->level].offset_256B;
860 ds->db_stencil_base = (state->va >> 8) + surf->u.legacy.zs.stencil_level[state->level].offset_256B;
861 ds->db_depth_view = S_028008_SLICE_START(state->first_layer) |
862 S_028008_SLICE_MAX(state->last_layer) |
863 S_028008_Z_READ_ONLY(state->z_read_only) |
864 S_028008_STENCIL_READ_ONLY(state->stencil_read_only);
865 ds->db_z_info = S_028040_FORMAT(db_format) |
866 S_028040_NUM_SAMPLES(util_logbase2(state->num_samples));
867 ds->db_stencil_info = S_028044_FORMAT(stencil_format);
868
869 if (info->gfx_level >= GFX7) {
870 const uint32_t index = surf->u.legacy.tiling_index[state->level];
871 const uint32_t stencil_index = surf->u.legacy.zs.stencil_tiling_index[state->level];
872 const uint32_t macro_index = surf->u.legacy.macro_tile_index;
873 const uint32_t stencil_tile_mode = info->si_tile_mode_array[stencil_index];
874 const uint32_t macro_mode = info->cik_macrotile_mode_array[macro_index];
875 uint32_t tile_mode = info->si_tile_mode_array[index];
876
877 if (state->stencil_only)
878 tile_mode = stencil_tile_mode;
879
880 ds->u.gfx6.db_depth_info |= S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
881 S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
882 S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
883 S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
884 S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
885 S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
886 ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
887 ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
888 } else {
889 uint32_t tile_mode_index = ac_tile_mode_index(surf, state->level, false);
890 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
891
892 tile_mode_index = ac_tile_mode_index(surf, state->level, true);
893 ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
894 if (state->stencil_only)
895 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
896 }
897
898 ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |
899 S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
900 ds->u.gfx6.db_depth_slice = S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
901
902 if (state->htile_enabled) {
903 ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1) |
904 S_028040_ALLOW_EXPCLEAR(state->allow_expclear);
905 ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(state->htile_stencil_disabled);
906
907 if (surf->has_stencil) {
908 /* Workaround: For a not yet understood reason, the
909 * combination of MSAA, fast stencil clear and stencil
910 * decompress messes with subsequent stencil buffer
911 * uses. Problem was reproduced on Verde, Bonaire,
912 * Tonga, and Carrizo.
913 *
914 * Disabling EXPCLEAR works around the problem.
915 *
916 * Check piglit's arb_texture_multisample-stencil-clear
917 * test if you want to try changing this.
918 */
919 if (state->num_samples <= 1)
920 ds->db_stencil_info |= S_028044_ALLOW_EXPCLEAR(state->allow_expclear);
921 }
922
923 ds->u.gfx6.db_htile_data_base = (state->va + surf->meta_offset) >> 8;
924 ds->u.gfx6.db_htile_surface = S_028ABC_FULL_CACHE(1);
925 }
926 }
927
928 static void
ac_init_gfx9_ds_surface(const struct radeon_info * info,const struct ac_ds_state * state,uint32_t db_format,uint32_t stencil_format,struct ac_ds_surface * ds)929 ac_init_gfx9_ds_surface(const struct radeon_info *info, const struct ac_ds_state *state,
930 uint32_t db_format, uint32_t stencil_format, struct ac_ds_surface *ds)
931 {
932 const struct radeon_surf *surf = state->surf;
933
934 assert(surf->u.gfx9.surf_offset == 0);
935
936 ds->u.gfx6.db_htile_data_base = 0;
937 ds->u.gfx6.db_htile_surface = 0;
938 ds->db_depth_base = state->va >> 8;
939 ds->db_stencil_base = (state->va + surf->u.gfx9.zs.stencil_offset) >> 8;
940 ds->db_depth_view = S_028008_SLICE_START(state->first_layer) |
941 S_028008_SLICE_MAX(state->last_layer) |
942 S_028008_Z_READ_ONLY(state->z_read_only) |
943 S_028008_STENCIL_READ_ONLY(state->stencil_read_only) |
944 S_028008_MIPID_GFX9(state->level);
945
946 if (info->gfx_level >= GFX10) {
947 ds->db_depth_view |= S_028008_SLICE_START_HI(state->first_layer >> 11) |
948 S_028008_SLICE_MAX_HI(state->last_layer >> 11);
949 }
950
951 ds->db_z_info = S_028038_FORMAT(db_format) |
952 S_028038_NUM_SAMPLES(util_logbase2(state->num_samples)) |
953 S_028038_SW_MODE(surf->u.gfx9.swizzle_mode) |
954 S_028038_MAXMIP(state->num_levels - 1) |
955 S_028040_ITERATE_256(info->gfx_level >= GFX11);
956 ds->db_stencil_info = S_02803C_FORMAT(stencil_format) |
957 S_02803C_SW_MODE(surf->u.gfx9.zs.stencil_swizzle_mode) |
958 S_028044_ITERATE_256(info->gfx_level >= GFX11);
959
960 if (info->gfx_level == GFX9) {
961 ds->u.gfx6.db_z_info2 = S_028068_EPITCH(surf->u.gfx9.epitch);
962 ds->u.gfx6.db_stencil_info2 = S_02806C_EPITCH(surf->u.gfx9.zs.stencil_epitch);
963 }
964
965 ds->db_depth_size = S_02801C_X_MAX(state->width - 1) |
966 S_02801C_Y_MAX(state->height - 1);
967
968 if (state->htile_enabled) {
969 ds->db_z_info |= S_028038_TILE_SURFACE_ENABLE(1) |
970 S_028038_ALLOW_EXPCLEAR(state->allow_expclear);
971 ds->db_stencil_info |= S_02803C_TILE_STENCIL_DISABLE(state->htile_stencil_disabled);
972
973 if (surf->has_stencil && !state->htile_stencil_disabled && state->num_samples <= 1) {
974 /* Stencil buffer workaround ported from the GFX6-GFX8 code.
975 * See that for explanation.
976 */
977 ds->db_stencil_info |= S_02803C_ALLOW_EXPCLEAR(state->allow_expclear);
978 }
979
980 ds->u.gfx6.db_htile_data_base = (state->va + surf->meta_offset) >> 8;
981 ds->u.gfx6.db_htile_surface = S_028ABC_FULL_CACHE(1) |
982 S_028ABC_PIPE_ALIGNED(1);
983
984 if (state->vrs_enabled) {
985 assert(info->gfx_level == GFX10_3);
986 ds->u.gfx6.db_htile_surface |= S_028ABC_VRS_HTILE_ENCODING(V_028ABC_VRS_HTILE_4BIT_ENCODING);
987 } else if (info->gfx_level == GFX9) {
988 ds->u.gfx6.db_htile_surface |= S_028ABC_RB_ALIGNED(1);
989 }
990 }
991 }
992
993 static void
ac_init_gfx12_ds_surface(const struct radeon_info * info,const struct ac_ds_state * state,uint32_t db_format,uint32_t stencil_format,struct ac_ds_surface * ds)994 ac_init_gfx12_ds_surface(const struct radeon_info *info, const struct ac_ds_state *state,
995 uint32_t db_format, uint32_t stencil_format, struct ac_ds_surface *ds)
996 {
997 const struct radeon_surf *surf = state->surf;
998
999 assert(db_format != V_028040_Z_24);
1000
1001 ds->db_depth_view = S_028004_SLICE_START(state->first_layer) |
1002 S_028004_SLICE_MAX(state->last_layer);
1003 ds->u.gfx12.db_depth_view1 = S_028008_MIPID_GFX12(state->level);
1004 ds->db_depth_size = S_028014_X_MAX(state->width - 1) |
1005 S_028014_Y_MAX(state->height - 1);
1006 ds->db_z_info = S_028018_FORMAT(db_format) |
1007 S_028018_NUM_SAMPLES(util_logbase2(state->num_samples)) |
1008 S_028018_SW_MODE(surf->u.gfx9.swizzle_mode) |
1009 S_028018_MAXMIP(state->num_levels - 1);
1010 ds->db_stencil_info = S_02801C_FORMAT(stencil_format) |
1011 S_02801C_SW_MODE(surf->u.gfx9.zs.stencil_swizzle_mode) |
1012 S_02801C_TILE_STENCIL_DISABLE(1);
1013 ds->db_depth_base = state->va >> 8;
1014 ds->db_stencil_base = (state->va + surf->u.gfx9.zs.stencil_offset) >> 8;
1015 ds->u.gfx12.hiz_info = 0;
1016 ds->u.gfx12.his_info = 0;
1017
1018 /* HiZ. */
1019 if (surf->u.gfx9.zs.hiz.offset) {
1020 ds->u.gfx12.hiz_info = S_028B94_SURFACE_ENABLE(1) |
1021 S_028B94_FORMAT(0) | /* unorm16 */
1022 S_028B94_SW_MODE(surf->u.gfx9.zs.hiz.swizzle_mode);
1023 ds->u.gfx12.hiz_size_xy = S_028BA4_X_MAX(surf->u.gfx9.zs.hiz.width_in_tiles - 1) |
1024 S_028BA4_Y_MAX(surf->u.gfx9.zs.hiz.height_in_tiles - 1);
1025 ds->u.gfx12.hiz_base = (state->va + surf->u.gfx9.zs.hiz.offset) >> 8;
1026 }
1027
1028 /* HiS. */
1029 if (surf->u.gfx9.zs.his.offset) {
1030 ds->u.gfx12.his_info = S_028B98_SURFACE_ENABLE(1) |
1031 S_028B98_SW_MODE(surf->u.gfx9.zs.his.swizzle_mode);
1032 ds->u.gfx12.his_size_xy = S_028BB0_X_MAX(surf->u.gfx9.zs.his.width_in_tiles - 1) |
1033 S_028BB0_Y_MAX(surf->u.gfx9.zs.his.height_in_tiles - 1);
1034 ds->u.gfx12.his_base = (state->va + surf->u.gfx9.zs.his.offset) >> 8;
1035 }
1036 }
1037
1038 void
ac_init_ds_surface(const struct radeon_info * info,const struct ac_ds_state * state,struct ac_ds_surface * ds)1039 ac_init_ds_surface(const struct radeon_info *info, const struct ac_ds_state *state, struct ac_ds_surface *ds)
1040 {
1041 const struct radeon_surf *surf = state->surf;
1042 const uint32_t db_format = ac_translate_dbformat(state->format);
1043 const uint32_t stencil_format = surf->has_stencil ? V_028044_STENCIL_8 : V_028044_STENCIL_INVALID;
1044
1045 if (info->gfx_level >= GFX12) {
1046 ac_init_gfx12_ds_surface(info, state, db_format, stencil_format, ds);
1047 } else if (info->gfx_level >= GFX9) {
1048 ac_init_gfx9_ds_surface(info, state, db_format, stencil_format, ds);
1049 } else {
1050 ac_init_gfx6_ds_surface(info, state, db_format, stencil_format, ds);
1051 }
1052 }
1053
1054 static unsigned
ac_get_decompress_on_z_planes(const struct radeon_info * info,enum pipe_format format,uint8_t log_num_samples,bool htile_stencil_disabled,bool no_d16_compression)1055 ac_get_decompress_on_z_planes(const struct radeon_info *info, enum pipe_format format, uint8_t log_num_samples,
1056 bool htile_stencil_disabled, bool no_d16_compression)
1057 {
1058 uint32_t max_zplanes = 0;
1059
1060 if (info->gfx_level >= GFX9) {
1061 const bool iterate256 = info->gfx_level >= GFX10 && log_num_samples >= 1;
1062
1063 /* Default value for 32-bit depth surfaces. */
1064 max_zplanes = 4;
1065
1066 if (format == PIPE_FORMAT_Z16_UNORM && log_num_samples > 0)
1067 max_zplanes = 2;
1068
1069 /* Workaround for a DB hang when ITERATE_256 is set to 1. Only affects 4X MSAA D/S images. */
1070 if (info->has_two_planes_iterate256_bug && iterate256 && !htile_stencil_disabled && log_num_samples == 2)
1071 max_zplanes = 1;
1072
1073 max_zplanes++;
1074 } else {
1075 if (format == PIPE_FORMAT_Z16_UNORM && no_d16_compression) {
1076 /* Do not enable Z plane compression for 16-bit depth
1077 * surfaces because isn't supported on GFX8. Only
1078 * 32-bit depth surfaces are supported by the hardware.
1079 * This allows to maintain shader compatibility and to
1080 * reduce the number of depth decompressions.
1081 */
1082 max_zplanes = 1;
1083 } else {
1084 /* 0 = full compression. N = only compress up to N-1 Z planes. */
1085 if (log_num_samples == 0)
1086 max_zplanes = 5;
1087 else if (log_num_samples <= 2)
1088 max_zplanes = 3;
1089 else
1090 max_zplanes = 2;
1091 }
1092 }
1093
1094 return max_zplanes;
1095 }
1096
1097 void
ac_set_mutable_ds_surface_fields(const struct radeon_info * info,const struct ac_mutable_ds_state * state,struct ac_ds_surface * ds)1098 ac_set_mutable_ds_surface_fields(const struct radeon_info *info, const struct ac_mutable_ds_state *state,
1099 struct ac_ds_surface *ds)
1100 {
1101 bool tile_stencil_disable = false;
1102 uint32_t log_num_samples;
1103
1104 memcpy(ds, state->ds, sizeof(*ds));
1105
1106 if (info->gfx_level >= GFX12)
1107 return;
1108
1109 if (info->gfx_level >= GFX9) {
1110 log_num_samples = G_028038_NUM_SAMPLES(ds->db_z_info);
1111 tile_stencil_disable = G_02803C_TILE_STENCIL_DISABLE(ds->db_stencil_info);
1112 } else {
1113 log_num_samples = G_028040_NUM_SAMPLES(ds->db_z_info);
1114 }
1115
1116 const uint32_t max_zplanes =
1117 ac_get_decompress_on_z_planes(info, state->format, log_num_samples,
1118 tile_stencil_disable, state->no_d16_compression);
1119
1120 if (info->gfx_level >= GFX9) {
1121 if (state->tc_compat_htile_enabled) {
1122 ds->db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes);
1123
1124 if (info->gfx_level >= GFX10) {
1125 const bool iterate256 = log_num_samples >= 1;
1126
1127 ds->db_z_info |= S_028040_ITERATE_FLUSH(1);
1128 ds->db_stencil_info |= S_028044_ITERATE_FLUSH(!tile_stencil_disable);
1129 ds->db_z_info |= S_028040_ITERATE_256(iterate256);
1130 ds->db_stencil_info |= S_028044_ITERATE_256(iterate256);
1131 } else {
1132 ds->db_z_info |= S_028038_ITERATE_FLUSH(1);
1133 ds->db_stencil_info |= S_02803C_ITERATE_FLUSH(1);
1134 }
1135 }
1136
1137 ds->db_z_info |= S_028038_ZRANGE_PRECISION(state->zrange_precision);
1138 } else {
1139 if (state->tc_compat_htile_enabled) {
1140 ds->u.gfx6.db_htile_surface |= S_028ABC_TC_COMPATIBLE(1);
1141 ds->db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(max_zplanes);
1142 } else {
1143 ds->u.gfx6.db_depth_info |= S_02803C_ADDR5_SWIZZLE_MASK(1);
1144 }
1145
1146 ds->db_z_info |= S_028040_ZRANGE_PRECISION(state->zrange_precision);
1147 }
1148 }
1149
1150 static uint32_t
ac_get_dcc_min_compressed_block_size(const struct radeon_info * info)1151 ac_get_dcc_min_compressed_block_size(const struct radeon_info *info)
1152 {
1153 /* This should typically match the request size of the memory type. DIMMs have 64B minimum
1154 * request size, which means compressing 64B to 32B has no benefit, while GDDR and HBM have
1155 * 32B minimum request size. Sometimes a different size is used depending on the data fabric,
1156 * etc.
1157 */
1158 return info->has_dedicated_vram || info->family == CHIP_GFX1151 ?
1159 V_028C78_MIN_BLOCK_SIZE_32B : V_028C78_MIN_BLOCK_SIZE_64B;
1160 }
1161
1162 static void
ac_init_gfx6_cb_surface(const struct radeon_info * info,const struct ac_cb_state * state,uint32_t cb_format,bool force_dst_alpha_1,struct ac_cb_surface * cb)1163 ac_init_gfx6_cb_surface(const struct radeon_info *info, const struct ac_cb_state *state,
1164 uint32_t cb_format, bool force_dst_alpha_1, struct ac_cb_surface *cb)
1165 {
1166 const struct radeon_surf *surf = state->surf;
1167 const uint32_t endian = ac_colorformat_endian_swap(cb_format);
1168
1169 cb->cb_color_info |= S_028C70_ENDIAN(endian) |
1170 S_028C70_FORMAT_GFX6(cb_format) |
1171 S_028C70_COMPRESSION(!!surf->fmask_offset);
1172 cb->cb_color_view = S_028C6C_SLICE_START(state->first_layer) |
1173 S_028C6C_SLICE_MAX_GFX6(state->last_layer);
1174 cb->cb_color_attrib = S_028C74_NUM_SAMPLES(util_logbase2(state->num_samples)) |
1175 S_028C74_NUM_FRAGMENTS_GFX6(util_logbase2(state->num_storage_samples)) |
1176 S_028C74_FORCE_DST_ALPHA_1_GFX6(force_dst_alpha_1);
1177 cb->cb_color_attrib2 = 0;
1178 cb->cb_dcc_control = 0;
1179
1180 if (info->gfx_level == GFX9) {
1181 cb->cb_color_view |= S_028C6C_MIP_LEVEL_GFX9(state->base_level);
1182 cb->cb_color_attrib |= S_028C74_MIP0_DEPTH(state->num_layers) |
1183 S_028C74_RESOURCE_TYPE(surf->u.gfx9.resource_type);
1184 cb->cb_color_attrib2 |= S_028C68_MIP0_WIDTH(state->width - 1) |
1185 S_028C68_MIP0_HEIGHT(state->height - 1) |
1186 S_028C68_MAX_MIP(state->num_levels - 1);
1187 }
1188
1189 if (info->gfx_level >= GFX8) {
1190 uint32_t max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_256B;
1191
1192 if (state->num_storage_samples > 1) {
1193 if (surf->bpe == 1)
1194 max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
1195 else if (surf->bpe == 2)
1196 max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_128B;
1197 }
1198
1199 cb->cb_dcc_control |= S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
1200 S_028C78_MIN_COMPRESSED_BLOCK_SIZE(ac_get_dcc_min_compressed_block_size(info)) |
1201 S_028C78_INDEPENDENT_64B_BLOCKS(1);
1202 }
1203
1204 if (info->gfx_level == GFX6) {
1205 /* Due to a hw bug, FMASK_BANK_HEIGHT must still be set on GFX6. (inherited from GFX5) */
1206 /* This must also be set for fast clear to work without FMASK. */
1207 const uint32_t fmask_bankh = surf->fmask_offset ? surf->u.legacy.color.fmask.bankh
1208 : surf->u.legacy.bankh;
1209 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(util_logbase2(fmask_bankh));
1210 }
1211 }
1212
1213 static void
ac_init_gfx10_cb_surface(const struct radeon_info * info,const struct ac_cb_state * state,uint32_t cb_format,bool force_dst_alpha_1,uint32_t width,struct ac_cb_surface * cb)1214 ac_init_gfx10_cb_surface(const struct radeon_info *info, const struct ac_cb_state *state,
1215 uint32_t cb_format, bool force_dst_alpha_1, uint32_t width,
1216 struct ac_cb_surface *cb)
1217 {
1218 const struct radeon_surf *surf = state->surf;
1219 uint32_t first_layer = state->first_layer;
1220 uint32_t base_level = state->base_level;
1221 uint32_t num_levels = state->num_levels;
1222
1223 if (state->gfx10.nbc_view) {
1224 assert(state->gfx10.nbc_view->valid);
1225 first_layer = 0;
1226 base_level = state->gfx10.nbc_view->level;
1227 num_levels = state->gfx10.nbc_view->num_levels;
1228 }
1229
1230 cb->cb_color_view = S_028C6C_SLICE_START(first_layer) |
1231 S_028C6C_SLICE_MAX_GFX10(state->last_layer) |
1232 S_028C6C_MIP_LEVEL_GFX10(base_level);
1233 cb->cb_color_attrib = 0;
1234 cb->cb_color_attrib2 = S_028C68_MIP0_WIDTH(width - 1) |
1235 S_028C68_MIP0_HEIGHT(state->height - 1) |
1236 S_028C68_MAX_MIP(num_levels - 1);
1237 cb->cb_color_attrib3 = S_028EE0_MIP0_DEPTH(state->num_layers) |
1238 S_028EE0_RESOURCE_TYPE(surf->u.gfx9.resource_type) |
1239 S_028EE0_RESOURCE_LEVEL(info->gfx_level >= GFX11 ? 0 : 1);
1240 cb->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B) |
1241 S_028C78_MAX_COMPRESSED_BLOCK_SIZE(surf->u.gfx9.color.dcc.max_compressed_block_size) |
1242 S_028C78_MIN_COMPRESSED_BLOCK_SIZE(ac_get_dcc_min_compressed_block_size(info)) |
1243 S_028C78_INDEPENDENT_64B_BLOCKS(surf->u.gfx9.color.dcc.independent_64B_blocks);
1244
1245 if (info->gfx_level >= GFX11) {
1246 assert(!UTIL_ARCH_BIG_ENDIAN);
1247 cb->cb_color_info |= S_028C70_FORMAT_GFX11(cb_format);
1248 cb->cb_color_attrib |= S_028C74_NUM_FRAGMENTS_GFX11(util_logbase2(state->num_storage_samples)) |
1249 S_028C74_FORCE_DST_ALPHA_1_GFX11(force_dst_alpha_1);
1250 cb->cb_dcc_control |= S_028C78_INDEPENDENT_128B_BLOCKS_GFX11(surf->u.gfx9.color.dcc.independent_128B_blocks);
1251 } else {
1252 const uint32_t endian = ac_colorformat_endian_swap(cb_format);
1253
1254 cb->cb_color_info |= S_028C70_ENDIAN(endian) |
1255 S_028C70_FORMAT_GFX6(cb_format) |
1256 S_028C70_COMPRESSION(!!surf->fmask_offset);
1257 cb->cb_color_attrib |= S_028C74_NUM_SAMPLES(util_logbase2(state->num_samples)) |
1258 S_028C74_NUM_FRAGMENTS_GFX6(util_logbase2(state->num_storage_samples)) |
1259 S_028C74_FORCE_DST_ALPHA_1_GFX6(force_dst_alpha_1);
1260 cb->cb_dcc_control |= S_028C78_INDEPENDENT_128B_BLOCKS_GFX10(surf->u.gfx9.color.dcc.independent_128B_blocks);
1261 }
1262 }
1263
1264 static void
ac_init_gfx12_cb_surface(const struct radeon_info * info,const struct ac_cb_state * state,uint32_t cb_format,bool force_dst_alpha_1,uint32_t width,struct ac_cb_surface * cb)1265 ac_init_gfx12_cb_surface(const struct radeon_info *info, const struct ac_cb_state *state,
1266 uint32_t cb_format, bool force_dst_alpha_1, uint32_t width,
1267 struct ac_cb_surface *cb)
1268 {
1269 const struct radeon_surf *surf = state->surf;
1270 uint32_t first_layer = state->first_layer;
1271 uint32_t base_level = state->base_level;
1272 uint32_t num_levels = state->num_levels;
1273
1274 if (state->gfx10.nbc_view) {
1275 assert(state->gfx10.nbc_view->valid);
1276 first_layer = 0;
1277 base_level = state->gfx10.nbc_view->level;
1278 num_levels = state->gfx10.nbc_view->num_levels;
1279 }
1280
1281 assert(!UTIL_ARCH_BIG_ENDIAN);
1282 cb->cb_color_info |= S_028EC0_FORMAT(cb_format);
1283 cb->cb_color_view = S_028C64_SLICE_START(first_layer) |
1284 S_028C64_SLICE_MAX(state->last_layer);
1285 cb->cb_color_view2 = S_028C68_MIP_LEVEL(base_level);
1286 cb->cb_color_attrib = S_028C6C_NUM_FRAGMENTS(util_logbase2(state->num_storage_samples)) |
1287 S_028C6C_FORCE_DST_ALPHA_1(force_dst_alpha_1);
1288 cb->cb_color_attrib2 = S_028C78_MIP0_HEIGHT(state->height - 1) |
1289 S_028C78_MIP0_WIDTH(width - 1);
1290 cb->cb_color_attrib3 = S_028C7C_MIP0_DEPTH(state->num_layers) |
1291 S_028C7C_MAX_MIP(num_levels - 1) |
1292 S_028C7C_RESOURCE_TYPE(surf->u.gfx9.resource_type);
1293 cb->cb_dcc_control = S_028C70_MAX_UNCOMPRESSED_BLOCK_SIZE(1) | /* 256B */
1294 S_028C70_MAX_COMPRESSED_BLOCK_SIZE(surf->u.gfx9.color.dcc.max_compressed_block_size) |
1295 S_028C70_ENABLE_MAX_COMP_FRAG_OVERRIDE(1) |
1296 S_028C70_MAX_COMP_FRAGS(state->num_samples >= 8 ? 3 :
1297 state->num_samples >= 4 ? 2 : 0);
1298 }
1299
1300 void
ac_init_cb_surface(const struct radeon_info * info,const struct ac_cb_state * state,struct ac_cb_surface * cb)1301 ac_init_cb_surface(const struct radeon_info *info, const struct ac_cb_state *state, struct ac_cb_surface *cb)
1302 {
1303 const struct util_format_description *desc = util_format_description(state->format);
1304 const uint32_t cb_format = ac_get_cb_format(info->gfx_level, state->format);
1305 const struct radeon_surf *surf = state->surf;
1306 uint32_t width = state->width;
1307
1308 assert(cb_format != V_028C70_COLOR_INVALID);
1309
1310 /* Intensity is implemented as Red, so treat it that way. */
1311 const bool force_dst_alpha_1 =
1312 desc->swizzle[3] == PIPE_SWIZZLE_1 || util_format_is_intensity(state->format);
1313
1314 /* GFX10.3+ can set a custom pitch for 1D and 2D non-array, but it must be a multiple of
1315 * 256B for GFX10.3-11 and 128B for GFX12.
1316 *
1317 * We set the pitch in MIP0_WIDTH.
1318 */
1319 if (info->gfx_level >= GFX10_3 && surf->u.gfx9.uses_custom_pitch) {
1320 ASSERTED unsigned min_alignment = info->gfx_level >= GFX12 ? 128 : 256;
1321 assert((surf->u.gfx9.surf_pitch * surf->bpe) % min_alignment == 0);
1322 assert(surf->is_linear);
1323
1324 width = surf->u.gfx9.surf_pitch;
1325
1326 /* Subsampled images have the pitch in the units of blocks. */
1327 if (surf->blk_w == 2)
1328 width *= 2;
1329 }
1330
1331 const uint32_t swap = ac_translate_colorswap(info->gfx_level, state->format, false);
1332 const uint32_t ntype = ac_get_cb_number_type(state->format);
1333 uint32_t blend_clamp = 0, blend_bypass = 0;
1334
1335 /* blend clamp should be set for all NORM/SRGB types */
1336 if (ntype == V_028C70_NUMBER_UNORM || ntype == V_028C70_NUMBER_SNORM ||
1337 ntype == V_028C70_NUMBER_SRGB)
1338 blend_clamp = 1;
1339
1340 /* set blend bypass according to docs if SINT/UINT or 8/24 COLOR variants */
1341 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
1342 cb_format == V_028C70_COLOR_8_24 || cb_format == V_028C70_COLOR_24_8 ||
1343 cb_format == V_028C70_COLOR_X24_8_32_FLOAT) {
1344 blend_clamp = 0;
1345 blend_bypass = 1;
1346 }
1347
1348 const bool round_mode = ntype != V_028C70_NUMBER_UNORM &&
1349 ntype != V_028C70_NUMBER_SNORM &&
1350 ntype != V_028C70_NUMBER_SRGB &&
1351 cb_format != V_028C70_COLOR_8_24 &&
1352 cb_format != V_028C70_COLOR_24_8;
1353
1354 cb->cb_color_info = S_028C70_COMP_SWAP(swap) |
1355 S_028C70_BLEND_CLAMP(blend_clamp) |
1356 S_028C70_BLEND_BYPASS(blend_bypass) |
1357 S_028C70_SIMPLE_FLOAT(1) |
1358 S_028C70_ROUND_MODE(round_mode) |
1359 S_028C70_NUMBER_TYPE(ntype);
1360
1361 if (info->gfx_level >= GFX12) {
1362 ac_init_gfx12_cb_surface(info, state, cb_format, force_dst_alpha_1, width, cb);
1363 } else if (info->gfx_level >= GFX10) {
1364 ac_init_gfx10_cb_surface(info, state, cb_format, force_dst_alpha_1, width, cb);
1365 } else {
1366 ac_init_gfx6_cb_surface(info, state, cb_format, force_dst_alpha_1, cb);
1367 }
1368 }
1369
1370 void
ac_set_mutable_cb_surface_fields(const struct radeon_info * info,const struct ac_mutable_cb_state * state,struct ac_cb_surface * cb)1371 ac_set_mutable_cb_surface_fields(const struct radeon_info *info, const struct ac_mutable_cb_state *state,
1372 struct ac_cb_surface *cb)
1373 {
1374 const struct radeon_surf *surf = state->surf;
1375 uint8_t tile_swizzle = surf->tile_swizzle;
1376 uint64_t va = state->va;
1377
1378 memcpy(cb, state->cb, sizeof(*cb));
1379
1380 if (state->gfx10.nbc_view) {
1381 assert(state->gfx10.nbc_view->valid);
1382 va += state->gfx10.nbc_view->base_address_offset;
1383 tile_swizzle = state->gfx10.nbc_view->tile_swizzle;
1384 }
1385
1386 cb->cb_color_base = va >> 8;
1387
1388 if (info->gfx_level >= GFX9) {
1389 cb->cb_color_base += surf->u.gfx9.surf_offset >> 8;
1390 cb->cb_color_base |= tile_swizzle;
1391 } else {
1392 const struct legacy_surf_level *level_info = &surf->u.legacy.level[state->base_level];
1393
1394 cb->cb_color_base += level_info->offset_256B;
1395
1396 /* Only macrotiled modes can set tile swizzle. */
1397 if (level_info->mode == RADEON_SURF_MODE_2D)
1398 cb->cb_color_base |= tile_swizzle;
1399 }
1400
1401 if (info->gfx_level >= GFX12) {
1402 cb->cb_color_attrib3 |= S_028C7C_COLOR_SW_MODE(surf->u.gfx9.swizzle_mode);
1403 return;
1404 }
1405
1406 /* Set up DCC. */
1407 if (state->dcc_enabled) {
1408 cb->cb_dcc_base = (va + surf->meta_offset) >> 8;
1409
1410 if (info->gfx_level == GFX8)
1411 cb->cb_dcc_base += surf->u.legacy.color.dcc_level[state->base_level].dcc_offset >> 8;
1412
1413 uint32_t dcc_tile_swizzle = tile_swizzle;
1414 dcc_tile_swizzle &= ((1 << surf->meta_alignment_log2) - 1) >> 8;
1415 cb->cb_dcc_base |= dcc_tile_swizzle;
1416 }
1417
1418 if (info->gfx_level >= GFX11) {
1419 cb->cb_color_attrib3 |= S_028EE0_COLOR_SW_MODE(surf->u.gfx9.swizzle_mode) |
1420 S_028EE0_DCC_PIPE_ALIGNED(surf->u.gfx9.color.dcc.pipe_aligned);
1421
1422 if (state->dcc_enabled) {
1423 cb->cb_dcc_control |= S_028C78_DISABLE_CONSTANT_ENCODE_REG(1) |
1424 S_028C78_FDCC_ENABLE(1);
1425
1426 if (info->family >= CHIP_PHOENIX2) {
1427 cb->cb_dcc_control |= S_028C78_ENABLE_MAX_COMP_FRAG_OVERRIDE(1) |
1428 S_028C78_MAX_COMP_FRAGS(state->num_samples >= 4);
1429 }
1430 }
1431 } else if (info->gfx_level >= GFX10) {
1432 cb->cb_color_attrib3 |= S_028EE0_COLOR_SW_MODE(surf->u.gfx9.swizzle_mode) |
1433 S_028EE0_FMASK_SW_MODE(surf->u.gfx9.color.fmask_swizzle_mode) |
1434 S_028EE0_CMASK_PIPE_ALIGNED(1) |
1435 S_028EE0_DCC_PIPE_ALIGNED(surf->u.gfx9.color.dcc.pipe_aligned);
1436 } else if (info->gfx_level == GFX9) {
1437 struct gfx9_surf_meta_flags meta = {
1438 .rb_aligned = 1,
1439 .pipe_aligned = 1,
1440 };
1441
1442 if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && surf->meta_offset)
1443 meta = surf->u.gfx9.color.dcc;
1444
1445 cb->cb_color_attrib |= S_028C74_COLOR_SW_MODE(surf->u.gfx9.swizzle_mode) |
1446 S_028C74_FMASK_SW_MODE(surf->u.gfx9.color.fmask_swizzle_mode) |
1447 S_028C74_RB_ALIGNED(meta.rb_aligned) |
1448 S_028C74_PIPE_ALIGNED(meta.pipe_aligned);
1449 cb->cb_mrt_epitch = S_0287A0_EPITCH(surf->u.gfx9.epitch);
1450 } else {
1451 /* GFX6-8 */
1452 const struct legacy_surf_level *level_info = &surf->u.legacy.level[state->base_level];
1453 uint32_t pitch_tile_max, slice_tile_max, tile_mode_index;
1454
1455 pitch_tile_max = level_info->nblk_x / 8 - 1;
1456 slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
1457 tile_mode_index = ac_tile_mode_index(surf, state->base_level, false);
1458
1459 cb->cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index);
1460 cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
1461 cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
1462
1463 cb->cb_color_cmask_slice = surf->u.legacy.color.cmask_slice_tile_max;
1464
1465 if (state->fmask_enabled) {
1466 if (info->gfx_level >= GFX7)
1467 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(surf->u.legacy.color.fmask.pitch_in_pixels / 8 - 1);
1468 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(surf->u.legacy.color.fmask.tiling_index);
1469 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(surf->u.legacy.color.fmask.slice_tile_max);
1470 } else {
1471 /* This must be set for fast clear to work without FMASK. */
1472 if (info->gfx_level >= GFX7)
1473 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
1474 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
1475 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
1476 }
1477 }
1478
1479 if (state->cmask_enabled) {
1480 cb->cb_color_cmask = (va + surf->cmask_offset) >> 8;
1481 cb->cb_color_info |= S_028C70_FAST_CLEAR(state->fast_clear_enabled);
1482 } else {
1483 cb->cb_color_cmask = cb->cb_color_base;
1484 }
1485
1486 if (state->fmask_enabled) {
1487 cb->cb_color_fmask = (va + surf->fmask_offset) >> 8;
1488 cb->cb_color_fmask |= surf->fmask_tile_swizzle;
1489
1490 if (state->tc_compat_cmask_enabled) {
1491 assert(state->cmask_enabled);
1492
1493 /* Allow the texture block to read FMASK directly without decompressing it. */
1494 cb->cb_color_info |= S_028C70_FMASK_COMPRESS_1FRAG_ONLY(1);
1495
1496 if (info->gfx_level == GFX8) {
1497 /* Set CMASK into a tiling format that allows
1498 * the texture block to read it.
1499 */
1500 cb->cb_color_info |= S_028C70_CMASK_ADDR_TYPE(2);
1501 cb->cb_color_cmask |= surf->fmask_tile_swizzle;
1502 }
1503 }
1504 } else {
1505 cb->cb_color_fmask = cb->cb_color_base;
1506 }
1507
1508 if (info->gfx_level < GFX11)
1509 cb->cb_color_info |= S_028C70_DCC_ENABLE(state->dcc_enabled);
1510 }
1511