• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2015 Advanced Micro Devices, Inc.
3  * Copyright 2024 Valve Corporation
4  *
5  * SPDX-License-Identifier: MIT
6  */
7 
8 #include "ac_descriptors.h"
9 #include "ac_gpu_info.h"
10 #include "ac_formats.h"
11 #include "ac_surface.h"
12 
13 #include "gfx10_format_table.h"
14 #include "sid.h"
15 
16 #include "util/u_math.h"
17 #include "util/format/u_format.h"
18 
19 unsigned
ac_map_swizzle(unsigned swizzle)20 ac_map_swizzle(unsigned swizzle)
21 {
22    switch (swizzle) {
23    case PIPE_SWIZZLE_Y:
24       return V_008F0C_SQ_SEL_Y;
25    case PIPE_SWIZZLE_Z:
26       return V_008F0C_SQ_SEL_Z;
27    case PIPE_SWIZZLE_W:
28       return V_008F0C_SQ_SEL_W;
29    case PIPE_SWIZZLE_0:
30       return V_008F0C_SQ_SEL_0;
31    case PIPE_SWIZZLE_1:
32       return V_008F0C_SQ_SEL_1;
33    default: /* PIPE_SWIZZLE_X */
34       return V_008F0C_SQ_SEL_X;
35    }
36 }
37 
38 void
ac_build_sampler_descriptor(const enum amd_gfx_level gfx_level,const struct ac_sampler_state * state,uint32_t desc[4])39 ac_build_sampler_descriptor(const enum amd_gfx_level gfx_level, const struct ac_sampler_state *state, uint32_t desc[4])
40 {
41    const unsigned perf_mip = state->max_aniso_ratio ? state->max_aniso_ratio + 6 : 0;
42    const bool compat_mode = gfx_level == GFX8 || gfx_level == GFX9;
43 
44    desc[0] = S_008F30_CLAMP_X(state->address_mode_u) |
45              S_008F30_CLAMP_Y(state->address_mode_v) |
46              S_008F30_CLAMP_Z(state->address_mode_w) |
47              S_008F30_MAX_ANISO_RATIO(state->max_aniso_ratio) |
48              S_008F30_DEPTH_COMPARE_FUNC(state->depth_compare_func) |
49              S_008F30_FORCE_UNNORMALIZED(state->unnormalized_coords) |
50              S_008F30_ANISO_THRESHOLD(state->max_aniso_ratio >> 1) |
51              S_008F30_ANISO_BIAS(state->max_aniso_ratio) |
52              S_008F30_DISABLE_CUBE_WRAP(!state->cube_wrap) |
53              S_008F30_COMPAT_MODE(compat_mode) |
54              S_008F30_TRUNC_COORD(state->trunc_coord) |
55              S_008F30_FILTER_MODE(state->filter_mode);
56    desc[1] = 0;
57    desc[2] = S_008F38_XY_MAG_FILTER(state->mag_filter) |
58              S_008F38_XY_MIN_FILTER(state->min_filter) |
59              S_008F38_MIP_FILTER(state->mip_filter);
60    desc[3] = S_008F3C_BORDER_COLOR_TYPE(state->border_color_type);
61 
62    if (gfx_level >= GFX12) {
63       desc[1] |= S_008F34_MIN_LOD_GFX12(util_unsigned_fixed(CLAMP(state->min_lod, 0, 17), 8)) |
64                  S_008F34_MAX_LOD_GFX12(util_unsigned_fixed(CLAMP(state->max_lod, 0, 17), 8));
65       desc[2] |= S_008F38_PERF_MIP_LO(perf_mip);
66       desc[3] |= S_008F3C_PERF_MIP_HI(perf_mip >> 2);
67    } else {
68       desc[1] |= S_008F34_MIN_LOD_GFX6(util_unsigned_fixed(CLAMP(state->min_lod, 0, 15), 8)) |
69                  S_008F34_MAX_LOD_GFX6(util_unsigned_fixed(CLAMP(state->max_lod, 0, 15), 8)) |
70                  S_008F34_PERF_MIP(perf_mip);
71    }
72 
73    if (gfx_level >= GFX10) {
74       desc[2] |= S_008F38_LOD_BIAS(util_signed_fixed(CLAMP(state->lod_bias, -32, 31), 8)) |
75                  S_008F38_ANISO_OVERRIDE_GFX10(!state->aniso_single_level);
76    } else {
77       desc[2] |= S_008F38_LOD_BIAS(util_signed_fixed(CLAMP(state->lod_bias, -16, 16), 8)) |
78                  S_008F38_DISABLE_LSB_CEIL(gfx_level <= GFX8) |
79                  S_008F38_FILTER_PREC_FIX(1) |
80                  S_008F38_ANISO_OVERRIDE_GFX8(gfx_level >= GFX8 && !state->aniso_single_level);
81    }
82 
83    if (gfx_level >= GFX11) {
84       desc[3] |= S_008F3C_BORDER_COLOR_PTR_GFX11(state->border_color_ptr);
85    } else {
86       desc[3] |= S_008F3C_BORDER_COLOR_PTR_GFX6(state->border_color_ptr);
87    }
88 }
89 
90 static void
ac_build_gfx6_fmask_descriptor(const enum amd_gfx_level gfx_level,const struct ac_fmask_state * state,uint32_t desc[8])91 ac_build_gfx6_fmask_descriptor(const enum amd_gfx_level gfx_level, const struct ac_fmask_state *state, uint32_t desc[8])
92 {
93    const struct radeon_surf *surf = state->surf;
94    const uint64_t va = state->va + surf->fmask_offset;
95    uint32_t data_format, num_format;
96 
97 #define FMASK(s, f) (((unsigned)(MAX2(1, s)) * 16) + (MAX2(1, f)))
98    if (gfx_level == GFX9) {
99       data_format = V_008F14_IMG_DATA_FORMAT_FMASK;
100       switch (FMASK(state->num_samples, state->num_storage_samples)) {
101       case FMASK(2, 1):
102          num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_2_1;
103          break;
104       case FMASK(2, 2):
105          num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_2_2;
106          break;
107       case FMASK(4, 1):
108          num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_4_1;
109          break;
110       case FMASK(4, 2):
111          num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_4_2;
112          break;
113       case FMASK(4, 4):
114          num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_4_4;
115          break;
116       case FMASK(8, 1):
117          num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_8_1;
118          break;
119       case FMASK(8, 2):
120          num_format = V_008F14_IMG_NUM_FORMAT_FMASK_16_8_2;
121          break;
122       case FMASK(8, 4):
123          num_format = V_008F14_IMG_NUM_FORMAT_FMASK_32_8_4;
124          break;
125       case FMASK(8, 8):
126          num_format = V_008F14_IMG_NUM_FORMAT_FMASK_32_8_8;
127          break;
128       case FMASK(16, 1):
129          num_format = V_008F14_IMG_NUM_FORMAT_FMASK_16_16_1;
130          break;
131       case FMASK(16, 2):
132          num_format = V_008F14_IMG_NUM_FORMAT_FMASK_32_16_2;
133          break;
134       case FMASK(16, 4):
135          num_format = V_008F14_IMG_NUM_FORMAT_FMASK_64_16_4;
136          break;
137       case FMASK(16, 8):
138          num_format = V_008F14_IMG_NUM_FORMAT_FMASK_64_16_8;
139          break;
140       default:
141          unreachable("invalid nr_samples");
142       }
143    } else {
144       switch (FMASK(state->num_samples, state->num_storage_samples)) {
145       case FMASK(2, 1):
146          data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F1;
147          break;
148       case FMASK(2, 2):
149          data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2;
150          break;
151       case FMASK(4, 1):
152          data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F1;
153          break;
154       case FMASK(4, 2):
155          data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F2;
156          break;
157       case FMASK(4, 4):
158          data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4;
159          break;
160       case FMASK(8, 1):
161          data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S8_F1;
162          break;
163       case FMASK(8, 2):
164          data_format = V_008F14_IMG_DATA_FORMAT_FMASK16_S8_F2;
165          break;
166       case FMASK(8, 4):
167          data_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F4;
168          break;
169       case FMASK(8, 8):
170          data_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8;
171          break;
172       case FMASK(16, 1):
173          data_format = V_008F14_IMG_DATA_FORMAT_FMASK16_S16_F1;
174          break;
175       case FMASK(16, 2):
176          data_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S16_F2;
177          break;
178       case FMASK(16, 4):
179          data_format = V_008F14_IMG_DATA_FORMAT_FMASK64_S16_F4;
180          break;
181       case FMASK(16, 8):
182          data_format = V_008F14_IMG_DATA_FORMAT_FMASK64_S16_F8;
183          break;
184       default:
185          unreachable("invalid nr_samples");
186       }
187       num_format = V_008F14_IMG_NUM_FORMAT_UINT;
188    }
189 #undef FMASK
190 
191    desc[0] = (va >> 8) | surf->fmask_tile_swizzle;
192    desc[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) |
193              S_008F14_DATA_FORMAT(data_format) |
194              S_008F14_NUM_FORMAT(num_format);
195    desc[2] = S_008F18_WIDTH(state->width - 1) |
196              S_008F18_HEIGHT(state->height - 1);
197    desc[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
198              S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
199              S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
200              S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
201              S_008F1C_TYPE(state->type);
202    desc[4] = 0;
203    desc[5] = S_008F24_BASE_ARRAY(state->first_layer);
204    desc[6] = 0;
205    desc[7] = 0;
206 
207    if (gfx_level == GFX9) {
208       desc[3] |= S_008F1C_SW_MODE(surf->u.gfx9.color.fmask_swizzle_mode);
209       desc[4] |= S_008F20_DEPTH(state->last_layer) |
210                  S_008F20_PITCH(surf->u.gfx9.color.fmask_epitch);
211       desc[5] |= S_008F24_META_PIPE_ALIGNED(1) |
212                  S_008F24_META_RB_ALIGNED(1);
213 
214       if (state->tc_compat_cmask) {
215          const uint64_t cmask_va = state->va + surf->cmask_offset;
216 
217          desc[5] |= S_008F24_META_DATA_ADDRESS(cmask_va >> 40);
218          desc[6] |= S_008F28_COMPRESSION_EN(1);
219          desc[7] |= cmask_va >> 8;
220       }
221    } else {
222       desc[3] |= S_008F1C_TILING_INDEX(surf->u.legacy.color.fmask.tiling_index);
223       desc[4] |= S_008F20_DEPTH(state->depth - 1) |
224                  S_008F20_PITCH(surf->u.legacy.color.fmask.pitch_in_pixels - 1);
225       desc[5] |= S_008F24_LAST_ARRAY(state->last_layer);
226 
227       if (state->tc_compat_cmask) {
228          const uint64_t cmask_va = state->va + surf->cmask_offset;
229 
230          desc[6] |= S_008F28_COMPRESSION_EN(1);
231          desc[7] |= (cmask_va >> 8) | surf->fmask_tile_swizzle;
232       }
233    }
234 }
235 
236 static void
ac_build_gfx10_fmask_descriptor(const enum amd_gfx_level gfx_level,const struct ac_fmask_state * state,uint32_t desc[8])237 ac_build_gfx10_fmask_descriptor(const enum amd_gfx_level gfx_level, const struct ac_fmask_state *state, uint32_t desc[8])
238 {
239    const struct radeon_surf *surf = state->surf;
240    const uint64_t va = state->va + surf->fmask_offset;
241    uint32_t format;
242 
243 #define FMASK(s, f) (((unsigned)(MAX2(1, s)) * 16) + (MAX2(1, f)))
244    switch (FMASK(state->num_samples, state->num_storage_samples)) {
245    case FMASK(2, 1):
246       format = V_008F0C_GFX10_FORMAT_FMASK8_S2_F1;
247       break;
248    case FMASK(2, 2):
249       format = V_008F0C_GFX10_FORMAT_FMASK8_S2_F2;
250       break;
251    case FMASK(4, 1):
252       format = V_008F0C_GFX10_FORMAT_FMASK8_S4_F1;
253       break;
254    case FMASK(4, 2):
255       format = V_008F0C_GFX10_FORMAT_FMASK8_S4_F2;
256       break;
257    case FMASK(4, 4):
258       format = V_008F0C_GFX10_FORMAT_FMASK8_S4_F4;
259       break;
260    case FMASK(8, 1):
261       format = V_008F0C_GFX10_FORMAT_FMASK8_S8_F1;
262       break;
263    case FMASK(8, 2):
264       format = V_008F0C_GFX10_FORMAT_FMASK16_S8_F2;
265       break;
266    case FMASK(8, 4):
267       format = V_008F0C_GFX10_FORMAT_FMASK32_S8_F4;
268       break;
269    case FMASK(8, 8):
270       format = V_008F0C_GFX10_FORMAT_FMASK32_S8_F8;
271       break;
272    case FMASK(16, 1):
273       format = V_008F0C_GFX10_FORMAT_FMASK16_S16_F1;
274       break;
275    case FMASK(16, 2):
276       format = V_008F0C_GFX10_FORMAT_FMASK32_S16_F2;
277       break;
278    case FMASK(16, 4):
279       format = V_008F0C_GFX10_FORMAT_FMASK64_S16_F4;
280       break;
281    case FMASK(16, 8):
282       format = V_008F0C_GFX10_FORMAT_FMASK64_S16_F8;
283       break;
284    default:
285       unreachable("invalid nr_samples");
286    }
287 #undef FMASK
288 
289    desc[0] = (va >> 8) | surf->fmask_tile_swizzle;
290    desc[1] = S_00A004_BASE_ADDRESS_HI(va >> 40) |
291              S_00A004_FORMAT_GFX10(format) |
292              S_00A004_WIDTH_LO(state->width - 1);
293    desc[2] = S_00A008_WIDTH_HI((state->width - 1) >> 2) |
294              S_00A008_HEIGHT(state->height - 1) |
295              S_00A008_RESOURCE_LEVEL(1);
296    desc[3] = S_00A00C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
297              S_00A00C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
298              S_00A00C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
299              S_00A00C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
300              S_00A00C_SW_MODE(surf->u.gfx9.color.fmask_swizzle_mode) |
301              S_00A00C_TYPE(state->type);
302    desc[4] = S_00A010_DEPTH_GFX10(state->last_layer) | S_00A010_BASE_ARRAY(state->first_layer);
303    desc[5] = 0;
304    desc[6] = S_00A018_META_PIPE_ALIGNED(1);
305    desc[7] = 0;
306 
307    if (state->tc_compat_cmask) {
308       uint64_t cmask_va = state->va + surf->cmask_offset;
309 
310       desc[6] |= S_00A018_COMPRESSION_EN(1);
311       desc[6] |= S_00A018_META_DATA_ADDRESS_LO(cmask_va >> 8);
312       desc[7] |= cmask_va >> 16;
313    }
314 }
315 
316 void
ac_build_fmask_descriptor(const enum amd_gfx_level gfx_level,const struct ac_fmask_state * state,uint32_t desc[8])317 ac_build_fmask_descriptor(const enum amd_gfx_level gfx_level, const struct ac_fmask_state *state, uint32_t desc[8])
318 {
319    assert(gfx_level < GFX11);
320 
321    if (gfx_level >= GFX10) {
322       ac_build_gfx10_fmask_descriptor(gfx_level, state, desc);
323    } else {
324       ac_build_gfx6_fmask_descriptor(gfx_level, state, desc);
325    }
326 }
327 
328 static void
ac_build_gfx6_texture_descriptor(const struct radeon_info * info,const struct ac_texture_state * state,uint32_t desc[8])329 ac_build_gfx6_texture_descriptor(const struct radeon_info *info, const struct ac_texture_state *state, uint32_t desc[8])
330 {
331    const struct util_format_description *fmt_desc = util_format_description(state->format);
332    uint32_t num_format, data_format, num_samples;
333    int first_non_void;
334 
335    num_samples = fmt_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS ? MAX2(1, state->num_samples)
336                                                                    : MAX2(1, state->num_storage_samples);
337 
338    first_non_void = util_format_get_first_non_void_channel(state->format);
339 
340    num_format = ac_translate_tex_numformat(fmt_desc, first_non_void);
341 
342    data_format = ac_translate_tex_dataformat(info, fmt_desc, first_non_void);
343    if (data_format == ~0) {
344       data_format = 0;
345    }
346 
347    /* S8 with either Z16 or Z32 HTILE need a special format. */
348    if (info->gfx_level == GFX9 && state->format == PIPE_FORMAT_S8_UINT && state->tc_compat_htile_enabled) {
349       if (state->img_format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT ||
350           state->img_format == PIPE_FORMAT_Z24_UNORM_S8_UINT ||
351           state->img_format == PIPE_FORMAT_S8_UINT_Z24_UNORM) {
352          data_format = V_008F14_IMG_DATA_FORMAT_S8_32;
353       } else if (state->img_format == PIPE_FORMAT_Z16_UNORM_S8_UINT) {
354          data_format = V_008F14_IMG_DATA_FORMAT_S8_16;
355       }
356    }
357 
358    desc[0] = 0;
359    desc[1] = S_008F14_MIN_LOD(util_unsigned_fixed(CLAMP(state->min_lod, 0, 15), 8)) |
360              S_008F14_DATA_FORMAT(data_format) |
361              S_008F14_NUM_FORMAT(num_format);
362    desc[2] = S_008F18_WIDTH(state->width - 1) |
363              S_008F18_HEIGHT(state->height - 1) |
364              S_008F18_PERF_MOD(4);
365    desc[3] = S_008F1C_DST_SEL_X(ac_map_swizzle(state->swizzle[0])) |
366              S_008F1C_DST_SEL_Y(ac_map_swizzle(state->swizzle[1])) |
367              S_008F1C_DST_SEL_Z(ac_map_swizzle(state->swizzle[2])) |
368              S_008F1C_DST_SEL_W(ac_map_swizzle(state->swizzle[3])) |
369              S_008F1C_BASE_LEVEL(num_samples > 1 ? 0 : state->first_level) |
370              S_008F1C_LAST_LEVEL(num_samples > 1 ? util_logbase2(num_samples) : state->last_level) |
371              S_008F1C_TYPE(state->type);
372    desc[4] = 0;
373    desc[5] = S_008F24_BASE_ARRAY(state->first_layer);
374    desc[6] = 0;
375    desc[7] = 0;
376 
377    if (info->gfx_level == GFX9) {
378       const uint32_t bc_swizzle = ac_border_color_swizzle(fmt_desc);
379 
380       /* Depth is the last accessible layer on Gfx9.
381        * The hw doesn't need to know the total number of layers.
382        */
383       if (state->type == V_008F1C_SQ_RSRC_IMG_3D)
384          desc[4] |= S_008F20_DEPTH(state->depth - 1);
385       else
386          desc[4] |= S_008F20_DEPTH(state->last_layer);
387 
388       desc[4] |= S_008F20_BC_SWIZZLE(bc_swizzle);
389       desc[5] |= S_008F24_MAX_MIP(num_samples > 1 ? util_logbase2(num_samples) : state->num_levels - 1);
390    } else {
391       desc[3] |= S_008F1C_POW2_PAD(state->num_levels > 1);
392       desc[4] |= S_008F20_DEPTH(state->depth - 1);
393       desc[5] |= S_008F24_LAST_ARRAY(state->last_layer);
394    }
395 
396    if (state->dcc_enabled) {
397       desc[6] = S_008F28_ALPHA_IS_ON_MSB(ac_alpha_is_on_msb(info, state->format));
398    } else {
399       if (!state->aniso_single_level) {
400          /* The last dword is unused by hw. The shader uses it to clear
401           * bits in the first dword of sampler state.
402           */
403          if (info->gfx_level <= GFX7 && state->num_samples <= 1) {
404             if (state->first_level == state->last_level)
405                desc[7] = C_008F30_MAX_ANISO_RATIO;
406             else
407                desc[7] = 0xffffffff;
408          }
409       }
410    }
411 }
412 
413 static uint32_t
ac_get_gfx10_img_format(const enum amd_gfx_level gfx_level,const struct ac_texture_state * state)414 ac_get_gfx10_img_format(const enum amd_gfx_level gfx_level, const struct ac_texture_state *state)
415 {
416    const struct gfx10_format *fmt = &ac_get_gfx10_format_table(gfx_level)[state->format];
417    const struct util_format_description *desc = util_format_description(state->format);
418    uint32_t img_format = fmt->img_format;
419 
420    if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS &&
421        state->gfx10.upgraded_depth && !util_format_has_stencil(desc)) {
422       if (gfx_level >= GFX11) {
423          assert(img_format == V_008F0C_GFX11_FORMAT_32_FLOAT);
424          img_format = V_008F0C_GFX11_FORMAT_32_FLOAT_CLAMP;
425       } else {
426          assert(img_format == V_008F0C_GFX10_FORMAT_32_FLOAT);
427          img_format = V_008F0C_GFX10_FORMAT_32_FLOAT_CLAMP;
428       }
429    }
430 
431    return img_format;
432 }
433 
434 static void
ac_build_gfx10_texture_descriptor(const struct radeon_info * info,const struct ac_texture_state * state,uint32_t desc[8])435 ac_build_gfx10_texture_descriptor(const struct radeon_info *info, const struct ac_texture_state *state, uint32_t desc[8])
436 {
437    const struct radeon_surf *surf = state->surf;
438    const struct util_format_description *fmt_desc = util_format_description(state->format);
439    const uint32_t img_format = ac_get_gfx10_img_format(info->gfx_level, state);
440    const struct ac_surf_nbc_view *nbc_view = state->gfx9.nbc_view;
441    const uint32_t field_last_level = state->num_samples > 1 ? util_logbase2(state->num_samples) : state->last_level;
442 
443    desc[0] = 0;
444    desc[1] = S_00A004_FORMAT_GFX10(img_format) |
445              S_00A004_WIDTH_LO(state->width - 1);
446    desc[2] = S_00A008_WIDTH_HI((state->width - 1) >> 2) |
447              S_00A008_HEIGHT(state->height - 1) |
448              S_00A008_RESOURCE_LEVEL(info->gfx_level < GFX11);
449    desc[3] = S_00A00C_DST_SEL_X(ac_map_swizzle(state->swizzle[0])) |
450              S_00A00C_DST_SEL_Y(ac_map_swizzle(state->swizzle[1])) |
451              S_00A00C_DST_SEL_Z(ac_map_swizzle(state->swizzle[2])) |
452              S_00A00C_DST_SEL_W(ac_map_swizzle(state->swizzle[3])) |
453              S_00A00C_BASE_LEVEL(state->num_samples > 1 ? 0 : state->first_level) |
454              S_00A00C_LAST_LEVEL_GFX10(field_last_level) |
455              S_00A00C_BC_SWIZZLE(ac_border_color_swizzle(fmt_desc)) |
456              S_00A00C_TYPE(state->type);
457 
458    /* Depth is the the last accessible layer on gfx9+. The hw doesn't need
459     * to know the total number of layers.
460     */
461    desc[4] = S_00A010_DEPTH_GFX10(state->depth) |
462              S_00A010_BASE_ARRAY(state->first_layer);
463 
464    /* ARRAY_PITCH is only meaningful for 3D images, 0 means SRV, 1 means UAV.
465     * In SRV mode, BASE_ARRAY is ignored and DEPTH is the last slice of mipmap level 0.
466     * In UAV mode, BASE_ARRAY is the first slice and DEPTH is the last slice of the bound level.
467     */
468    desc[5] = S_00A014_ARRAY_PITCH(state->gfx10.uav3d) | S_00A014_PERF_MOD(4);
469    desc[6] = 0;
470    desc[7] = 0;
471 
472    uint32_t max_mip = state->num_samples > 1 ? util_logbase2(state->num_samples) : state->num_levels - 1;
473    if (nbc_view && nbc_view->valid)
474       max_mip = nbc_view->num_levels - 1;
475 
476    const uint32_t min_lod_clamped = util_unsigned_fixed(CLAMP(state->min_lod, 0, 15), 8);
477    if (info->gfx_level >= GFX11) {
478       desc[1] |= S_00A004_MAX_MIP_GFX11(max_mip);
479       desc[5] |= S_00A014_MIN_LOD_LO_GFX11(min_lod_clamped);
480       desc[6] |= S_00A018_MIN_LOD_HI(min_lod_clamped >> 5);
481    } else {
482       desc[1] |= S_00A004_MIN_LOD(min_lod_clamped);
483       desc[5] |= S_00A014_MAX_MIP(max_mip);
484    }
485 
486    if (state->dcc_enabled) {
487       desc[6] |= S_00A018_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B) |
488                  S_00A018_MAX_COMPRESSED_BLOCK_SIZE(surf->u.gfx9.color.dcc.max_compressed_block_size) |
489                  S_00A018_ALPHA_IS_ON_MSB(ac_alpha_is_on_msb(info, state->format));
490    }
491 }
492 
493 static void
ac_build_gfx12_texture_descriptor(const struct radeon_info * info,const struct ac_texture_state * state,uint32_t desc[8])494 ac_build_gfx12_texture_descriptor(const struct radeon_info *info, const struct ac_texture_state *state, uint32_t desc[8])
495 {
496    const struct radeon_surf *surf = state->surf;
497    const struct util_format_description *fmt_desc = util_format_description(state->format);
498    const uint32_t img_format = ac_get_gfx10_img_format(info->gfx_level, state);
499    const uint32_t field_last_level = state->num_samples > 1 ? util_logbase2(state->num_samples) : state->last_level;
500    const bool no_edge_clamp = state->num_levels > 1 && util_format_is_compressed(state->img_format) &&
501                               !util_format_is_compressed(state->format);
502    const uint32_t min_lod_clamped = util_unsigned_fixed(CLAMP(state->min_lod, 0, 15), 8);
503    const struct ac_surf_nbc_view *nbc_view = state->gfx9.nbc_view;
504 
505    uint32_t max_mip = state->num_samples > 1 ? util_logbase2(state->num_samples) : state->num_levels - 1;
506    if (nbc_view && nbc_view->valid)
507       max_mip = nbc_view->num_levels - 1;
508 
509    desc[0] = 0;
510    desc[1] = S_00A004_MAX_MIP_GFX12(max_mip) |
511              S_00A004_FORMAT_GFX12(img_format) |
512              S_00A004_BASE_LEVEL(state->num_samples > 1 ? 0 : state->first_level) |
513              S_00A004_WIDTH_LO(state->width - 1);
514    desc[2] = S_00A008_WIDTH_HI((state->width - 1) >> 2) |
515              S_00A008_HEIGHT(state->height - 1);
516    desc[3] = S_00A00C_DST_SEL_X(ac_map_swizzle(state->swizzle[0])) |
517              S_00A00C_DST_SEL_Y(ac_map_swizzle(state->swizzle[1])) |
518              S_00A00C_DST_SEL_Z(ac_map_swizzle(state->swizzle[2])) |
519              S_00A00C_DST_SEL_W(ac_map_swizzle(state->swizzle[3])) |
520              S_00A00C_NO_EDGE_CLAMP(no_edge_clamp) |
521              S_00A00C_LAST_LEVEL_GFX12(field_last_level) |
522              S_00A00C_BC_SWIZZLE(ac_border_color_swizzle(fmt_desc)) |
523              S_00A00C_TYPE(state->type);
524 
525    /* Depth is the the last accessible layer on gfx9+. The hw doesn't need
526     * to know the total number of layers.
527     */
528    desc[4] = S_00A010_DEPTH_GFX12(state->depth) |
529              S_00A010_BASE_ARRAY(state->first_layer);
530    desc[5] = S_00A014_UAV3D(state->gfx10.uav3d) |
531              S_00A014_PERF_MOD(4) |
532              S_00A014_MIN_LOD_LO_GFX12(min_lod_clamped);
533    desc[6] = S_00A018_MAX_UNCOMPRESSED_BLOCK_SIZE(1 /*256B*/) |
534              S_00A018_MAX_COMPRESSED_BLOCK_SIZE(surf->u.gfx9.color.dcc.max_compressed_block_size) |
535              S_00A018_MIN_LOD_HI(min_lod_clamped >> 6);
536    desc[7] = 0;
537 }
538 
539 void
ac_build_texture_descriptor(const struct radeon_info * info,const struct ac_texture_state * state,uint32_t desc[8])540 ac_build_texture_descriptor(const struct radeon_info *info, const struct ac_texture_state *state, uint32_t desc[8])
541 {
542    if (info->gfx_level >= GFX12) {
543       ac_build_gfx12_texture_descriptor(info, state, desc);
544    } else if (info->gfx_level >= GFX10) {
545       ac_build_gfx10_texture_descriptor(info, state, desc);
546    } else {
547       ac_build_gfx6_texture_descriptor(info, state, desc);
548    }
549 }
550 
551 uint32_t
ac_tile_mode_index(const struct radeon_surf * surf,unsigned level,bool stencil)552 ac_tile_mode_index(const struct radeon_surf *surf, unsigned level, bool stencil)
553 {
554    if (stencil)
555       return surf->u.legacy.zs.stencil_tiling_index[level];
556    else
557       return surf->u.legacy.tiling_index[level];
558 }
559 
560 void
ac_set_mutable_tex_desc_fields(const struct radeon_info * info,const struct ac_mutable_tex_state * state,uint32_t desc[8])561 ac_set_mutable_tex_desc_fields(const struct radeon_info *info, const struct ac_mutable_tex_state *state, uint32_t desc[8])
562 {
563    const struct radeon_surf *surf = state->surf;
564    const struct legacy_surf_level *base_level_info = state->gfx6.base_level_info;
565    const struct ac_surf_nbc_view *nbc_view = state->gfx9.nbc_view;
566    uint8_t swizzle = surf->tile_swizzle;
567    uint64_t va = state->va, meta_va = 0;
568 
569    if (info->gfx_level >= GFX9) {
570       if (state->is_stencil) {
571          va += surf->u.gfx9.zs.stencil_offset;
572       } else {
573          va += surf->u.gfx9.surf_offset;
574       }
575 
576       if (nbc_view && nbc_view->valid) {
577          va += nbc_view->base_address_offset;
578          swizzle = nbc_view->tile_swizzle;
579       }
580    } else {
581       va += (uint64_t)base_level_info->offset_256B * 256;
582    }
583 
584    if (!info->has_image_opcodes) {
585       /* Set it as a buffer descriptor. */
586       desc[0] = va;
587       desc[1] |= S_008F04_BASE_ADDRESS_HI(va >> 32);
588       return;
589    }
590 
591    desc[0] = va >> 8;
592    desc[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
593 
594    if (info->gfx_level >= GFX8 && info->gfx_level < GFX12) {
595       if (state->dcc_enabled) {
596          meta_va = state->va + surf->meta_offset;
597          if (info->gfx_level == GFX8) {
598             meta_va += surf->u.legacy.color.dcc_level[state->gfx6.base_level].dcc_offset;
599             assert(base_level_info->mode == RADEON_SURF_MODE_2D);
600          }
601 
602          unsigned dcc_tile_swizzle = swizzle << 8;
603          dcc_tile_swizzle &= (1 << surf->meta_alignment_log2) - 1;
604          meta_va |= dcc_tile_swizzle;
605       } else if (state->tc_compat_htile_enabled) {
606          meta_va = state->va + surf->meta_offset;
607       }
608    }
609 
610    if (info->gfx_level >= GFX10) {
611       desc[0] |= swizzle;
612 
613       if (state->is_stencil) {
614          desc[3] |= S_00A00C_SW_MODE(surf->u.gfx9.zs.stencil_swizzle_mode);
615       } else {
616          desc[3] |= S_00A00C_SW_MODE(surf->u.gfx9.swizzle_mode);
617       }
618 
619       /* GFX10.3+ can set a custom pitch for 1D and 2D non-array, but it must be a multiple
620        * of 256B.
621        */
622       if (info->gfx_level >= GFX10_3 && surf->u.gfx9.uses_custom_pitch) {
623          ASSERTED unsigned min_alignment = info->gfx_level >= GFX12 ? 128 : 256;
624          assert((surf->u.gfx9.surf_pitch * surf->bpe) % min_alignment == 0);
625          assert(surf->is_linear);
626          unsigned pitch = surf->u.gfx9.surf_pitch;
627 
628          /* Subsampled images have the pitch in the units of blocks. */
629          if (surf->blk_w == 2)
630             pitch *= 2;
631 
632          if (info->gfx_level >= GFX12) {
633             desc[4] |= S_00A010_DEPTH_GFX12(pitch - 1) | /* DEPTH contains low bits of PITCH. */
634                        S_00A010_PITCH_MSB_GFX12((pitch - 1) >> 14);
635          } else {
636             desc[4] |= S_00A010_DEPTH_GFX10(pitch - 1) | /* DEPTH contains low bits of PITCH. */
637                        S_00A010_PITCH_MSB_GFX103((pitch - 1) >> 13);
638          }
639       }
640 
641       if (info->gfx_level >= GFX12) {
642          /* Color and Z/S always support compressed image stores on Gfx12. Enablement is
643           * mostly controlled by PTE.D (page table bit). The rule is:
644           *
645           * Shader Engines (shaders, CB, DB, SC):
646           *    COMPRESSION_ENABLED = PTE.D && COMPRESSION_EN;
647           *
648           * Central Hub (CP, SDMA, indices, tess factor loads):
649           *    PTE.D is ignored. Packets and states fully determine enablement.
650           *
651           * If !PTE.D, the states enabling compression in shaders, CB, DB, and SC have no effect.
652           * PTE.D is set per buffer allocation in Linux, not per VM page, so that it's
653           * automatically propagated between processes. We could optionally allow setting it
654           * per VM page too.
655           *
656           * The DCC/HTILE buffer isn't allocated separately on Gfx12 anymore. The DCC/HTILE
657           * metadata storage is mostly hidden from userspace, and any buffer can be compressed.
658           */
659          if (state->dcc_enabled) {
660             desc[6] |= S_00A018_COMPRESSION_EN(1) |
661                        S_00A018_WRITE_COMPRESS_ENABLE(state->gfx10.write_compress_enable);
662          }
663       } else if (meta_va) {
664          /* Gfx10-11. */
665          struct gfx9_surf_meta_flags meta = {
666             .rb_aligned = 1,
667             .pipe_aligned = 1,
668          };
669 
670          if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && surf->meta_offset)
671             meta = surf->u.gfx9.color.dcc;
672 
673          desc[6] |= S_00A018_COMPRESSION_EN(1) |
674                     S_00A018_META_PIPE_ALIGNED(meta.pipe_aligned) |
675                     S_00A018_META_DATA_ADDRESS_LO(meta_va >> 8) |
676                     /* DCC image stores require the following settings:
677                      * - INDEPENDENT_64B_BLOCKS = 0
678                      * - INDEPENDENT_128B_BLOCKS = 1
679                      * - MAX_COMPRESSED_BLOCK_SIZE = 128B
680                      * - MAX_UNCOMPRESSED_BLOCK_SIZE = 256B (always used)
681                      *
682                      * The same limitations apply to SDMA compressed stores because
683                      * SDMA uses the same DCC codec.
684                      */
685                     S_00A018_WRITE_COMPRESS_ENABLE(state->gfx10.write_compress_enable) |
686                     /* TC-compatible MSAA HTILE requires ITERATE_256. */
687                     S_00A018_ITERATE_256(state->gfx10.iterate_256);
688 
689          desc[7] = meta_va >> 16;
690       }
691    } else if (info->gfx_level == GFX9) {
692       desc[0] |= surf->tile_swizzle;
693 
694       if (state->is_stencil) {
695          desc[3] |= S_008F1C_SW_MODE(surf->u.gfx9.zs.stencil_swizzle_mode);
696          desc[4] |= S_008F20_PITCH(surf->u.gfx9.zs.stencil_epitch);
697       } else {
698          desc[3] |= S_008F1C_SW_MODE(surf->u.gfx9.swizzle_mode);
699          desc[4] |= S_008F20_PITCH(surf->u.gfx9.epitch);
700       }
701 
702       if (meta_va) {
703          struct gfx9_surf_meta_flags meta = {
704             .rb_aligned = 1,
705             .pipe_aligned = 1,
706          };
707 
708          if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && surf->meta_offset)
709             meta = surf->u.gfx9.color.dcc;
710 
711          desc[5] |= S_008F24_META_DATA_ADDRESS(meta_va >> 40) |
712                     S_008F24_META_PIPE_ALIGNED(meta.pipe_aligned) |
713                     S_008F24_META_RB_ALIGNED(meta.rb_aligned);
714          desc[6] |= S_008F28_COMPRESSION_EN(1);
715          desc[7] = meta_va >> 8;
716       }
717    } else {
718       /* GFX6-GFX8 */
719       unsigned pitch = base_level_info->nblk_x * state->gfx6.block_width;
720       unsigned index = ac_tile_mode_index(surf, state->gfx6.base_level, state->is_stencil);
721 
722       /* Only macrotiled modes can set tile swizzle. */
723       if (base_level_info->mode == RADEON_SURF_MODE_2D)
724          desc[0] |= surf->tile_swizzle;
725 
726       desc[3] |= S_008F1C_TILING_INDEX(index);
727       desc[4] |= S_008F20_PITCH(pitch - 1);
728 
729       if (info->gfx_level == GFX8 && meta_va) {
730          desc[6] |= S_008F28_COMPRESSION_EN(1);
731          desc[7] = meta_va >> 8;
732       }
733    }
734 }
735 
736 void
ac_set_buf_desc_word3(const enum amd_gfx_level gfx_level,const struct ac_buffer_state * state,uint32_t * rsrc_word3)737 ac_set_buf_desc_word3(const enum amd_gfx_level gfx_level, const struct ac_buffer_state *state, uint32_t *rsrc_word3)
738 {
739    *rsrc_word3 = S_008F0C_DST_SEL_X(ac_map_swizzle(state->swizzle[0])) |
740                  S_008F0C_DST_SEL_Y(ac_map_swizzle(state->swizzle[1])) |
741                  S_008F0C_DST_SEL_Z(ac_map_swizzle(state->swizzle[2])) |
742                  S_008F0C_DST_SEL_W(ac_map_swizzle(state->swizzle[3])) |
743                  S_008F0C_INDEX_STRIDE(state->index_stride) |
744                  S_008F0C_ADD_TID_ENABLE(state->add_tid);
745 
746    if (gfx_level >= GFX10) {
747       const struct gfx10_format *fmt = &ac_get_gfx10_format_table(gfx_level)[state->format];
748 
749       /* OOB_SELECT chooses the out-of-bounds check.
750        *
751        * GFX10:
752        *  - 0: (index >= NUM_RECORDS) || (offset >= STRIDE)
753        *  - 1: index >= NUM_RECORDS
754        *  - 2: NUM_RECORDS == 0
755        *  - 3: if SWIZZLE_ENABLE:
756        *          swizzle_address >= NUM_RECORDS
757        *       else:
758        *          offset >= NUM_RECORDS
759        *
760        * GFX11+:
761        *  - 0: (index >= NUM_RECORDS) || (offset+payload > STRIDE)
762        *  - 1: index >= NUM_RECORDS
763        *  - 2: NUM_RECORDS == 0
764        *  - 3: if SWIZZLE_ENABLE && STRIDE:
765        *          (index >= NUM_RECORDS) || ( offset+payload > STRIDE)
766        *       else:
767        *          offset+payload > NUM_RECORDS
768        */
769       *rsrc_word3 |= (gfx_level >= GFX12 ? S_008F0C_FORMAT_GFX12(fmt->img_format) :
770                                            S_008F0C_FORMAT_GFX10(fmt->img_format)) |
771                      S_008F0C_OOB_SELECT(state->gfx10_oob_select) |
772                      S_008F0C_RESOURCE_LEVEL(gfx_level < GFX11);
773 
774       if (gfx_level >= GFX12) {
775          *rsrc_word3 |= S_008F0C_COMPRESSION_EN(state->gfx12.compression_en) |
776                         S_008F0C_WRITE_COMPRESS_ENABLE(state->gfx12.write_compress_enable);
777       }
778    } else {
779       const struct util_format_description * desc =  util_format_description(state->format);
780       const int first_non_void = util_format_get_first_non_void_channel(state->format);
781       const uint32_t num_format = ac_translate_buffer_numformat(desc, first_non_void);
782 
783       /* DATA_FORMAT is STRIDE[14:17] for MUBUF with ADD_TID_ENABLE=1 */
784       const uint32_t data_format =
785          gfx_level >= GFX8 && state->add_tid ? 0 : ac_translate_buffer_dataformat(desc, first_non_void);
786 
787       *rsrc_word3 |= S_008F0C_NUM_FORMAT(num_format) |
788                      S_008F0C_DATA_FORMAT(data_format) |
789                      S_008F0C_ELEMENT_SIZE(state->element_size);
790    }
791 }
792 
793 void
ac_build_buffer_descriptor(const enum amd_gfx_level gfx_level,const struct ac_buffer_state * state,uint32_t desc[4])794 ac_build_buffer_descriptor(const enum amd_gfx_level gfx_level, const struct ac_buffer_state *state, uint32_t desc[4])
795 {
796    uint32_t rsrc_word1 = S_008F04_BASE_ADDRESS_HI(state->va >> 32) | S_008F04_STRIDE(state->stride);
797    uint32_t rsrc_word3;
798 
799    if (gfx_level >= GFX11) {
800       rsrc_word1 |= S_008F04_SWIZZLE_ENABLE_GFX11(state->swizzle_enable);
801    } else {
802       rsrc_word1 |= S_008F04_SWIZZLE_ENABLE_GFX6(state->swizzle_enable);
803    }
804 
805    ac_set_buf_desc_word3(gfx_level, state, &rsrc_word3);
806 
807    desc[0] = state->va;
808    desc[1] = rsrc_word1;
809    desc[2] = state->size;
810    desc[3] = rsrc_word3;
811 }
812 
813 void
ac_build_raw_buffer_descriptor(const enum amd_gfx_level gfx_level,uint64_t va,uint32_t size,uint32_t desc[4])814 ac_build_raw_buffer_descriptor(const enum amd_gfx_level gfx_level, uint64_t va, uint32_t size, uint32_t desc[4])
815 {
816    const struct ac_buffer_state ac_state = {
817       .va = va,
818       .size = size,
819       .format = PIPE_FORMAT_R32_FLOAT,
820       .swizzle = {
821          PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W,
822       },
823       .gfx10_oob_select = V_008F0C_OOB_SELECT_RAW,
824    };
825 
826    ac_build_buffer_descriptor(gfx_level, &ac_state, desc);
827 }
828 
829 void
ac_build_attr_ring_descriptor(const enum amd_gfx_level gfx_level,uint64_t va,uint32_t size,uint32_t stride,uint32_t desc[4])830 ac_build_attr_ring_descriptor(const enum amd_gfx_level gfx_level, uint64_t va, uint32_t size, uint32_t stride, uint32_t desc[4])
831 {
832    assert(gfx_level >= GFX11);
833 
834    const struct ac_buffer_state ac_state = {
835       .va = va,
836       .size = size,
837       .format = PIPE_FORMAT_R32G32B32A32_FLOAT,
838       .swizzle = {
839          PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W,
840       },
841       .stride = stride,
842       .gfx10_oob_select = V_008F0C_OOB_SELECT_STRUCTURED_WITH_OFFSET,
843       .swizzle_enable = 3, /* 16B */
844       .index_stride = 2, /* 32 elements */
845    };
846 
847    ac_build_buffer_descriptor(gfx_level, &ac_state, desc);
848 }
849 
850 static void
ac_init_gfx6_ds_surface(const struct radeon_info * info,const struct ac_ds_state * state,uint32_t db_format,uint32_t stencil_format,struct ac_ds_surface * ds)851 ac_init_gfx6_ds_surface(const struct radeon_info *info, const struct ac_ds_state *state,
852                         uint32_t db_format, uint32_t stencil_format, struct ac_ds_surface *ds)
853 {
854    const struct radeon_surf *surf = state->surf;
855    const struct legacy_surf_level *level_info = &surf->u.legacy.level[state->level];
856 
857    assert(level_info->nblk_x % 8 == 0 && level_info->nblk_y % 8 == 0);
858 
859    if (state->stencil_only)
860       level_info = &surf->u.legacy.zs.stencil_level[state->level];
861 
862    ds->u.gfx6.db_htile_data_base = 0;
863    ds->u.gfx6.db_htile_surface = 0;
864    ds->db_depth_base = (state->va >> 8) + surf->u.legacy.level[state->level].offset_256B;
865    ds->db_stencil_base = (state->va >> 8) + surf->u.legacy.zs.stencil_level[state->level].offset_256B;
866    ds->db_depth_view = S_028008_SLICE_START(state->first_layer) |
867                        S_028008_SLICE_MAX(state->last_layer) |
868                        S_028008_Z_READ_ONLY(state->z_read_only) |
869                        S_028008_STENCIL_READ_ONLY(state->stencil_read_only);
870    ds->db_z_info = S_028040_FORMAT(db_format) |
871                    S_028040_NUM_SAMPLES(util_logbase2(state->num_samples));
872    ds->db_stencil_info = S_028044_FORMAT(stencil_format);
873 
874    if (info->gfx_level >= GFX7) {
875       const uint32_t index = surf->u.legacy.tiling_index[state->level];
876       const uint32_t stencil_index = surf->u.legacy.zs.stencil_tiling_index[state->level];
877       const uint32_t macro_index = surf->u.legacy.macro_tile_index;
878       const uint32_t stencil_tile_mode = info->si_tile_mode_array[stencil_index];
879       const uint32_t macro_mode = info->cik_macrotile_mode_array[macro_index];
880       uint32_t tile_mode = info->si_tile_mode_array[index];
881 
882       if (state->stencil_only)
883          tile_mode = stencil_tile_mode;
884 
885       ds->u.gfx6.db_depth_info |= S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
886                                   S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
887                                   S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
888                                   S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
889                                   S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
890                                   S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
891       ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
892       ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
893    } else {
894       uint32_t tile_mode_index = ac_tile_mode_index(surf, state->level, false);
895       ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
896 
897       tile_mode_index = ac_tile_mode_index(surf, state->level, true);
898       ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
899       if (state->stencil_only)
900          ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
901    }
902 
903    ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |
904                        S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
905    ds->u.gfx6.db_depth_slice = S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
906 
907    if (state->htile_enabled) {
908       ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1) |
909                        S_028040_ALLOW_EXPCLEAR(state->allow_expclear);
910       ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(state->htile_stencil_disabled);
911 
912       if (surf->has_stencil) {
913          /* Workaround: For a not yet understood reason, the
914           * combination of MSAA, fast stencil clear and stencil
915           * decompress messes with subsequent stencil buffer
916           * uses. Problem was reproduced on Verde, Bonaire,
917           * Tonga, and Carrizo.
918           *
919           * Disabling EXPCLEAR works around the problem.
920           *
921           * Check piglit's arb_texture_multisample-stencil-clear
922           * test if you want to try changing this.
923           */
924          if (state->num_samples <= 1)
925             ds->db_stencil_info |= S_028044_ALLOW_EXPCLEAR(state->allow_expclear);
926       }
927 
928       ds->u.gfx6.db_htile_data_base = (state->va + surf->meta_offset) >> 8;
929       ds->u.gfx6.db_htile_surface = S_028ABC_FULL_CACHE(1);
930    }
931 }
932 
933 static void
ac_init_gfx9_ds_surface(const struct radeon_info * info,const struct ac_ds_state * state,uint32_t db_format,uint32_t stencil_format,struct ac_ds_surface * ds)934 ac_init_gfx9_ds_surface(const struct radeon_info *info, const struct ac_ds_state *state,
935                         uint32_t db_format, uint32_t stencil_format, struct ac_ds_surface *ds)
936 {
937    const struct radeon_surf *surf = state->surf;
938 
939    assert(surf->u.gfx9.surf_offset == 0);
940 
941    ds->u.gfx6.db_htile_data_base = 0;
942    ds->u.gfx6.db_htile_surface = 0;
943    ds->db_depth_base = state->va >> 8;
944    ds->db_stencil_base = (state->va + surf->u.gfx9.zs.stencil_offset) >> 8;
945    ds->db_depth_view = S_028008_SLICE_START(state->first_layer) |
946                        S_028008_SLICE_MAX(state->last_layer) |
947                        S_028008_Z_READ_ONLY(state->z_read_only) |
948                        S_028008_STENCIL_READ_ONLY(state->stencil_read_only) |
949                        S_028008_MIPID_GFX9(state->level);
950 
951    if (info->gfx_level >= GFX10) {
952       ds->db_depth_view |= S_028008_SLICE_START_HI(state->first_layer >> 11) |
953                            S_028008_SLICE_MAX_HI(state->last_layer >> 11);
954    }
955 
956    ds->db_z_info = S_028038_FORMAT(db_format) |
957                    S_028038_NUM_SAMPLES(util_logbase2(state->num_samples)) |
958                    S_028038_SW_MODE(surf->u.gfx9.swizzle_mode) |
959                    S_028038_MAXMIP(state->num_levels - 1) |
960                    S_028040_ITERATE_256(info->gfx_level >= GFX11);
961    ds->db_stencil_info = S_02803C_FORMAT(stencil_format) |
962                          S_02803C_SW_MODE(surf->u.gfx9.zs.stencil_swizzle_mode) |
963                          S_028044_ITERATE_256(info->gfx_level >= GFX11);
964 
965    if (info->gfx_level == GFX9) {
966       ds->u.gfx6.db_z_info2 = S_028068_EPITCH(surf->u.gfx9.epitch);
967       ds->u.gfx6.db_stencil_info2 = S_02806C_EPITCH(surf->u.gfx9.zs.stencil_epitch);
968    }
969 
970    ds->db_depth_size = S_02801C_X_MAX(state->width - 1) |
971                        S_02801C_Y_MAX(state->height - 1);
972 
973    if (state->htile_enabled) {
974       ds->db_z_info |= S_028038_TILE_SURFACE_ENABLE(1) |
975                        S_028038_ALLOW_EXPCLEAR(state->allow_expclear);
976       ds->db_stencil_info |= S_02803C_TILE_STENCIL_DISABLE(state->htile_stencil_disabled);
977 
978       if (surf->has_stencil && !state->htile_stencil_disabled && state->num_samples <= 1) {
979          /* Stencil buffer workaround ported from the GFX6-GFX8 code.
980           * See that for explanation.
981           */
982          ds->db_stencil_info |= S_02803C_ALLOW_EXPCLEAR(state->allow_expclear);
983       }
984 
985       ds->u.gfx6.db_htile_data_base = (state->va + surf->meta_offset) >> 8;
986       ds->u.gfx6.db_htile_surface = S_028ABC_FULL_CACHE(1) |
987                                     S_028ABC_PIPE_ALIGNED(1);
988 
989       if (state->vrs_enabled) {
990          assert(info->gfx_level == GFX10_3);
991          ds->u.gfx6.db_htile_surface |= S_028ABC_VRS_HTILE_ENCODING(V_028ABC_VRS_HTILE_4BIT_ENCODING);
992       } else if (info->gfx_level == GFX9) {
993          ds->u.gfx6.db_htile_surface |= S_028ABC_RB_ALIGNED(1);
994       }
995    }
996 }
997 
998 static void
ac_init_gfx12_ds_surface(const struct radeon_info * info,const struct ac_ds_state * state,uint32_t db_format,uint32_t stencil_format,struct ac_ds_surface * ds)999 ac_init_gfx12_ds_surface(const struct radeon_info *info, const struct ac_ds_state *state,
1000                          uint32_t db_format, uint32_t stencil_format, struct ac_ds_surface *ds)
1001 {
1002    const struct radeon_surf *surf = state->surf;
1003 
1004    assert(db_format != V_028040_Z_24);
1005 
1006    ds->db_depth_view = S_028004_SLICE_START(state->first_layer) |
1007                        S_028004_SLICE_MAX(state->last_layer);
1008    ds->u.gfx12.db_depth_view1 = S_028008_MIPID_GFX12(state->level);
1009    ds->db_depth_size = S_028014_X_MAX(state->width - 1) |
1010                        S_028014_Y_MAX(state->height - 1);
1011    ds->db_z_info = S_028018_FORMAT(db_format) |
1012                    S_028018_NUM_SAMPLES(util_logbase2(state->num_samples)) |
1013                    S_028018_SW_MODE(surf->u.gfx9.swizzle_mode) |
1014                    S_028018_MAXMIP(state->num_levels - 1);
1015    ds->db_stencil_info = S_02801C_FORMAT(stencil_format) |
1016                          S_02801C_SW_MODE(surf->u.gfx9.zs.stencil_swizzle_mode) |
1017                          S_02801C_TILE_STENCIL_DISABLE(1);
1018    ds->db_depth_base = state->va >> 8;
1019    ds->db_stencil_base = (state->va + surf->u.gfx9.zs.stencil_offset) >> 8;
1020    ds->u.gfx12.hiz_info = 0;
1021    ds->u.gfx12.his_info = 0;
1022 
1023    /* HiZ. */
1024    if (surf->u.gfx9.zs.hiz.offset) {
1025       ds->u.gfx12.hiz_info = S_028B94_SURFACE_ENABLE(1) |
1026                              S_028B94_FORMAT(0) | /* unorm16 */
1027                              S_028B94_SW_MODE(surf->u.gfx9.zs.hiz.swizzle_mode);
1028       ds->u.gfx12.hiz_size_xy = S_028BA4_X_MAX(surf->u.gfx9.zs.hiz.width_in_tiles - 1) |
1029                                 S_028BA4_Y_MAX(surf->u.gfx9.zs.hiz.height_in_tiles - 1);
1030       ds->u.gfx12.hiz_base = (state->va + surf->u.gfx9.zs.hiz.offset) >> 8;
1031    }
1032 
1033    /* HiS. */
1034    if (surf->u.gfx9.zs.his.offset) {
1035       ds->u.gfx12.his_info = S_028B98_SURFACE_ENABLE(1) |
1036                              S_028B98_SW_MODE(surf->u.gfx9.zs.his.swizzle_mode);
1037       ds->u.gfx12.his_size_xy = S_028BB0_X_MAX(surf->u.gfx9.zs.his.width_in_tiles - 1) |
1038                                 S_028BB0_Y_MAX(surf->u.gfx9.zs.his.height_in_tiles - 1);
1039       ds->u.gfx12.his_base = (state->va + surf->u.gfx9.zs.his.offset) >> 8;
1040    }
1041 }
1042 
1043 void
ac_init_ds_surface(const struct radeon_info * info,const struct ac_ds_state * state,struct ac_ds_surface * ds)1044 ac_init_ds_surface(const struct radeon_info *info, const struct ac_ds_state *state, struct ac_ds_surface *ds)
1045 {
1046    const struct radeon_surf *surf = state->surf;
1047    const uint32_t db_format = ac_translate_dbformat(state->format);
1048    const uint32_t stencil_format = surf->has_stencil ? V_028044_STENCIL_8 : V_028044_STENCIL_INVALID;
1049 
1050    if (info->gfx_level >= GFX12) {
1051       ac_init_gfx12_ds_surface(info, state, db_format, stencil_format, ds);
1052    } else if (info->gfx_level >= GFX9) {
1053       ac_init_gfx9_ds_surface(info, state, db_format, stencil_format, ds);
1054    } else {
1055       ac_init_gfx6_ds_surface(info, state, db_format, stencil_format, ds);
1056    }
1057 }
1058 
1059 static unsigned
ac_get_decompress_on_z_planes(const struct radeon_info * info,enum pipe_format format,uint8_t log_num_samples,bool htile_stencil_disabled,bool no_d16_compression)1060 ac_get_decompress_on_z_planes(const struct radeon_info *info, enum pipe_format format, uint8_t log_num_samples,
1061                               bool htile_stencil_disabled, bool no_d16_compression)
1062 {
1063    uint32_t max_zplanes = 0;
1064 
1065    if (info->gfx_level >= GFX9) {
1066       const bool iterate256 = info->gfx_level >= GFX10 && log_num_samples >= 1;
1067 
1068       /* Default value for 32-bit depth surfaces. */
1069       max_zplanes = 4;
1070 
1071       if (format == PIPE_FORMAT_Z16_UNORM && log_num_samples > 0)
1072          max_zplanes = 2;
1073 
1074       /* Workaround for a DB hang when ITERATE_256 is set to 1. Only affects 4X MSAA D/S images. */
1075       if (info->has_two_planes_iterate256_bug && iterate256 && !htile_stencil_disabled && log_num_samples == 2)
1076          max_zplanes = 1;
1077 
1078       max_zplanes++;
1079    } else {
1080       if (format == PIPE_FORMAT_Z16_UNORM && no_d16_compression) {
1081          /* Do not enable Z plane compression for 16-bit depth
1082           * surfaces because isn't supported on GFX8. Only
1083           * 32-bit depth surfaces are supported by the hardware.
1084           * This allows to maintain shader compatibility and to
1085           * reduce the number of depth decompressions.
1086           */
1087          max_zplanes = 1;
1088       } else {
1089          /* 0 = full compression. N = only compress up to N-1 Z planes. */
1090          if (log_num_samples == 0)
1091             max_zplanes = 5;
1092          else if (log_num_samples <= 2)
1093             max_zplanes = 3;
1094          else
1095             max_zplanes = 2;
1096       }
1097    }
1098 
1099    return max_zplanes;
1100 }
1101 
1102 void
ac_set_mutable_ds_surface_fields(const struct radeon_info * info,const struct ac_mutable_ds_state * state,struct ac_ds_surface * ds)1103 ac_set_mutable_ds_surface_fields(const struct radeon_info *info, const struct ac_mutable_ds_state *state,
1104                                  struct ac_ds_surface *ds)
1105 {
1106    bool tile_stencil_disable = false;
1107    uint32_t log_num_samples;
1108 
1109    memcpy(ds, state->ds, sizeof(*ds));
1110 
1111    if (info->gfx_level >= GFX12)
1112       return;
1113 
1114    if (info->gfx_level >= GFX9) {
1115       log_num_samples = G_028038_NUM_SAMPLES(ds->db_z_info);
1116       tile_stencil_disable = G_02803C_TILE_STENCIL_DISABLE(ds->db_stencil_info);
1117    } else {
1118       log_num_samples = G_028040_NUM_SAMPLES(ds->db_z_info);
1119    }
1120 
1121    const uint32_t max_zplanes =
1122       ac_get_decompress_on_z_planes(info, state->format, log_num_samples,
1123                                     tile_stencil_disable, state->no_d16_compression);
1124 
1125    if (info->gfx_level >= GFX9) {
1126       if (state->tc_compat_htile_enabled) {
1127          ds->db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes);
1128 
1129          if (info->gfx_level >= GFX10) {
1130             const bool iterate256 = log_num_samples >= 1;
1131 
1132             ds->db_z_info |= S_028040_ITERATE_FLUSH(1);
1133             ds->db_stencil_info |= S_028044_ITERATE_FLUSH(!tile_stencil_disable);
1134             ds->db_z_info |= S_028040_ITERATE_256(iterate256);
1135             ds->db_stencil_info |= S_028044_ITERATE_256(iterate256);
1136          } else {
1137             ds->db_z_info |= S_028038_ITERATE_FLUSH(1);
1138             ds->db_stencil_info |= S_02803C_ITERATE_FLUSH(1);
1139          }
1140       }
1141 
1142       ds->db_z_info |= S_028038_ZRANGE_PRECISION(state->zrange_precision);
1143    } else {
1144       if (state->tc_compat_htile_enabled) {
1145          ds->u.gfx6.db_htile_surface |= S_028ABC_TC_COMPATIBLE(1);
1146          ds->db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(max_zplanes);
1147       } else {
1148          ds->u.gfx6.db_depth_info |= S_02803C_ADDR5_SWIZZLE_MASK(1);
1149       }
1150 
1151       ds->db_z_info |= S_028040_ZRANGE_PRECISION(state->zrange_precision);
1152    }
1153 }
1154 
1155 static uint32_t
ac_get_dcc_min_compressed_block_size(const struct radeon_info * info)1156 ac_get_dcc_min_compressed_block_size(const struct radeon_info *info)
1157 {
1158    /* This should typically match the request size of the memory type. DIMMs have 64B minimum
1159     * request size, which means compressing 64B to 32B has no benefit, while GDDR and HBM have
1160     * 32B minimum request size. Sometimes a different size is used depending on the data fabric,
1161     * etc.
1162     */
1163    return info->has_dedicated_vram || info->family == CHIP_GFX1151 ?
1164             V_028C78_MIN_BLOCK_SIZE_32B : V_028C78_MIN_BLOCK_SIZE_64B;
1165 }
1166 
1167 static void
ac_init_gfx6_cb_surface(const struct radeon_info * info,const struct ac_cb_state * state,uint32_t cb_format,bool force_dst_alpha_1,struct ac_cb_surface * cb)1168 ac_init_gfx6_cb_surface(const struct radeon_info *info, const struct ac_cb_state *state,
1169                         uint32_t cb_format, bool force_dst_alpha_1, struct ac_cb_surface *cb)
1170 {
1171    const struct radeon_surf *surf = state->surf;
1172    const uint32_t endian = ac_colorformat_endian_swap(cb_format);
1173 
1174    cb->cb_color_info |= S_028C70_ENDIAN(endian) |
1175                         S_028C70_FORMAT_GFX6(cb_format) |
1176                         S_028C70_COMPRESSION(!!surf->fmask_offset);
1177    cb->cb_color_view = S_028C6C_SLICE_START(state->first_layer) |
1178                        S_028C6C_SLICE_MAX_GFX6(state->last_layer);
1179    cb->cb_color_attrib = S_028C74_NUM_SAMPLES(util_logbase2(state->num_samples)) |
1180                          S_028C74_NUM_FRAGMENTS_GFX6(util_logbase2(state->num_storage_samples)) |
1181                          S_028C74_FORCE_DST_ALPHA_1_GFX6(force_dst_alpha_1);
1182    cb->cb_color_attrib2 = 0;
1183    cb->cb_dcc_control = 0;
1184 
1185    if (info->gfx_level == GFX9) {
1186       cb->cb_color_view |= S_028C6C_MIP_LEVEL_GFX9(state->base_level);
1187       cb->cb_color_attrib |= S_028C74_MIP0_DEPTH(state->num_layers) |
1188                              S_028C74_RESOURCE_TYPE(surf->u.gfx9.resource_type);
1189       cb->cb_color_attrib2 |= S_028C68_MIP0_WIDTH(state->width - 1) |
1190                               S_028C68_MIP0_HEIGHT(state->height - 1) |
1191                               S_028C68_MAX_MIP(state->num_levels - 1);
1192    }
1193 
1194    if (info->gfx_level >= GFX8) {
1195       uint32_t max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_256B;
1196 
1197       if (state->num_storage_samples > 1) {
1198          if (surf->bpe == 1)
1199             max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
1200          else if (surf->bpe == 2)
1201             max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_128B;
1202       }
1203 
1204       cb->cb_dcc_control |= S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
1205                             S_028C78_MIN_COMPRESSED_BLOCK_SIZE(ac_get_dcc_min_compressed_block_size(info)) |
1206                             S_028C78_INDEPENDENT_64B_BLOCKS(1);
1207    }
1208 
1209    if (info->gfx_level == GFX6) {
1210       /* Due to a hw bug, FMASK_BANK_HEIGHT must still be set on GFX6. (inherited from GFX5) */
1211       /* This must also be set for fast clear to work without FMASK. */
1212       const uint32_t fmask_bankh = surf->fmask_offset ? surf->u.legacy.color.fmask.bankh
1213                                                       : surf->u.legacy.bankh;
1214       cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(util_logbase2(fmask_bankh));
1215    }
1216 }
1217 
1218 static void
ac_init_gfx10_cb_surface(const struct radeon_info * info,const struct ac_cb_state * state,uint32_t cb_format,bool force_dst_alpha_1,uint32_t width,struct ac_cb_surface * cb)1219 ac_init_gfx10_cb_surface(const struct radeon_info *info, const struct ac_cb_state *state,
1220                          uint32_t cb_format, bool force_dst_alpha_1, uint32_t width,
1221                          struct ac_cb_surface *cb)
1222 {
1223    const struct radeon_surf *surf = state->surf;
1224    uint32_t first_layer = state->first_layer;
1225    uint32_t base_level = state->base_level;
1226    uint32_t num_levels = state->num_levels;
1227 
1228    if (state->gfx10.nbc_view) {
1229       assert(state->gfx10.nbc_view->valid);
1230       first_layer = 0;
1231       base_level = state->gfx10.nbc_view->level;
1232       num_levels = state->gfx10.nbc_view->num_levels;
1233    }
1234 
1235    cb->cb_color_view = S_028C6C_SLICE_START(first_layer) |
1236                        S_028C6C_SLICE_MAX_GFX10(state->last_layer) |
1237                        S_028C6C_MIP_LEVEL_GFX10(base_level);
1238    cb->cb_color_attrib = 0;
1239    cb->cb_color_attrib2 = S_028C68_MIP0_WIDTH(width - 1) |
1240                           S_028C68_MIP0_HEIGHT(state->height - 1) |
1241                           S_028C68_MAX_MIP(num_levels - 1);
1242    cb->cb_color_attrib3 = S_028EE0_MIP0_DEPTH(state->num_layers) |
1243                           S_028EE0_RESOURCE_TYPE(surf->u.gfx9.resource_type) |
1244                           S_028EE0_RESOURCE_LEVEL(info->gfx_level >= GFX11 ? 0 : 1);
1245    cb->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B) |
1246                         S_028C78_MAX_COMPRESSED_BLOCK_SIZE(surf->u.gfx9.color.dcc.max_compressed_block_size) |
1247                         S_028C78_MIN_COMPRESSED_BLOCK_SIZE(ac_get_dcc_min_compressed_block_size(info)) |
1248                         S_028C78_INDEPENDENT_64B_BLOCKS(surf->u.gfx9.color.dcc.independent_64B_blocks);
1249 
1250    if (info->gfx_level >= GFX11) {
1251       assert(!UTIL_ARCH_BIG_ENDIAN);
1252       cb->cb_color_info |= S_028C70_FORMAT_GFX11(cb_format);
1253       cb->cb_color_attrib |= S_028C74_NUM_FRAGMENTS_GFX11(util_logbase2(state->num_storage_samples)) |
1254                              S_028C74_FORCE_DST_ALPHA_1_GFX11(force_dst_alpha_1);
1255       cb->cb_dcc_control |= S_028C78_INDEPENDENT_128B_BLOCKS_GFX11(surf->u.gfx9.color.dcc.independent_128B_blocks);
1256    } else {
1257       const uint32_t endian = ac_colorformat_endian_swap(cb_format);
1258 
1259       cb->cb_color_info |= S_028C70_ENDIAN(endian) |
1260                            S_028C70_FORMAT_GFX6(cb_format) |
1261                            S_028C70_COMPRESSION(!!surf->fmask_offset);
1262       cb->cb_color_attrib |= S_028C74_NUM_SAMPLES(util_logbase2(state->num_samples)) |
1263                              S_028C74_NUM_FRAGMENTS_GFX6(util_logbase2(state->num_storage_samples)) |
1264                              S_028C74_FORCE_DST_ALPHA_1_GFX6(force_dst_alpha_1);
1265       cb->cb_dcc_control |= S_028C78_INDEPENDENT_128B_BLOCKS_GFX10(surf->u.gfx9.color.dcc.independent_128B_blocks);
1266    }
1267 }
1268 
1269 static void
ac_init_gfx12_cb_surface(const struct radeon_info * info,const struct ac_cb_state * state,uint32_t cb_format,bool force_dst_alpha_1,uint32_t width,struct ac_cb_surface * cb)1270 ac_init_gfx12_cb_surface(const struct radeon_info *info, const struct ac_cb_state *state,
1271                          uint32_t cb_format, bool force_dst_alpha_1, uint32_t width,
1272                          struct ac_cb_surface *cb)
1273 {
1274    const struct radeon_surf *surf = state->surf;
1275    uint32_t first_layer = state->first_layer;
1276    uint32_t base_level = state->base_level;
1277    uint32_t num_levels = state->num_levels;
1278 
1279    if (state->gfx10.nbc_view) {
1280       assert(state->gfx10.nbc_view->valid);
1281       first_layer = 0;
1282       base_level = state->gfx10.nbc_view->level;
1283       num_levels = state->gfx10.nbc_view->num_levels;
1284    }
1285 
1286    assert(!UTIL_ARCH_BIG_ENDIAN);
1287    cb->cb_color_info |= S_028EC0_FORMAT(cb_format);
1288    cb->cb_color_view = S_028C64_SLICE_START(first_layer) |
1289                        S_028C64_SLICE_MAX(state->last_layer);
1290    cb->cb_color_view2 = S_028C68_MIP_LEVEL(base_level);
1291    cb->cb_color_attrib = S_028C6C_NUM_FRAGMENTS(util_logbase2(state->num_storage_samples)) |
1292                          S_028C6C_FORCE_DST_ALPHA_1(force_dst_alpha_1);
1293    cb->cb_color_attrib2 = S_028C78_MIP0_HEIGHT(state->height - 1) |
1294                           S_028C78_MIP0_WIDTH(width - 1);
1295    cb->cb_color_attrib3 = S_028C7C_MIP0_DEPTH(state->num_layers) |
1296                           S_028C7C_MAX_MIP(num_levels - 1) |
1297                           S_028C7C_RESOURCE_TYPE(surf->u.gfx9.resource_type);
1298    cb->cb_dcc_control = S_028C70_MAX_UNCOMPRESSED_BLOCK_SIZE(1) | /* 256B */
1299                         S_028C70_MAX_COMPRESSED_BLOCK_SIZE(surf->u.gfx9.color.dcc.max_compressed_block_size) |
1300                         S_028C70_ENABLE_MAX_COMP_FRAG_OVERRIDE(1) |
1301                         S_028C70_MAX_COMP_FRAGS(state->num_samples >= 8 ? 3 :
1302                                                 state->num_samples >= 4 ? 2 : 0);
1303 }
1304 
1305 void
ac_init_cb_surface(const struct radeon_info * info,const struct ac_cb_state * state,struct ac_cb_surface * cb)1306 ac_init_cb_surface(const struct radeon_info *info, const struct ac_cb_state *state, struct ac_cb_surface *cb)
1307 {
1308    const struct util_format_description *desc = util_format_description(state->format);
1309    const uint32_t cb_format = ac_get_cb_format(info->gfx_level, state->format);
1310    const struct radeon_surf *surf = state->surf;
1311    uint32_t width = state->width;
1312 
1313    assert(cb_format != V_028C70_COLOR_INVALID);
1314 
1315    /* Intensity is implemented as Red, so treat it that way. */
1316    const bool force_dst_alpha_1 =
1317       desc->swizzle[3] == PIPE_SWIZZLE_1 || util_format_is_intensity(state->format);
1318 
1319    /* GFX10.3+ can set a custom pitch for 1D and 2D non-array, but it must be a multiple of
1320     * 256B for GFX10.3-11 and 128B for GFX12.
1321     *
1322     * We set the pitch in MIP0_WIDTH.
1323     */
1324    if (info->gfx_level >= GFX10_3 && surf->u.gfx9.uses_custom_pitch) {
1325       ASSERTED unsigned min_alignment = info->gfx_level >= GFX12 ? 128 : 256;
1326       assert((surf->u.gfx9.surf_pitch * surf->bpe) % min_alignment == 0);
1327       assert(surf->is_linear);
1328 
1329       width = surf->u.gfx9.surf_pitch;
1330 
1331       /* Subsampled images have the pitch in the units of blocks. */
1332       if (surf->blk_w == 2)
1333          width *= 2;
1334    }
1335 
1336    const uint32_t swap = ac_translate_colorswap(info->gfx_level, state->format, false);
1337    const uint32_t ntype = ac_get_cb_number_type(state->format);
1338    uint32_t blend_clamp = 0, blend_bypass = 0;
1339 
1340    /* blend clamp should be set for all NORM/SRGB types */
1341    if (ntype == V_028C70_NUMBER_UNORM || ntype == V_028C70_NUMBER_SNORM ||
1342        ntype == V_028C70_NUMBER_SRGB)
1343       blend_clamp = 1;
1344 
1345    /* set blend bypass according to docs if SINT/UINT or 8/24 COLOR variants */
1346    if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
1347        cb_format == V_028C70_COLOR_8_24 || cb_format == V_028C70_COLOR_24_8 ||
1348        cb_format == V_028C70_COLOR_X24_8_32_FLOAT) {
1349       blend_clamp = 0;
1350       blend_bypass = 1;
1351    }
1352 
1353    const bool round_mode = ntype != V_028C70_NUMBER_UNORM &&
1354                            ntype != V_028C70_NUMBER_SNORM &&
1355                            ntype != V_028C70_NUMBER_SRGB &&
1356                            cb_format != V_028C70_COLOR_8_24 &&
1357                            cb_format != V_028C70_COLOR_24_8;
1358 
1359    cb->cb_color_info = S_028C70_COMP_SWAP(swap) |
1360                        S_028C70_BLEND_CLAMP(blend_clamp) |
1361                        S_028C70_BLEND_BYPASS(blend_bypass) |
1362                        S_028C70_SIMPLE_FLOAT(1) |
1363                        S_028C70_ROUND_MODE(round_mode) |
1364                        S_028C70_NUMBER_TYPE(ntype);
1365 
1366    if (info->gfx_level >= GFX12) {
1367       ac_init_gfx12_cb_surface(info, state, cb_format, force_dst_alpha_1, width, cb);
1368    } else if (info->gfx_level >= GFX10) {
1369       ac_init_gfx10_cb_surface(info, state, cb_format, force_dst_alpha_1, width, cb);
1370    } else {
1371       ac_init_gfx6_cb_surface(info, state, cb_format, force_dst_alpha_1, cb);
1372    }
1373 }
1374 
1375 void
ac_set_mutable_cb_surface_fields(const struct radeon_info * info,const struct ac_mutable_cb_state * state,struct ac_cb_surface * cb)1376 ac_set_mutable_cb_surface_fields(const struct radeon_info *info, const struct ac_mutable_cb_state *state,
1377                                  struct ac_cb_surface *cb)
1378 {
1379    const struct radeon_surf *surf = state->surf;
1380    uint8_t tile_swizzle = surf->tile_swizzle;
1381    uint64_t va = state->va;
1382 
1383    memcpy(cb, state->cb, sizeof(*cb));
1384 
1385    if (state->gfx10.nbc_view) {
1386       assert(state->gfx10.nbc_view->valid);
1387       va += state->gfx10.nbc_view->base_address_offset;
1388       tile_swizzle = state->gfx10.nbc_view->tile_swizzle;
1389    }
1390 
1391    cb->cb_color_base = va >> 8;
1392 
1393    if (info->gfx_level >= GFX9) {
1394       cb->cb_color_base += surf->u.gfx9.surf_offset >> 8;
1395       cb->cb_color_base |= tile_swizzle;
1396    } else {
1397       const struct legacy_surf_level *level_info = &surf->u.legacy.level[state->base_level];
1398 
1399       cb->cb_color_base += level_info->offset_256B;
1400 
1401       /* Only macrotiled modes can set tile swizzle. */
1402       if (level_info->mode == RADEON_SURF_MODE_2D)
1403          cb->cb_color_base |= tile_swizzle;
1404    }
1405 
1406    if (info->gfx_level >= GFX12) {
1407       cb->cb_color_attrib3 |= S_028C7C_COLOR_SW_MODE(surf->u.gfx9.swizzle_mode);
1408       return;
1409    }
1410 
1411    /* Set up DCC. */
1412    if (state->dcc_enabled) {
1413       cb->cb_dcc_base = (va + surf->meta_offset) >> 8;
1414 
1415       if (info->gfx_level == GFX8)
1416          cb->cb_dcc_base += surf->u.legacy.color.dcc_level[state->base_level].dcc_offset >> 8;
1417 
1418       uint32_t dcc_tile_swizzle = tile_swizzle;
1419       dcc_tile_swizzle &= ((1 << surf->meta_alignment_log2) - 1) >> 8;
1420       cb->cb_dcc_base |= dcc_tile_swizzle;
1421    }
1422 
1423    if (info->gfx_level >= GFX11) {
1424       cb->cb_color_attrib3 |= S_028EE0_COLOR_SW_MODE(surf->u.gfx9.swizzle_mode) |
1425                               S_028EE0_DCC_PIPE_ALIGNED(surf->u.gfx9.color.dcc.pipe_aligned);
1426 
1427       if (state->dcc_enabled) {
1428          cb->cb_dcc_control |= S_028C78_DISABLE_CONSTANT_ENCODE_REG(1) |
1429                                S_028C78_FDCC_ENABLE(1);
1430 
1431          if (info->family >= CHIP_PHOENIX2) {
1432             cb->cb_dcc_control |= S_028C78_ENABLE_MAX_COMP_FRAG_OVERRIDE(1) |
1433                                   S_028C78_MAX_COMP_FRAGS(state->num_samples >= 4);
1434          }
1435       }
1436    } else if (info->gfx_level >= GFX10) {
1437       cb->cb_color_attrib3 |= S_028EE0_COLOR_SW_MODE(surf->u.gfx9.swizzle_mode) |
1438                               S_028EE0_FMASK_SW_MODE(surf->u.gfx9.color.fmask_swizzle_mode) |
1439                               S_028EE0_CMASK_PIPE_ALIGNED(1) |
1440                               S_028EE0_DCC_PIPE_ALIGNED(surf->u.gfx9.color.dcc.pipe_aligned);
1441    } else if (info->gfx_level == GFX9) {
1442       struct gfx9_surf_meta_flags meta = {
1443          .rb_aligned = 1,
1444          .pipe_aligned = 1,
1445       };
1446 
1447       if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && surf->meta_offset)
1448          meta = surf->u.gfx9.color.dcc;
1449 
1450       cb->cb_color_attrib |= S_028C74_COLOR_SW_MODE(surf->u.gfx9.swizzle_mode) |
1451                              S_028C74_FMASK_SW_MODE(surf->u.gfx9.color.fmask_swizzle_mode) |
1452                              S_028C74_RB_ALIGNED(meta.rb_aligned) |
1453                              S_028C74_PIPE_ALIGNED(meta.pipe_aligned);
1454       cb->cb_mrt_epitch = S_0287A0_EPITCH(surf->u.gfx9.epitch);
1455    } else {
1456       /* GFX6-8 */
1457       const struct legacy_surf_level *level_info = &surf->u.legacy.level[state->base_level];
1458       uint32_t pitch_tile_max, slice_tile_max, tile_mode_index;
1459 
1460       pitch_tile_max = level_info->nblk_x / 8 - 1;
1461       slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
1462       tile_mode_index = ac_tile_mode_index(surf, state->base_level, false);
1463 
1464       cb->cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index);
1465       cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
1466       cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
1467 
1468       cb->cb_color_cmask_slice = surf->u.legacy.color.cmask_slice_tile_max;
1469 
1470       if (state->fmask_enabled) {
1471          if (info->gfx_level >= GFX7)
1472             cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(surf->u.legacy.color.fmask.pitch_in_pixels / 8 - 1);
1473          cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(surf->u.legacy.color.fmask.tiling_index);
1474          cb->cb_color_fmask_slice = S_028C88_TILE_MAX(surf->u.legacy.color.fmask.slice_tile_max);
1475       } else {
1476          /* This must be set for fast clear to work without FMASK. */
1477          if (info->gfx_level >= GFX7)
1478             cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
1479          cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
1480          cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
1481       }
1482    }
1483 
1484    if (state->cmask_enabled) {
1485       cb->cb_color_cmask = (va + surf->cmask_offset) >> 8;
1486       cb->cb_color_info |= S_028C70_FAST_CLEAR(state->fast_clear_enabled);
1487    } else {
1488       cb->cb_color_cmask = cb->cb_color_base;
1489    }
1490 
1491    if (state->fmask_enabled) {
1492       cb->cb_color_fmask = (va + surf->fmask_offset) >> 8;
1493       cb->cb_color_fmask |= surf->fmask_tile_swizzle;
1494 
1495       if (state->tc_compat_cmask_enabled) {
1496          assert(state->cmask_enabled);
1497 
1498          /* Allow the texture block to read FMASK directly without decompressing it. */
1499          cb->cb_color_info |= S_028C70_FMASK_COMPRESS_1FRAG_ONLY(1);
1500 
1501          if (info->gfx_level == GFX8) {
1502             /* Set CMASK into a tiling format that allows
1503              * the texture block to read it.
1504              */
1505             cb->cb_color_info |= S_028C70_CMASK_ADDR_TYPE(2);
1506             cb->cb_color_cmask |= surf->fmask_tile_swizzle;
1507          }
1508       }
1509    } else {
1510       cb->cb_color_fmask = cb->cb_color_base;
1511    }
1512 
1513    if (info->gfx_level < GFX11)
1514       cb->cb_color_info |= S_028C70_DCC_ENABLE(state->dcc_enabled);
1515 }
1516