• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2011 Red Hat All Rights Reserved.
3  * Copyright © 2017 Advanced Micro Devices, Inc.
4  *
5  * SPDX-License-Identifier: MIT
6  */
7 
8 #define AC_SURFACE_INCLUDE_NIR
9 #include "ac_surface.h"
10 
11 #include "ac_drm_fourcc.h"
12 #include "ac_gpu_info.h"
13 #include "addrlib/inc/addrinterface.h"
14 #include "addrlib/src/amdgpu_asic_addr.h"
15 #include "amd_family.h"
16 #include "sid.h"
17 #include "util/hash_table.h"
18 #include "util/macros.h"
19 #include "util/simple_mtx.h"
20 #include "util/u_atomic.h"
21 #include "util/format/u_format.h"
22 #include "util/u_math.h"
23 #include "util/u_memory.h"
24 
25 #include <errno.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 
29 #ifdef _WIN32
30 #define AMDGPU_TILING_ARRAY_MODE_SHIFT			0
31 #define AMDGPU_TILING_ARRAY_MODE_MASK			0xf
32 #define AMDGPU_TILING_PIPE_CONFIG_SHIFT			4
33 #define AMDGPU_TILING_PIPE_CONFIG_MASK			0x1f
34 #define AMDGPU_TILING_TILE_SPLIT_SHIFT			9
35 #define AMDGPU_TILING_TILE_SPLIT_MASK			0x7
36 #define AMDGPU_TILING_MICRO_TILE_MODE_SHIFT		12
37 #define AMDGPU_TILING_MICRO_TILE_MODE_MASK		0x7
38 #define AMDGPU_TILING_BANK_WIDTH_SHIFT			15
39 #define AMDGPU_TILING_BANK_WIDTH_MASK			0x3
40 #define AMDGPU_TILING_BANK_HEIGHT_SHIFT			17
41 #define AMDGPU_TILING_BANK_HEIGHT_MASK			0x3
42 #define AMDGPU_TILING_MACRO_TILE_ASPECT_SHIFT		19
43 #define AMDGPU_TILING_MACRO_TILE_ASPECT_MASK		0x3
44 #define AMDGPU_TILING_NUM_BANKS_SHIFT			21
45 #define AMDGPU_TILING_NUM_BANKS_MASK			0x3
46 #define AMDGPU_TILING_SWIZZLE_MODE_SHIFT		0
47 #define AMDGPU_TILING_SWIZZLE_MODE_MASK			0x1f
48 #define AMDGPU_TILING_DCC_OFFSET_256B_SHIFT		5
49 #define AMDGPU_TILING_DCC_OFFSET_256B_MASK		0xFFFFFF
50 #define AMDGPU_TILING_DCC_PITCH_MAX_SHIFT		29
51 #define AMDGPU_TILING_DCC_PITCH_MAX_MASK		0x3FFF
52 #define AMDGPU_TILING_DCC_INDEPENDENT_64B_SHIFT		43
53 #define AMDGPU_TILING_DCC_INDEPENDENT_64B_MASK		0x1
54 #define AMDGPU_TILING_DCC_INDEPENDENT_128B_SHIFT	44
55 #define AMDGPU_TILING_DCC_INDEPENDENT_128B_MASK		0x1
56 #define AMDGPU_TILING_SCANOUT_SHIFT			63
57 #define AMDGPU_TILING_SCANOUT_MASK			0x1
58 #define AMDGPU_TILING_GFX12_SWIZZLE_MODE_SHIFT			0
59 #define AMDGPU_TILING_GFX12_SWIZZLE_MODE_MASK			0x7
60 #define AMDGPU_TILING_GFX12_SCANOUT_SHIFT			63
61 #define AMDGPU_TILING_GFX12_SCANOUT_MASK			0x1
62 #define AMDGPU_TILING_GFX12_DCC_MAX_COMPRESSED_BLOCK_SHIFT	3
63 #define AMDGPU_TILING_GFX12_DCC_MAX_COMPRESSED_BLOCK_MASK	0x3
64 #define AMDGPU_TILING_GFX12_DCC_NUMBER_TYPE_SHIFT		5
65 #define AMDGPU_TILING_GFX12_DCC_NUMBER_TYPE_MASK		0x7
66 #define AMDGPU_TILING_GFX12_DCC_DATA_FORMAT_SHIFT		8
67 #define AMDGPU_TILING_GFX12_DCC_DATA_FORMAT_MASK		0x3f
68 /* When clearing the buffer or moving it from VRAM to GTT, don't compress and set DCC metadata
69  * to uncompressed. Set when parts of an allocation bypass DCC and read raw data. */
70 #define AMDGPU_TILING_GFX12_DCC_WRITE_COMPRESS_DISABLE_SHIFT   14
71 #define AMDGPU_TILING_GFX12_DCC_WRITE_COMPRESS_DISABLE_MASK    0x1
72 #define AMDGPU_TILING_SET(field, value) \
73 	(((__u64)(value) & AMDGPU_TILING_##field##_MASK) << AMDGPU_TILING_##field##_SHIFT)
74 #define AMDGPU_TILING_GET(value, field) \
75 	(((__u64)(value) >> AMDGPU_TILING_##field##_SHIFT) & AMDGPU_TILING_##field##_MASK)
76 #else
77 #include "drm-uapi/amdgpu_drm.h"
78 #endif
79 
80 #ifndef CIASICIDGFXENGINE_SOUTHERNISLAND
81 #define CIASICIDGFXENGINE_SOUTHERNISLAND 0x0000000A
82 #endif
83 
84 #ifndef CIASICIDGFXENGINE_ARCTICISLAND
85 #define CIASICIDGFXENGINE_ARCTICISLAND 0x0000000D
86 #endif
87 
88 struct ac_addrlib {
89    ADDR_HANDLE handle;
90    simple_mtx_t lock;
91 };
92 
ac_pipe_config_to_num_pipes(unsigned pipe_config)93 unsigned ac_pipe_config_to_num_pipes(unsigned pipe_config)
94 {
95    switch (pipe_config) {
96    case V_009910_ADDR_SURF_P2:
97       return 2;
98    case V_009910_ADDR_SURF_P4_8x16:
99    case V_009910_ADDR_SURF_P4_16x16:
100    case V_009910_ADDR_SURF_P4_16x32:
101    case V_009910_ADDR_SURF_P4_32x32:
102       return 4;
103    case V_009910_ADDR_SURF_P8_16x16_8x16:
104    case V_009910_ADDR_SURF_P8_16x32_8x16:
105    case V_009910_ADDR_SURF_P8_32x32_8x16:
106    case V_009910_ADDR_SURF_P8_16x32_16x16:
107    case V_009910_ADDR_SURF_P8_32x32_16x16:
108    case V_009910_ADDR_SURF_P8_32x32_16x32:
109    case V_009910_ADDR_SURF_P8_32x64_32x32:
110       return 8;
111    case V_009910_ADDR_SURF_P16_32x32_8x16:
112    case V_009910_ADDR_SURF_P16_32x32_16x16:
113       return 16;
114    default:
115       unreachable("invalid pipe_config");
116    }
117 }
118 
ac_modifier_has_dcc(uint64_t modifier)119 bool ac_modifier_has_dcc(uint64_t modifier)
120 {
121    return IS_AMD_FMT_MOD(modifier) && AMD_FMT_MOD_GET(DCC, modifier);
122 }
123 
ac_modifier_has_dcc_retile(uint64_t modifier)124 bool ac_modifier_has_dcc_retile(uint64_t modifier)
125 {
126    return IS_AMD_FMT_MOD(modifier) && AMD_FMT_MOD_GET(DCC_RETILE, modifier);
127 }
128 
ac_modifier_supports_dcc_image_stores(enum amd_gfx_level gfx_level,uint64_t modifier)129 bool ac_modifier_supports_dcc_image_stores(enum amd_gfx_level gfx_level, uint64_t modifier)
130 {
131    if (!ac_modifier_has_dcc(modifier))
132       return false;
133 
134    if (gfx_level >= GFX12)
135       return true;
136 
137    return (!AMD_FMT_MOD_GET(DCC_INDEPENDENT_64B, modifier) &&
138            AMD_FMT_MOD_GET(DCC_INDEPENDENT_128B, modifier) &&
139            AMD_FMT_MOD_GET(DCC_MAX_COMPRESSED_BLOCK, modifier) == AMD_FMT_MOD_DCC_BLOCK_128B) ||
140           (AMD_FMT_MOD_GET(TILE_VERSION, modifier) >= AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS && /* gfx10.3 */
141            AMD_FMT_MOD_GET(DCC_INDEPENDENT_64B, modifier) &&
142            AMD_FMT_MOD_GET(DCC_INDEPENDENT_128B, modifier) &&
143            AMD_FMT_MOD_GET(DCC_MAX_COMPRESSED_BLOCK, modifier) == AMD_FMT_MOD_DCC_BLOCK_64B) ||
144           (gfx_level >= GFX11_5 &&
145            AMD_FMT_MOD_GET(TILE_VERSION, modifier) >= AMD_FMT_MOD_TILE_VER_GFX11 &&
146            !AMD_FMT_MOD_GET(DCC_INDEPENDENT_64B, modifier) &&
147            AMD_FMT_MOD_GET(DCC_INDEPENDENT_128B, modifier) &&
148            AMD_FMT_MOD_GET(DCC_MAX_COMPRESSED_BLOCK, modifier) == AMD_FMT_MOD_DCC_BLOCK_256B);
149 
150 }
151 
152 
ac_surface_supports_dcc_image_stores(enum amd_gfx_level gfx_level,const struct radeon_surf * surf)153 bool ac_surface_supports_dcc_image_stores(enum amd_gfx_level gfx_level,
154                                           const struct radeon_surf *surf)
155 {
156    /* DCC image stores is only available for GFX10+. */
157    if (gfx_level < GFX10)
158       return false;
159 
160    if (gfx_level >= GFX12)
161       return true;
162 
163    /* DCC image stores support the following settings:
164     * - INDEPENDENT_64B_BLOCKS = 0
165     * - INDEPENDENT_128B_BLOCKS = 1
166     * - MAX_COMPRESSED_BLOCK_SIZE = 128B
167     * - MAX_UNCOMPRESSED_BLOCK_SIZE = 256B (always used)
168     *
169     * gfx10.3 also supports the following setting:
170     * - INDEPENDENT_64B_BLOCKS = 1
171     * - INDEPENDENT_128B_BLOCKS = 1
172     * - MAX_COMPRESSED_BLOCK_SIZE = 64B
173     * - MAX_UNCOMPRESSED_BLOCK_SIZE = 256B (always used)
174     *
175     * gfx11.5 also supports the following:
176     * - INDEPENDENT_64B_BLOCKS = 0
177     * - INDEPENDENT_128B_BLOCKS = 1
178     * - MAX_COMPRESSED_BLOCK_SIZE = 256B
179     * - MAX_UNCOMPRESSED_BLOCK_SIZE = 256B (always used)
180     *
181     * The compressor only looks at MAX_COMPRESSED_BLOCK_SIZE to determine
182     * the INDEPENDENT_xx_BLOCKS settings. 128B implies INDEP_128B, while 64B
183     * implies INDEP_64B && INDEP_128B.
184     *
185     * The same limitations apply to SDMA compressed stores because
186     * SDMA uses the same DCC codec.
187     */
188    return (!surf->u.gfx9.color.dcc.independent_64B_blocks &&
189            surf->u.gfx9.color.dcc.independent_128B_blocks &&
190            surf->u.gfx9.color.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_128B) ||
191           (gfx_level >= GFX10_3 && /* gfx10.3 - old 64B compression */
192            surf->u.gfx9.color.dcc.independent_64B_blocks &&
193            surf->u.gfx9.color.dcc.independent_128B_blocks &&
194            surf->u.gfx9.color.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_64B) ||
195           (gfx_level >= GFX11_5 && /* gfx11.5 - new 256B compression */
196            !surf->u.gfx9.color.dcc.independent_64B_blocks &&
197            surf->u.gfx9.color.dcc.independent_128B_blocks &&
198            surf->u.gfx9.color.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_256B);
199 }
200 
ac_get_modifier_swizzle_mode(enum amd_gfx_level gfx_level,uint64_t modifier)201 static unsigned ac_get_modifier_swizzle_mode(enum amd_gfx_level gfx_level, uint64_t modifier)
202 {
203    if (modifier == DRM_FORMAT_MOD_LINEAR)
204       return ADDR_SW_LINEAR;
205 
206    if (gfx_level >= GFX12 &&
207        AMD_FMT_MOD_GET(TILE_VERSION, modifier) == AMD_FMT_MOD_TILE_VER_GFX11) {
208       /* The Gfx11 swizzle mode needs to be translated to Gfx12. */
209       if (AMD_FMT_MOD_GET(TILE, modifier) == AMD_FMT_MOD_TILE_GFX9_64K_D)
210          return AMD_FMT_MOD_TILE_GFX12_64K_2D;
211 
212       assert(0);
213       return ADDR_SW_MAX_TYPE; /* can't translate */
214    }
215 
216    return AMD_FMT_MOD_GET(TILE, modifier);
217 }
218 
219 static void
ac_modifier_fill_dcc_params(uint64_t modifier,struct radeon_surf * surf,ADDR2_COMPUTE_SURFACE_INFO_INPUT * surf_info)220 ac_modifier_fill_dcc_params(uint64_t modifier, struct radeon_surf *surf,
221                             ADDR2_COMPUTE_SURFACE_INFO_INPUT *surf_info)
222 {
223    assert(ac_modifier_has_dcc(modifier));
224    assert(AMD_FMT_MOD_GET(TILE_VERSION, modifier) < AMD_FMT_MOD_TILE_VER_GFX12);
225 
226    if (AMD_FMT_MOD_GET(DCC_RETILE, modifier)) {
227       surf_info->flags.metaPipeUnaligned = 0;
228    } else {
229       surf_info->flags.metaPipeUnaligned = !AMD_FMT_MOD_GET(DCC_PIPE_ALIGN, modifier);
230    }
231 
232    /* The metaPipeUnaligned is not strictly necessary, but ensure we don't set metaRbUnaligned on
233     * non-displayable DCC surfaces just because num_render_backends = 1 */
234    surf_info->flags.metaRbUnaligned = AMD_FMT_MOD_GET(TILE_VERSION, modifier) == AMD_FMT_MOD_TILE_VER_GFX9 &&
235                                       AMD_FMT_MOD_GET(RB, modifier) == 0 &&
236                                       surf_info->flags.metaPipeUnaligned;
237 
238    surf->u.gfx9.color.dcc.independent_64B_blocks = AMD_FMT_MOD_GET(DCC_INDEPENDENT_64B, modifier);
239    surf->u.gfx9.color.dcc.independent_128B_blocks = AMD_FMT_MOD_GET(DCC_INDEPENDENT_128B, modifier);
240    surf->u.gfx9.color.dcc.max_compressed_block_size = AMD_FMT_MOD_GET(DCC_MAX_COMPRESSED_BLOCK, modifier);
241 }
242 
ac_is_modifier_supported(const struct radeon_info * info,const struct ac_modifier_options * options,enum pipe_format format,uint64_t modifier)243 bool ac_is_modifier_supported(const struct radeon_info *info,
244                               const struct ac_modifier_options *options,
245                               enum pipe_format format,
246                               uint64_t modifier)
247 {
248 
249    if (util_format_is_compressed(format) ||
250        util_format_is_depth_or_stencil(format) ||
251        util_format_get_blocksizebits(format) > 64)
252       return false;
253 
254    if (info->gfx_level < GFX9)
255       return false;
256 
257    if(modifier == DRM_FORMAT_MOD_LINEAR)
258       return true;
259 
260    /* GFX8 may need a different modifier for each plane */
261    if (info->gfx_level < GFX9 && util_format_get_num_planes(format) > 1)
262       return false;
263 
264    uint32_t allowed_swizzles = 0xFFFFFFFF;
265    switch(info->gfx_level) {
266    case GFX9:
267       allowed_swizzles = ac_modifier_has_dcc(modifier) ? 0x06000000 : 0x06660660;
268       break;
269    case GFX10:
270    case GFX10_3:
271       allowed_swizzles = ac_modifier_has_dcc(modifier) ? 0x08000000 : 0x0E660660;
272       break;
273    case GFX11:
274    case GFX11_5:
275       allowed_swizzles = ac_modifier_has_dcc(modifier) ? 0x88000000 : 0xCC440440;
276       break;
277    case GFX12:
278       allowed_swizzles = 0x1E; /* all 2D swizzle modes */
279       break;
280    default:
281       return false;
282    }
283 
284    if (!((1u << ac_get_modifier_swizzle_mode(info->gfx_level, modifier)) & allowed_swizzles))
285       return false;
286 
287    if (ac_modifier_has_dcc(modifier)) {
288       /* TODO: support multi-planar formats with DCC */
289       if (util_format_get_num_planes(format) > 1)
290          return false;
291 
292       if (!info->has_graphics)
293          return false;
294 
295       if (!options->dcc)
296          return false;
297 
298       if (ac_modifier_has_dcc_retile(modifier)) {
299          /* radeonsi and radv retiling shaders only support bpe == 32. */
300          if (util_format_get_blocksizebits(format) != 32)
301             return false;
302          if (!info->use_display_dcc_with_retile_blit || !options->dcc_retile)
303             return false;
304       }
305    }
306 
307    return true;
308 }
309 
ac_get_supported_modifiers(const struct radeon_info * info,const struct ac_modifier_options * options,enum pipe_format format,unsigned * mod_count,uint64_t * mods)310 bool ac_get_supported_modifiers(const struct radeon_info *info,
311                                 const struct ac_modifier_options *options,
312                                 enum pipe_format format,
313                                 unsigned *mod_count,
314                                 uint64_t *mods)
315 {
316    unsigned current_mod = 0;
317 
318 #define ADD_MOD(name)                                                   \
319    if (ac_is_modifier_supported(info, options, format, (name))) {  \
320       if (mods && current_mod < *mod_count)                  \
321          mods[current_mod] = (name);                    \
322       ++current_mod;                                         \
323    }
324 
325    /* The modifiers have to be added in descending order of estimated
326     * performance. The drivers will prefer modifiers that come earlier
327     * in the list. */
328    switch (info->gfx_level) {
329    case GFX9: {
330       unsigned pipe_xor_bits = MIN2(G_0098F8_NUM_PIPES(info->gb_addr_config) +
331                                     G_0098F8_NUM_SHADER_ENGINES_GFX9(info->gb_addr_config), 8);
332       unsigned bank_xor_bits =  MIN2(G_0098F8_NUM_BANKS(info->gb_addr_config), 8 - pipe_xor_bits);
333       unsigned pipes = G_0098F8_NUM_PIPES(info->gb_addr_config);
334       unsigned rb = G_0098F8_NUM_RB_PER_SE(info->gb_addr_config) +
335                     G_0098F8_NUM_SHADER_ENGINES_GFX9(info->gb_addr_config);
336 
337       uint64_t common_dcc = AMD_FMT_MOD_SET(DCC, 1) |
338                             AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) |
339                             AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B) |
340                             AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, info->has_dcc_constant_encode) |
341                             AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
342                             AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits);
343 
344       ADD_MOD(AMD_FMT_MOD |
345               AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D_X) |
346               AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) |
347               AMD_FMT_MOD_SET(DCC_PIPE_ALIGN, 1) |
348               common_dcc |
349               AMD_FMT_MOD_SET(PIPE, pipes) |
350               AMD_FMT_MOD_SET(RB, rb))
351 
352       ADD_MOD(AMD_FMT_MOD |
353               AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) |
354               AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) |
355               AMD_FMT_MOD_SET(DCC_PIPE_ALIGN, 1) |
356               common_dcc |
357               AMD_FMT_MOD_SET(PIPE, pipes) |
358               AMD_FMT_MOD_SET(RB, rb))
359 
360       if (util_format_get_blocksizebits(format) == 32) {
361          if (info->max_render_backends == 1) {
362             ADD_MOD(AMD_FMT_MOD |
363                     AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) |
364                     AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) |
365                     common_dcc);
366          }
367 
368 
369          ADD_MOD(AMD_FMT_MOD |
370                  AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) |
371                  AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) |
372                  AMD_FMT_MOD_SET(DCC_RETILE, 1) |
373                  common_dcc |
374                  AMD_FMT_MOD_SET(PIPE, pipes) |
375                  AMD_FMT_MOD_SET(RB, rb))
376       }
377 
378 
379       ADD_MOD(AMD_FMT_MOD |
380               AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D_X) |
381               AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) |
382               AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
383               AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits));
384 
385       ADD_MOD(AMD_FMT_MOD |
386               AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) |
387               AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) |
388               AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
389               AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits));
390 
391       ADD_MOD(AMD_FMT_MOD |
392               AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D) |
393               AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9));
394 
395       ADD_MOD(AMD_FMT_MOD |
396               AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S) |
397               AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9));
398 
399       ADD_MOD(DRM_FORMAT_MOD_LINEAR)
400       break;
401    }
402    case GFX10:
403    case GFX10_3: {
404       bool rbplus = info->gfx_level >= GFX10_3;
405       unsigned pipe_xor_bits = G_0098F8_NUM_PIPES(info->gb_addr_config);
406       unsigned pkrs = rbplus ? G_0098F8_NUM_PKRS(info->gb_addr_config) : 0;
407 
408       unsigned version = rbplus ? AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS : AMD_FMT_MOD_TILE_VER_GFX10;
409       uint64_t common_dcc = AMD_FMT_MOD_SET(TILE_VERSION, version) |
410                             AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) |
411                             AMD_FMT_MOD_SET(DCC, 1) |
412                             AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) |
413                             AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
414                             AMD_FMT_MOD_SET(PACKERS, pkrs);
415 
416       ADD_MOD(AMD_FMT_MOD | common_dcc |
417               AMD_FMT_MOD_SET(DCC_PIPE_ALIGN, 1) |
418               AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) |
419               AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_128B))
420 
421       if (info->gfx_level >= GFX10_3) {
422          ADD_MOD(AMD_FMT_MOD | common_dcc |
423                  AMD_FMT_MOD_SET(DCC_RETILE, 1) |
424                  AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) |
425                  AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_128B))
426 
427          ADD_MOD(AMD_FMT_MOD | common_dcc |
428                  AMD_FMT_MOD_SET(DCC_RETILE, 1) |
429                  AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) |
430                  AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) |
431                  AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B))
432       }
433 
434       ADD_MOD(AMD_FMT_MOD |
435               AMD_FMT_MOD_SET(TILE_VERSION, version) |
436               AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) |
437               AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
438               AMD_FMT_MOD_SET(PACKERS, pkrs))
439 
440       if (util_format_get_blocksizebits(format) != 32) {
441          ADD_MOD(AMD_FMT_MOD |
442                  AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D) |
443                  AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9));
444       }
445 
446       ADD_MOD(AMD_FMT_MOD |
447               AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S) |
448               AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9));
449 
450       ADD_MOD(DRM_FORMAT_MOD_LINEAR)
451       break;
452    }
453    case GFX11:
454    case GFX11_5: {
455       /* GFX11 has new microblock organization. No S modes for 2D. */
456       unsigned pipe_xor_bits = G_0098F8_NUM_PIPES(info->gb_addr_config);
457       unsigned pkrs = G_0098F8_NUM_PKRS(info->gb_addr_config);
458       unsigned num_pipes = 1 << pipe_xor_bits;
459 
460       /* R_X swizzle modes are the best for rendering and DCC requires them. */
461       for (unsigned i = 0; i < 2; i++) {
462          unsigned swizzle_r_x;
463 
464          /* Insert the best one first. */
465          if (num_pipes > 16)
466             swizzle_r_x = !i ? AMD_FMT_MOD_TILE_GFX11_256K_R_X : AMD_FMT_MOD_TILE_GFX9_64K_R_X;
467          else
468             swizzle_r_x = !i ? AMD_FMT_MOD_TILE_GFX9_64K_R_X : AMD_FMT_MOD_TILE_GFX11_256K_R_X;
469 
470          /* Disable 256K on APUs because it doesn't work with DAL. */
471          if (!info->has_dedicated_vram && swizzle_r_x == AMD_FMT_MOD_TILE_GFX11_256K_R_X)
472             continue;
473 
474          uint64_t modifier_r_x = AMD_FMT_MOD |
475                                  AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX11) |
476                                  AMD_FMT_MOD_SET(TILE, swizzle_r_x) |
477                                  AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
478                                  AMD_FMT_MOD_SET(PACKERS, pkrs);
479 
480          /* DCC_CONSTANT_ENCODE is not set because it can't vary with gfx11 (it's implied to be 1). */
481          uint64_t modifier_dcc_best_gfx11_5 = modifier_r_x |
482                                               AMD_FMT_MOD_SET(DCC, 1) |
483                                               AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 0) |
484                                               AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) |
485                                               AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_256B);
486 
487          uint64_t modifier_dcc_best = modifier_r_x |
488                                       AMD_FMT_MOD_SET(DCC, 1) |
489                                       AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 0) |
490                                       AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) |
491                                       AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_128B);
492 
493          /* DCC settings for 4K and greater resolutions. (required by display hw) */
494          uint64_t modifier_dcc_4k = modifier_r_x |
495                                     AMD_FMT_MOD_SET(DCC, 1) |
496                                     AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) |
497                                     AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) |
498                                     AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B);
499 
500          /* Modifiers have to be sorted from best to worst.
501           *
502           * Top level order:
503           *   1. The best chip-specific modifiers with DCC, potentially non-displayable.
504           *   2. Chip-specific displayable modifiers with DCC.
505           *   3. Chip-specific displayable modifiers without DCC.
506           *   4. Chip-independent modifiers without DCC.
507           *   5. Linear.
508           */
509 
510          /* Add the best non-displayable modifier first. */
511          if (info->gfx_level == GFX11_5)
512             ADD_MOD(modifier_dcc_best_gfx11_5 | AMD_FMT_MOD_SET(DCC_PIPE_ALIGN, 1));
513 
514          ADD_MOD(modifier_dcc_best | AMD_FMT_MOD_SET(DCC_PIPE_ALIGN, 1));
515 
516          /* Displayable modifiers are next. */
517          /* Add other displayable DCC settings. (DCC_RETILE implies displayable on all chips) */
518          ADD_MOD(modifier_dcc_best | AMD_FMT_MOD_SET(DCC_RETILE, 1))
519          ADD_MOD(modifier_dcc_4k | AMD_FMT_MOD_SET(DCC_RETILE, 1))
520 
521          /* Add one without DCC that is displayable (it's also optimal for non-displayable cases). */
522          ADD_MOD(modifier_r_x)
523       }
524 
525       /* Add one that is compatible with other gfx11 chips. */
526       ADD_MOD(AMD_FMT_MOD |
527               AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX11) |
528               AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D))
529 
530       /* Linear must be last. */
531       ADD_MOD(DRM_FORMAT_MOD_LINEAR)
532       break;
533    }
534    case GFX12: {
535       /* Chip properties no longer affect tiling, and there is no distinction between displayable
536        * and non-displayable anymore. (DCC settings may affect displayability though)
537        *
538        * Only declare 64K modifiers for now.
539        */
540       uint64_t mod_gfx12 = AMD_FMT_MOD |
541                            AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX12);
542 
543       uint64_t mod_256K_2D = mod_gfx12 | AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX12_256K_2D);
544       uint64_t mod_64K_2D = mod_gfx12 | AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX12_64K_2D);
545       uint64_t mod_4K_2D = mod_gfx12 | AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX12_4K_2D);
546       uint64_t mod_256B_2D = mod_gfx12 | AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX12_256B_2D);
547 
548       /* This is identical to GFX12_64K_2D, but expressed in terms of VER_GFX11. */
549       uint64_t mod_64K_2D_as_gfx11 = AMD_FMT_MOD |
550                                      AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX11) |
551                                      AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D);
552 
553       /* Expose both all compressed blocks. */
554       uint64_t dcc_256B = AMD_FMT_MOD_SET(DCC, 1) |
555                           AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_256B);
556       uint64_t dcc_128B = AMD_FMT_MOD_SET(DCC, 1) |
557                           AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_128B);
558       uint64_t dcc_64B = AMD_FMT_MOD_SET(DCC, 1) |
559                          AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B);
560 
561       /* Modifiers must be sorted from best to worst. */
562       ADD_MOD(mod_64K_2D | dcc_256B)      /* 64K with DCC and 256B compressed blocks */
563       ADD_MOD(mod_64K_2D | dcc_128B)      /* 64K with DCC and 128B compressed blocks */
564       ADD_MOD(mod_64K_2D | dcc_64B)       /* 64K with DCC and 64B compressed blocks */
565       ADD_MOD(mod_256K_2D | dcc_256B)     /* OpenGL exported modifier */
566       ADD_MOD(mod_4K_2D | dcc_256B)       /* OpenGL exported modifier */
567       ADD_MOD(mod_256B_2D | dcc_256B)     /* OpenGL exported modifier */
568       /* Without DCC is last. */
569       ADD_MOD(mod_64K_2D)                 /* 64K without DCC */
570       ADD_MOD(mod_64K_2D_as_gfx11)        /* the same as above, but for gfx11 interop */
571       ADD_MOD(mod_256B_2D)
572       ADD_MOD(DRM_FORMAT_MOD_LINEAR)
573       break;
574    }
575    default:
576       break;
577    }
578 
579 #undef ADD_MOD
580 
581    if (!mods) {
582       *mod_count = current_mod;
583       return true;
584    }
585 
586    bool complete = current_mod <= *mod_count;
587    *mod_count = MIN2(*mod_count, current_mod);
588    return complete;
589 }
590 
allocSysMem(const ADDR_ALLOCSYSMEM_INPUT * pInput)591 static void *ADDR_API allocSysMem(const ADDR_ALLOCSYSMEM_INPUT *pInput)
592 {
593    return malloc(pInput->sizeInBytes);
594 }
595 
freeSysMem(const ADDR_FREESYSMEM_INPUT * pInput)596 static ADDR_E_RETURNCODE ADDR_API freeSysMem(const ADDR_FREESYSMEM_INPUT *pInput)
597 {
598    free(pInput->pVirtAddr);
599    return ADDR_OK;
600 }
601 
ac_addrlib_create(const struct radeon_info * info,uint64_t * max_alignment)602 struct ac_addrlib *ac_addrlib_create(const struct radeon_info *info,
603                                      uint64_t *max_alignment)
604 {
605    ADDR_CREATE_INPUT addrCreateInput = {0};
606    ADDR_CREATE_OUTPUT addrCreateOutput = {0};
607    ADDR_REGISTER_VALUE regValue = {0};
608    ADDR_CREATE_FLAGS createFlags = {{0}};
609    ADDR_GET_MAX_ALIGNMENTS_OUTPUT addrGetMaxAlignmentsOutput = {0};
610    ADDR_E_RETURNCODE addrRet;
611 
612    addrCreateInput.size = sizeof(ADDR_CREATE_INPUT);
613    addrCreateOutput.size = sizeof(ADDR_CREATE_OUTPUT);
614 
615    regValue.gbAddrConfig = info->gb_addr_config;
616    createFlags.value = 0;
617 
618    addrCreateInput.chipFamily = info->family_id;
619    addrCreateInput.chipRevision = info->chip_external_rev;
620 
621    if (addrCreateInput.chipFamily == FAMILY_UNKNOWN)
622       return NULL;
623 
624    if (addrCreateInput.chipFamily >= FAMILY_AI) {
625       addrCreateInput.chipEngine = CIASICIDGFXENGINE_ARCTICISLAND;
626    } else {
627       regValue.noOfBanks = info->mc_arb_ramcfg & 0x3;
628       regValue.noOfRanks = (info->mc_arb_ramcfg & 0x4) >> 2;
629 
630       regValue.backendDisables = info->enabled_rb_mask;
631       regValue.pTileConfig = info->si_tile_mode_array;
632       regValue.noOfEntries = ARRAY_SIZE(info->si_tile_mode_array);
633       if (addrCreateInput.chipFamily == FAMILY_SI) {
634          regValue.pMacroTileConfig = NULL;
635          regValue.noOfMacroEntries = 0;
636       } else {
637          regValue.pMacroTileConfig = info->cik_macrotile_mode_array;
638          regValue.noOfMacroEntries = ARRAY_SIZE(info->cik_macrotile_mode_array);
639       }
640 
641       createFlags.useTileIndex = 1;
642       createFlags.useHtileSliceAlign = 1;
643 
644       addrCreateInput.chipEngine = CIASICIDGFXENGINE_SOUTHERNISLAND;
645    }
646 
647    addrCreateInput.callbacks.allocSysMem = allocSysMem;
648    addrCreateInput.callbacks.freeSysMem = freeSysMem;
649    addrCreateInput.callbacks.debugPrint = 0;
650    addrCreateInput.createFlags = createFlags;
651    addrCreateInput.regValue = regValue;
652 
653    addrRet = AddrCreate(&addrCreateInput, &addrCreateOutput);
654    if (addrRet != ADDR_OK)
655       return NULL;
656 
657    if (max_alignment) {
658       addrRet = AddrGetMaxAlignments(addrCreateOutput.hLib, &addrGetMaxAlignmentsOutput);
659       if (addrRet == ADDR_OK) {
660          *max_alignment = addrGetMaxAlignmentsOutput.baseAlign;
661       }
662    }
663 
664    struct ac_addrlib *addrlib = calloc(1, sizeof(struct ac_addrlib));
665    if (!addrlib) {
666       AddrDestroy(addrCreateOutput.hLib);
667       return NULL;
668    }
669 
670    addrlib->handle = addrCreateOutput.hLib;
671    simple_mtx_init(&addrlib->lock, mtx_plain);
672    return addrlib;
673 }
674 
ac_addrlib_destroy(struct ac_addrlib * addrlib)675 void ac_addrlib_destroy(struct ac_addrlib *addrlib)
676 {
677    simple_mtx_destroy(&addrlib->lock);
678    AddrDestroy(addrlib->handle);
679    free(addrlib);
680 }
681 
ac_addrlib_get_handle(struct ac_addrlib * addrlib)682 void *ac_addrlib_get_handle(struct ac_addrlib *addrlib)
683 {
684    return addrlib->handle;
685 }
686 
surf_config_sanity(const struct ac_surf_config * config,unsigned flags)687 static int surf_config_sanity(const struct ac_surf_config *config, unsigned flags)
688 {
689    /* FMASK is allocated together with the color surface and can't be
690     * allocated separately.
691     */
692    assert(!(flags & RADEON_SURF_FMASK));
693    if (flags & RADEON_SURF_FMASK)
694       return -EINVAL;
695 
696    /* all dimension must be at least 1 ! */
697    if (!config->info.width || !config->info.height || !config->info.depth ||
698        !config->info.array_size || !config->info.levels)
699       return -EINVAL;
700 
701    switch (config->info.samples) {
702    case 0:
703    case 1:
704    case 2:
705    case 4:
706    case 8:
707       break;
708    case 16:
709       if (flags & RADEON_SURF_Z_OR_SBUFFER)
710          return -EINVAL;
711       break;
712    default:
713       return -EINVAL;
714    }
715 
716    if (!(flags & RADEON_SURF_Z_OR_SBUFFER)) {
717       switch (config->info.storage_samples) {
718       case 0:
719       case 1:
720       case 2:
721       case 4:
722       case 8:
723          break;
724       default:
725          return -EINVAL;
726       }
727    }
728 
729    if (config->is_3d && config->info.array_size > 1)
730       return -EINVAL;
731    if (config->is_cube && config->info.depth > 1)
732       return -EINVAL;
733 
734    return 0;
735 }
736 
bpe_to_format(struct radeon_surf * surf)737 static unsigned bpe_to_format(struct radeon_surf *surf)
738 {
739    if (surf->blk_w != 1 || surf->blk_h != 1) {
740       if (surf->blk_w == 4 && surf->blk_h == 4) {
741          switch (surf->bpe) {
742          case 8:
743             return ADDR_FMT_BC1;
744          case 16:
745             /* since BC3 and ASTC4x4 has same blk dimension and bpe reporting BC3 also for ASTC4x4.
746              * matching is fine since addrlib needs only blk_w, blk_h and bpe to compute surface
747              * properties.
748              * TODO: If compress_type can be passed to this function, then this ugly BC3 and ASTC4x4
749              *       matching can be avoided.
750              */
751             return ADDR_FMT_BC3;
752          default:
753             unreachable("invalid compressed bpe");
754          }
755       } else if (surf->blk_w == 5 && surf->blk_h == 4)
756          return ADDR_FMT_ASTC_5x4;
757       else if (surf->blk_w == 5 && surf->blk_h == 5)
758          return ADDR_FMT_ASTC_5x5;
759       else if (surf->blk_w == 6 && surf->blk_h == 5)
760          return ADDR_FMT_ASTC_6x5;
761       else if (surf->blk_w == 6 && surf->blk_h == 6)
762          return ADDR_FMT_ASTC_6x6;
763       else if (surf->blk_w == 8 && surf->blk_h == 5)
764          return ADDR_FMT_ASTC_8x5;
765       else if (surf->blk_w == 8 && surf->blk_h == 6)
766          return ADDR_FMT_ASTC_8x6;
767       else if (surf->blk_w == 8 && surf->blk_h == 8)
768          return ADDR_FMT_ASTC_8x8;
769       else if (surf->blk_w == 10 && surf->blk_h == 5)
770          return ADDR_FMT_ASTC_10x5;
771       else if (surf->blk_w == 10 && surf->blk_h == 6)
772          return ADDR_FMT_ASTC_10x6;
773       else if (surf->blk_w == 10 && surf->blk_h == 8)
774          return ADDR_FMT_ASTC_10x8;
775       else if (surf->blk_w == 10 && surf->blk_h == 10)
776          return ADDR_FMT_ASTC_10x10;
777       else if (surf->blk_w == 12 && surf->blk_h == 10)
778          return ADDR_FMT_ASTC_12x10;
779       else if (surf->blk_w == 12 && surf->blk_h == 12)
780          return ADDR_FMT_ASTC_12x12;
781    } else {
782       switch (surf->bpe) {
783       case 1:
784          assert(!(surf->flags & RADEON_SURF_ZBUFFER));
785          return ADDR_FMT_8;
786       case 2:
787          assert(surf->flags & RADEON_SURF_ZBUFFER || !(surf->flags & RADEON_SURF_SBUFFER));
788          return ADDR_FMT_16;
789       case 4:
790          assert(surf->flags & RADEON_SURF_ZBUFFER || !(surf->flags & RADEON_SURF_SBUFFER));
791          return ADDR_FMT_32;
792       case 8:
793          assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER));
794          return ADDR_FMT_32_32;
795       case 12:
796          assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER));
797          return ADDR_FMT_32_32_32;
798       case 16:
799          assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER));
800          return ADDR_FMT_32_32_32_32;
801       default:
802          unreachable("invalid bpe");
803       }
804    }
805    return ADDR_FMT_INVALID;
806 }
807 
808 /* The addrlib pitch alignment is forced to this number for all chips to support interop
809  * between any 2 chips.
810  */
811 #define LINEAR_PITCH_ALIGNMENT 256
812 
gfx6_compute_level(ADDR_HANDLE addrlib,const struct ac_surf_config * config,struct radeon_surf * surf,bool is_stencil,unsigned level,bool compressed,ADDR_COMPUTE_SURFACE_INFO_INPUT * AddrSurfInfoIn,ADDR_COMPUTE_SURFACE_INFO_OUTPUT * AddrSurfInfoOut,ADDR_COMPUTE_DCCINFO_INPUT * AddrDccIn,ADDR_COMPUTE_DCCINFO_OUTPUT * AddrDccOut,ADDR_COMPUTE_HTILE_INFO_INPUT * AddrHtileIn,ADDR_COMPUTE_HTILE_INFO_OUTPUT * AddrHtileOut)813 static int gfx6_compute_level(ADDR_HANDLE addrlib, const struct ac_surf_config *config,
814                               struct radeon_surf *surf, bool is_stencil, unsigned level,
815                               bool compressed, ADDR_COMPUTE_SURFACE_INFO_INPUT *AddrSurfInfoIn,
816                               ADDR_COMPUTE_SURFACE_INFO_OUTPUT *AddrSurfInfoOut,
817                               ADDR_COMPUTE_DCCINFO_INPUT *AddrDccIn,
818                               ADDR_COMPUTE_DCCINFO_OUTPUT *AddrDccOut,
819                               ADDR_COMPUTE_HTILE_INFO_INPUT *AddrHtileIn,
820                               ADDR_COMPUTE_HTILE_INFO_OUTPUT *AddrHtileOut)
821 {
822    struct legacy_surf_level *surf_level;
823    struct legacy_surf_dcc_level *dcc_level;
824    ADDR_E_RETURNCODE ret;
825    bool mode_has_htile = false;
826 
827    AddrSurfInfoIn->mipLevel = level;
828    AddrSurfInfoIn->width = u_minify(config->info.width, level);
829    AddrSurfInfoIn->height = u_minify(config->info.height, level);
830 
831    /* Make GFX6 linear surfaces compatible with all chips for multi-GPU interop. */
832    if (config->info.levels == 1 && AddrSurfInfoIn->tileMode == ADDR_TM_LINEAR_ALIGNED &&
833        AddrSurfInfoIn->bpp && util_is_power_of_two_or_zero(AddrSurfInfoIn->bpp)) {
834       unsigned alignment = LINEAR_PITCH_ALIGNMENT / surf->bpe;
835 
836       AddrSurfInfoIn->width = align(AddrSurfInfoIn->width, alignment);
837    }
838 
839    /* addrlib assumes the bytes/pixel is a divisor of 64, which is not
840     * true for r32g32b32 formats. */
841    if (AddrSurfInfoIn->bpp == 96) {
842       assert(config->info.levels == 1);
843       assert(AddrSurfInfoIn->tileMode == ADDR_TM_LINEAR_ALIGNED);
844 
845       /* The least common multiple of 64 bytes and 12 bytes/pixel is
846        * 192 bytes, or 16 pixels. */
847       AddrSurfInfoIn->width = align(AddrSurfInfoIn->width, 16);
848    }
849 
850    if (config->is_3d)
851       AddrSurfInfoIn->numSlices = u_minify(config->info.depth, level);
852    else if (config->is_cube)
853       AddrSurfInfoIn->numSlices = 6;
854    else
855       AddrSurfInfoIn->numSlices = config->info.array_size;
856 
857    if (level > 0) {
858       /* Set the base level pitch. This is needed for calculation
859        * of non-zero levels. */
860       if (is_stencil)
861          AddrSurfInfoIn->basePitch = surf->u.legacy.zs.stencil_level[0].nblk_x;
862       else
863          AddrSurfInfoIn->basePitch = surf->u.legacy.level[0].nblk_x;
864 
865       /* Convert blocks to pixels for compressed formats. */
866       if (compressed)
867          AddrSurfInfoIn->basePitch *= surf->blk_w;
868    }
869 
870    ret = AddrComputeSurfaceInfo(addrlib, AddrSurfInfoIn, AddrSurfInfoOut);
871    if (ret != ADDR_OK) {
872       return ret;
873    }
874 
875    surf_level = is_stencil ? &surf->u.legacy.zs.stencil_level[level] : &surf->u.legacy.level[level];
876    dcc_level = &surf->u.legacy.color.dcc_level[level];
877    surf_level->offset_256B = align64(surf->surf_size, AddrSurfInfoOut->baseAlign) / 256;
878    surf_level->slice_size_dw = AddrSurfInfoOut->sliceSize / 4;
879    surf_level->nblk_x = AddrSurfInfoOut->pitch;
880    surf_level->nblk_y = AddrSurfInfoOut->height;
881 
882    switch (AddrSurfInfoOut->tileMode) {
883    case ADDR_TM_LINEAR_ALIGNED:
884       surf_level->mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
885       break;
886    case ADDR_TM_1D_TILED_THIN1:
887    case ADDR_TM_1D_TILED_THICK:
888    case ADDR_TM_PRT_TILED_THIN1:
889       surf_level->mode = RADEON_SURF_MODE_1D;
890       break;
891    default:
892       surf_level->mode = RADEON_SURF_MODE_2D;
893       break;
894    }
895 
896    if (is_stencil)
897       surf->u.legacy.zs.stencil_tiling_index[level] = AddrSurfInfoOut->tileIndex;
898    else
899       surf->u.legacy.tiling_index[level] = AddrSurfInfoOut->tileIndex;
900 
901    if (AddrSurfInfoIn->flags.prt) {
902       if (level == 0) {
903          surf->prt_tile_width = AddrSurfInfoOut->pitchAlign;
904          surf->prt_tile_height = AddrSurfInfoOut->heightAlign;
905          surf->prt_tile_depth = AddrSurfInfoOut->depthAlign;
906       }
907       if (surf_level->nblk_x >= surf->prt_tile_width &&
908           surf_level->nblk_y >= surf->prt_tile_height) {
909          /* +1 because the current level is not in the miptail */
910          surf->first_mip_tail_level = level + 1;
911       }
912    }
913 
914    surf->surf_size = (uint64_t)surf_level->offset_256B * 256 + AddrSurfInfoOut->surfSize;
915 
916    /* Clear DCC fields at the beginning. */
917    if (!AddrSurfInfoIn->flags.depth && !AddrSurfInfoIn->flags.stencil)
918       dcc_level->dcc_offset = 0;
919 
920    /* The previous level's flag tells us if we can use DCC for this level. */
921    if (AddrSurfInfoIn->flags.dccCompatible && (level == 0 || AddrDccOut->subLvlCompressible)) {
922       bool prev_level_clearable = level == 0 || AddrDccOut->dccRamSizeAligned;
923 
924       AddrDccIn->colorSurfSize = AddrSurfInfoOut->surfSize;
925       AddrDccIn->tileMode = AddrSurfInfoOut->tileMode;
926       AddrDccIn->tileInfo = *AddrSurfInfoOut->pTileInfo;
927       AddrDccIn->tileIndex = AddrSurfInfoOut->tileIndex;
928       AddrDccIn->macroModeIndex = AddrSurfInfoOut->macroModeIndex;
929 
930       ret = AddrComputeDccInfo(addrlib, AddrDccIn, AddrDccOut);
931 
932       if (ret == ADDR_OK) {
933          dcc_level->dcc_offset = surf->meta_size;
934          surf->num_meta_levels = level + 1;
935          surf->meta_size = dcc_level->dcc_offset + AddrDccOut->dccRamSize;
936          surf->meta_alignment_log2 = MAX2(surf->meta_alignment_log2, util_logbase2(AddrDccOut->dccRamBaseAlign));
937 
938          /* If the DCC size of a subresource (1 mip level or 1 slice)
939           * is not aligned, the DCC memory layout is not contiguous for
940           * that subresource, which means we can't use fast clear.
941           *
942           * We only do fast clears for whole mipmap levels. If we did
943           * per-slice fast clears, the same restriction would apply.
944           * (i.e. only compute the slice size and see if it's aligned)
945           *
946           * The last level can be non-contiguous and still be clearable
947           * if it's interleaved with the next level that doesn't exist.
948           */
949          if (AddrDccOut->dccRamSizeAligned ||
950              (prev_level_clearable && level == config->info.levels - 1))
951             dcc_level->dcc_fast_clear_size = AddrDccOut->dccFastClearSize;
952          else
953             dcc_level->dcc_fast_clear_size = 0;
954 
955          /* Compute the DCC slice size because addrlib doesn't
956           * provide this info. As DCC memory is linear (each
957           * slice is the same size) it's easy to compute.
958           */
959          surf->meta_slice_size = AddrDccOut->dccRamSize / config->info.array_size;
960 
961          /* For arrays, we have to compute the DCC info again
962           * with one slice size to get a correct fast clear
963           * size.
964           */
965          if (config->info.array_size > 1) {
966             AddrDccIn->colorSurfSize = AddrSurfInfoOut->sliceSize;
967             AddrDccIn->tileMode = AddrSurfInfoOut->tileMode;
968             AddrDccIn->tileInfo = *AddrSurfInfoOut->pTileInfo;
969             AddrDccIn->tileIndex = AddrSurfInfoOut->tileIndex;
970             AddrDccIn->macroModeIndex = AddrSurfInfoOut->macroModeIndex;
971 
972             ret = AddrComputeDccInfo(addrlib, AddrDccIn, AddrDccOut);
973             if (ret == ADDR_OK) {
974                /* If the DCC memory isn't properly
975                 * aligned, the data are interleaved
976                 * across slices.
977                 */
978                if (AddrDccOut->dccRamSizeAligned)
979                   dcc_level->dcc_slice_fast_clear_size = AddrDccOut->dccFastClearSize;
980                else
981                   dcc_level->dcc_slice_fast_clear_size = 0;
982             }
983 
984             if (surf->flags & RADEON_SURF_CONTIGUOUS_DCC_LAYERS &&
985                 surf->meta_slice_size != dcc_level->dcc_slice_fast_clear_size) {
986                surf->meta_size = 0;
987                surf->num_meta_levels = 0;
988                AddrDccOut->subLvlCompressible = false;
989             }
990          } else {
991             dcc_level->dcc_slice_fast_clear_size = dcc_level->dcc_fast_clear_size;
992          }
993       }
994    }
995 
996    if (surf_level->mode == RADEON_SURF_MODE_2D)
997       mode_has_htile = true;
998    else if (surf_level->mode == RADEON_SURF_MODE_1D &&
999             !(surf->flags & RADEON_SURF_TC_COMPATIBLE_HTILE))
1000       mode_has_htile = true;
1001 
1002    /* HTILE. */
1003    if (!is_stencil && AddrSurfInfoIn->flags.depth && mode_has_htile &&
1004        level == 0 && !(surf->flags & RADEON_SURF_NO_HTILE)) {
1005       AddrHtileIn->flags.tcCompatible = AddrSurfInfoOut->tcCompatible;
1006       AddrHtileIn->pitch = AddrSurfInfoOut->pitch;
1007       AddrHtileIn->height = AddrSurfInfoOut->height;
1008       AddrHtileIn->numSlices = AddrSurfInfoOut->depth;
1009       AddrHtileIn->blockWidth = ADDR_HTILE_BLOCKSIZE_8;
1010       AddrHtileIn->blockHeight = ADDR_HTILE_BLOCKSIZE_8;
1011       AddrHtileIn->pTileInfo = AddrSurfInfoOut->pTileInfo;
1012       AddrHtileIn->tileIndex = AddrSurfInfoOut->tileIndex;
1013       AddrHtileIn->macroModeIndex = AddrSurfInfoOut->macroModeIndex;
1014 
1015       ret = AddrComputeHtileInfo(addrlib, AddrHtileIn, AddrHtileOut);
1016 
1017       if (ret == ADDR_OK) {
1018          surf->meta_size = AddrHtileOut->htileBytes;
1019          surf->meta_slice_size = AddrHtileOut->sliceSize;
1020          surf->meta_alignment_log2 = util_logbase2(AddrHtileOut->baseAlign);
1021          surf->meta_pitch = AddrHtileOut->pitch;
1022          surf->num_meta_levels = level + 1;
1023       }
1024    }
1025 
1026    return 0;
1027 }
1028 
gfx6_set_micro_tile_mode(struct radeon_surf * surf,const struct radeon_info * info)1029 static void gfx6_set_micro_tile_mode(struct radeon_surf *surf, const struct radeon_info *info)
1030 {
1031    uint32_t tile_mode = info->si_tile_mode_array[surf->u.legacy.tiling_index[0]];
1032 
1033    if (info->gfx_level >= GFX7)
1034       surf->micro_tile_mode = G_009910_MICRO_TILE_MODE_NEW(tile_mode);
1035    else
1036       surf->micro_tile_mode = G_009910_MICRO_TILE_MODE(tile_mode);
1037 }
1038 
cik_get_macro_tile_index(struct radeon_surf * surf)1039 static unsigned cik_get_macro_tile_index(struct radeon_surf *surf)
1040 {
1041    unsigned index, tileb;
1042 
1043    tileb = 8 * 8 * surf->bpe;
1044    tileb = MIN2(surf->u.legacy.tile_split, tileb);
1045 
1046    for (index = 0; tileb > 64; index++)
1047       tileb >>= 1;
1048 
1049    assert(index < 16);
1050    return index;
1051 }
1052 
get_display_flag(const struct ac_surf_config * config,const struct radeon_surf * surf)1053 static bool get_display_flag(const struct ac_surf_config *config, const struct radeon_surf *surf)
1054 {
1055    unsigned num_channels = config->info.num_channels;
1056    unsigned bpe = surf->bpe;
1057 
1058    /* With modifiers the kernel is in charge of whether it is displayable.
1059     * We need to ensure at least 32 pixels pitch alignment, but this is
1060     * always the case when the blocksize >= 4K.
1061     */
1062    if (surf->modifier != DRM_FORMAT_MOD_INVALID)
1063       return false;
1064 
1065    if (!config->is_1d && !config->is_3d && !config->is_cube &&
1066        !(surf->flags & RADEON_SURF_Z_OR_SBUFFER) &&
1067        surf->flags & RADEON_SURF_SCANOUT && config->info.samples <= 1 && surf->blk_w <= 2 &&
1068        surf->blk_h == 1) {
1069       /* subsampled */
1070       if (surf->blk_w == 2 && surf->blk_h == 1)
1071          return true;
1072 
1073       if (/* RGBA8 or RGBA16F */
1074           (bpe >= 4 && bpe <= 8 && num_channels == 4) ||
1075           /* R5G6B5 or R5G5B5A1 */
1076           (bpe == 2 && num_channels >= 3) ||
1077           /* C8 palette */
1078           (bpe == 1 && num_channels == 1))
1079          return true;
1080    }
1081    return false;
1082 }
1083 
1084 /**
1085  * This must be called after the first level is computed.
1086  *
1087  * Copy surface-global settings like pipe/bank config from level 0 surface
1088  * computation, and compute tile swizzle.
1089  */
gfx6_surface_settings(ADDR_HANDLE addrlib,const struct radeon_info * info,const struct ac_surf_config * config,ADDR_COMPUTE_SURFACE_INFO_OUTPUT * csio,struct radeon_surf * surf)1090 static int gfx6_surface_settings(ADDR_HANDLE addrlib, const struct radeon_info *info,
1091                                  const struct ac_surf_config *config,
1092                                  ADDR_COMPUTE_SURFACE_INFO_OUTPUT *csio, struct radeon_surf *surf)
1093 {
1094    surf->surf_alignment_log2 = util_logbase2(csio->baseAlign);
1095    surf->u.legacy.pipe_config = csio->pTileInfo->pipeConfig - 1;
1096    gfx6_set_micro_tile_mode(surf, info);
1097 
1098    /* For 2D modes only. */
1099    if (csio->tileMode >= ADDR_TM_2D_TILED_THIN1) {
1100       surf->u.legacy.bankw = csio->pTileInfo->bankWidth;
1101       surf->u.legacy.bankh = csio->pTileInfo->bankHeight;
1102       surf->u.legacy.mtilea = csio->pTileInfo->macroAspectRatio;
1103       surf->u.legacy.tile_split = csio->pTileInfo->tileSplitBytes;
1104       surf->u.legacy.num_banks = csio->pTileInfo->banks;
1105       surf->u.legacy.macro_tile_index = csio->macroModeIndex;
1106    } else {
1107       surf->u.legacy.macro_tile_index = 0;
1108    }
1109 
1110    /* Compute tile swizzle. */
1111    /* TODO: fix tile swizzle with mipmapping for GFX6 */
1112    if ((info->gfx_level >= GFX7 || config->info.levels == 1) && config->info.surf_index &&
1113        surf->u.legacy.level[0].mode == RADEON_SURF_MODE_2D &&
1114        !(surf->flags & (RADEON_SURF_Z_OR_SBUFFER | RADEON_SURF_SHAREABLE)) &&
1115        !get_display_flag(config, surf)) {
1116       ADDR_COMPUTE_BASE_SWIZZLE_INPUT AddrBaseSwizzleIn = {0};
1117       ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT AddrBaseSwizzleOut = {0};
1118 
1119       AddrBaseSwizzleIn.size = sizeof(ADDR_COMPUTE_BASE_SWIZZLE_INPUT);
1120       AddrBaseSwizzleOut.size = sizeof(ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT);
1121 
1122       AddrBaseSwizzleIn.surfIndex = p_atomic_inc_return(config->info.surf_index) - 1;
1123       AddrBaseSwizzleIn.tileIndex = csio->tileIndex;
1124       AddrBaseSwizzleIn.macroModeIndex = csio->macroModeIndex;
1125       AddrBaseSwizzleIn.pTileInfo = csio->pTileInfo;
1126       AddrBaseSwizzleIn.tileMode = csio->tileMode;
1127 
1128       int r = AddrComputeBaseSwizzle(addrlib, &AddrBaseSwizzleIn, &AddrBaseSwizzleOut);
1129       if (r != ADDR_OK)
1130          return r;
1131 
1132       assert(AddrBaseSwizzleOut.tileSwizzle <=
1133              u_bit_consecutive(0, sizeof(surf->tile_swizzle) * 8));
1134       surf->tile_swizzle = AddrBaseSwizzleOut.tileSwizzle;
1135    }
1136    return 0;
1137 }
1138 
ac_compute_cmask(const struct radeon_info * info,const struct ac_surf_config * config,struct radeon_surf * surf)1139 static void ac_compute_cmask(const struct radeon_info *info, const struct ac_surf_config *config,
1140                              struct radeon_surf *surf)
1141 {
1142    unsigned pipe_interleave_bytes = info->pipe_interleave_bytes;
1143    unsigned num_pipes = info->num_tile_pipes;
1144    unsigned cl_width, cl_height;
1145 
1146    if (surf->flags & RADEON_SURF_Z_OR_SBUFFER || surf->is_linear ||
1147        (config->info.samples >= 2 && !surf->fmask_size))
1148       return;
1149 
1150    assert(info->gfx_level <= GFX8);
1151 
1152    switch (num_pipes) {
1153    case 2:
1154       cl_width = 32;
1155       cl_height = 16;
1156       break;
1157    case 4:
1158       cl_width = 32;
1159       cl_height = 32;
1160       break;
1161    case 8:
1162       cl_width = 64;
1163       cl_height = 32;
1164       break;
1165    case 16: /* Hawaii */
1166       cl_width = 64;
1167       cl_height = 64;
1168       break;
1169    default:
1170       assert(0);
1171       return;
1172    }
1173 
1174    unsigned base_align = num_pipes * pipe_interleave_bytes;
1175 
1176    unsigned width = align(surf->u.legacy.level[0].nblk_x, cl_width * 8);
1177    unsigned height = align(surf->u.legacy.level[0].nblk_y, cl_height * 8);
1178    unsigned slice_elements = (width * height) / (8 * 8);
1179 
1180    /* Each element of CMASK is a nibble. */
1181    unsigned slice_bytes = slice_elements / 2;
1182 
1183    surf->u.legacy.color.cmask_slice_tile_max = (width * height) / (128 * 128);
1184    if (surf->u.legacy.color.cmask_slice_tile_max)
1185       surf->u.legacy.color.cmask_slice_tile_max -= 1;
1186 
1187    unsigned num_layers;
1188    if (config->is_3d)
1189       num_layers = config->info.depth;
1190    else if (config->is_cube)
1191       num_layers = 6;
1192    else
1193       num_layers = config->info.array_size;
1194 
1195    surf->cmask_alignment_log2 = util_logbase2(MAX2(256, base_align));
1196    surf->cmask_slice_size = align(slice_bytes, base_align);
1197    surf->cmask_size = surf->cmask_slice_size * num_layers;
1198 }
1199 
1200 /**
1201  * Fill in the tiling information in \p surf based on the given surface config.
1202  *
1203  * The following fields of \p surf must be initialized by the caller:
1204  * blk_w, blk_h, bpe, flags.
1205  */
gfx6_compute_surface(ADDR_HANDLE addrlib,const struct radeon_info * info,const struct ac_surf_config * config,enum radeon_surf_mode mode,struct radeon_surf * surf)1206 static int gfx6_compute_surface(ADDR_HANDLE addrlib, const struct radeon_info *info,
1207                                 const struct ac_surf_config *config, enum radeon_surf_mode mode,
1208                                 struct radeon_surf *surf)
1209 {
1210    unsigned level;
1211    bool compressed;
1212    ADDR_COMPUTE_SURFACE_INFO_INPUT AddrSurfInfoIn = {0};
1213    ADDR_COMPUTE_SURFACE_INFO_OUTPUT AddrSurfInfoOut = {0};
1214    ADDR_COMPUTE_DCCINFO_INPUT AddrDccIn = {0};
1215    ADDR_COMPUTE_DCCINFO_OUTPUT AddrDccOut = {0};
1216    ADDR_COMPUTE_HTILE_INFO_INPUT AddrHtileIn = {0};
1217    ADDR_COMPUTE_HTILE_INFO_OUTPUT AddrHtileOut = {0};
1218    ADDR_TILEINFO AddrTileInfoIn = {0};
1219    ADDR_TILEINFO AddrTileInfoOut = {0};
1220    int r;
1221 
1222    AddrSurfInfoIn.size = sizeof(ADDR_COMPUTE_SURFACE_INFO_INPUT);
1223    AddrSurfInfoOut.size = sizeof(ADDR_COMPUTE_SURFACE_INFO_OUTPUT);
1224    AddrDccIn.size = sizeof(ADDR_COMPUTE_DCCINFO_INPUT);
1225    AddrDccOut.size = sizeof(ADDR_COMPUTE_DCCINFO_OUTPUT);
1226    AddrHtileIn.size = sizeof(ADDR_COMPUTE_HTILE_INFO_INPUT);
1227    AddrHtileOut.size = sizeof(ADDR_COMPUTE_HTILE_INFO_OUTPUT);
1228    AddrSurfInfoOut.pTileInfo = &AddrTileInfoOut;
1229 
1230    compressed = surf->blk_w == 4 && surf->blk_h == 4;
1231 
1232    /* MSAA requires 2D tiling. */
1233    if (config->info.samples > 1)
1234       mode = RADEON_SURF_MODE_2D;
1235 
1236    /* DB doesn't support linear layouts. */
1237    if (surf->flags & (RADEON_SURF_Z_OR_SBUFFER) && mode < RADEON_SURF_MODE_1D)
1238       mode = RADEON_SURF_MODE_1D;
1239 
1240    /* Set the requested tiling mode. */
1241    switch (mode) {
1242    case RADEON_SURF_MODE_LINEAR_ALIGNED:
1243       AddrSurfInfoIn.tileMode = ADDR_TM_LINEAR_ALIGNED;
1244       break;
1245    case RADEON_SURF_MODE_1D:
1246       if (surf->flags & RADEON_SURF_PRT)
1247          AddrSurfInfoIn.tileMode = ADDR_TM_PRT_TILED_THIN1;
1248       else if (config->is_3d)
1249          AddrSurfInfoIn.tileMode = ADDR_TM_1D_TILED_THICK;
1250       else
1251          AddrSurfInfoIn.tileMode = ADDR_TM_1D_TILED_THIN1;
1252       break;
1253    case RADEON_SURF_MODE_2D:
1254       if (surf->flags & RADEON_SURF_PRT) {
1255          if (config->is_3d && surf->bpe < 8) {
1256             AddrSurfInfoIn.tileMode = ADDR_TM_PRT_2D_TILED_THICK;
1257          } else {
1258             AddrSurfInfoIn.tileMode = ADDR_TM_PRT_2D_TILED_THIN1;
1259          }
1260       } else {
1261          if (config->is_3d) {
1262             /* GFX6 doesn't have 3D_TILED_XTHICK. */
1263             if (info->gfx_level >= GFX7)
1264                AddrSurfInfoIn.tileMode = ADDR_TM_3D_TILED_XTHICK;
1265             else
1266                AddrSurfInfoIn.tileMode = ADDR_TM_2D_TILED_XTHICK;
1267          } else {
1268             AddrSurfInfoIn.tileMode = ADDR_TM_2D_TILED_THIN1;
1269          }
1270       }
1271       break;
1272    default:
1273       assert(0);
1274    }
1275 
1276    AddrSurfInfoIn.format = bpe_to_format(surf);
1277    if (!compressed)
1278       AddrDccIn.bpp = AddrSurfInfoIn.bpp = surf->bpe * 8;
1279 
1280    /* Setting ADDR_FMT_32_32_32 breaks gfx6-8, while INVALID works. */
1281    if (AddrSurfInfoIn.format == ADDR_FMT_32_32_32)
1282       AddrSurfInfoIn.format = ADDR_FMT_INVALID;
1283 
1284    AddrDccIn.numSamples = AddrSurfInfoIn.numSamples = MAX2(1, config->info.samples);
1285    AddrSurfInfoIn.tileIndex = -1;
1286 
1287    if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)) {
1288       AddrDccIn.numSamples = AddrSurfInfoIn.numFrags = MAX2(1, config->info.storage_samples);
1289    }
1290 
1291    /* Set the micro tile type. */
1292    if (surf->flags & RADEON_SURF_SCANOUT)
1293       AddrSurfInfoIn.tileType = ADDR_DISPLAYABLE;
1294    else if (surf->flags & RADEON_SURF_Z_OR_SBUFFER)
1295       AddrSurfInfoIn.tileType = ADDR_DEPTH_SAMPLE_ORDER;
1296    else
1297       AddrSurfInfoIn.tileType = ADDR_NON_DISPLAYABLE;
1298 
1299    AddrSurfInfoIn.flags.color = !(surf->flags & RADEON_SURF_Z_OR_SBUFFER);
1300    AddrSurfInfoIn.flags.depth = (surf->flags & RADEON_SURF_ZBUFFER) != 0;
1301    AddrSurfInfoIn.flags.cube = config->is_cube;
1302    AddrSurfInfoIn.flags.display = get_display_flag(config, surf);
1303    AddrSurfInfoIn.flags.pow2Pad = config->info.levels > 1;
1304    AddrSurfInfoIn.flags.tcCompatible = (surf->flags & RADEON_SURF_TC_COMPATIBLE_HTILE) != 0;
1305    AddrSurfInfoIn.flags.prt = (surf->flags & RADEON_SURF_PRT) != 0;
1306 
1307    /* Only degrade the tile mode for space if TC-compatible HTILE hasn't been
1308     * requested, because TC-compatible HTILE requires 2D tiling.
1309     */
1310    AddrSurfInfoIn.flags.opt4Space = !AddrSurfInfoIn.flags.tcCompatible && !config->is_3d &&
1311                                     !AddrSurfInfoIn.flags.fmask && config->info.samples <= 1 &&
1312                                     !(surf->flags & RADEON_SURF_FORCE_SWIZZLE_MODE);
1313 
1314    /* DCC notes:
1315     * - If we add MSAA support, keep in mind that CB can't decompress 8bpp
1316     *   with samples >= 4.
1317     * - Mipmapped array textures have low performance (discovered by a closed
1318     *   driver team).
1319     */
1320    AddrSurfInfoIn.flags.dccCompatible =
1321       info->gfx_level >= GFX8 && info->has_graphics && /* disable DCC on compute-only chips */
1322       !(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && !(surf->flags & RADEON_SURF_DISABLE_DCC) &&
1323       !compressed &&
1324       ((config->info.array_size == 1 && config->info.depth == 1) || config->info.levels == 1);
1325 
1326    AddrSurfInfoIn.flags.noStencil =
1327       !(surf->flags & RADEON_SURF_SBUFFER) || (surf->flags & RADEON_SURF_NO_RENDER_TARGET);
1328 
1329    AddrSurfInfoIn.flags.compressZ = !!(surf->flags & RADEON_SURF_Z_OR_SBUFFER);
1330 
1331    /* On GFX7-GFX8, the DB uses the same pitch and tile mode (except tilesplit)
1332     * for Z and stencil. This can cause a number of problems which we work
1333     * around here:
1334     *
1335     * - a depth part that is incompatible with mipmapped texturing
1336     * - at least on Stoney, entirely incompatible Z/S aspects (e.g.
1337     *   incorrect tiling applied to the stencil part, stencil buffer
1338     *   memory accesses that go out of bounds) even without mipmapping
1339     *
1340     * Some piglit tests that are prone to different types of related
1341     * failures:
1342     *  ./bin/ext_framebuffer_multisample-upsample 2 stencil
1343     *  ./bin/framebuffer-blit-levels {draw,read} stencil
1344     *  ./bin/ext_framebuffer_multisample-unaligned-blit N {depth,stencil} {msaa,upsample,downsample}
1345     *  ./bin/fbo-depth-array fs-writes-{depth,stencil} / {depth,stencil}-{clear,layered-clear,draw}
1346     *  ./bin/depthstencil-render-miplevels 1024 d=s=z24_s8
1347     */
1348    int stencil_tile_idx = -1;
1349 
1350    if (AddrSurfInfoIn.flags.depth && !AddrSurfInfoIn.flags.noStencil &&
1351        (config->info.levels > 1 || info->family == CHIP_STONEY)) {
1352       /* Compute stencilTileIdx that is compatible with the (depth)
1353        * tileIdx. This degrades the depth surface if necessary to
1354        * ensure that a matching stencilTileIdx exists. */
1355       AddrSurfInfoIn.flags.matchStencilTileCfg = 1;
1356 
1357       /* Keep the depth mip-tail compatible with texturing. */
1358       if (config->info.levels > 1 && !(surf->flags & RADEON_SURF_NO_STENCIL_ADJUST))
1359          AddrSurfInfoIn.flags.noStencil = 1;
1360    }
1361 
1362    /* Set preferred macrotile parameters. This is usually required
1363     * for shared resources. This is for 2D tiling only. */
1364    if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) &&
1365        AddrSurfInfoIn.tileMode >= ADDR_TM_2D_TILED_THIN1 && surf->u.legacy.bankw &&
1366        surf->u.legacy.bankh && surf->u.legacy.mtilea && surf->u.legacy.tile_split) {
1367       /* If any of these parameters are incorrect, the calculation
1368        * will fail. */
1369       AddrTileInfoIn.banks = surf->u.legacy.num_banks;
1370       AddrTileInfoIn.bankWidth = surf->u.legacy.bankw;
1371       AddrTileInfoIn.bankHeight = surf->u.legacy.bankh;
1372       AddrTileInfoIn.macroAspectRatio = surf->u.legacy.mtilea;
1373       AddrTileInfoIn.tileSplitBytes = surf->u.legacy.tile_split;
1374       AddrTileInfoIn.pipeConfig = surf->u.legacy.pipe_config + 1; /* +1 compared to GB_TILE_MODE */
1375       AddrSurfInfoIn.flags.opt4Space = 0;
1376       AddrSurfInfoIn.pTileInfo = &AddrTileInfoIn;
1377 
1378       /* If AddrSurfInfoIn.pTileInfo is set, Addrlib doesn't set
1379        * the tile index, because we are expected to know it if
1380        * we know the other parameters.
1381        *
1382        * This is something that can easily be fixed in Addrlib.
1383        * For now, just figure it out here.
1384        * Note that only 2D_TILE_THIN1 is handled here.
1385        */
1386       assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER));
1387       assert(AddrSurfInfoIn.tileMode == ADDR_TM_2D_TILED_THIN1);
1388 
1389       if (info->gfx_level == GFX6) {
1390          if (AddrSurfInfoIn.tileType == ADDR_DISPLAYABLE) {
1391             if (surf->bpe == 2)
1392                AddrSurfInfoIn.tileIndex = 11; /* 16bpp */
1393             else
1394                AddrSurfInfoIn.tileIndex = 12; /* 32bpp */
1395          } else {
1396             if (surf->bpe == 1)
1397                AddrSurfInfoIn.tileIndex = 14; /* 8bpp */
1398             else if (surf->bpe == 2)
1399                AddrSurfInfoIn.tileIndex = 15; /* 16bpp */
1400             else if (surf->bpe == 4)
1401                AddrSurfInfoIn.tileIndex = 16; /* 32bpp */
1402             else
1403                AddrSurfInfoIn.tileIndex = 17; /* 64bpp (and 128bpp) */
1404          }
1405       } else {
1406          /* GFX7 - GFX8 */
1407          if (AddrSurfInfoIn.tileType == ADDR_DISPLAYABLE)
1408             AddrSurfInfoIn.tileIndex = 10; /* 2D displayable */
1409          else
1410             AddrSurfInfoIn.tileIndex = 14; /* 2D non-displayable */
1411 
1412          /* Addrlib doesn't set this if tileIndex is forced like above. */
1413          AddrSurfInfoOut.macroModeIndex = cik_get_macro_tile_index(surf);
1414       }
1415    }
1416 
1417    surf->has_stencil = !!(surf->flags & RADEON_SURF_SBUFFER);
1418    surf->num_meta_levels = 0;
1419    surf->surf_size = 0;
1420    surf->meta_size = 0;
1421    surf->meta_slice_size = 0;
1422    surf->meta_alignment_log2 = 0;
1423 
1424    const bool only_stencil =
1425       (surf->flags & RADEON_SURF_SBUFFER) && !(surf->flags & RADEON_SURF_ZBUFFER);
1426 
1427    /* Calculate texture layout information. */
1428    if (!only_stencil) {
1429       for (level = 0; level < config->info.levels; level++) {
1430          r = gfx6_compute_level(addrlib, config, surf, false, level, compressed, &AddrSurfInfoIn,
1431                                 &AddrSurfInfoOut, &AddrDccIn, &AddrDccOut, &AddrHtileIn,
1432                                 &AddrHtileOut);
1433          if (r)
1434             return r;
1435 
1436          if (level > 0)
1437             continue;
1438 
1439          if (!AddrSurfInfoOut.tcCompatible) {
1440             AddrSurfInfoIn.flags.tcCompatible = 0;
1441             surf->flags &= ~RADEON_SURF_TC_COMPATIBLE_HTILE;
1442          }
1443 
1444          if (AddrSurfInfoIn.flags.matchStencilTileCfg) {
1445             AddrSurfInfoIn.flags.matchStencilTileCfg = 0;
1446             AddrSurfInfoIn.tileIndex = AddrSurfInfoOut.tileIndex;
1447             stencil_tile_idx = AddrSurfInfoOut.stencilTileIdx;
1448 
1449             assert(stencil_tile_idx >= 0);
1450          }
1451 
1452          r = gfx6_surface_settings(addrlib, info, config, &AddrSurfInfoOut, surf);
1453          if (r)
1454             return r;
1455       }
1456    }
1457 
1458    /* Calculate texture layout information for stencil. */
1459    if (surf->flags & RADEON_SURF_SBUFFER) {
1460       AddrSurfInfoIn.tileIndex = stencil_tile_idx;
1461       AddrSurfInfoIn.bpp = 8;
1462       AddrSurfInfoIn.format = ADDR_FMT_8;
1463       AddrSurfInfoIn.flags.depth = 0;
1464       AddrSurfInfoIn.flags.stencil = 1;
1465       AddrSurfInfoIn.flags.tcCompatible = 0;
1466       /* This will be ignored if AddrSurfInfoIn.pTileInfo is NULL. */
1467       AddrTileInfoIn.tileSplitBytes = surf->u.legacy.stencil_tile_split;
1468 
1469       for (level = 0; level < config->info.levels; level++) {
1470          r = gfx6_compute_level(addrlib, config, surf, true, level, compressed, &AddrSurfInfoIn,
1471                                 &AddrSurfInfoOut, &AddrDccIn, &AddrDccOut, NULL, NULL);
1472          if (r)
1473             return r;
1474 
1475          /* DB uses the depth pitch for both stencil and depth. */
1476          if (!only_stencil) {
1477             if (surf->u.legacy.zs.stencil_level[level].nblk_x != surf->u.legacy.level[level].nblk_x)
1478                surf->u.legacy.stencil_adjusted = true;
1479          } else {
1480             surf->u.legacy.level[level].nblk_x = surf->u.legacy.zs.stencil_level[level].nblk_x;
1481          }
1482 
1483          if (level == 0) {
1484             if (only_stencil) {
1485                r = gfx6_surface_settings(addrlib, info, config, &AddrSurfInfoOut, surf);
1486                if (r)
1487                   return r;
1488             }
1489 
1490             /* For 2D modes only. */
1491             if (AddrSurfInfoOut.tileMode >= ADDR_TM_2D_TILED_THIN1) {
1492                surf->u.legacy.stencil_tile_split = AddrSurfInfoOut.pTileInfo->tileSplitBytes;
1493             }
1494          }
1495       }
1496    }
1497 
1498    /* Compute FMASK. */
1499    if (config->info.samples >= 2 && AddrSurfInfoIn.flags.color && info->has_graphics &&
1500        !(surf->flags & RADEON_SURF_NO_FMASK)) {
1501       ADDR_COMPUTE_FMASK_INFO_INPUT fin = {0};
1502       ADDR_COMPUTE_FMASK_INFO_OUTPUT fout = {0};
1503       ADDR_TILEINFO fmask_tile_info = {0};
1504 
1505       fin.size = sizeof(fin);
1506       fout.size = sizeof(fout);
1507 
1508       fin.tileMode = AddrSurfInfoOut.tileMode;
1509       fin.pitch = AddrSurfInfoOut.pitch;
1510       fin.height = config->info.height;
1511       fin.numSlices = AddrSurfInfoIn.numSlices;
1512       fin.numSamples = AddrSurfInfoIn.numSamples;
1513       fin.numFrags = AddrSurfInfoIn.numFrags;
1514       fin.tileIndex = -1;
1515       fout.pTileInfo = &fmask_tile_info;
1516 
1517       r = AddrComputeFmaskInfo(addrlib, &fin, &fout);
1518       if (r)
1519          return r;
1520 
1521       surf->fmask_size = fout.fmaskBytes;
1522       surf->fmask_alignment_log2 = util_logbase2(fout.baseAlign);
1523       surf->fmask_slice_size = fout.sliceSize;
1524       surf->fmask_tile_swizzle = 0;
1525 
1526       surf->u.legacy.color.fmask.slice_tile_max = (fout.pitch * fout.height) / 64;
1527       if (surf->u.legacy.color.fmask.slice_tile_max)
1528          surf->u.legacy.color.fmask.slice_tile_max -= 1;
1529 
1530       surf->u.legacy.color.fmask.tiling_index = fout.tileIndex;
1531       surf->u.legacy.color.fmask.bankh = fout.pTileInfo->bankHeight;
1532       surf->u.legacy.color.fmask.pitch_in_pixels = fout.pitch;
1533 
1534       /* Compute tile swizzle for FMASK. */
1535       if (config->info.fmask_surf_index && !(surf->flags & RADEON_SURF_SHAREABLE)) {
1536          ADDR_COMPUTE_BASE_SWIZZLE_INPUT xin = {0};
1537          ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT xout = {0};
1538 
1539          xin.size = sizeof(ADDR_COMPUTE_BASE_SWIZZLE_INPUT);
1540          xout.size = sizeof(ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT);
1541 
1542          /* This counter starts from 1 instead of 0. */
1543          xin.surfIndex = p_atomic_inc_return(config->info.fmask_surf_index);
1544          xin.tileIndex = fout.tileIndex;
1545          xin.macroModeIndex = fout.macroModeIndex;
1546          xin.pTileInfo = fout.pTileInfo;
1547          xin.tileMode = fin.tileMode;
1548 
1549          int r = AddrComputeBaseSwizzle(addrlib, &xin, &xout);
1550          if (r != ADDR_OK)
1551             return r;
1552 
1553          assert(xout.tileSwizzle <= u_bit_consecutive(0, sizeof(surf->tile_swizzle) * 8));
1554          surf->fmask_tile_swizzle = xout.tileSwizzle;
1555       }
1556    }
1557 
1558    /* Recalculate the whole DCC miptree size including disabled levels.
1559     * This is what addrlib does, but calling addrlib would be a lot more
1560     * complicated.
1561     */
1562    if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && surf->meta_size && config->info.levels > 1) {
1563       /* The smallest miplevels that are never compressed by DCC
1564        * still read the DCC buffer from memory if the base level uses DCC,
1565        * and for some reason the DCC buffer needs to be larger if
1566        * the miptree uses non-zero tile_swizzle. Otherwise there are
1567        * VM faults.
1568        *
1569        * "dcc_alignment * 4" was determined by trial and error.
1570        */
1571       surf->meta_size = align64(surf->surf_size >> 8, (1ull << surf->meta_alignment_log2) * 4);
1572    }
1573 
1574    /* Make sure HTILE covers the whole miptree, because the shader reads
1575     * TC-compatible HTILE even for levels where it's disabled by DB.
1576     */
1577    if (surf->flags & (RADEON_SURF_Z_OR_SBUFFER | RADEON_SURF_TC_COMPATIBLE_HTILE) &&
1578        surf->meta_size && config->info.levels > 1) {
1579       /* MSAA can't occur with levels > 1, so ignore the sample count. */
1580       const unsigned total_pixels = surf->surf_size / surf->bpe;
1581       const unsigned htile_block_size = 8 * 8;
1582       const unsigned htile_element_size = 4;
1583 
1584       surf->meta_size = (total_pixels / htile_block_size) * htile_element_size;
1585       surf->meta_size = align(surf->meta_size, 1 << surf->meta_alignment_log2);
1586    } else if (surf->flags & RADEON_SURF_Z_OR_SBUFFER && !surf->meta_size) {
1587       /* Unset this if HTILE is not present. */
1588       surf->flags &= ~RADEON_SURF_TC_COMPATIBLE_HTILE;
1589    }
1590 
1591    surf->is_linear = (only_stencil ? surf->u.legacy.zs.stencil_level[0].mode :
1592                                      surf->u.legacy.level[0].mode) == RADEON_SURF_MODE_LINEAR_ALIGNED;
1593 
1594    surf->is_displayable = surf->is_linear || surf->micro_tile_mode == RADEON_MICRO_MODE_DISPLAY ||
1595                           surf->micro_tile_mode == RADEON_MICRO_MODE_RENDER;
1596 
1597    surf->thick_tiling = AddrSurfInfoOut.tileMode == ADDR_TM_1D_TILED_THICK ||
1598                         AddrSurfInfoOut.tileMode == ADDR_TM_2D_TILED_THICK ||
1599                         AddrSurfInfoOut.tileMode == ADDR_TM_2B_TILED_THICK ||
1600                         AddrSurfInfoOut.tileMode == ADDR_TM_3D_TILED_THICK ||
1601                         AddrSurfInfoOut.tileMode == ADDR_TM_3B_TILED_THICK ||
1602                         AddrSurfInfoOut.tileMode == ADDR_TM_2D_TILED_XTHICK ||
1603                         AddrSurfInfoOut.tileMode == ADDR_TM_3D_TILED_XTHICK ||
1604                         AddrSurfInfoOut.tileMode == ADDR_TM_PRT_TILED_THICK ||
1605                         AddrSurfInfoOut.tileMode == ADDR_TM_PRT_2D_TILED_THICK ||
1606                         AddrSurfInfoOut.tileMode == ADDR_TM_PRT_3D_TILED_THICK ||
1607                         /* Not thick per se, but these also benefit from the 3D access pattern
1608                          * due to pipe rotation between slices.
1609                          */
1610                         AddrSurfInfoOut.tileMode == ADDR_TM_3D_TILED_THIN1 ||
1611                         AddrSurfInfoOut.tileMode == ADDR_TM_PRT_3D_TILED_THIN1;
1612 
1613    /* The rotated micro tile mode doesn't work if both CMASK and RB+ are
1614     * used at the same time. This case is not currently expected to occur
1615     * because we don't use rotated. Enforce this restriction on all chips
1616     * to facilitate testing.
1617     */
1618    if (surf->micro_tile_mode == RADEON_MICRO_MODE_RENDER) {
1619       assert(!"rotate micro tile mode is unsupported");
1620       return ADDR_ERROR;
1621    }
1622 
1623    ac_compute_cmask(info, config, surf);
1624    return 0;
1625 }
1626 
1627 /* This is only called when expecting a tiled layout. */
gfx9_get_preferred_swizzle_mode(ADDR_HANDLE addrlib,const struct radeon_info * info,struct radeon_surf * surf,ADDR2_COMPUTE_SURFACE_INFO_INPUT * in,bool is_fmask,AddrSwizzleMode * swizzle_mode)1628 static int gfx9_get_preferred_swizzle_mode(ADDR_HANDLE addrlib, const struct radeon_info *info,
1629                                            struct radeon_surf *surf,
1630                                            ADDR2_COMPUTE_SURFACE_INFO_INPUT *in, bool is_fmask,
1631                                            AddrSwizzleMode *swizzle_mode)
1632 {
1633    ADDR_E_RETURNCODE ret;
1634    ADDR2_GET_PREFERRED_SURF_SETTING_INPUT sin = {0};
1635    ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT sout = {0};
1636 
1637    sin.size = sizeof(ADDR2_GET_PREFERRED_SURF_SETTING_INPUT);
1638    sout.size = sizeof(ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT);
1639 
1640    sin.flags = in->flags;
1641    sin.resourceType = in->resourceType;
1642    sin.format = in->format;
1643    sin.resourceLoction = ADDR_RSRC_LOC_INVIS;
1644 
1645    /* TODO: We could allow some of these: */
1646    sin.forbiddenBlock.micro = 1; /* don't allow the 256B swizzle modes */
1647 
1648    if (info->gfx_level >= GFX11) {
1649       /* Disable 256K on APUs because it doesn't work with DAL. */
1650       if (!info->has_dedicated_vram) {
1651          sin.forbiddenBlock.gfx11.thin256KB = 1;
1652          sin.forbiddenBlock.gfx11.thick256KB = 1;
1653       }
1654    } else {
1655       sin.forbiddenBlock.var = 1;   /* don't allow the variable-sized swizzle modes */
1656    }
1657 
1658    sin.bpp = in->bpp;
1659    sin.width = in->width;
1660    sin.height = in->height;
1661    sin.numSlices = in->numSlices;
1662    sin.numMipLevels = in->numMipLevels;
1663    sin.numSamples = in->numSamples;
1664    sin.numFrags = in->numFrags;
1665 
1666    if (is_fmask) {
1667       sin.flags.display = 0;
1668       sin.flags.color = 0;
1669       sin.flags.fmask = 1;
1670    }
1671 
1672    /* With PRT images we want to force 64 KiB block size so that the image
1673     * created is consistent with the format properties returned in Vulkan
1674     * independent of the image. */
1675    if (sin.flags.prt) {
1676       sin.forbiddenBlock.macroThin4KB = 1;
1677       sin.forbiddenBlock.macroThick4KB = 1;
1678       if (info->gfx_level >= GFX11) {
1679          sin.forbiddenBlock.gfx11.thin256KB = 1;
1680          sin.forbiddenBlock.gfx11.thick256KB = 1;
1681       }
1682       sin.forbiddenBlock.linear = 1;
1683    } else if (surf->flags & RADEON_SURF_PREFER_4K_ALIGNMENT) {
1684       sin.forbiddenBlock.macroThin64KB = 1;
1685       sin.forbiddenBlock.macroThick64KB = 1;
1686    }
1687 
1688    if (surf->flags & (RADEON_SURF_PREFER_64K_ALIGNMENT | RADEON_SURF_PREFER_4K_ALIGNMENT)) {
1689       if (info->gfx_level >= GFX11) {
1690          sin.forbiddenBlock.gfx11.thin256KB = 1;
1691          sin.forbiddenBlock.gfx11.thick256KB = 1;
1692       }
1693    }
1694 
1695    if (surf->flags & RADEON_SURF_FORCE_MICRO_TILE_MODE) {
1696       sin.forbiddenBlock.linear = 1;
1697 
1698       if (surf->micro_tile_mode == RADEON_MICRO_MODE_DISPLAY)
1699          sin.preferredSwSet.sw_D = 1;
1700       else if (surf->micro_tile_mode == RADEON_MICRO_MODE_STANDARD)
1701          sin.preferredSwSet.sw_S = 1;
1702       else if (surf->micro_tile_mode == RADEON_MICRO_MODE_DEPTH)
1703          sin.preferredSwSet.sw_Z = 1;
1704       else if (surf->micro_tile_mode == RADEON_MICRO_MODE_RENDER)
1705          sin.preferredSwSet.sw_R = 1;
1706    }
1707 
1708    if (info->gfx_level >= GFX10 && in->resourceType == ADDR_RSRC_TEX_3D && in->numSlices > 1) {
1709       /* 3D textures should use S swizzle modes for the best performance.
1710        * THe only exception is 3D render targets, which prefer 64KB_D_X.
1711        *
1712        * 3D texture sampler performance with a very large 3D texture:
1713        *   ADDR_SW_64KB_R_X = 19 FPS (DCC on), 26 FPS (DCC off)
1714        *   ADDR_SW_64KB_Z_X = 25 FPS
1715        *   ADDR_SW_64KB_D_X = 53 FPS
1716        *   ADDR_SW_4KB_S    = 53 FPS
1717        *   ADDR_SW_64KB_S   = 53 FPS
1718        *   ADDR_SW_64KB_S_T = 61 FPS
1719        *   ADDR_SW_4KB_S_X  = 63 FPS
1720        *   ADDR_SW_64KB_S_X = 62 FPS
1721        */
1722       sin.preferredSwSet.sw_S = 1;
1723    }
1724 
1725    ret = Addr2GetPreferredSurfaceSetting(addrlib, &sin, &sout);
1726    if (ret != ADDR_OK)
1727       return ret;
1728 
1729    *swizzle_mode = sout.swizzleMode;
1730    return 0;
1731 }
1732 
is_dcc_supported_by_CB(const struct radeon_info * info,unsigned sw_mode)1733 static bool is_dcc_supported_by_CB(const struct radeon_info *info, unsigned sw_mode)
1734 {
1735    switch (info->gfx_level) {
1736    case GFX9:
1737       return sw_mode != ADDR_SW_LINEAR;
1738 
1739    case GFX10:
1740    case GFX10_3:
1741       return sw_mode == ADDR_SW_64KB_Z_X || sw_mode == ADDR_SW_64KB_R_X;
1742 
1743    case GFX11:
1744    case GFX11_5:
1745       return sw_mode == ADDR_SW_64KB_Z_X || sw_mode == ADDR_SW_64KB_R_X ||
1746              sw_mode == ADDR_SW_256KB_Z_X || sw_mode == ADDR_SW_256KB_R_X;
1747 
1748    default:
1749       unreachable("invalid gfx_level");
1750    }
1751 }
1752 
is_dcc_supported_by_L2(const struct radeon_info * info,const struct radeon_surf * surf)1753 ASSERTED static bool is_dcc_supported_by_L2(const struct radeon_info *info,
1754                                             const struct radeon_surf *surf)
1755 {
1756    assert(info->gfx_level < GFX12);
1757 
1758    bool single_indep = surf->u.gfx9.color.dcc.independent_64B_blocks !=
1759                        surf->u.gfx9.color.dcc.independent_128B_blocks;
1760    bool valid_64b = surf->u.gfx9.color.dcc.independent_64B_blocks &&
1761                     surf->u.gfx9.color.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_64B;
1762    bool valid_128b = surf->u.gfx9.color.dcc.independent_128B_blocks &&
1763                      (surf->u.gfx9.color.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_128B ||
1764                       (info->gfx_level >= GFX11_5 &&
1765                        surf->u.gfx9.color.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_256B));
1766 
1767    if (info->gfx_level <= GFX9) {
1768       /* Only independent 64B blocks are supported. */
1769       return single_indep && valid_64b;
1770    }
1771 
1772    if (info->family == CHIP_NAVI10) {
1773       /* Only independent 128B blocks are supported. */
1774       return single_indep && valid_128b;
1775    }
1776 
1777    if (info->family == CHIP_NAVI12 || info->family == CHIP_NAVI14) {
1778       /* Either 64B or 128B can be used, but the INDEPENDENT_*_BLOCKS setting must match.
1779        * If 64B is used, DCC image stores are unsupported.
1780        */
1781       return single_indep && (valid_64b || valid_128b);
1782    }
1783 
1784    /* Valid settings are the same as NAVI14 + (64B && 128B && max_compressed_block_size == 64B) */
1785    return (single_indep && (valid_64b || valid_128b)) || valid_64b;
1786 }
1787 
gfx10_DCN_requires_independent_64B_blocks(const struct radeon_info * info,const struct ac_surf_config * config)1788 static bool gfx10_DCN_requires_independent_64B_blocks(const struct radeon_info *info,
1789                                                       const struct ac_surf_config *config)
1790 {
1791    assert(info->gfx_level >= GFX10);
1792 
1793    /* Older kernels have buggy DAL. */
1794    if (info->drm_minor <= 43)
1795       return true;
1796 
1797    /* For 4K, DCN requires INDEPENDENT_64B_BLOCKS = 1 and MAX_COMPRESSED_BLOCK_SIZE = 64B. */
1798    return config->info.width > 2560 || config->info.height > 2560;
1799 }
1800 
ac_modifier_max_extent(const struct radeon_info * info,uint64_t modifier,uint32_t * width,uint32_t * height)1801 void ac_modifier_max_extent(const struct radeon_info *info,
1802                             uint64_t modifier, uint32_t *width, uint32_t *height)
1803 {
1804    /* DCC is supported with any size. The maximum width per display pipe is 5760, but multiple
1805     * display pipes can be used to drive the display.
1806     */
1807    *width = 16384;
1808    *height = 16384;
1809 
1810    if (info->gfx_level < GFX12 && ac_modifier_has_dcc(modifier)) {
1811       bool independent_64B_blocks = AMD_FMT_MOD_GET(DCC_INDEPENDENT_64B, modifier);
1812 
1813       if (info->gfx_level >= GFX10 && !independent_64B_blocks) {
1814          /* For 4K, DCN requires INDEPENDENT_64B_BLOCKS = 1 and MAX_COMPRESSED_BLOCK_SIZE = 64B. */
1815          *width = 2560;
1816          *height = 2560;
1817       }
1818    }
1819 }
1820 
gfx9_is_dcc_supported_by_DCN(const struct radeon_info * info,const struct ac_surf_config * config,const struct radeon_surf * surf,bool rb_aligned,bool pipe_aligned)1821 static bool gfx9_is_dcc_supported_by_DCN(const struct radeon_info *info,
1822                                          const struct ac_surf_config *config,
1823                                          const struct radeon_surf *surf, bool rb_aligned,
1824                                          bool pipe_aligned)
1825 {
1826    if (!info->use_display_dcc_unaligned && !info->use_display_dcc_with_retile_blit)
1827       return false;
1828 
1829    /* 16bpp and 64bpp are more complicated, so they are disallowed for now. */
1830    if (surf->bpe != 4)
1831       return false;
1832 
1833    /* Handle unaligned DCC. */
1834    if (info->use_display_dcc_unaligned && (rb_aligned || pipe_aligned))
1835       return false;
1836 
1837    switch (info->gfx_level) {
1838    case GFX9:
1839       /* There are more constraints, but we always set
1840        * INDEPENDENT_64B_BLOCKS = 1 and MAX_COMPRESSED_BLOCK_SIZE = 64B,
1841        * which always works.
1842        */
1843       assert(surf->u.gfx9.color.dcc.independent_64B_blocks &&
1844              surf->u.gfx9.color.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_64B);
1845       return true;
1846    case GFX10:
1847    case GFX10_3:
1848    case GFX11:
1849    case GFX11_5:
1850       /* DCN requires INDEPENDENT_128B_BLOCKS = 0 only on Navi1x. */
1851       if (info->gfx_level == GFX10 && surf->u.gfx9.color.dcc.independent_128B_blocks)
1852          return false;
1853 
1854       return (!gfx10_DCN_requires_independent_64B_blocks(info, config) ||
1855               (surf->u.gfx9.color.dcc.independent_64B_blocks &&
1856                surf->u.gfx9.color.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_64B));
1857    default:
1858       unreachable("unhandled chip");
1859       return false;
1860    }
1861 }
1862 
ac_copy_dcc_equation(const struct radeon_info * info,ADDR2_COMPUTE_DCCINFO_OUTPUT * dcc,struct gfx9_meta_equation * equation)1863 static void ac_copy_dcc_equation(const struct radeon_info *info,
1864                                  ADDR2_COMPUTE_DCCINFO_OUTPUT *dcc,
1865                                  struct gfx9_meta_equation *equation)
1866 {
1867    assert(info->gfx_level < GFX12);
1868 
1869    equation->meta_block_width = dcc->metaBlkWidth;
1870    equation->meta_block_height = dcc->metaBlkHeight;
1871    equation->meta_block_depth = dcc->metaBlkDepth;
1872 
1873    if (info->gfx_level >= GFX10) {
1874       /* gfx9_meta_equation doesn't store the first 4 and the last 8 elements. They must be 0. */
1875       for (unsigned i = 0; i < 4; i++)
1876          assert(dcc->equation.gfx10_bits[i] == 0);
1877 
1878       for (unsigned i = ARRAY_SIZE(equation->u.gfx10_bits) + 4; i < 68; i++)
1879          assert(dcc->equation.gfx10_bits[i] == 0);
1880 
1881       memcpy(equation->u.gfx10_bits, dcc->equation.gfx10_bits + 4,
1882              sizeof(equation->u.gfx10_bits));
1883    } else {
1884       assert(dcc->equation.gfx9.num_bits <= ARRAY_SIZE(equation->u.gfx9.bit));
1885 
1886       equation->u.gfx9.num_bits = dcc->equation.gfx9.num_bits;
1887       equation->u.gfx9.num_pipe_bits = dcc->equation.gfx9.numPipeBits;
1888       for (unsigned b = 0; b < ARRAY_SIZE(equation->u.gfx9.bit); b++) {
1889          for (unsigned c = 0; c < ARRAY_SIZE(equation->u.gfx9.bit[b].coord); c++) {
1890             equation->u.gfx9.bit[b].coord[c].dim = dcc->equation.gfx9.bit[b].coord[c].dim;
1891             equation->u.gfx9.bit[b].coord[c].ord = dcc->equation.gfx9.bit[b].coord[c].ord;
1892          }
1893       }
1894    }
1895 }
1896 
ac_copy_cmask_equation(const struct radeon_info * info,ADDR2_COMPUTE_CMASK_INFO_OUTPUT * cmask,struct gfx9_meta_equation * equation)1897 static void ac_copy_cmask_equation(const struct radeon_info *info,
1898                                    ADDR2_COMPUTE_CMASK_INFO_OUTPUT *cmask,
1899                                    struct gfx9_meta_equation *equation)
1900 {
1901    assert(info->gfx_level < GFX11);
1902 
1903    equation->meta_block_width = cmask->metaBlkWidth;
1904    equation->meta_block_height = cmask->metaBlkHeight;
1905    equation->meta_block_depth = 1;
1906 
1907    if (info->gfx_level == GFX9) {
1908       assert(cmask->equation.gfx9.num_bits <= ARRAY_SIZE(equation->u.gfx9.bit));
1909 
1910       equation->u.gfx9.num_bits = cmask->equation.gfx9.num_bits;
1911       equation->u.gfx9.num_pipe_bits = cmask->equation.gfx9.numPipeBits;
1912       for (unsigned b = 0; b < ARRAY_SIZE(equation->u.gfx9.bit); b++) {
1913          for (unsigned c = 0; c < ARRAY_SIZE(equation->u.gfx9.bit[b].coord); c++) {
1914             equation->u.gfx9.bit[b].coord[c].dim = cmask->equation.gfx9.bit[b].coord[c].dim;
1915             equation->u.gfx9.bit[b].coord[c].ord = cmask->equation.gfx9.bit[b].coord[c].ord;
1916          }
1917       }
1918    }
1919 }
1920 
ac_copy_htile_equation(const struct radeon_info * info,ADDR2_COMPUTE_HTILE_INFO_OUTPUT * htile,struct gfx9_meta_equation * equation)1921 static void ac_copy_htile_equation(const struct radeon_info *info,
1922                                    ADDR2_COMPUTE_HTILE_INFO_OUTPUT *htile,
1923                                    struct gfx9_meta_equation *equation)
1924 {
1925    assert(info->gfx_level < GFX12);
1926 
1927    equation->meta_block_width = htile->metaBlkWidth;
1928    equation->meta_block_height = htile->metaBlkHeight;
1929 
1930    /* gfx9_meta_equation doesn't store the first 8 and the last 4 elements. They must be 0. */
1931    for (unsigned i = 0; i < 8; i++)
1932       assert(htile->equation.gfx10_bits[i] == 0);
1933 
1934    for (unsigned i = ARRAY_SIZE(equation->u.gfx10_bits) + 8; i < 72; i++)
1935       assert(htile->equation.gfx10_bits[i] == 0);
1936 
1937    memcpy(equation->u.gfx10_bits, htile->equation.gfx10_bits + 8,
1938           sizeof(equation->u.gfx10_bits));
1939 }
1940 
gfx9_compute_miptree(struct ac_addrlib * addrlib,const struct radeon_info * info,const struct ac_surf_config * config,struct radeon_surf * surf,bool compressed,ADDR2_COMPUTE_SURFACE_INFO_INPUT * in)1941 static int gfx9_compute_miptree(struct ac_addrlib *addrlib, const struct radeon_info *info,
1942                                 const struct ac_surf_config *config, struct radeon_surf *surf,
1943                                 bool compressed, ADDR2_COMPUTE_SURFACE_INFO_INPUT *in)
1944 {
1945    ADDR2_MIP_INFO mip_info[RADEON_SURF_MAX_LEVELS] = {0};
1946    ADDR2_COMPUTE_SURFACE_INFO_OUTPUT out = {0};
1947    ADDR_E_RETURNCODE ret;
1948 
1949    out.size = sizeof(ADDR2_COMPUTE_SURFACE_INFO_OUTPUT);
1950    out.pMipInfo = mip_info;
1951 
1952    ret = Addr2ComputeSurfaceInfo(addrlib->handle, in, &out);
1953    if (ret != ADDR_OK)
1954       return ret;
1955 
1956    if (in->flags.prt) {
1957       surf->prt_tile_width = out.blockWidth;
1958       surf->prt_tile_height = out.blockHeight;
1959       surf->prt_tile_depth = out.blockSlices;
1960 
1961       surf->first_mip_tail_level = out.firstMipIdInTail;
1962 
1963       for (unsigned i = 0; i < in->numMipLevels; i++) {
1964          surf->u.gfx9.prt_level_offset[i] = mip_info[i].macroBlockOffset + mip_info[i].mipTailOffset;
1965 
1966          if (info->gfx_level >= GFX10)
1967             surf->u.gfx9.prt_level_pitch[i] = mip_info[i].pitch;
1968          else
1969             surf->u.gfx9.prt_level_pitch[i] = out.mipChainPitch;
1970       }
1971    }
1972 
1973    surf->thick_tiling = out.blockSlices > 1; /* should be 0 for depth and stencil */
1974 
1975    if (in->flags.stencil) {
1976       surf->u.gfx9.zs.stencil_swizzle_mode = in->swizzleMode;
1977       surf->u.gfx9.zs.stencil_epitch =
1978          out.epitchIsHeight ? out.mipChainHeight - 1 : out.mipChainPitch - 1;
1979       surf->surf_alignment_log2 = MAX2(surf->surf_alignment_log2, util_logbase2(out.baseAlign));
1980       surf->u.gfx9.zs.stencil_offset = align(surf->surf_size, out.baseAlign);
1981       surf->surf_size = surf->u.gfx9.zs.stencil_offset + out.surfSize;
1982       return 0;
1983    }
1984 
1985    surf->u.gfx9.swizzle_mode = in->swizzleMode;
1986    surf->u.gfx9.epitch = out.epitchIsHeight ? out.mipChainHeight - 1 : out.mipChainPitch - 1;
1987 
1988    /* CMASK fast clear uses these even if FMASK isn't allocated.
1989     * FMASK only supports the Z swizzle modes, whose numbers are multiples of 4.
1990     */
1991    if (!in->flags.depth) {
1992       surf->u.gfx9.color.fmask_swizzle_mode = surf->u.gfx9.swizzle_mode & ~0x3;
1993       surf->u.gfx9.color.fmask_epitch = surf->u.gfx9.epitch;
1994    }
1995 
1996    surf->u.gfx9.surf_slice_size = out.sliceSize;
1997    surf->u.gfx9.surf_pitch = out.pitch;
1998    surf->u.gfx9.surf_height = out.height;
1999    surf->surf_size = out.surfSize;
2000    surf->surf_alignment_log2 = util_logbase2(out.baseAlign);
2001 
2002    const int linear_alignment =
2003       util_next_power_of_two(LINEAR_PITCH_ALIGNMENT / surf->bpe);
2004 
2005    if (!compressed && surf->blk_w > 1 && out.pitch == out.pixelPitch &&
2006        surf->u.gfx9.swizzle_mode == ADDR_SW_LINEAR &&
2007        in->numMipLevels == 1) {
2008       /* Divide surf_pitch (= pitch in pixels) by blk_w to get a
2009        * pitch in elements instead because that's what the hardware needs
2010        * in resource descriptors.
2011        * See the comment in si_descriptors.c.
2012        */
2013       surf->u.gfx9.surf_pitch = align(surf->u.gfx9.surf_pitch / surf->blk_w,
2014                                       linear_alignment);
2015       surf->u.gfx9.epitch = surf->u.gfx9.surf_pitch - 1;
2016        /* Adjust surf_slice_size and surf_size to reflect the change made to surf_pitch. */
2017       surf->u.gfx9.surf_slice_size = (uint64_t)surf->u.gfx9.surf_pitch * out.height * surf->bpe;
2018       surf->surf_size = surf->u.gfx9.surf_slice_size * in->numSlices;
2019 
2020       for (unsigned i = 0; i < in->numMipLevels; i++) {
2021          surf->u.gfx9.offset[i] = mip_info[i].offset;
2022          /* Adjust pitch like we did for surf_pitch */
2023          surf->u.gfx9.pitch[i] = align(mip_info[i].pitch / surf->blk_w,
2024                                        linear_alignment);
2025       }
2026       surf->u.gfx9.base_mip_width = surf->u.gfx9.surf_pitch;
2027    } else if (in->swizzleMode == ADDR_SW_LINEAR) {
2028       for (unsigned i = 0; i < in->numMipLevels; i++) {
2029          surf->u.gfx9.offset[i] = mip_info[i].offset;
2030          surf->u.gfx9.pitch[i] = mip_info[i].pitch;
2031       }
2032       surf->u.gfx9.base_mip_width = surf->u.gfx9.surf_pitch;
2033    } else {
2034       surf->u.gfx9.base_mip_width = mip_info[0].pitch;
2035    }
2036 
2037    surf->u.gfx9.base_mip_height = mip_info[0].height;
2038 
2039    if (in->flags.depth) {
2040       assert(in->swizzleMode != ADDR_SW_LINEAR);
2041 
2042       if (surf->flags & RADEON_SURF_NO_HTILE)
2043          return 0;
2044 
2045       /* HTILE */
2046       ADDR2_COMPUTE_HTILE_INFO_INPUT hin = {0};
2047       ADDR2_COMPUTE_HTILE_INFO_OUTPUT hout = {0};
2048       ADDR2_META_MIP_INFO meta_mip_info[RADEON_SURF_MAX_LEVELS] = {0};
2049 
2050       hin.size = sizeof(ADDR2_COMPUTE_HTILE_INFO_INPUT);
2051       hout.size = sizeof(ADDR2_COMPUTE_HTILE_INFO_OUTPUT);
2052       hout.pMipInfo = meta_mip_info;
2053 
2054       assert(in->flags.metaPipeUnaligned == 0);
2055       assert(in->flags.metaRbUnaligned == 0);
2056 
2057       hin.hTileFlags.pipeAligned = 1;
2058       hin.hTileFlags.rbAligned = 1;
2059       hin.depthFlags = in->flags;
2060       hin.swizzleMode = in->swizzleMode;
2061       hin.unalignedWidth = in->width;
2062       hin.unalignedHeight = in->height;
2063       hin.numSlices = in->numSlices;
2064       hin.numMipLevels = in->numMipLevels;
2065       hin.firstMipIdInTail = out.firstMipIdInTail;
2066 
2067       ret = Addr2ComputeHtileInfo(addrlib->handle, &hin, &hout);
2068       if (ret != ADDR_OK)
2069          return ret;
2070 
2071       surf->meta_size = hout.htileBytes;
2072       surf->meta_slice_size = hout.sliceSize;
2073       surf->meta_alignment_log2 = util_logbase2(hout.baseAlign);
2074       surf->meta_pitch = hout.pitch;
2075       surf->num_meta_levels = in->numMipLevels;
2076 
2077       for (unsigned i = 0; i < in->numMipLevels; i++) {
2078          surf->u.gfx9.meta_levels[i].offset = meta_mip_info[i].offset;
2079          surf->u.gfx9.meta_levels[i].size = meta_mip_info[i].sliceSize;
2080 
2081          if (meta_mip_info[i].inMiptail) {
2082             /* GFX10 can only compress the first level
2083              * in the mip tail.
2084              */
2085             surf->num_meta_levels = i + 1;
2086             break;
2087          }
2088       }
2089 
2090       if (!surf->num_meta_levels)
2091          surf->meta_size = 0;
2092 
2093       if (info->gfx_level >= GFX10)
2094          ac_copy_htile_equation(info, &hout, &surf->u.gfx9.zs.htile_equation);
2095       return 0;
2096    }
2097 
2098    {
2099       /* Compute tile swizzle for the color surface.
2100        * All *_X and *_T modes can use the swizzle.
2101        */
2102       if (config->info.surf_index && in->swizzleMode >= ADDR_SW_64KB_Z_T && !out.mipChainInTail &&
2103           !(surf->flags & RADEON_SURF_SHAREABLE) && !in->flags.display) {
2104          ADDR2_COMPUTE_PIPEBANKXOR_INPUT xin = {0};
2105          ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT xout = {0};
2106 
2107          xin.size = sizeof(ADDR2_COMPUTE_PIPEBANKXOR_INPUT);
2108          xout.size = sizeof(ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT);
2109 
2110          xin.surfIndex = p_atomic_inc_return(config->info.surf_index) - 1;
2111          xin.flags = in->flags;
2112          xin.swizzleMode = in->swizzleMode;
2113          xin.resourceType = in->resourceType;
2114          xin.format = in->format;
2115          xin.numSamples = in->numSamples;
2116          xin.numFrags = in->numFrags;
2117 
2118          ret = Addr2ComputePipeBankXor(addrlib->handle, &xin, &xout);
2119          if (ret != ADDR_OK)
2120             return ret;
2121 
2122          assert(xout.pipeBankXor <= u_bit_consecutive(0, sizeof(surf->tile_swizzle) * 8));
2123          surf->tile_swizzle = xout.pipeBankXor;
2124 
2125          /* Gfx11 should shift it by 10 bits instead of 8, and drivers already shift it by 8 bits,
2126           * so shift it by 2 bits here.
2127           */
2128          if (info->gfx_level >= GFX11)
2129             surf->tile_swizzle <<= 2;
2130       }
2131 
2132       bool use_dcc = false;
2133       if (surf->modifier != DRM_FORMAT_MOD_INVALID) {
2134          use_dcc = ac_modifier_has_dcc(surf->modifier);
2135       } else {
2136          use_dcc = info->has_graphics && !(surf->flags & RADEON_SURF_DISABLE_DCC) && !compressed &&
2137                    !config->is_3d &&
2138                    is_dcc_supported_by_CB(info, in->swizzleMode) &&
2139                    (!in->flags.display ||
2140                     gfx9_is_dcc_supported_by_DCN(info, config, surf, !in->flags.metaRbUnaligned,
2141                                                  !in->flags.metaPipeUnaligned));
2142       }
2143 
2144       /* DCC */
2145       if (use_dcc) {
2146          ADDR2_COMPUTE_DCCINFO_INPUT din = {0};
2147          ADDR2_COMPUTE_DCCINFO_OUTPUT dout = {0};
2148          ADDR2_META_MIP_INFO meta_mip_info[RADEON_SURF_MAX_LEVELS] = {0};
2149 
2150          din.size = sizeof(ADDR2_COMPUTE_DCCINFO_INPUT);
2151          dout.size = sizeof(ADDR2_COMPUTE_DCCINFO_OUTPUT);
2152          dout.pMipInfo = meta_mip_info;
2153 
2154          din.dccKeyFlags.pipeAligned = !in->flags.metaPipeUnaligned;
2155          din.dccKeyFlags.rbAligned = !in->flags.metaRbUnaligned;
2156          din.resourceType = in->resourceType;
2157          din.swizzleMode = in->swizzleMode;
2158          din.bpp = in->bpp;
2159          din.unalignedWidth = in->width;
2160          din.unalignedHeight = in->height;
2161          din.numSlices = in->numSlices;
2162          din.numFrags = in->numFrags;
2163          din.numMipLevels = in->numMipLevels;
2164          din.dataSurfaceSize = out.surfSize;
2165          din.firstMipIdInTail = out.firstMipIdInTail;
2166 
2167          if (info->gfx_level == GFX9)
2168             simple_mtx_lock(&addrlib->lock);
2169          ret = Addr2ComputeDccInfo(addrlib->handle, &din, &dout);
2170          if (info->gfx_level == GFX9)
2171             simple_mtx_unlock(&addrlib->lock);
2172 
2173          if (ret != ADDR_OK)
2174             return ret;
2175 
2176          surf->u.gfx9.color.dcc.rb_aligned = din.dccKeyFlags.rbAligned;
2177          surf->u.gfx9.color.dcc.pipe_aligned = din.dccKeyFlags.pipeAligned;
2178          surf->u.gfx9.color.dcc_block_width = dout.compressBlkWidth;
2179          surf->u.gfx9.color.dcc_block_height = dout.compressBlkHeight;
2180          surf->u.gfx9.color.dcc_block_depth = dout.compressBlkDepth;
2181          surf->u.gfx9.color.dcc_pitch_max = dout.pitch - 1;
2182          surf->u.gfx9.color.dcc_height = dout.height;
2183          surf->meta_size = dout.dccRamSize;
2184          surf->meta_slice_size = dout.dccRamSliceSize;
2185          surf->meta_alignment_log2 = util_logbase2(dout.dccRamBaseAlign);
2186          surf->num_meta_levels = in->numMipLevels;
2187 
2188          /* Disable DCC for levels that are in the mip tail.
2189           *
2190           * There are two issues that this is intended to
2191           * address:
2192           *
2193           * 1. Multiple mip levels may share a cache line. This
2194           *    can lead to corruption when switching between
2195           *    rendering to different mip levels because the
2196           *    RBs don't maintain coherency.
2197           *
2198           * 2. Texturing with metadata after rendering sometimes
2199           *    fails with corruption, probably for a similar
2200           *    reason.
2201           *
2202           * Working around these issues for all levels in the
2203           * mip tail may be overly conservative, but it's what
2204           * Vulkan does.
2205           *
2206           * Alternative solutions that also work but are worse:
2207           * - Disable DCC entirely.
2208           * - Flush the L2 cache after rendering.
2209           */
2210          for (unsigned i = 0; i < in->numMipLevels; i++) {
2211             surf->u.gfx9.meta_levels[i].offset = meta_mip_info[i].offset;
2212             surf->u.gfx9.meta_levels[i].size = meta_mip_info[i].sliceSize;
2213 
2214             if (meta_mip_info[i].inMiptail) {
2215                /* GFX10 can only compress the first level
2216                 * in the mip tail.
2217                 *
2218                 * TODO: Try to do the same thing for gfx9
2219                 *       if there are no regressions.
2220                 */
2221                if (info->gfx_level >= GFX10)
2222                   surf->num_meta_levels = i + 1;
2223                else
2224                   surf->num_meta_levels = i;
2225                break;
2226             }
2227          }
2228 
2229          if (!surf->num_meta_levels)
2230             surf->meta_size = 0;
2231 
2232          surf->u.gfx9.color.display_dcc_size = surf->meta_size;
2233          surf->u.gfx9.color.display_dcc_alignment_log2 = surf->meta_alignment_log2;
2234          surf->u.gfx9.color.display_dcc_pitch_max = surf->u.gfx9.color.dcc_pitch_max;
2235          surf->u.gfx9.color.display_dcc_height = surf->u.gfx9.color.dcc_height;
2236 
2237          if (in->resourceType == ADDR_RSRC_TEX_2D)
2238             ac_copy_dcc_equation(info, &dout, &surf->u.gfx9.color.dcc_equation);
2239 
2240          /* Compute displayable DCC. */
2241          if (((in->flags.display && info->use_display_dcc_with_retile_blit) ||
2242               ac_modifier_has_dcc_retile(surf->modifier)) && surf->num_meta_levels) {
2243             /* Compute displayable DCC info. */
2244             din.dccKeyFlags.pipeAligned = 0;
2245             din.dccKeyFlags.rbAligned = 0;
2246 
2247             assert(din.numSlices == 1);
2248             assert(din.numMipLevels == 1);
2249             assert(din.numFrags == 1);
2250             assert(surf->tile_swizzle == 0);
2251             assert(surf->u.gfx9.color.dcc.pipe_aligned || surf->u.gfx9.color.dcc.rb_aligned);
2252 
2253             if (info->gfx_level == GFX9)
2254                simple_mtx_lock(&addrlib->lock);
2255             ret = Addr2ComputeDccInfo(addrlib->handle, &din, &dout);
2256             if (info->gfx_level == GFX9)
2257                simple_mtx_unlock(&addrlib->lock);
2258 
2259             if (ret != ADDR_OK)
2260                return ret;
2261 
2262             surf->u.gfx9.color.display_dcc_size = dout.dccRamSize;
2263             surf->u.gfx9.color.display_dcc_alignment_log2 = util_logbase2(dout.dccRamBaseAlign);
2264             surf->u.gfx9.color.display_dcc_pitch_max = dout.pitch - 1;
2265             surf->u.gfx9.color.display_dcc_height = dout.height;
2266             assert(surf->u.gfx9.color.display_dcc_size <= surf->meta_size);
2267 
2268             ac_copy_dcc_equation(info, &dout, &surf->u.gfx9.color.display_dcc_equation);
2269             surf->u.gfx9.color.dcc.display_equation_valid = true;
2270          }
2271       }
2272 
2273       /* FMASK (it doesn't exist on GFX11) */
2274       if (info->gfx_level <= GFX10_3 && info->has_graphics &&
2275           in->numSamples > 1 && !(surf->flags & RADEON_SURF_NO_FMASK)) {
2276          ADDR2_COMPUTE_FMASK_INFO_INPUT fin = {0};
2277          ADDR2_COMPUTE_FMASK_INFO_OUTPUT fout = {0};
2278 
2279          fin.size = sizeof(ADDR2_COMPUTE_FMASK_INFO_INPUT);
2280          fout.size = sizeof(ADDR2_COMPUTE_FMASK_INFO_OUTPUT);
2281 
2282          ret = gfx9_get_preferred_swizzle_mode(addrlib->handle, info, surf, in, true, &fin.swizzleMode);
2283          if (ret != ADDR_OK)
2284             return ret;
2285 
2286          fin.unalignedWidth = in->width;
2287          fin.unalignedHeight = in->height;
2288          fin.numSlices = in->numSlices;
2289          fin.numSamples = in->numSamples;
2290          fin.numFrags = in->numFrags;
2291 
2292          ret = Addr2ComputeFmaskInfo(addrlib->handle, &fin, &fout);
2293          if (ret != ADDR_OK)
2294             return ret;
2295 
2296          surf->u.gfx9.color.fmask_swizzle_mode = fin.swizzleMode;
2297          surf->u.gfx9.color.fmask_epitch = fout.pitch - 1;
2298          surf->fmask_size = fout.fmaskBytes;
2299          surf->fmask_alignment_log2 = util_logbase2(fout.baseAlign);
2300          surf->fmask_slice_size = fout.sliceSize;
2301 
2302          /* Compute tile swizzle for the FMASK surface. */
2303          if (config->info.fmask_surf_index && fin.swizzleMode >= ADDR_SW_64KB_Z_T &&
2304              !(surf->flags & RADEON_SURF_SHAREABLE)) {
2305             ADDR2_COMPUTE_PIPEBANKXOR_INPUT xin = {0};
2306             ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT xout = {0};
2307 
2308             xin.size = sizeof(ADDR2_COMPUTE_PIPEBANKXOR_INPUT);
2309             xout.size = sizeof(ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT);
2310 
2311             /* This counter starts from 1 instead of 0. */
2312             xin.surfIndex = p_atomic_inc_return(config->info.fmask_surf_index);
2313             xin.flags = in->flags;
2314             xin.swizzleMode = fin.swizzleMode;
2315             xin.resourceType = in->resourceType;
2316             xin.format = in->format;
2317             xin.numSamples = in->numSamples;
2318             xin.numFrags = in->numFrags;
2319 
2320             ret = Addr2ComputePipeBankXor(addrlib->handle, &xin, &xout);
2321             if (ret != ADDR_OK)
2322                return ret;
2323 
2324             assert(xout.pipeBankXor <= u_bit_consecutive(0, sizeof(surf->fmask_tile_swizzle) * 8));
2325             surf->fmask_tile_swizzle = xout.pipeBankXor;
2326          }
2327       }
2328 
2329       /* CMASK -- on GFX10 only for FMASK (and it doesn't exist on GFX11) */
2330       if (info->gfx_level <= GFX10_3 && info->has_graphics &&
2331           in->swizzleMode != ADDR_SW_LINEAR && in->resourceType == ADDR_RSRC_TEX_2D &&
2332           ((info->gfx_level <= GFX9 && in->numSamples == 1 && in->flags.metaPipeUnaligned == 0 &&
2333             in->flags.metaRbUnaligned == 0) ||
2334            (surf->fmask_size && in->numSamples >= 2))) {
2335          ADDR2_COMPUTE_CMASK_INFO_INPUT cin = {0};
2336          ADDR2_COMPUTE_CMASK_INFO_OUTPUT cout = {0};
2337          ADDR2_META_MIP_INFO meta_mip_info[RADEON_SURF_MAX_LEVELS] = {0};
2338 
2339          cin.size = sizeof(ADDR2_COMPUTE_CMASK_INFO_INPUT);
2340          cout.size = sizeof(ADDR2_COMPUTE_CMASK_INFO_OUTPUT);
2341          cout.pMipInfo = meta_mip_info;
2342 
2343          assert(in->flags.metaPipeUnaligned == 0);
2344          assert(in->flags.metaRbUnaligned == 0);
2345 
2346          cin.cMaskFlags.pipeAligned = 1;
2347          cin.cMaskFlags.rbAligned = 1;
2348          cin.resourceType = in->resourceType;
2349          cin.unalignedWidth = in->width;
2350          cin.unalignedHeight = in->height;
2351          cin.numSlices = in->numSlices;
2352          cin.numMipLevels = in->numMipLevels;
2353          cin.firstMipIdInTail = out.firstMipIdInTail;
2354 
2355          if (in->numSamples > 1)
2356             cin.swizzleMode = surf->u.gfx9.color.fmask_swizzle_mode;
2357          else
2358             cin.swizzleMode = in->swizzleMode;
2359 
2360          if (info->gfx_level == GFX9)
2361             simple_mtx_lock(&addrlib->lock);
2362          ret = Addr2ComputeCmaskInfo(addrlib->handle, &cin, &cout);
2363          if (info->gfx_level == GFX9)
2364             simple_mtx_unlock(&addrlib->lock);
2365 
2366          if (ret != ADDR_OK)
2367             return ret;
2368 
2369          surf->cmask_size = cout.cmaskBytes;
2370          surf->cmask_alignment_log2 = util_logbase2(cout.baseAlign);
2371          surf->cmask_slice_size = cout.sliceSize;
2372          surf->cmask_pitch = cout.pitch;
2373          surf->cmask_height = cout.height;
2374          surf->u.gfx9.color.cmask_level0.offset = meta_mip_info[0].offset;
2375          surf->u.gfx9.color.cmask_level0.size = meta_mip_info[0].sliceSize;
2376 
2377          ac_copy_cmask_equation(info, &cout, &surf->u.gfx9.color.cmask_equation);
2378       }
2379    }
2380 
2381    return 0;
2382 }
2383 
gfx9_compute_surface(struct ac_addrlib * addrlib,const struct radeon_info * info,const struct ac_surf_config * config,enum radeon_surf_mode mode,struct radeon_surf * surf)2384 static int gfx9_compute_surface(struct ac_addrlib *addrlib, const struct radeon_info *info,
2385                                 const struct ac_surf_config *config, enum radeon_surf_mode mode,
2386                                 struct radeon_surf *surf)
2387 {
2388    bool compressed;
2389    ADDR2_COMPUTE_SURFACE_INFO_INPUT AddrSurfInfoIn = {0};
2390    int r;
2391 
2392    AddrSurfInfoIn.size = sizeof(ADDR2_COMPUTE_SURFACE_INFO_INPUT);
2393 
2394    compressed = surf->blk_w == 4 && surf->blk_h == 4;
2395 
2396    AddrSurfInfoIn.format = bpe_to_format(surf);
2397    if (!compressed)
2398       AddrSurfInfoIn.bpp = surf->bpe * 8;
2399 
2400    bool is_color_surface = !(surf->flags & RADEON_SURF_Z_OR_SBUFFER);
2401    AddrSurfInfoIn.flags.color = is_color_surface && !(surf->flags & RADEON_SURF_NO_RENDER_TARGET);
2402    AddrSurfInfoIn.flags.depth = (surf->flags & RADEON_SURF_ZBUFFER) != 0;
2403    AddrSurfInfoIn.flags.display = get_display_flag(config, surf);
2404    /* flags.texture currently refers to TC-compatible HTILE */
2405    AddrSurfInfoIn.flags.texture = (is_color_surface && !(surf->flags & RADEON_SURF_NO_TEXTURE)) ||
2406                                   (surf->flags & RADEON_SURF_TC_COMPATIBLE_HTILE);
2407    AddrSurfInfoIn.flags.opt4space = 1;
2408    AddrSurfInfoIn.flags.prt = (surf->flags & RADEON_SURF_PRT) != 0;
2409 
2410    AddrSurfInfoIn.numMipLevels = config->info.levels;
2411    AddrSurfInfoIn.numSamples = MAX2(1, config->info.samples);
2412    AddrSurfInfoIn.numFrags = AddrSurfInfoIn.numSamples;
2413 
2414    if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER))
2415       AddrSurfInfoIn.numFrags = MAX2(1, config->info.storage_samples);
2416 
2417    /* GFX9 doesn't support 1D depth textures, so allocate all 1D textures
2418     * as 2D to avoid having shader variants for 1D vs 2D, so all shaders
2419     * must sample 1D textures as 2D. */
2420    if (config->is_3d)
2421       AddrSurfInfoIn.resourceType = ADDR_RSRC_TEX_3D;
2422    else if (info->gfx_level != GFX9 && config->is_1d)
2423       AddrSurfInfoIn.resourceType = ADDR_RSRC_TEX_1D;
2424    else
2425       AddrSurfInfoIn.resourceType = ADDR_RSRC_TEX_2D;
2426 
2427    AddrSurfInfoIn.width = config->info.width;
2428    AddrSurfInfoIn.height = config->info.height;
2429 
2430    if (config->is_3d)
2431       AddrSurfInfoIn.numSlices = config->info.depth;
2432    else if (config->is_cube)
2433       AddrSurfInfoIn.numSlices = 6;
2434    else
2435       AddrSurfInfoIn.numSlices = config->info.array_size;
2436 
2437    /* This is propagated to DCC. It must be 0 for HTILE and CMASK. */
2438    AddrSurfInfoIn.flags.metaPipeUnaligned = 0;
2439    AddrSurfInfoIn.flags.metaRbUnaligned = 0;
2440 
2441    if (ac_modifier_has_dcc(surf->modifier)) {
2442       ac_modifier_fill_dcc_params(surf->modifier, surf, &AddrSurfInfoIn);
2443    } else if (!AddrSurfInfoIn.flags.depth && !AddrSurfInfoIn.flags.stencil) {
2444       /* Optimal values for the L2 cache. */
2445       /* Don't change the DCC settings for imported buffers - they might differ. */
2446       if (!(surf->flags & RADEON_SURF_IMPORTED)) {
2447          if (info->gfx_level >= GFX11_5) {
2448             surf->u.gfx9.color.dcc.independent_64B_blocks = 0;
2449             surf->u.gfx9.color.dcc.independent_128B_blocks = 1;
2450             surf->u.gfx9.color.dcc.max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_256B;
2451          } else if (info->gfx_level >= GFX10) {
2452             surf->u.gfx9.color.dcc.independent_64B_blocks = 0;
2453             surf->u.gfx9.color.dcc.independent_128B_blocks = 1;
2454             surf->u.gfx9.color.dcc.max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_128B;
2455          } else if (info->gfx_level == GFX9) {
2456             surf->u.gfx9.color.dcc.independent_64B_blocks = 1;
2457             surf->u.gfx9.color.dcc.independent_128B_blocks = 0;
2458             surf->u.gfx9.color.dcc.max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
2459          }
2460       }
2461 
2462       if (AddrSurfInfoIn.flags.display) {
2463          /* The display hardware can only read DCC with RB_ALIGNED=0 and
2464           * PIPE_ALIGNED=0. PIPE_ALIGNED really means L2CACHE_ALIGNED.
2465           *
2466           * The CB block requires RB_ALIGNED=1 except 1 RB chips.
2467           * PIPE_ALIGNED is optional, but PIPE_ALIGNED=0 requires L2 flushes
2468           * after rendering, so PIPE_ALIGNED=1 is recommended.
2469           */
2470          if (info->use_display_dcc_unaligned) {
2471             AddrSurfInfoIn.flags.metaPipeUnaligned = 1;
2472             AddrSurfInfoIn.flags.metaRbUnaligned = 1;
2473          }
2474 
2475          /* Adjust DCC settings to meet DCN requirements. */
2476          /* Don't change the DCC settings for imported buffers - they might differ. */
2477          if (!(surf->flags & RADEON_SURF_IMPORTED) &&
2478              (info->use_display_dcc_unaligned || info->use_display_dcc_with_retile_blit)) {
2479             /* Only Navi12/14 support independent 64B blocks in L2,
2480              * but without DCC image stores.
2481              */
2482             if (info->family == CHIP_NAVI12 || info->family == CHIP_NAVI14) {
2483                surf->u.gfx9.color.dcc.independent_64B_blocks = 1;
2484                surf->u.gfx9.color.dcc.independent_128B_blocks = 0;
2485                surf->u.gfx9.color.dcc.max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
2486             }
2487 
2488             if ((info->gfx_level >= GFX10_3 && info->family <= CHIP_REMBRANDT) ||
2489                 /* Newer chips will skip this when possible to get better performance.
2490                  * This is also possible for other gfx10.3 chips, but is disabled for
2491                  * interoperability between different Mesa versions.
2492                  */
2493                 (info->family > CHIP_REMBRANDT &&
2494                  gfx10_DCN_requires_independent_64B_blocks(info, config))) {
2495                surf->u.gfx9.color.dcc.independent_64B_blocks = 1;
2496                surf->u.gfx9.color.dcc.independent_128B_blocks = 1;
2497                surf->u.gfx9.color.dcc.max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
2498             }
2499          }
2500       }
2501    }
2502 
2503    if (surf->modifier == DRM_FORMAT_MOD_INVALID) {
2504       switch (mode) {
2505       case RADEON_SURF_MODE_LINEAR_ALIGNED:
2506          assert(config->info.samples <= 1);
2507          assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER));
2508          AddrSurfInfoIn.swizzleMode = ADDR_SW_LINEAR;
2509          break;
2510 
2511       case RADEON_SURF_MODE_1D:
2512       case RADEON_SURF_MODE_2D:
2513          if (surf->flags & RADEON_SURF_IMPORTED ||
2514              (info->gfx_level >= GFX10 && surf->flags & RADEON_SURF_FORCE_SWIZZLE_MODE)) {
2515             AddrSurfInfoIn.swizzleMode = surf->u.gfx9.swizzle_mode;
2516             break;
2517          }
2518 
2519          /* On GFX11, the only allowed swizzle mode for VRS rate images is
2520           * 64KB_R_X.
2521           */
2522          if (info->gfx_level >= GFX11 && surf->flags & RADEON_SURF_VRS_RATE) {
2523             AddrSurfInfoIn.swizzleMode = ADDR_SW_64KB_R_X;
2524             break;
2525          }
2526 
2527          /* VCN only supports 256B_D. */
2528          if (surf->flags & RADEON_SURF_VIDEO_REFERENCE) {
2529             AddrSurfInfoIn.swizzleMode = ADDR_SW_256B_D;
2530             break;
2531          }
2532 
2533          r = gfx9_get_preferred_swizzle_mode(addrlib->handle, info, surf, &AddrSurfInfoIn, false,
2534                                              &AddrSurfInfoIn.swizzleMode);
2535          if (r)
2536             return r;
2537          break;
2538 
2539       default:
2540          assert(0);
2541       }
2542    } else {
2543       /* We have a valid and required modifier here. */
2544 
2545       assert(!compressed);
2546       assert(!ac_modifier_has_dcc(surf->modifier) ||
2547              !(surf->flags & RADEON_SURF_DISABLE_DCC));
2548 
2549       AddrSurfInfoIn.swizzleMode = ac_get_modifier_swizzle_mode(info->gfx_level, surf->modifier);
2550    }
2551 
2552    surf->u.gfx9.resource_type = (enum gfx9_resource_type)AddrSurfInfoIn.resourceType;
2553    surf->has_stencil = !!(surf->flags & RADEON_SURF_SBUFFER);
2554 
2555    surf->num_meta_levels = 0;
2556    surf->surf_size = 0;
2557    surf->fmask_size = 0;
2558    surf->meta_size = 0;
2559    surf->meta_slice_size = 0;
2560    surf->u.gfx9.surf_offset = 0;
2561    if (AddrSurfInfoIn.flags.stencil)
2562       surf->u.gfx9.zs.stencil_offset = 0;
2563    surf->cmask_size = 0;
2564 
2565    const bool only_stencil =
2566       (surf->flags & RADEON_SURF_SBUFFER) && !(surf->flags & RADEON_SURF_ZBUFFER);
2567 
2568    /* Calculate texture layout information. */
2569    if (!only_stencil) {
2570       r = gfx9_compute_miptree(addrlib, info, config, surf, compressed, &AddrSurfInfoIn);
2571       if (r)
2572          return r;
2573    }
2574 
2575    /* Calculate texture layout information for stencil. */
2576    if (surf->flags & RADEON_SURF_SBUFFER) {
2577       AddrSurfInfoIn.flags.stencil = 1;
2578       AddrSurfInfoIn.bpp = 8;
2579       AddrSurfInfoIn.format = ADDR_FMT_8;
2580 
2581       if (!AddrSurfInfoIn.flags.depth) {
2582          r = gfx9_get_preferred_swizzle_mode(addrlib->handle, info, surf, &AddrSurfInfoIn, false,
2583                                              &AddrSurfInfoIn.swizzleMode);
2584          if (r)
2585             return r;
2586       } else
2587          AddrSurfInfoIn.flags.depth = 0;
2588 
2589       r = gfx9_compute_miptree(addrlib, info, config, surf, compressed, &AddrSurfInfoIn);
2590       if (r)
2591          return r;
2592    }
2593 
2594    surf->is_linear = (only_stencil ? surf->u.gfx9.zs.stencil_swizzle_mode :
2595                                      surf->u.gfx9.swizzle_mode) == ADDR_SW_LINEAR;
2596 
2597    /* Query whether the surface is displayable. */
2598    /* This is only useful for surfaces that are allocated without SCANOUT. */
2599    BOOL_32 displayable = false;
2600    if (!config->is_3d && !config->is_cube) {
2601       r = Addr2IsValidDisplaySwizzleMode(addrlib->handle, surf->u.gfx9.swizzle_mode,
2602                                          surf->bpe * 8, &displayable);
2603       if (r)
2604          return r;
2605 
2606       /* Display needs unaligned DCC. */
2607       if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) &&
2608           surf->num_meta_levels &&
2609           (!gfx9_is_dcc_supported_by_DCN(info, config, surf, surf->u.gfx9.color.dcc.rb_aligned,
2610                                          surf->u.gfx9.color.dcc.pipe_aligned) ||
2611            /* Don't set is_displayable if displayable DCC is missing. */
2612            (info->use_display_dcc_with_retile_blit && !surf->u.gfx9.color.dcc.display_equation_valid)))
2613          displayable = false;
2614    }
2615    surf->is_displayable = displayable;
2616 
2617    /* Validate that we allocated a displayable surface if requested. */
2618    assert(!AddrSurfInfoIn.flags.display || surf->is_displayable);
2619 
2620    /* Validate that DCC is set up correctly. */
2621    if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && surf->num_meta_levels) {
2622       assert(is_dcc_supported_by_L2(info, surf));
2623       if (AddrSurfInfoIn.flags.color)
2624          assert(is_dcc_supported_by_CB(info, surf->u.gfx9.swizzle_mode));
2625       if (AddrSurfInfoIn.flags.display && surf->modifier == DRM_FORMAT_MOD_INVALID) {
2626          assert(gfx9_is_dcc_supported_by_DCN(info, config, surf, surf->u.gfx9.color.dcc.rb_aligned,
2627                                              surf->u.gfx9.color.dcc.pipe_aligned));
2628       }
2629    }
2630 
2631    if (info->has_graphics && !compressed && !config->is_3d && config->info.levels == 1 &&
2632        AddrSurfInfoIn.flags.color && !surf->is_linear &&
2633        (1 << surf->surf_alignment_log2) >= 64 * 1024 && /* 64KB tiling */
2634        !(surf->flags & (RADEON_SURF_DISABLE_DCC | RADEON_SURF_FORCE_SWIZZLE_MODE |
2635                         RADEON_SURF_FORCE_MICRO_TILE_MODE)) &&
2636        surf->modifier == DRM_FORMAT_MOD_INVALID &&
2637        gfx9_is_dcc_supported_by_DCN(info, config, surf, surf->u.gfx9.color.dcc.rb_aligned,
2638                                     surf->u.gfx9.color.dcc.pipe_aligned)) {
2639       /* Validate that DCC is enabled if DCN can do it. */
2640       if ((info->use_display_dcc_unaligned || info->use_display_dcc_with_retile_blit) &&
2641           AddrSurfInfoIn.flags.display && surf->bpe == 4) {
2642          assert(surf->num_meta_levels);
2643       }
2644 
2645       /* Validate that non-scanout DCC is always enabled. */
2646       if (!AddrSurfInfoIn.flags.display)
2647          assert(surf->num_meta_levels);
2648    }
2649 
2650    if (!surf->meta_size) {
2651       /* Unset this if HTILE is not present. */
2652       surf->flags &= ~RADEON_SURF_TC_COMPATIBLE_HTILE;
2653    }
2654 
2655    if (surf->modifier != DRM_FORMAT_MOD_INVALID) {
2656       assert((surf->num_meta_levels != 0) == ac_modifier_has_dcc(surf->modifier));
2657    }
2658 
2659    switch (surf->u.gfx9.swizzle_mode) {
2660    /* S = standard. */
2661    case ADDR_SW_256B_S:
2662    case ADDR_SW_4KB_S:
2663    case ADDR_SW_64KB_S:
2664    case ADDR_SW_64KB_S_T:
2665    case ADDR_SW_4KB_S_X:
2666    case ADDR_SW_64KB_S_X:
2667    case ADDR_SW_256KB_S_X:
2668       surf->micro_tile_mode = RADEON_MICRO_MODE_STANDARD;
2669       break;
2670 
2671    /* D = display. */
2672    case ADDR_SW_LINEAR:
2673    case ADDR_SW_256B_D:
2674    case ADDR_SW_4KB_D:
2675    case ADDR_SW_64KB_D:
2676    case ADDR_SW_64KB_D_T:
2677    case ADDR_SW_4KB_D_X:
2678    case ADDR_SW_64KB_D_X:
2679    case ADDR_SW_256KB_D_X:
2680       surf->micro_tile_mode = RADEON_MICRO_MODE_DISPLAY;
2681       break;
2682 
2683    /* R = rotated (gfx9), render target (gfx10). */
2684    case ADDR_SW_256B_R:
2685    case ADDR_SW_4KB_R:
2686    case ADDR_SW_64KB_R:
2687    case ADDR_SW_64KB_R_T:
2688    case ADDR_SW_4KB_R_X:
2689    case ADDR_SW_64KB_R_X:
2690    case ADDR_SW_256KB_R_X:
2691       /* The rotated micro tile mode doesn't work if both CMASK and RB+ are
2692        * used at the same time. We currently do not use rotated
2693        * in gfx9.
2694        */
2695       assert(info->gfx_level >= GFX10 || !"rotate micro tile mode is unsupported");
2696       surf->micro_tile_mode = RADEON_MICRO_MODE_RENDER;
2697       break;
2698 
2699    /* Z = depth. */
2700    case ADDR_SW_4KB_Z:
2701    case ADDR_SW_64KB_Z:
2702    case ADDR_SW_64KB_Z_T:
2703    case ADDR_SW_4KB_Z_X:
2704    case ADDR_SW_64KB_Z_X:
2705    case ADDR_SW_256KB_Z_X:
2706       surf->micro_tile_mode = RADEON_MICRO_MODE_DEPTH;
2707       break;
2708 
2709    default:
2710       assert(0);
2711    }
2712 
2713    return 0;
2714 }
2715 
gfx12_estimate_size(const ADDR3_COMPUTE_SURFACE_INFO_INPUT * in,const struct radeon_surf * surf,unsigned align_width,unsigned align_height,unsigned align_depth)2716 static uint64_t gfx12_estimate_size(const ADDR3_COMPUTE_SURFACE_INFO_INPUT *in,
2717                                     const struct radeon_surf *surf,
2718                                     unsigned align_width, unsigned align_height,
2719                                     unsigned align_depth)
2720 {
2721    unsigned blk_w = surf ? surf->blk_w : 1;
2722    unsigned blk_h = surf ? surf->blk_h : 1;
2723    unsigned bpe = in->bpp ? in->bpp / 8 : surf->bpe;
2724    unsigned width = align(in->width, align_width * blk_w);
2725    unsigned height = align(in->height, align_height * blk_h);
2726    unsigned depth = align(in->numSlices, align_depth);
2727    unsigned tile_size = align_width * align_height * align_depth *
2728                         in->numSamples * bpe;
2729 
2730    if (in->numMipLevels > 1 && align_height > 1) {
2731       width = util_next_power_of_two(width);
2732       height = util_next_power_of_two(height);
2733    }
2734 
2735    uint64_t size = 0;
2736 
2737    /* Note: This mipmap size computation is inaccurate. */
2738    for (unsigned i = 0; i < in->numMipLevels; i++) {
2739       uint64_t level_size =
2740          (uint64_t)DIV_ROUND_UP(width, blk_w) * DIV_ROUND_UP(height, blk_h) * depth *
2741          in->numSamples * bpe;
2742 
2743       size += level_size;
2744 
2745       if (tile_size >= 4096 && level_size <= tile_size / 2) {
2746          /* We are likely in the mip tail, return. */
2747          assert(size);
2748          return size;
2749       }
2750 
2751       /* Minify the level. */
2752       width = u_minify(width, 1);
2753       height = u_minify(height, 1);
2754       if (in->resourceType == ADDR_RSRC_TEX_3D)
2755          depth = u_minify(depth, 1);
2756    }
2757 
2758    /* TODO: check that this is not too different from the correct value */
2759    assert(size);
2760    return size;
2761 }
2762 
gfx12_select_swizzle_mode(struct ac_addrlib * addrlib,const struct radeon_info * info,const struct radeon_surf * surf,const ADDR3_COMPUTE_SURFACE_INFO_INPUT * in)2763 static unsigned gfx12_select_swizzle_mode(struct ac_addrlib *addrlib,
2764                                           const struct radeon_info *info,
2765                                           const struct radeon_surf *surf,
2766                                           const ADDR3_COMPUTE_SURFACE_INFO_INPUT *in)
2767 {
2768    ADDR3_GET_POSSIBLE_SWIZZLE_MODE_INPUT get_in = {0};
2769    ADDR3_GET_POSSIBLE_SWIZZLE_MODE_OUTPUT get_out = {0};
2770 
2771    get_in.size = sizeof(ADDR3_GET_POSSIBLE_SWIZZLE_MODE_INPUT);
2772    get_out.size = sizeof(ADDR3_GET_POSSIBLE_SWIZZLE_MODE_OUTPUT);
2773 
2774    get_in.flags = in->flags;
2775    get_in.resourceType = in->resourceType;
2776    get_in.bpp = in->bpp ? in->bpp : (surf->bpe * 8);
2777    get_in.width = in->width;
2778    get_in.height = in->height;
2779    get_in.numSlices = in->numSlices;
2780    get_in.numMipLevels = in->numMipLevels;
2781    get_in.numSamples = in->numSamples;
2782 
2783    if (surf && surf->flags & RADEON_SURF_PREFER_4K_ALIGNMENT) {
2784       get_in.maxAlign = 4 * 1024;
2785    } else if (surf && surf->flags & RADEON_SURF_PREFER_64K_ALIGNMENT) {
2786       get_in.maxAlign = 64 * 1024;
2787    } else {
2788       get_in.maxAlign = info->has_dedicated_vram ? (256 * 1024) : (64 * 1024);
2789    }
2790 
2791    if (Addr3GetPossibleSwizzleModes(addrlib->handle, &get_in, &get_out) != ADDR_OK) {
2792       assert(!"Addr3GetPossibleSwizzleModes failed");
2793       return ADDR3_MAX_TYPE;
2794    }
2795 
2796    /* TODO: Workaround for SW_LINEAR assertion failures in addrlib. This should be fixed in addrlib. */
2797    if (surf && surf->blk_w == 4)
2798       get_out.validModes.swLinear = 0;
2799 
2800    assert(get_out.validModes.value);
2801 
2802    unsigned bpe = in->bpp ? in->bpp / 8 : surf->bpe;
2803    unsigned log_bpp = util_logbase2(bpe);
2804    unsigned log_samples = util_logbase2(in->numSamples);
2805    uint64_t ideal_size = gfx12_estimate_size(in, surf, 1, 1, 1);
2806 
2807    if (in->resourceType == ADDR_RSRC_TEX_3D) {
2808       static unsigned block3d_size_4K[5][3] = {
2809          {16, 16, 16},
2810          {8, 16, 16},
2811          {8, 16, 8},
2812          {8, 8, 8},
2813          {4, 8, 8},
2814       };
2815       static unsigned block3d_size_64K[5][3] = {
2816          {64, 32, 32},
2817          {32, 32, 32},
2818          {32, 32, 16},
2819          {32, 16, 16},
2820          {16, 16, 16},
2821       };
2822       static unsigned block3d_size_256K[5][3] = {
2823          {64, 64, 64},
2824          {32, 64, 64},
2825          {32, 64, 32},
2826          {32, 32, 32},
2827          {16, 32, 32},
2828       };
2829 
2830       uint64_t size_4K = gfx12_estimate_size(in, surf, block3d_size_4K[log_bpp][0],
2831                                              block3d_size_4K[log_bpp][1],
2832                                              block3d_size_4K[log_bpp][2]);
2833 
2834       uint64_t size_64K = gfx12_estimate_size(in, surf, block3d_size_64K[log_bpp][0],
2835                                               block3d_size_64K[log_bpp][1],
2836                                               block3d_size_64K[log_bpp][2]);
2837 
2838       uint64_t size_256K = gfx12_estimate_size(in, surf, block3d_size_256K[log_bpp][0],
2839                                                block3d_size_256K[log_bpp][1],
2840                                                block3d_size_256K[log_bpp][2]);;
2841 
2842       float max_3d_overalloc_256K = 1.1;
2843       float max_3d_overalloc_64K = 1.2;
2844       float max_3d_overalloc_4K = 2;
2845 
2846       if (get_out.validModes.sw3d256kB &&
2847           (size_256K / (double)ideal_size <= max_3d_overalloc_256K || !get_out.validModes.sw3d64kB))
2848          return ADDR3_256KB_3D;
2849 
2850       if (get_out.validModes.sw3d64kB &&
2851           (size_64K / (double)ideal_size <= max_3d_overalloc_64K || !get_out.validModes.sw3d4kB))
2852          return ADDR3_64KB_3D;
2853 
2854       if (get_out.validModes.sw3d4kB &&
2855           (size_4K / (double)ideal_size <= max_3d_overalloc_4K ||
2856            /* If the image is thick, prefer thick tiling. */
2857            in->numSlices >= block3d_size_4K[log_bpp][2] * 3))
2858          return ADDR3_4KB_3D;
2859 
2860       /* Try to select a 2D (planar) swizzle mode to save memory. */
2861    }
2862 
2863    static unsigned block_size_LINEAR[5] = {
2864       /* 1xAA (MSAA not supported with LINEAR)
2865        *
2866        * The pitch alignment is 128B, but the slice size is computed as if the pitch alignment
2867        * was 256B.
2868        */
2869       256,
2870       128,
2871       64,
2872       32,
2873       16,
2874    };
2875    static unsigned block_size_256B[4][5][2] = {
2876       { /* 1xAA */
2877          {16, 16},
2878          {16, 8},
2879          {8, 8},
2880          {8, 4},
2881          {4, 4},
2882       },
2883       { /* 2xAA */
2884          {16, 8},
2885          {8, 8},
2886          {8, 4},
2887          {4, 4},
2888          {4, 2},
2889       },
2890       { /* 4xAA */
2891          {8, 8},
2892          {8, 4},
2893          {4, 4},
2894          {4, 2},
2895          {2, 2},
2896       },
2897       { /* 8xAA */
2898          {8, 4},
2899          {4, 4},
2900          {4, 2},
2901          {2, 2},
2902          {2, 1},
2903       },
2904    };
2905    static unsigned block_size_4K[4][5][2] = {
2906       { /* 1xAA */
2907          {64, 64},
2908          {64, 32},
2909          {32, 32},
2910          {32, 16},
2911          {16, 16},
2912       },
2913       { /* 2xAA */
2914          {64, 32},
2915          {32, 32},
2916          {32, 16},
2917          {16, 16},
2918          {16, 8},
2919       },
2920       { /* 4xAA */
2921          {32, 32},
2922          {32, 16},
2923          {16, 16},
2924          {16, 8},
2925          {8, 8},
2926       },
2927       { /* 8xAA */
2928          {32, 16},
2929          {16, 16},
2930          {16, 8},
2931          {8, 8},
2932          {8, 4},
2933       },
2934    };
2935    static unsigned block_size_64K[4][5][2] = {
2936       { /* 1xAA */
2937          {256, 256},
2938          {256, 128},
2939          {128, 128},
2940          {128, 64},
2941          {64, 64},
2942       },
2943       { /* 2xAA */
2944          {256, 128},
2945          {128, 128},
2946          {128, 64},
2947          {64, 64},
2948          {64, 32},
2949       },
2950       { /* 4xAA */
2951          {128, 128},
2952          {128, 64},
2953          {64, 64},
2954          {64, 32},
2955          {32, 32},
2956       },
2957       { /* 8xAA */
2958          {128, 64},
2959          {64, 64},
2960          {64, 32},
2961          {32, 32},
2962          {32, 16},
2963       },
2964    };
2965    static unsigned block_size_256K[4][5][2] = {
2966       { /* 1xAA */
2967          {512, 512},
2968          {512, 256},
2969          {256, 256},
2970          {256, 128},
2971          {128, 128},
2972       },
2973       { /* 2xAA */
2974          {512, 256},
2975          {256, 256},
2976          {256, 128},
2977          {128, 128},
2978          {128, 64},
2979       },
2980       { /* 4xAA */
2981          {256, 256},
2982          {256, 128},
2983          {128, 128},
2984          {128, 64},
2985          {64, 64},
2986       },
2987       { /* 8xAA */
2988          {256, 128},
2989          {128, 128},
2990          {128, 64},
2991          {64, 64},
2992          {64, 32},
2993       },
2994    };
2995 
2996    uint64_t size_LINEAR = gfx12_estimate_size(in, surf, block_size_LINEAR[log_bpp], 1, 1);
2997 
2998    uint64_t size_256B = gfx12_estimate_size(in, surf, block_size_256B[log_samples][log_bpp][0],
2999                                             block_size_256B[log_samples][log_bpp][1], 1);
3000 
3001    uint64_t size_4K = gfx12_estimate_size(in, surf, block_size_4K[log_samples][log_bpp][0],
3002                                           block_size_4K[log_samples][log_bpp][1], 1);;
3003 
3004    uint64_t size_64K = gfx12_estimate_size(in, surf, block_size_64K[log_samples][log_bpp][0],
3005                                            block_size_64K[log_samples][log_bpp][1], 1);
3006 
3007    uint64_t size_256K = gfx12_estimate_size(in, surf, block_size_256K[log_samples][log_bpp][0],
3008                                             block_size_256K[log_samples][log_bpp][1], 1);
3009 
3010    float max_2d_overalloc_256K = 1.1;  /* relative to ideal */
3011    float max_2d_overalloc_64K = 1.3;   /* relative to ideal */
3012    float max_2d_overalloc_4K = 2;      /* relative to ideal */
3013    float max_2d_overalloc_256B = 3;    /* relative to LINEAR */
3014 
3015    if (get_out.validModes.sw2d256kB &&
3016        (size_256K / (double)ideal_size <= max_2d_overalloc_256K || !get_out.validModes.sw2d64kB))
3017       return ADDR3_256KB_2D;
3018 
3019    if (get_out.validModes.sw2d64kB &&
3020        (size_64K / (double)ideal_size <= max_2d_overalloc_64K || !get_out.validModes.sw2d4kB))
3021       return ADDR3_64KB_2D;
3022 
3023    if (get_out.validModes.sw2d4kB &&
3024        (size_4K / (double)ideal_size <= max_2d_overalloc_4K ||
3025         (!get_out.validModes.sw2d256B && !get_out.validModes.swLinear)))
3026       return ADDR3_4KB_2D;
3027 
3028    assert(get_out.validModes.sw2d256B || get_out.validModes.swLinear);
3029 
3030    if (get_out.validModes.sw2d256B && get_out.validModes.swLinear)
3031       return size_256B / (double)size_LINEAR <= max_2d_overalloc_256B ? ADDR3_256B_2D : ADDR3_LINEAR;
3032    else if (get_out.validModes.sw2d256B)
3033       return ADDR3_256B_2D;
3034    else
3035       return ADDR3_LINEAR;
3036 }
3037 
gfx12_compute_hiz_his_info(struct ac_addrlib * addrlib,const struct radeon_info * info,struct radeon_surf * surf,struct gfx12_hiz_his_layout * hizs,const ADDR3_COMPUTE_SURFACE_INFO_INPUT * surf_in)3038 static bool gfx12_compute_hiz_his_info(struct ac_addrlib *addrlib, const struct radeon_info *info,
3039                                        struct radeon_surf *surf, struct gfx12_hiz_his_layout *hizs,
3040                                        const ADDR3_COMPUTE_SURFACE_INFO_INPUT *surf_in)
3041 {
3042    assert(surf_in->flags.depth != surf_in->flags.stencil);
3043 
3044    if (surf->flags & RADEON_SURF_NO_HTILE || (info->gfx_level == GFX12 && info->chip_rev == 0))
3045       return true;
3046 
3047    ADDR3_COMPUTE_SURFACE_INFO_OUTPUT out = {0};
3048    out.size = sizeof(ADDR3_COMPUTE_SURFACE_INFO_OUTPUT);
3049 
3050    ADDR3_COMPUTE_SURFACE_INFO_INPUT in = *surf_in;
3051    in.flags.depth = 0;
3052    in.flags.stencil = 0;
3053    in.flags.hiZHiS = 1;
3054 
3055    if (surf_in->flags.depth) {
3056       in.format = ADDR_FMT_32;
3057       in.bpp = 32;
3058    } else {
3059       in.format = ADDR_FMT_16;
3060       in.bpp = 16;
3061    }
3062 
3063    /* Compute the HiZ/HiS size. */
3064    in.width = align(DIV_ROUND_UP(surf_in->width, 8), 2);
3065    in.height = align(DIV_ROUND_UP(surf_in->height, 8), 2);
3066    in.swizzleMode = gfx12_select_swizzle_mode(addrlib, info, NULL, &in);
3067 
3068    int ret = Addr3ComputeSurfaceInfo(addrlib->handle, &in, &out);
3069    if (ret != ADDR_OK)
3070       return false;
3071 
3072    hizs->size = out.surfSize;
3073    hizs->width_in_tiles = in.width;
3074    hizs->height_in_tiles = in.height;
3075    hizs->swizzle_mode = in.swizzleMode;
3076    hizs->alignment_log2 = out.baseAlign;
3077    return true;
3078 }
3079 
gfx12_compute_miptree(struct ac_addrlib * addrlib,const struct radeon_info * info,const struct ac_surf_config * config,struct radeon_surf * surf,bool compressed,ADDR3_COMPUTE_SURFACE_INFO_INPUT * in)3080 static bool gfx12_compute_miptree(struct ac_addrlib *addrlib, const struct radeon_info *info,
3081                                   const struct ac_surf_config *config, struct radeon_surf *surf,
3082                                   bool compressed, ADDR3_COMPUTE_SURFACE_INFO_INPUT *in)
3083 {
3084    ADDR3_MIP_INFO mip_info[RADEON_SURF_MAX_LEVELS] = {0};
3085    ADDR3_COMPUTE_SURFACE_INFO_OUTPUT out = {0};
3086    ADDR_E_RETURNCODE ret;
3087 
3088    out.size = sizeof(ADDR3_COMPUTE_SURFACE_INFO_OUTPUT);
3089    out.pMipInfo = mip_info;
3090 
3091    ret = Addr3ComputeSurfaceInfo(addrlib->handle, in, &out);
3092    if (ret != ADDR_OK)
3093       return false;
3094 
3095    /* TODO: remove this block once addrlib stops giving us 64K pitch for small images, breaking
3096     * modifiers and X.Org.
3097     */
3098    if (in->swizzleMode >= ADDR3_256B_2D && in->swizzleMode <= ADDR3_256KB_2D &&
3099        in->numMipLevels == 1) {
3100       static unsigned block_bits[ADDR3_MAX_TYPE] = {
3101          [ADDR3_256B_2D] = 8,
3102          [ADDR3_4KB_2D] = 12,
3103          [ADDR3_64KB_2D] = 16,
3104          [ADDR3_256KB_2D] = 18,
3105       };
3106       unsigned align_bits = block_bits[in->swizzleMode] - util_logbase2(surf->bpe);
3107       unsigned w_align = 1 << (align_bits / 2 + align_bits % 2);
3108 
3109       out.pitch = align(in->width, w_align);
3110    }
3111 
3112    if (in->flags.stencil) {
3113       surf->u.gfx9.zs.stencil_swizzle_mode = in->swizzleMode;
3114       surf->u.gfx9.zs.stencil_offset = align(surf->surf_size, out.baseAlign);
3115       surf->surf_alignment_log2 = MAX2(surf->surf_alignment_log2, util_logbase2(out.baseAlign));
3116       surf->surf_size = surf->u.gfx9.zs.stencil_offset + out.surfSize;
3117 
3118       if (info->chip_rev >= 2 &&
3119           !gfx12_compute_hiz_his_info(addrlib, info, surf, &surf->u.gfx9.zs.his, in))
3120          return false;
3121 
3122       return true;
3123    }
3124 
3125    surf->u.gfx9.surf_slice_size = out.sliceSize;
3126    surf->u.gfx9.surf_pitch = out.pitch;
3127    surf->u.gfx9.surf_height = out.height;
3128    surf->surf_size = out.surfSize;
3129    surf->surf_alignment_log2 = util_logbase2(out.baseAlign);
3130 
3131    if (surf->flags & RADEON_SURF_PRT) {
3132       surf->prt_tile_width = out.blockExtent.width;
3133       surf->prt_tile_height = out.blockExtent.height;
3134       surf->prt_tile_depth = out.blockExtent.depth;
3135       surf->first_mip_tail_level = out.firstMipIdInTail;
3136 
3137       for (unsigned i = 0; i < in->numMipLevels; i++) {
3138          surf->u.gfx9.prt_level_offset[i] = mip_info[i].macroBlockOffset + mip_info[i].mipTailOffset;
3139          surf->u.gfx9.prt_level_pitch[i] = mip_info[i].pitch;
3140       }
3141    }
3142 
3143    if (surf->blk_w == 2 && out.pitch == out.pixelPitch &&
3144        surf->u.gfx9.swizzle_mode == ADDR3_LINEAR) {
3145       const unsigned linear_byte_alignment = 128;
3146 
3147       /* Adjust surf_pitch to be in elements units not in pixels */
3148       surf->u.gfx9.surf_pitch = align(surf->u.gfx9.surf_pitch / surf->blk_w,
3149                                        linear_byte_alignment / surf->bpe);
3150       /* The surface is really a surf->bpe bytes per pixel surface even if we
3151        * use it as a surf->bpe bytes per element one.
3152        * Adjust surf_slice_size and surf_size to reflect the change
3153        * made to surf_pitch.
3154        */
3155       surf->u.gfx9.surf_slice_size =
3156          MAX2(surf->u.gfx9.surf_slice_size,
3157               (uint64_t)surf->u.gfx9.surf_pitch * out.height * surf->bpe * surf->blk_w);
3158       surf->surf_size = surf->u.gfx9.surf_slice_size * in->numSlices;
3159 
3160       int alignment = linear_byte_alignment / surf->bpe;
3161       for (unsigned i = 0; i < in->numMipLevels; i++) {
3162          surf->u.gfx9.offset[i] = mip_info[i].offset;
3163          /* Adjust pitch like we did for surf_pitch */
3164          surf->u.gfx9.pitch[i] = align(mip_info[i].pitch / surf->blk_w, alignment);
3165       }
3166       surf->u.gfx9.base_mip_width = surf->u.gfx9.surf_pitch;
3167    } else if (in->swizzleMode == ADDR3_LINEAR) {
3168       for (unsigned i = 0; i < in->numMipLevels; i++) {
3169          surf->u.gfx9.offset[i] = mip_info[i].offset;
3170          surf->u.gfx9.pitch[i] = mip_info[i].pitch;
3171       }
3172       surf->u.gfx9.base_mip_width = surf->u.gfx9.surf_pitch;
3173    } else {
3174       surf->u.gfx9.base_mip_width = mip_info[0].pitch;
3175    }
3176 
3177    surf->u.gfx9.base_mip_height = mip_info[0].height;
3178 
3179    if (in->flags.depth) {
3180       assert(in->swizzleMode != ADDR3_LINEAR);
3181 
3182       return gfx12_compute_hiz_his_info(addrlib, info, surf, &surf->u.gfx9.zs.hiz, in);
3183    }
3184 
3185    /* Compute tile swizzle for the color surface. All swizzle modes >= 4K support it. */
3186    if (surf->modifier == DRM_FORMAT_MOD_INVALID && config->info.surf_index &&
3187        in->swizzleMode >= ADDR3_4KB_2D && !out.mipChainInTail &&
3188        !(surf->flags & RADEON_SURF_SHAREABLE) && !get_display_flag(config, surf)) {
3189       ADDR3_COMPUTE_PIPEBANKXOR_INPUT xin = {0};
3190       ADDR3_COMPUTE_PIPEBANKXOR_OUTPUT xout = {0};
3191 
3192       xin.size = sizeof(ADDR3_COMPUTE_PIPEBANKXOR_INPUT);
3193       xout.size = sizeof(ADDR3_COMPUTE_PIPEBANKXOR_OUTPUT);
3194 
3195       xin.surfIndex = p_atomic_inc_return(config->info.surf_index) - 1;
3196       xin.swizzleMode = in->swizzleMode;
3197 
3198       ret = Addr3ComputePipeBankXor(addrlib->handle, &xin, &xout);
3199       if (ret != ADDR_OK)
3200          return false;
3201 
3202       assert(xout.pipeBankXor <= u_bit_consecutive(0, sizeof(surf->tile_swizzle) * 8 + 2));
3203       surf->tile_swizzle = xout.pipeBankXor;
3204    }
3205 
3206    return true;
3207 }
3208 
gfx12_compute_surface(struct ac_addrlib * addrlib,const struct radeon_info * info,const struct ac_surf_config * config,enum radeon_surf_mode mode,struct radeon_surf * surf)3209 static bool gfx12_compute_surface(struct ac_addrlib *addrlib, const struct radeon_info *info,
3210                                   const struct ac_surf_config *config, enum radeon_surf_mode mode,
3211                                   struct radeon_surf *surf)
3212 {
3213    bool compressed = surf->blk_w == 4 && surf->blk_h == 4;
3214    bool stencil_only = (surf->flags & RADEON_SURF_SBUFFER) && !(surf->flags & RADEON_SURF_ZBUFFER);
3215    ADDR3_COMPUTE_SURFACE_INFO_INPUT AddrSurfInfoIn = {0};
3216 
3217    AddrSurfInfoIn.size = sizeof(ADDR3_COMPUTE_SURFACE_INFO_INPUT);
3218 
3219    if (stencil_only) {
3220       AddrSurfInfoIn.bpp = 8;
3221       AddrSurfInfoIn.format = ADDR_FMT_8;
3222    } else {
3223       AddrSurfInfoIn.format = bpe_to_format(surf);
3224       if (!compressed)
3225          AddrSurfInfoIn.bpp = surf->bpe * 8;
3226    }
3227 
3228    AddrSurfInfoIn.flags.depth = !!(surf->flags & RADEON_SURF_ZBUFFER);
3229    AddrSurfInfoIn.flags.stencil = stencil_only;
3230    AddrSurfInfoIn.flags.blockCompressed = compressed;
3231    AddrSurfInfoIn.flags.isVrsImage = !!(surf->flags & RADEON_SURF_VRS_RATE);
3232    AddrSurfInfoIn.flags.standardPrt = !!(surf->flags & RADEON_SURF_PRT);
3233 
3234    if (config->is_3d)
3235       AddrSurfInfoIn.resourceType = ADDR_RSRC_TEX_3D;
3236    else if (config->is_1d)
3237       AddrSurfInfoIn.resourceType = ADDR_RSRC_TEX_1D;
3238    else
3239       AddrSurfInfoIn.resourceType = ADDR_RSRC_TEX_2D;
3240 
3241    AddrSurfInfoIn.width = config->info.width;
3242    AddrSurfInfoIn.height = config->info.height;
3243    AddrSurfInfoIn.numMipLevels = config->info.levels;
3244    AddrSurfInfoIn.numSamples = MAX2(1, config->info.samples);
3245 
3246    if (config->is_3d)
3247       AddrSurfInfoIn.numSlices = config->info.depth;
3248    else if (config->is_cube)
3249       AddrSurfInfoIn.numSlices = 6;
3250    else
3251       AddrSurfInfoIn.numSlices = config->info.array_size;
3252 
3253    /* Select the swizzle mode. */
3254    if (surf->modifier != DRM_FORMAT_MOD_INVALID) {
3255       assert(!compressed);
3256       assert(!ac_modifier_has_dcc(surf->modifier) || !(surf->flags & RADEON_SURF_DISABLE_DCC));
3257       AddrSurfInfoIn.swizzleMode = ac_get_modifier_swizzle_mode(info->gfx_level, surf->modifier);
3258    } else if (surf->flags & RADEON_SURF_IMPORTED) {
3259       AddrSurfInfoIn.swizzleMode = surf->u.gfx9.swizzle_mode;
3260    } else if (mode == RADEON_SURF_MODE_LINEAR_ALIGNED) {
3261       assert(config->info.samples <= 1 && !(surf->flags & RADEON_SURF_Z_OR_SBUFFER));
3262       AddrSurfInfoIn.swizzleMode = ADDR3_LINEAR;
3263    } else if (config->is_1d && !(surf->flags & RADEON_SURF_Z_OR_SBUFFER)) {
3264       AddrSurfInfoIn.swizzleMode = ADDR3_LINEAR;
3265    } else if (surf->flags & RADEON_SURF_VIDEO_REFERENCE) {
3266       AddrSurfInfoIn.swizzleMode = ADDR3_256B_2D;
3267    } else {
3268       AddrSurfInfoIn.swizzleMode = gfx12_select_swizzle_mode(addrlib, info, surf, &AddrSurfInfoIn);
3269    }
3270 
3271    /* Force the linear pitch from 128B (default) to 256B for multi-GPU interop. This only applies
3272     * to 2D non-MSAA and plain color formats.
3273     */
3274    if (!config->is_1d && !config->is_3d && !config->is_cube && !config->is_array &&
3275        config->info.levels == 1 && config->info.samples <= 1 &&
3276        surf->blk_w == 1 && surf->blk_h == 1 && !(surf->flags & RADEON_SURF_Z_OR_SBUFFER) &&
3277        util_is_power_of_two_nonzero(surf->bpe) && AddrSurfInfoIn.swizzleMode == ADDR3_LINEAR) {
3278       AddrSurfInfoIn.pitchInElement = align(config->info.width, LINEAR_PITCH_ALIGNMENT / surf->bpe);
3279       surf->u.gfx9.uses_custom_pitch = true;
3280    }
3281 
3282    surf->u.gfx9.swizzle_mode = AddrSurfInfoIn.swizzleMode;
3283    surf->u.gfx9.resource_type = (enum gfx9_resource_type)AddrSurfInfoIn.resourceType;
3284    surf->u.gfx9.gfx12_enable_dcc = ac_modifier_has_dcc(surf->modifier) ||
3285                                    (surf->modifier == DRM_FORMAT_MOD_INVALID &&
3286                                     !(surf->flags & RADEON_SURF_DISABLE_DCC) &&
3287                                     /* Always enable compression for Z/S and MSAA color by default. */
3288                                     (surf->flags & RADEON_SURF_Z_OR_SBUFFER ||
3289                                      config->info.samples > 1 ||
3290                                      ((info->gfx12_supports_display_dcc || !(surf->flags & RADEON_SURF_SCANOUT)) &&
3291                                       /* This one is not strictly necessary. */
3292                                       surf->u.gfx9.swizzle_mode != ADDR3_LINEAR)));
3293 
3294    surf->has_stencil = !!(surf->flags & RADEON_SURF_SBUFFER);
3295    surf->is_linear = surf->u.gfx9.swizzle_mode == ADDR3_LINEAR;
3296    surf->is_displayable = !(surf->flags & RADEON_SURF_Z_OR_SBUFFER) &&
3297                           surf->u.gfx9.resource_type != RADEON_RESOURCE_3D &&
3298                           (info->gfx12_supports_display_dcc || !surf->u.gfx9.gfx12_enable_dcc);
3299    surf->thick_tiling = surf->u.gfx9.swizzle_mode >= ADDR3_4KB_3D;
3300 
3301    if (surf->flags & RADEON_SURF_Z_OR_SBUFFER) {
3302       surf->u.gfx9.zs.hiz.offset = 0;
3303       surf->u.gfx9.zs.hiz.size = 0;
3304       surf->u.gfx9.zs.his.offset = 0;
3305       surf->u.gfx9.zs.his.size = 0;
3306    }
3307 
3308    if (surf->u.gfx9.gfx12_enable_dcc) {
3309       if (surf->modifier != DRM_FORMAT_MOD_INVALID) {
3310          surf->u.gfx9.color.dcc.max_compressed_block_size =
3311             AMD_FMT_MOD_GET(DCC_MAX_COMPRESSED_BLOCK, surf->modifier);
3312       } else if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) &&
3313                  /* Don't change the DCC settings for imported buffers - they might differ. */
3314                  !(surf->flags & RADEON_SURF_IMPORTED)) {
3315          surf->u.gfx9.color.dcc.max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_256B;
3316       }
3317    }
3318 
3319    /* Calculate texture layout information. */
3320    if (!stencil_only &&
3321        !gfx12_compute_miptree(addrlib, info, config, surf, compressed, &AddrSurfInfoIn))
3322       return false;
3323 
3324    /* Calculate texture layout information for stencil. */
3325    if (surf->flags & RADEON_SURF_SBUFFER) {
3326       if (stencil_only) {
3327          assert(!AddrSurfInfoIn.flags.depth);
3328          assert(AddrSurfInfoIn.flags.stencil);
3329          assert(AddrSurfInfoIn.bpp == 8);
3330          assert(AddrSurfInfoIn.format == ADDR_FMT_8);
3331       } else {
3332          AddrSurfInfoIn.flags.depth = 0;
3333          AddrSurfInfoIn.flags.stencil = 1;
3334          AddrSurfInfoIn.bpp = 8;
3335          AddrSurfInfoIn.format = ADDR_FMT_8;
3336       }
3337 
3338       if (!gfx12_compute_miptree(addrlib, info, config, surf, compressed, &AddrSurfInfoIn))
3339          return false;
3340    }
3341 
3342    return true;
3343 }
3344 
ac_compute_surface(struct ac_addrlib * addrlib,const struct radeon_info * info,const struct ac_surf_config * config,enum radeon_surf_mode mode,struct radeon_surf * surf)3345 int ac_compute_surface(struct ac_addrlib *addrlib, const struct radeon_info *info,
3346                        const struct ac_surf_config *config, enum radeon_surf_mode mode,
3347                        struct radeon_surf *surf)
3348 {
3349    int r;
3350 
3351    r = surf_config_sanity(config, surf->flags);
3352    if (r)
3353       return r;
3354 
3355    /* Images are emulated on some CDNA chips. */
3356    if (!info->has_image_opcodes && !(surf->flags & RADEON_SURF_VIDEO_REFERENCE))
3357       mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
3358 
3359    /* 0 offsets mean disabled. */
3360    surf->meta_offset = surf->fmask_offset = surf->cmask_offset = surf->display_dcc_offset = 0;
3361 
3362    if (info->family_id >= FAMILY_GFX12) {
3363       if (!gfx12_compute_surface(addrlib, info, config, mode, surf))
3364          return ADDR_ERROR;
3365 
3366       /* Determine the memory layout of multiple allocations in one buffer. */
3367       surf->total_size = surf->surf_size;
3368       surf->alignment_log2 = surf->surf_alignment_log2;
3369 
3370       if (surf->flags & RADEON_SURF_Z_OR_SBUFFER) {
3371          if (surf->u.gfx9.zs.hiz.size) {
3372             surf->u.gfx9.zs.hiz.offset = align64(surf->total_size,
3373                                                  1ull << surf->u.gfx9.zs.hiz.alignment_log2);
3374             surf->surf_alignment_log2 = MAX2(surf->surf_alignment_log2,
3375                                              surf->u.gfx9.zs.hiz.alignment_log2);
3376             surf->total_size = surf->u.gfx9.zs.hiz.offset + surf->u.gfx9.zs.hiz.size;
3377          }
3378 
3379          if (surf->u.gfx9.zs.his.size) {
3380             surf->u.gfx9.zs.his.offset = align64(surf->total_size,
3381                                                  1ull << surf->u.gfx9.zs.his.alignment_log2);
3382             surf->surf_alignment_log2 = MAX2(surf->surf_alignment_log2,
3383                                              surf->u.gfx9.zs.his.alignment_log2);
3384             surf->total_size = surf->u.gfx9.zs.his.offset + surf->u.gfx9.zs.his.size;
3385          }
3386       }
3387 
3388       return 0;
3389    }
3390 
3391    /* Gfx6-11. */
3392    if (info->family_id >= FAMILY_AI)
3393       r = gfx9_compute_surface(addrlib, info, config, mode, surf);
3394    else
3395       r = gfx6_compute_surface(addrlib->handle, info, config, mode, surf);
3396 
3397    if (r)
3398       return r;
3399 
3400    /* Determine the memory layout of multiple allocations in one buffer. */
3401    surf->total_size = surf->surf_size;
3402    surf->alignment_log2 = surf->surf_alignment_log2;
3403 
3404    if (surf->fmask_size) {
3405       assert(config->info.samples >= 2);
3406       surf->fmask_offset = align64(surf->total_size, 1ull << surf->fmask_alignment_log2);
3407       surf->total_size = surf->fmask_offset + surf->fmask_size;
3408       surf->alignment_log2 = MAX2(surf->alignment_log2, surf->fmask_alignment_log2);
3409    }
3410 
3411    /* Single-sample CMASK is in a separate buffer. */
3412    if (surf->cmask_size && config->info.samples >= 2) {
3413       surf->cmask_offset = align64(surf->total_size, 1ull << surf->cmask_alignment_log2);
3414       surf->total_size = surf->cmask_offset + surf->cmask_size;
3415       surf->alignment_log2 = MAX2(surf->alignment_log2, surf->cmask_alignment_log2);
3416    }
3417 
3418    if (surf->is_displayable)
3419       surf->flags |= RADEON_SURF_SCANOUT;
3420 
3421    if (surf->meta_size &&
3422        /* dcc_size is computed on GFX9+ only if it's displayable. */
3423        (info->gfx_level >= GFX9 || !get_display_flag(config, surf))) {
3424       /* It's better when displayable DCC is immediately after
3425        * the image due to hw-specific reasons.
3426        */
3427       if (info->gfx_level >= GFX9 &&
3428           !(surf->flags & RADEON_SURF_Z_OR_SBUFFER) &&
3429           surf->u.gfx9.color.dcc.display_equation_valid) {
3430          /* Add space for the displayable DCC buffer. */
3431          surf->display_dcc_offset = align64(surf->total_size, 1ull << surf->u.gfx9.color.display_dcc_alignment_log2);
3432          surf->total_size = surf->display_dcc_offset + surf->u.gfx9.color.display_dcc_size;
3433       }
3434 
3435       surf->meta_offset = align64(surf->total_size, 1ull << surf->meta_alignment_log2);
3436       surf->total_size = surf->meta_offset + surf->meta_size;
3437       surf->alignment_log2 = MAX2(surf->alignment_log2, surf->meta_alignment_log2);
3438    }
3439 
3440    return 0;
3441 }
3442 
3443 /* This is meant to be used for disabling DCC. */
ac_surface_zero_dcc_fields(struct radeon_surf * surf)3444 void ac_surface_zero_dcc_fields(struct radeon_surf *surf)
3445 {
3446    if (surf->flags & RADEON_SURF_Z_OR_SBUFFER)
3447       return;
3448 
3449    surf->meta_offset = 0;
3450    surf->display_dcc_offset = 0;
3451    if (!surf->fmask_offset && !surf->cmask_offset) {
3452       surf->total_size = surf->surf_size;
3453       surf->alignment_log2 = surf->surf_alignment_log2;
3454    }
3455 }
3456 
eg_tile_split(unsigned tile_split)3457 static unsigned eg_tile_split(unsigned tile_split)
3458 {
3459    switch (tile_split) {
3460    case 0:
3461       tile_split = 64;
3462       break;
3463    case 1:
3464       tile_split = 128;
3465       break;
3466    case 2:
3467       tile_split = 256;
3468       break;
3469    case 3:
3470       tile_split = 512;
3471       break;
3472    default:
3473    case 4:
3474       tile_split = 1024;
3475       break;
3476    case 5:
3477       tile_split = 2048;
3478       break;
3479    case 6:
3480       tile_split = 4096;
3481       break;
3482    }
3483    return tile_split;
3484 }
3485 
eg_tile_split_rev(unsigned eg_tile_split)3486 static unsigned eg_tile_split_rev(unsigned eg_tile_split)
3487 {
3488    switch (eg_tile_split) {
3489    case 64:
3490       return 0;
3491    case 128:
3492       return 1;
3493    case 256:
3494       return 2;
3495    case 512:
3496       return 3;
3497    default:
3498    case 1024:
3499       return 4;
3500    case 2048:
3501       return 5;
3502    case 4096:
3503       return 6;
3504    }
3505 }
3506 
3507 #define AMDGPU_TILING_DCC_MAX_COMPRESSED_BLOCK_SIZE_SHIFT 45
3508 #define AMDGPU_TILING_DCC_MAX_COMPRESSED_BLOCK_SIZE_MASK  0x3
3509 
3510 /* This should be called before ac_compute_surface. */
ac_surface_apply_bo_metadata(enum amd_gfx_level gfx_level,struct radeon_surf * surf,uint64_t tiling_flags,enum radeon_surf_mode * mode)3511 void ac_surface_apply_bo_metadata(enum amd_gfx_level gfx_level, struct radeon_surf *surf,
3512                                   uint64_t tiling_flags, enum radeon_surf_mode *mode)
3513 {
3514    bool scanout;
3515 
3516    if (gfx_level >= GFX12) {
3517       surf->u.gfx9.swizzle_mode = AMDGPU_TILING_GET(tiling_flags, GFX12_SWIZZLE_MODE);
3518       surf->u.gfx9.color.dcc.max_compressed_block_size =
3519          AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_MAX_COMPRESSED_BLOCK);
3520       surf->u.gfx9.color.dcc_data_format =
3521          AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_DATA_FORMAT);
3522       surf->u.gfx9.color.dcc_number_type =
3523          AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_NUMBER_TYPE);
3524       surf->u.gfx9.color.dcc_write_compress_disable =
3525          AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_WRITE_COMPRESS_DISABLE);
3526       scanout = AMDGPU_TILING_GET(tiling_flags, GFX12_SCANOUT);
3527    } else if (gfx_level >= GFX9) {
3528       surf->u.gfx9.swizzle_mode = AMDGPU_TILING_GET(tiling_flags, SWIZZLE_MODE);
3529       surf->u.gfx9.color.dcc.independent_64B_blocks =
3530          AMDGPU_TILING_GET(tiling_flags, DCC_INDEPENDENT_64B);
3531       surf->u.gfx9.color.dcc.independent_128B_blocks =
3532          AMDGPU_TILING_GET(tiling_flags, DCC_INDEPENDENT_128B);
3533       surf->u.gfx9.color.dcc.max_compressed_block_size =
3534          AMDGPU_TILING_GET(tiling_flags, DCC_MAX_COMPRESSED_BLOCK_SIZE);
3535       surf->u.gfx9.color.display_dcc_pitch_max = AMDGPU_TILING_GET(tiling_flags, DCC_PITCH_MAX);
3536       scanout = AMDGPU_TILING_GET(tiling_flags, SCANOUT);
3537       *mode =
3538          surf->u.gfx9.swizzle_mode > 0 ? RADEON_SURF_MODE_2D : RADEON_SURF_MODE_LINEAR_ALIGNED;
3539    } else {
3540       surf->u.legacy.pipe_config = AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG);
3541       surf->u.legacy.bankw = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_WIDTH);
3542       surf->u.legacy.bankh = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_HEIGHT);
3543       surf->u.legacy.tile_split = eg_tile_split(AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT));
3544       surf->u.legacy.mtilea = 1 << AMDGPU_TILING_GET(tiling_flags, MACRO_TILE_ASPECT);
3545       surf->u.legacy.num_banks = 2 << AMDGPU_TILING_GET(tiling_flags, NUM_BANKS);
3546       scanout = AMDGPU_TILING_GET(tiling_flags, MICRO_TILE_MODE) == 0; /* DISPLAY */
3547 
3548       if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 4) /* 2D_TILED_THIN1 */
3549          *mode = RADEON_SURF_MODE_2D;
3550       else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 2) /* 1D_TILED_THIN1 */
3551          *mode = RADEON_SURF_MODE_1D;
3552       else
3553          *mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
3554    }
3555 
3556    if (scanout)
3557       surf->flags |= RADEON_SURF_SCANOUT;
3558    else
3559       surf->flags &= ~RADEON_SURF_SCANOUT;
3560 }
3561 
ac_surface_compute_bo_metadata(const struct radeon_info * info,struct radeon_surf * surf,uint64_t * tiling_flags)3562 void ac_surface_compute_bo_metadata(const struct radeon_info *info, struct radeon_surf *surf,
3563                                     uint64_t *tiling_flags)
3564 {
3565    *tiling_flags = 0;
3566 
3567    if (info->gfx_level >= GFX12) {
3568       *tiling_flags |= AMDGPU_TILING_SET(GFX12_SWIZZLE_MODE, surf->u.gfx9.swizzle_mode);
3569       *tiling_flags |= AMDGPU_TILING_SET(GFX12_DCC_MAX_COMPRESSED_BLOCK,
3570                                          surf->u.gfx9.color.dcc.max_compressed_block_size);
3571       *tiling_flags |= AMDGPU_TILING_SET(GFX12_DCC_NUMBER_TYPE, surf->u.gfx9.color.dcc_number_type);
3572       *tiling_flags |= AMDGPU_TILING_SET(GFX12_DCC_DATA_FORMAT, surf->u.gfx9.color.dcc_data_format);
3573       *tiling_flags |= AMDGPU_TILING_SET(GFX12_DCC_WRITE_COMPRESS_DISABLE, surf->u.gfx9.color.dcc_write_compress_disable);
3574       *tiling_flags |= AMDGPU_TILING_SET(GFX12_SCANOUT, (surf->flags & RADEON_SURF_SCANOUT) != 0);
3575    } else if (info->gfx_level >= GFX9) {
3576       uint64_t dcc_offset = 0;
3577 
3578       if (surf->meta_offset) {
3579          dcc_offset = surf->display_dcc_offset ? surf->display_dcc_offset : surf->meta_offset;
3580          assert((dcc_offset >> 8) != 0 && (dcc_offset >> 8) < (1 << 24));
3581       }
3582 
3583       *tiling_flags |= AMDGPU_TILING_SET(SWIZZLE_MODE, surf->u.gfx9.swizzle_mode);
3584       *tiling_flags |= AMDGPU_TILING_SET(DCC_OFFSET_256B, dcc_offset >> 8);
3585       *tiling_flags |= AMDGPU_TILING_SET(DCC_PITCH_MAX, surf->u.gfx9.color.display_dcc_pitch_max);
3586       *tiling_flags |=
3587          AMDGPU_TILING_SET(DCC_INDEPENDENT_64B, surf->u.gfx9.color.dcc.independent_64B_blocks);
3588       *tiling_flags |=
3589          AMDGPU_TILING_SET(DCC_INDEPENDENT_128B, surf->u.gfx9.color.dcc.independent_128B_blocks);
3590       *tiling_flags |= AMDGPU_TILING_SET(DCC_MAX_COMPRESSED_BLOCK_SIZE,
3591                                          surf->u.gfx9.color.dcc.max_compressed_block_size);
3592       *tiling_flags |= AMDGPU_TILING_SET(SCANOUT, (surf->flags & RADEON_SURF_SCANOUT) != 0);
3593    } else {
3594       if (surf->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D)
3595          *tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 4); /* 2D_TILED_THIN1 */
3596       else if (surf->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D)
3597          *tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 2); /* 1D_TILED_THIN1 */
3598       else
3599          *tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 1); /* LINEAR_ALIGNED */
3600 
3601       *tiling_flags |= AMDGPU_TILING_SET(PIPE_CONFIG, surf->u.legacy.pipe_config);
3602       *tiling_flags |= AMDGPU_TILING_SET(BANK_WIDTH, util_logbase2(surf->u.legacy.bankw));
3603       *tiling_flags |= AMDGPU_TILING_SET(BANK_HEIGHT, util_logbase2(surf->u.legacy.bankh));
3604       if (surf->u.legacy.tile_split)
3605          *tiling_flags |=
3606             AMDGPU_TILING_SET(TILE_SPLIT, eg_tile_split_rev(surf->u.legacy.tile_split));
3607       *tiling_flags |= AMDGPU_TILING_SET(MACRO_TILE_ASPECT, util_logbase2(surf->u.legacy.mtilea));
3608       *tiling_flags |= AMDGPU_TILING_SET(NUM_BANKS, util_logbase2(surf->u.legacy.num_banks) - 1);
3609 
3610       if (surf->flags & RADEON_SURF_SCANOUT)
3611          *tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 0); /* DISPLAY_MICRO_TILING */
3612       else
3613          *tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 1); /* THIN_MICRO_TILING */
3614    }
3615 }
3616 
ac_get_umd_metadata_word1(const struct radeon_info * info)3617 static uint32_t ac_get_umd_metadata_word1(const struct radeon_info *info)
3618 {
3619    return (ATI_VENDOR_ID << 16) | info->pci_id;
3620 }
3621 
3622 /* This should be called after ac_compute_surface. */
ac_surface_apply_umd_metadata(const struct radeon_info * info,struct radeon_surf * surf,unsigned num_storage_samples,unsigned num_mipmap_levels,unsigned size_metadata,const uint32_t metadata[64])3623 bool ac_surface_apply_umd_metadata(const struct radeon_info *info, struct radeon_surf *surf,
3624                                    unsigned num_storage_samples, unsigned num_mipmap_levels,
3625                                    unsigned size_metadata, const uint32_t metadata[64])
3626 {
3627    const uint32_t *desc = &metadata[2];
3628    uint64_t offset;
3629 
3630    if (surf->modifier != DRM_FORMAT_MOD_INVALID)
3631       return true;
3632 
3633    if (info->gfx_level >= GFX9)
3634       offset = surf->u.gfx9.surf_offset;
3635    else
3636       offset = (uint64_t)surf->u.legacy.level[0].offset_256B * 256;
3637 
3638    if (offset ||                 /* Non-zero planes ignore metadata. */
3639        size_metadata < 10 * 4 || /* at least 2(header) + 8(desc) dwords */
3640        metadata[0] == 0 ||       /* invalid version number (1 and 2 layouts are compatible) */
3641        metadata[1] != ac_get_umd_metadata_word1(info)) /* invalid PCI ID */ {
3642       /* Disable DCC because it might not be enabled. */
3643       ac_surface_zero_dcc_fields(surf);
3644 
3645       /* Don't report an error if the texture comes from an incompatible driver,
3646        * but this might not work.
3647        */
3648       return true;
3649    }
3650 
3651    /* Validate that sample counts and the number of mipmap levels match. */
3652    unsigned desc_last_level = info->gfx_level >= GFX12 ? G_00A00C_LAST_LEVEL_GFX12(desc[3])
3653                                                        : G_008F1C_LAST_LEVEL(desc[3]);
3654    unsigned type = G_008F1C_TYPE(desc[3]);
3655 
3656    if (type == V_008F1C_SQ_RSRC_IMG_2D_MSAA || type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
3657       unsigned log_samples = util_logbase2(MAX2(1, num_storage_samples));
3658 
3659       if (desc_last_level != log_samples) {
3660          fprintf(stderr,
3661                  "amdgpu: invalid MSAA texture import, "
3662                  "metadata has log2(samples) = %u, the caller set %u\n",
3663                  desc_last_level, log_samples);
3664          return false;
3665       }
3666    } else {
3667       if (desc_last_level != num_mipmap_levels - 1) {
3668          fprintf(stderr,
3669                  "amdgpu: invalid mipmapped texture import, "
3670                  "metadata has last_level = %u, the caller set %u\n",
3671                  desc_last_level, num_mipmap_levels - 1);
3672          return false;
3673       }
3674    }
3675 
3676    if (info->gfx_level >= GFX8 && info->gfx_level < GFX12 && G_008F28_COMPRESSION_EN(desc[6])) {
3677       /* Read DCC information. */
3678       switch (info->gfx_level) {
3679       case GFX8:
3680          surf->meta_offset = (uint64_t)desc[7] << 8;
3681          break;
3682 
3683       case GFX9:
3684          surf->meta_offset =
3685             ((uint64_t)desc[7] << 8) | ((uint64_t)G_008F24_META_DATA_ADDRESS(desc[5]) << 40);
3686          surf->u.gfx9.color.dcc.pipe_aligned = G_008F24_META_PIPE_ALIGNED(desc[5]);
3687          surf->u.gfx9.color.dcc.rb_aligned = G_008F24_META_RB_ALIGNED(desc[5]);
3688 
3689          /* If DCC is unaligned, this can only be a displayable image. */
3690          if (!surf->u.gfx9.color.dcc.pipe_aligned && !surf->u.gfx9.color.dcc.rb_aligned)
3691             assert(surf->is_displayable);
3692          break;
3693 
3694       case GFX10:
3695       case GFX10_3:
3696       case GFX11:
3697       case GFX11_5:
3698          surf->meta_offset =
3699             ((uint64_t)G_00A018_META_DATA_ADDRESS_LO(desc[6]) << 8) | ((uint64_t)desc[7] << 16);
3700          surf->u.gfx9.color.dcc.pipe_aligned = G_00A018_META_PIPE_ALIGNED(desc[6]);
3701          break;
3702 
3703       default:
3704          assert(0);
3705          return false;
3706       }
3707    } else {
3708       /* Disable DCC. dcc_offset is always set by texture_from_handle
3709        * and must be cleared here.
3710        */
3711       ac_surface_zero_dcc_fields(surf);
3712    }
3713 
3714    return true;
3715 }
3716 
ac_surface_compute_umd_metadata(const struct radeon_info * info,struct radeon_surf * surf,unsigned num_mipmap_levels,uint32_t desc[8],unsigned * size_metadata,uint32_t metadata[64],bool include_tool_md)3717 void ac_surface_compute_umd_metadata(const struct radeon_info *info, struct radeon_surf *surf,
3718                                      unsigned num_mipmap_levels, uint32_t desc[8],
3719                                      unsigned *size_metadata, uint32_t metadata[64],
3720                                      bool include_tool_md)
3721 {
3722    /* Clear the base address and set the relative DCC offset. */
3723    desc[0] = 0;
3724    desc[1] &= C_008F14_BASE_ADDRESS_HI;
3725 
3726    switch (info->gfx_level) {
3727    case GFX6:
3728    case GFX7:
3729       break;
3730    case GFX8:
3731       desc[7] = surf->meta_offset >> 8;
3732       break;
3733    case GFX9:
3734       desc[7] = surf->meta_offset >> 8;
3735       desc[5] &= C_008F24_META_DATA_ADDRESS;
3736       desc[5] |= S_008F24_META_DATA_ADDRESS(surf->meta_offset >> 40);
3737       break;
3738    case GFX10:
3739    case GFX10_3:
3740    case GFX11:
3741    case GFX11_5:
3742       desc[6] &= C_00A018_META_DATA_ADDRESS_LO;
3743       desc[6] |= S_00A018_META_DATA_ADDRESS_LO(surf->meta_offset >> 8);
3744       desc[7] = surf->meta_offset >> 16;
3745       break;
3746    default: /* Gfx12 doesn't have any metadata address */
3747       break;
3748    }
3749 
3750    /* Metadata image format format version 1 and 2. Version 2 uses the same layout as
3751     * version 1 with some additional fields (used if include_tool_md=true).
3752     * [0] = optional flags | metadata_format_identifier
3753     * [1] = (VENDOR_ID << 16) | PCI_ID
3754     * [2:9] = image descriptor for the whole resource
3755     *         [2] is always 0, because the base address is cleared
3756     *         [9] is the DCC offset bits [39:8] from the beginning of
3757     *             the buffer
3758     * gfx8-: [10:10+LAST_LEVEL] = mipmap level offset bits [39:8] for each level (gfx8-)
3759     * ---- Optional data (if version == 2 or version > 2 + AC_SURF_METADATA_FLAG_EXTRA_MD_BIT)
3760     *      AC_SURF_METADATA_FLAG_EXTRA_MD_BIT is set.
3761     *      It shouldn't be used by the driver as it's only present to help
3762     *      tools (eg: umr) that would want to access this buffer.
3763     * gfx9+ if valid modifier: [10:11] = modifier
3764     *                          [12:12+3*nplane] = [offset, stride]
3765     *       else: [10]: stride
3766     * ---- Optional data (if version >= 3 + AC_SURF_METADATA_FLAG_FAMILY_OVERRIDEN_BIT)
3767     *  [last] = fake family id
3768     */
3769 
3770    /* metadata image format version */
3771    metadata[0] = (include_tool_md || info->family_overridden) ? 3 : 1;
3772 
3773    if (include_tool_md)
3774       metadata[0] |= 1u << (16 + AC_SURF_METADATA_FLAG_EXTRA_MD_BIT);
3775    if (info->family_overridden)
3776       metadata[0] |= 1u << (16 + AC_SURF_METADATA_FLAG_FAMILY_OVERRIDEN_BIT);
3777 
3778    /* Tiling modes are ambiguous without a PCI ID. */
3779    metadata[1] = ac_get_umd_metadata_word1(info);
3780 
3781    /* Dwords [2:9] contain the image descriptor. */
3782    memcpy(&metadata[2], desc, 8 * 4);
3783    *size_metadata = 10 * 4;
3784 
3785    /* Dwords [10:..] contain the mipmap level offsets. */
3786    if (info->gfx_level <= GFX8) {
3787       for (unsigned i = 0; i < num_mipmap_levels; i++)
3788          metadata[10 + i] = surf->u.legacy.level[i].offset_256B;
3789 
3790       *size_metadata += num_mipmap_levels * 4;
3791    } else if (include_tool_md) {
3792       if (surf->modifier != DRM_FORMAT_MOD_INVALID) {
3793          /* Modifier */
3794          metadata[10] = surf->modifier;
3795          metadata[11] = surf->modifier >> 32;
3796          /* Num planes */
3797          int nplanes = ac_surface_get_nplanes(surf);
3798          metadata[12] = nplanes;
3799          int ndw = 13;
3800          for (int i = 0; i < nplanes; i++) {
3801             metadata[ndw++] = ac_surface_get_plane_offset(info->gfx_level,
3802                                                           surf, i, 0);
3803             metadata[ndw++] = ac_surface_get_plane_stride(info->gfx_level,
3804                                                           surf, i, 0);
3805          }
3806          *size_metadata = ndw * 4;
3807       } else {
3808          metadata[10] = ac_surface_get_plane_stride(info->gfx_level,
3809                                                     surf, 0, 0);
3810          *size_metadata = 11 * 4;
3811       }
3812    }
3813 
3814    if (info->family_overridden) {
3815       int n_dw = *size_metadata / 4;
3816       assert(n_dw < 64 - 1);
3817       metadata[n_dw] = info->gfx_level;
3818       *size_metadata += 4;
3819    }
3820 }
3821 
ac_surface_get_pitch_align(const struct radeon_info * info,const struct radeon_surf * surf)3822 static uint32_t ac_surface_get_pitch_align(const struct radeon_info *info,
3823                                            const struct radeon_surf *surf)
3824 {
3825    if (surf->is_linear) {
3826       if (info->gfx_level >= GFX12)
3827          return 128 / surf->bpe;
3828       else if (info->gfx_level >= GFX9)
3829          return 256 / surf->bpe;
3830       else
3831          return MAX2(8, 64 / surf->bpe);
3832    }
3833 
3834    if (info->gfx_level >= GFX12) {
3835       if (surf->u.gfx9.resource_type == RADEON_RESOURCE_3D)
3836          return 1u << 31; /* reject 3D textures by returning an impossible alignment */
3837 
3838       unsigned bpe_log2 = util_logbase2(surf->bpe);
3839       unsigned block_size_log2;
3840 
3841       switch (surf->u.gfx9.swizzle_mode) {
3842       case ADDR3_256B_2D:
3843          block_size_log2 = 8;
3844          break;
3845       case ADDR3_4KB_2D:
3846          block_size_log2 = 12;
3847          break;
3848       case ADDR3_64KB_2D:
3849          block_size_log2 = 16;
3850          break;
3851       case ADDR3_256KB_2D:
3852          block_size_log2 = 18;
3853          break;
3854       default:
3855          unreachable("unhandled swizzle mode");
3856       }
3857 
3858       return 1 << ((block_size_log2 >> 1) - (bpe_log2 >> 1));
3859    } else if (info->gfx_level >= GFX9) {
3860       if (surf->u.gfx9.resource_type == RADEON_RESOURCE_3D)
3861          return 1u << 31; /* reject 3D textures by returning an impossible alignment */
3862 
3863       unsigned bpe_log2 = util_logbase2(surf->bpe);
3864       unsigned block_size_log2;
3865 
3866       switch((surf->u.gfx9.swizzle_mode & ~3) + 3) {
3867       case ADDR_SW_256B_R:
3868          block_size_log2 = 8;
3869          break;
3870       case ADDR_SW_4KB_R:
3871       case ADDR_SW_4KB_R_X:
3872          block_size_log2 = 12;
3873          break;
3874       case ADDR_SW_64KB_R:
3875       case ADDR_SW_64KB_R_T:
3876       case ADDR_SW_64KB_R_X:
3877          block_size_log2 = 16;
3878          break;
3879       case ADDR_SW_256KB_R_X:
3880          block_size_log2 = 18;
3881          break;
3882       default:
3883          unreachable("unhandled swizzle mode");
3884       }
3885 
3886       if (info->gfx_level >= GFX10) {
3887          return 1 << (((block_size_log2 - bpe_log2) + 1) / 2);
3888       } else {
3889          static unsigned block_256B_width[] = {16, 16, 8, 8, 4};
3890          return block_256B_width[bpe_log2] << ((block_size_log2 - 8) / 2);
3891       }
3892    } else {
3893       unsigned mode;
3894 
3895       if ((surf->flags & RADEON_SURF_Z_OR_SBUFFER) == RADEON_SURF_SBUFFER)
3896          mode = surf->u.legacy.zs.stencil_level[0].mode;
3897       else
3898          mode = surf->u.legacy.level[0].mode;
3899 
3900       /* Note that display usage requires an alignment of 32 pixels (see AdjustPitchAlignment),
3901        * which is not checked here.
3902        */
3903       switch (mode) {
3904       case RADEON_SURF_MODE_1D:
3905          return 8;
3906       case RADEON_SURF_MODE_2D:
3907          return 8 * surf->u.legacy.bankw * surf->u.legacy.mtilea *
3908                 ac_pipe_config_to_num_pipes(surf->u.legacy.pipe_config);
3909       default:
3910          unreachable("unhandled surf mode");
3911       }
3912    }
3913 }
3914 
ac_surface_override_offset_stride(const struct radeon_info * info,struct radeon_surf * surf,unsigned num_layers,unsigned num_mipmap_levels,uint64_t offset,unsigned pitch)3915 bool ac_surface_override_offset_stride(const struct radeon_info *info, struct radeon_surf *surf,
3916                                        unsigned num_layers, unsigned num_mipmap_levels,
3917                                        uint64_t offset, unsigned pitch)
3918 {
3919    if ((ac_surface_get_pitch_align(info, surf) - 1) & pitch)
3920       return false;
3921 
3922    /* Require an equal pitch with metadata (DCC), mipmapping, non-linear layout (that could be
3923     * relaxed), or when the chip is GFX10, which is the only generation that can't override
3924     * the pitch.
3925     */
3926    bool require_equal_pitch = surf->surf_size != surf->total_size ||
3927                               num_layers != 1 ||
3928                               num_mipmap_levels != 1 ||
3929                               (info->gfx_level >= GFX9 && !surf->is_linear) ||
3930                               info->gfx_level == GFX10;
3931 
3932    if (info->gfx_level >= GFX9) {
3933       if (pitch) {
3934          if (surf->u.gfx9.surf_pitch != pitch && require_equal_pitch)
3935             return false;
3936 
3937          if (pitch != surf->u.gfx9.surf_pitch) {
3938             unsigned slices = surf->surf_size / surf->u.gfx9.surf_slice_size;
3939 
3940             surf->u.gfx9.uses_custom_pitch = true;
3941             surf->u.gfx9.surf_pitch = pitch;
3942             surf->u.gfx9.epitch = pitch - 1;
3943             surf->u.gfx9.pitch[0] = pitch;
3944             surf->u.gfx9.surf_slice_size = (uint64_t)pitch * surf->u.gfx9.surf_height * surf->bpe;
3945             surf->total_size = surf->surf_size = surf->u.gfx9.surf_slice_size * slices;
3946          }
3947       }
3948 
3949       surf->u.gfx9.surf_offset = offset;
3950       if (surf->has_stencil)
3951          surf->u.gfx9.zs.stencil_offset += offset;
3952    } else {
3953       if (pitch) {
3954          if (surf->u.legacy.level[0].nblk_x != pitch && require_equal_pitch)
3955             return false;
3956 
3957          surf->u.legacy.level[0].nblk_x = pitch;
3958          surf->u.legacy.level[0].slice_size_dw =
3959             ((uint64_t)pitch * surf->u.legacy.level[0].nblk_y * surf->bpe) / 4;
3960       }
3961 
3962       if (offset) {
3963          for (unsigned i = 0; i < ARRAY_SIZE(surf->u.legacy.level); ++i)
3964             surf->u.legacy.level[i].offset_256B += offset / 256;
3965       }
3966    }
3967 
3968    if (offset & ((1 << surf->alignment_log2) - 1) ||
3969        offset >= UINT64_MAX - surf->total_size)
3970       return false;
3971 
3972    if (surf->meta_offset)
3973       surf->meta_offset += offset;
3974    if (surf->fmask_offset)
3975       surf->fmask_offset += offset;
3976    if (surf->cmask_offset)
3977       surf->cmask_offset += offset;
3978    if (surf->display_dcc_offset)
3979       surf->display_dcc_offset += offset;
3980    return true;
3981 }
3982 
ac_surface_get_nplanes(const struct radeon_surf * surf)3983 unsigned ac_surface_get_nplanes(const struct radeon_surf *surf)
3984 {
3985    if (surf->modifier == DRM_FORMAT_MOD_INVALID)
3986       return 1;
3987    else if (surf->display_dcc_offset)
3988       return 3;
3989    else if (surf->meta_offset)
3990       return 2;
3991    else
3992       return 1;
3993 }
3994 
ac_surface_get_plane_offset(enum amd_gfx_level gfx_level,const struct radeon_surf * surf,unsigned plane,unsigned layer)3995 uint64_t ac_surface_get_plane_offset(enum amd_gfx_level gfx_level,
3996                                     const struct radeon_surf *surf,
3997                                     unsigned plane, unsigned layer)
3998 {
3999    switch (plane) {
4000    case 0:
4001       if (gfx_level >= GFX9) {
4002          return surf->u.gfx9.surf_offset +
4003                 layer * surf->u.gfx9.surf_slice_size;
4004       } else {
4005          return (uint64_t)surf->u.legacy.level[0].offset_256B * 256 +
4006                 layer * (uint64_t)surf->u.legacy.level[0].slice_size_dw * 4;
4007       }
4008    case 1:
4009       assert(!layer);
4010       return surf->display_dcc_offset ?
4011              surf->display_dcc_offset : surf->meta_offset;
4012    case 2:
4013       assert(!layer);
4014       return surf->meta_offset;
4015    default:
4016       unreachable("Invalid plane index");
4017    }
4018 }
4019 
ac_surface_get_plane_stride(enum amd_gfx_level gfx_level,const struct radeon_surf * surf,unsigned plane,unsigned level)4020 uint64_t ac_surface_get_plane_stride(enum amd_gfx_level gfx_level,
4021                                     const struct radeon_surf *surf,
4022                                     unsigned plane, unsigned level)
4023 {
4024    switch (plane) {
4025    case 0:
4026       if (gfx_level >= GFX9) {
4027          return (surf->is_linear ? surf->u.gfx9.pitch[level] : surf->u.gfx9.surf_pitch) * surf->bpe;
4028       } else {
4029          return surf->u.legacy.level[level].nblk_x * surf->bpe;
4030       }
4031    case 1:
4032       return 1 + (surf->display_dcc_offset ?
4033              surf->u.gfx9.color.display_dcc_pitch_max : surf->u.gfx9.color.dcc_pitch_max);
4034    case 2:
4035       return surf->u.gfx9.color.dcc_pitch_max + 1;
4036    default:
4037       unreachable("Invalid plane index");
4038    }
4039 }
4040 
ac_surface_get_plane_size(const struct radeon_surf * surf,unsigned plane)4041 uint64_t ac_surface_get_plane_size(const struct radeon_surf *surf,
4042                                    unsigned plane)
4043 {
4044    switch (plane) {
4045    case 0:
4046       return surf->surf_size;
4047    case 1:
4048       return surf->display_dcc_offset ?
4049              surf->u.gfx9.color.display_dcc_size : surf->meta_size;
4050    case 2:
4051       return surf->meta_size;
4052    default:
4053       unreachable("Invalid plane index");
4054    }
4055 }
4056 
4057 uint64_t
ac_surface_addr_from_coord(struct ac_addrlib * addrlib,const struct radeon_info * info,const struct radeon_surf * surf,const struct ac_surf_info * surf_info,unsigned level,unsigned x,unsigned y,unsigned layer,bool is_3d)4058 ac_surface_addr_from_coord(struct ac_addrlib *addrlib, const struct radeon_info *info,
4059                            const struct radeon_surf *surf, const struct ac_surf_info *surf_info,
4060                            unsigned level, unsigned x, unsigned y, unsigned layer, bool is_3d)
4061 {
4062    /* Only implemented for GFX9+ */
4063    assert(info->gfx_level >= GFX9);
4064 
4065    ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT input = {0};
4066    input.size = sizeof(ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT);
4067    input.slice = layer;
4068    input.mipId = level;
4069    input.unalignedWidth = DIV_ROUND_UP(surf_info->width, surf->blk_w);
4070    input.unalignedHeight = DIV_ROUND_UP(surf_info->height, surf->blk_h);
4071    input.numSlices = is_3d ? surf_info->depth : surf_info->array_size;
4072    input.numMipLevels = surf_info->levels;
4073    input.numSamples = surf_info->samples;
4074    input.numFrags = surf_info->samples;
4075    input.swizzleMode = surf->u.gfx9.swizzle_mode;
4076    input.resourceType = (AddrResourceType)surf->u.gfx9.resource_type;
4077    input.pipeBankXor = surf->tile_swizzle;
4078    input.bpp = surf->bpe * 8;
4079    input.x = x;
4080    input.y = y;
4081 
4082    ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT output = {0};
4083    output.size = sizeof(ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT);
4084    Addr2ComputeSurfaceAddrFromCoord(addrlib->handle, &input, &output);
4085    return output.addr;
4086 }
4087 
4088 static void
gfx12_surface_compute_nbc_view(struct ac_addrlib * addrlib,const struct radeon_info * info,const struct radeon_surf * surf,const struct ac_surf_info * surf_info,unsigned level,unsigned layer,struct ac_surf_nbc_view * out)4089 gfx12_surface_compute_nbc_view(struct ac_addrlib *addrlib, const struct radeon_info *info,
4090                             const struct radeon_surf *surf, const struct ac_surf_info *surf_info,
4091                             unsigned level, unsigned layer, struct ac_surf_nbc_view *out)
4092 {
4093    ADDR3_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT input = {0};
4094    input.size = sizeof(ADDR3_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT);
4095    input.swizzleMode = surf->u.gfx9.swizzle_mode;
4096    input.resourceType = (AddrResourceType)surf->u.gfx9.resource_type;
4097    switch (surf->bpe) {
4098    case 8:
4099       input.format = ADDR_FMT_BC1;
4100       break;
4101    case 16:
4102       input.format = ADDR_FMT_BC3;
4103       break;
4104    default:
4105       assert(0);
4106    }
4107    input.unAlignedDims.width = surf_info->width;
4108    input.unAlignedDims.height = surf_info->height;
4109    input.numMipLevels = surf_info->levels;
4110    input.pipeBankXor = surf->tile_swizzle;
4111    input.slice = layer;
4112    input.mipId = level;
4113 
4114    ADDR_E_RETURNCODE res;
4115    ADDR3_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT output = {0};
4116    output.size = sizeof(ADDR3_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT);
4117    res = Addr3ComputeNonBlockCompressedView(addrlib->handle, &input, &output);
4118    if (res == ADDR_OK) {
4119       out->base_address_offset = output.offset;
4120       out->tile_swizzle = output.pipeBankXor;
4121       out->width = output.unAlignedDims.width;
4122       out->height = output.unAlignedDims.height;
4123       out->num_levels = output.numMipLevels;
4124       out->level = output.mipId;
4125       out->valid = true;
4126    } else {
4127       out->valid = false;
4128    }
4129 }
4130 
4131 static void
gfx10_surface_compute_nbc_view(struct ac_addrlib * addrlib,const struct radeon_info * info,const struct radeon_surf * surf,const struct ac_surf_info * surf_info,unsigned level,unsigned layer,struct ac_surf_nbc_view * out)4132 gfx10_surface_compute_nbc_view(struct ac_addrlib *addrlib, const struct radeon_info *info,
4133                             const struct radeon_surf *surf, const struct ac_surf_info *surf_info,
4134                             unsigned level, unsigned layer, struct ac_surf_nbc_view *out)
4135 {
4136    ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT input = {0};
4137    input.size = sizeof(ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT);
4138    input.swizzleMode = surf->u.gfx9.swizzle_mode;
4139    input.resourceType = (AddrResourceType)surf->u.gfx9.resource_type;
4140    switch (surf->bpe) {
4141    case 8:
4142       input.format = ADDR_FMT_BC1;
4143       break;
4144    case 16:
4145       input.format = ADDR_FMT_BC3;
4146       break;
4147    default:
4148       assert(0);
4149    }
4150    input.width = surf_info->width;
4151    input.height = surf_info->height;
4152    input.numSlices = surf_info->array_size;
4153    input.numMipLevels = surf_info->levels;
4154    input.pipeBankXor = surf->tile_swizzle;
4155    input.slice = layer;
4156    input.mipId = level;
4157 
4158    ADDR_E_RETURNCODE res;
4159    ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT output = {0};
4160    output.size = sizeof(ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT);
4161    res = Addr2ComputeNonBlockCompressedView(addrlib->handle, &input, &output);
4162    if (res == ADDR_OK) {
4163       out->base_address_offset = output.offset;
4164       out->tile_swizzle = output.pipeBankXor;
4165       out->width = output.unalignedWidth;
4166       out->height = output.unalignedHeight;
4167       out->num_levels = output.numMipLevels;
4168       out->level = output.mipId;
4169       out->valid = true;
4170    } else {
4171       out->valid = false;
4172    }
4173 }
4174 
4175 void
ac_surface_compute_nbc_view(struct ac_addrlib * addrlib,const struct radeon_info * info,const struct radeon_surf * surf,const struct ac_surf_info * surf_info,unsigned level,unsigned layer,struct ac_surf_nbc_view * out)4176 ac_surface_compute_nbc_view(struct ac_addrlib *addrlib, const struct radeon_info *info,
4177                             const struct radeon_surf *surf, const struct ac_surf_info *surf_info,
4178                             unsigned level, unsigned layer, struct ac_surf_nbc_view *out)
4179 {
4180    /* Only implemented for GFX10+ */
4181    assert(info->gfx_level >= GFX10);
4182 
4183    if (info->gfx_level >= GFX12) {
4184       gfx12_surface_compute_nbc_view(addrlib, info, surf, surf_info, level, layer, out);
4185    } else {
4186       gfx10_surface_compute_nbc_view(addrlib, info, surf, surf_info, level, layer, out);
4187    }
4188 }
4189 
ac_surface_print_info(FILE * out,const struct radeon_info * info,const struct radeon_surf * surf)4190 void ac_surface_print_info(FILE *out, const struct radeon_info *info,
4191                            const struct radeon_surf *surf)
4192 {
4193    if (info->gfx_level >= GFX9) {
4194       fprintf(out,
4195               "    Surf: size=%" PRIu64 ", slice_size=%" PRIu64 ", "
4196               "alignment=%u, swmode=%u, tile_swizzle=%u, epitch=%u, pitch=%u, blk_w=%u, "
4197               "blk_h=%u, bpe=%u, flags=0x%"PRIx64"\n",
4198               surf->surf_size, surf->u.gfx9.surf_slice_size,
4199               1 << surf->surf_alignment_log2, surf->u.gfx9.swizzle_mode, surf->tile_swizzle,
4200               surf->u.gfx9.epitch, surf->u.gfx9.surf_pitch,
4201               surf->blk_w, surf->blk_h, surf->bpe, surf->flags);
4202 
4203       if (surf->fmask_offset)
4204          fprintf(out,
4205                  "    FMask: offset=%" PRIu64 ", size=%" PRIu64 ", "
4206                  "alignment=%u, swmode=%u, epitch=%u\n",
4207                  surf->fmask_offset, surf->fmask_size,
4208                  1 << surf->fmask_alignment_log2, surf->u.gfx9.color.fmask_swizzle_mode,
4209                  surf->u.gfx9.color.fmask_epitch);
4210 
4211       if (surf->cmask_offset)
4212          fprintf(out,
4213                  "    CMask: offset=%" PRIu64 ", size=%u, "
4214                  "alignment=%u\n",
4215                  surf->cmask_offset, surf->cmask_size,
4216                  1 << surf->cmask_alignment_log2);
4217 
4218       if (surf->flags & RADEON_SURF_Z_OR_SBUFFER && surf->meta_offset)
4219          fprintf(out,
4220                  "    HTile: offset=%" PRIu64 ", size=%u, alignment=%u\n",
4221                  surf->meta_offset, surf->meta_size,
4222                  1 << surf->meta_alignment_log2);
4223 
4224       if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && surf->meta_offset)
4225          fprintf(out,
4226                  "    DCC: offset=%" PRIu64 ", size=%u, "
4227                  "alignment=%u, pitch_max=%u, num_dcc_levels=%u\n",
4228                  surf->meta_offset, surf->meta_size, 1 << surf->meta_alignment_log2,
4229                  surf->u.gfx9.color.display_dcc_pitch_max, surf->num_meta_levels);
4230 
4231       if (surf->has_stencil)
4232          fprintf(out,
4233                  "    Stencil: offset=%" PRIu64 ", swmode=%u, epitch=%u\n",
4234                  surf->u.gfx9.zs.stencil_offset,
4235                  surf->u.gfx9.zs.stencil_swizzle_mode,
4236                  surf->u.gfx9.zs.stencil_epitch);
4237 
4238       if (info->gfx_level == GFX12) {
4239          if (surf->u.gfx9.zs.hiz.size) {
4240             fprintf(out,
4241                     "    HiZ: offset=%" PRIu64 ", size=%u, swmode=%u, width_in_tiles=%u, height_in_tiles=%u\n",
4242                     surf->u.gfx9.zs.hiz.offset, surf->u.gfx9.zs.hiz.size, surf->u.gfx9.zs.hiz.swizzle_mode,
4243                     surf->u.gfx9.zs.hiz.width_in_tiles, surf->u.gfx9.zs.hiz.height_in_tiles);
4244          }
4245 
4246          if (surf->u.gfx9.zs.his.size) {
4247             fprintf(out,
4248                     "    HiS: offset=%" PRIu64 ", size=%u, swmode=%u, width_in_tiles=%u, height_in_tiles=%u\n",
4249                     surf->u.gfx9.zs.his.offset, surf->u.gfx9.zs.his.size, surf->u.gfx9.zs.his.swizzle_mode,
4250                     surf->u.gfx9.zs.his.width_in_tiles, surf->u.gfx9.zs.his.height_in_tiles);
4251          }
4252       }
4253    } else {
4254       fprintf(out,
4255               "    Surf: size=%" PRIu64 ", alignment=%u, blk_w=%u, blk_h=%u, "
4256               "bpe=%u, flags=0x%"PRIx64"\n",
4257               surf->surf_size, 1 << surf->surf_alignment_log2, surf->blk_w,
4258               surf->blk_h, surf->bpe, surf->flags);
4259 
4260       fprintf(out,
4261               "    Layout: size=%" PRIu64 ", alignment=%u, bankw=%u, bankh=%u, "
4262               "nbanks=%u, mtilea=%u, tilesplit=%u, pipeconfig=%u, scanout=%u\n",
4263               surf->surf_size, 1 << surf->surf_alignment_log2,
4264               surf->u.legacy.bankw, surf->u.legacy.bankh,
4265               surf->u.legacy.num_banks, surf->u.legacy.mtilea,
4266               surf->u.legacy.tile_split, surf->u.legacy.pipe_config,
4267               (surf->flags & RADEON_SURF_SCANOUT) != 0);
4268 
4269       if (surf->fmask_offset)
4270          fprintf(out,
4271                  "    FMask: offset=%" PRIu64 ", size=%" PRIu64 ", "
4272                  "alignment=%u, pitch_in_pixels=%u, bankh=%u, "
4273                  "slice_tile_max=%u, tile_mode_index=%u\n",
4274                  surf->fmask_offset, surf->fmask_size,
4275                  1 << surf->fmask_alignment_log2, surf->u.legacy.color.fmask.pitch_in_pixels,
4276                  surf->u.legacy.color.fmask.bankh,
4277                  surf->u.legacy.color.fmask.slice_tile_max,
4278                  surf->u.legacy.color.fmask.tiling_index);
4279 
4280       if (surf->cmask_offset)
4281          fprintf(out,
4282                  "    CMask: offset=%" PRIu64 ", size=%u, alignment=%u, "
4283                  "slice_tile_max=%u\n",
4284                  surf->cmask_offset, surf->cmask_size,
4285                  1 << surf->cmask_alignment_log2, surf->u.legacy.color.cmask_slice_tile_max);
4286 
4287       if (surf->flags & RADEON_SURF_Z_OR_SBUFFER && surf->meta_offset)
4288          fprintf(out, "    HTile: offset=%" PRIu64 ", size=%u, alignment=%u\n",
4289                  surf->meta_offset, surf->meta_size,
4290                  1 << surf->meta_alignment_log2);
4291 
4292       if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && surf->meta_offset)
4293          fprintf(out, "    DCC: offset=%" PRIu64 ", size=%u, alignment=%u\n",
4294                  surf->meta_offset, surf->meta_size, 1 << surf->meta_alignment_log2);
4295 
4296       if (surf->has_stencil)
4297          fprintf(out, "    StencilLayout: tilesplit=%u\n",
4298                  surf->u.legacy.stencil_tile_split);
4299    }
4300 }
4301 
gfx10_nir_meta_addr_from_coord(nir_builder * b,const struct radeon_info * info,const struct gfx9_meta_equation * equation,int blkSizeBias,unsigned blkStart,nir_def * meta_pitch,nir_def * meta_slice_size,nir_def * x,nir_def * y,nir_def * z,nir_def * pipe_xor,nir_def ** bit_position)4302 static nir_def *gfx10_nir_meta_addr_from_coord(nir_builder *b, const struct radeon_info *info,
4303                                                const struct gfx9_meta_equation *equation,
4304                                                int blkSizeBias, unsigned blkStart,
4305                                                nir_def *meta_pitch, nir_def *meta_slice_size,
4306                                                nir_def *x, nir_def *y, nir_def *z,
4307                                                nir_def *pipe_xor,
4308                                                nir_def **bit_position)
4309 {
4310    nir_def *zero = nir_imm_int(b, 0);
4311    nir_def *one = nir_imm_int(b, 1);
4312 
4313    assert(info->gfx_level >= GFX10);
4314 
4315    unsigned meta_block_width_log2 = util_logbase2(equation->meta_block_width);
4316    unsigned meta_block_height_log2 = util_logbase2(equation->meta_block_height);
4317    unsigned blkSizeLog2 = meta_block_width_log2 + meta_block_height_log2 + blkSizeBias;
4318 
4319    nir_def *coord[] = {x, y, z, 0};
4320    nir_def *address = zero;
4321 
4322    for (unsigned i = blkStart; i < blkSizeLog2 + 1; i++) {
4323       nir_def *v = zero;
4324 
4325       for (unsigned c = 0; c < 4; c++) {
4326          unsigned index = i * 4 + c - (blkStart * 4);
4327          if (equation->u.gfx10_bits[index]) {
4328             unsigned mask = equation->u.gfx10_bits[index];
4329             nir_def *bits = coord[c];
4330 
4331             while (mask)
4332                v = nir_ixor(b, v, nir_iand(b, nir_ushr_imm(b, bits, u_bit_scan(&mask)), one));
4333          }
4334       }
4335 
4336       address = nir_ior(b, address, nir_ishl_imm(b, v, i));
4337    }
4338 
4339    unsigned blkMask = (1 << blkSizeLog2) - 1;
4340    unsigned pipeMask = (1 << G_0098F8_NUM_PIPES(info->gb_addr_config)) - 1;
4341    unsigned m_pipeInterleaveLog2 = 8 + G_0098F8_PIPE_INTERLEAVE_SIZE_GFX9(info->gb_addr_config);
4342    nir_def *xb = nir_ushr_imm(b, x, meta_block_width_log2);
4343    nir_def *yb = nir_ushr_imm(b, y, meta_block_height_log2);
4344    nir_def *pb = nir_ushr_imm(b, meta_pitch, meta_block_width_log2);
4345    nir_def *blkIndex = nir_iadd(b, nir_imul(b, yb, pb), xb);
4346    nir_def *pipeXor = nir_iand_imm(b, nir_ishl_imm(b, nir_iand_imm(b, pipe_xor, pipeMask),
4347                                                        m_pipeInterleaveLog2), blkMask);
4348 
4349    if (bit_position)
4350       *bit_position = nir_ishl_imm(b, nir_iand_imm(b, address, 1), 2);
4351 
4352    return nir_iadd(b, nir_iadd(b, nir_imul(b, meta_slice_size, z),
4353                                nir_imul(b, blkIndex, nir_ishl_imm(b, one, blkSizeLog2))),
4354                    nir_ixor(b, nir_ushr(b, address, one), pipeXor));
4355 }
4356 
gfx9_nir_meta_addr_from_coord(nir_builder * b,const struct radeon_info * info,const struct gfx9_meta_equation * equation,nir_def * meta_pitch,nir_def * meta_height,nir_def * x,nir_def * y,nir_def * z,nir_def * sample,nir_def * pipe_xor,nir_def ** bit_position)4357 static nir_def *gfx9_nir_meta_addr_from_coord(nir_builder *b, const struct radeon_info *info,
4358                                               const struct gfx9_meta_equation *equation,
4359                                               nir_def *meta_pitch, nir_def *meta_height,
4360                                               nir_def *x, nir_def *y, nir_def *z,
4361                                               nir_def *sample, nir_def *pipe_xor,
4362                                               nir_def **bit_position)
4363 {
4364    nir_def *zero = nir_imm_int(b, 0);
4365    nir_def *one = nir_imm_int(b, 1);
4366 
4367    assert(info->gfx_level >= GFX9);
4368 
4369    unsigned meta_block_width_log2 = util_logbase2(equation->meta_block_width);
4370    unsigned meta_block_height_log2 = util_logbase2(equation->meta_block_height);
4371    unsigned meta_block_depth_log2 = util_logbase2(equation->meta_block_depth);
4372 
4373    unsigned m_pipeInterleaveLog2 = 8 + G_0098F8_PIPE_INTERLEAVE_SIZE_GFX9(info->gb_addr_config);
4374    unsigned numPipeBits = equation->u.gfx9.num_pipe_bits;
4375    nir_def *pitchInBlock = nir_ushr_imm(b, meta_pitch, meta_block_width_log2);
4376    nir_def *sliceSizeInBlock = nir_imul(b, nir_ushr_imm(b, meta_height, meta_block_height_log2),
4377                                             pitchInBlock);
4378 
4379    nir_def *xb = nir_ushr_imm(b, x, meta_block_width_log2);
4380    nir_def *yb = nir_ushr_imm(b, y, meta_block_height_log2);
4381    nir_def *zb = nir_ushr_imm(b, z, meta_block_depth_log2);
4382 
4383    nir_def *blockIndex = nir_iadd(b, nir_iadd(b, nir_imul(b, zb, sliceSizeInBlock),
4384                                                   nir_imul(b, yb, pitchInBlock)), xb);
4385    nir_def *coords[] = {x, y, z, sample, blockIndex};
4386 
4387    nir_def *address = zero;
4388    unsigned num_bits = equation->u.gfx9.num_bits;
4389    assert(num_bits <= 32);
4390 
4391    /* Compute the address up until the last bit that doesn't use the block index. */
4392    for (unsigned i = 0; i < num_bits - 1; i++) {
4393       nir_def *xor = zero;
4394 
4395       for (unsigned c = 0; c < 5; c++) {
4396          if (equation->u.gfx9.bit[i].coord[c].dim >= 5)
4397             continue;
4398 
4399          assert(equation->u.gfx9.bit[i].coord[c].ord < 32);
4400          nir_def *ison =
4401             nir_iand(b, nir_ushr_imm(b, coords[equation->u.gfx9.bit[i].coord[c].dim],
4402                                      equation->u.gfx9.bit[i].coord[c].ord), one);
4403 
4404          xor = nir_ixor(b, xor, ison);
4405       }
4406       address = nir_ior(b, address, nir_ishl_imm(b, xor, i));
4407    }
4408 
4409    /* Fill the remaining bits with the block index. */
4410    unsigned last = num_bits - 1;
4411    address = nir_ior(b, address,
4412                      nir_ishl_imm(b, nir_ushr_imm(b, blockIndex,
4413                                               equation->u.gfx9.bit[last].coord[0].ord),
4414                                   last));
4415 
4416    if (bit_position)
4417       *bit_position = nir_ishl_imm(b, nir_iand_imm(b, address, 1), 2);
4418 
4419    nir_def *pipeXor = nir_iand_imm(b, pipe_xor, (1 << numPipeBits) - 1);
4420    return nir_ixor(b, nir_ushr(b, address, one),
4421                    nir_ishl_imm(b, pipeXor, m_pipeInterleaveLog2));
4422 }
4423 
ac_nir_dcc_addr_from_coord(nir_builder * b,const struct radeon_info * info,unsigned bpe,const struct gfx9_meta_equation * equation,nir_def * dcc_pitch,nir_def * dcc_height,nir_def * dcc_slice_size,nir_def * x,nir_def * y,nir_def * z,nir_def * sample,nir_def * pipe_xor)4424 nir_def *ac_nir_dcc_addr_from_coord(nir_builder *b, const struct radeon_info *info,
4425                                     unsigned bpe, const struct gfx9_meta_equation *equation,
4426                                     nir_def *dcc_pitch, nir_def *dcc_height,
4427                                     nir_def *dcc_slice_size,
4428                                     nir_def *x, nir_def *y, nir_def *z,
4429                                     nir_def *sample, nir_def *pipe_xor)
4430 {
4431    if (info->gfx_level >= GFX10) {
4432       unsigned bpp_log2 = util_logbase2(bpe);
4433 
4434       return gfx10_nir_meta_addr_from_coord(b, info, equation, bpp_log2 - 8, 1,
4435                                             dcc_pitch, dcc_slice_size,
4436                                             x, y, z, pipe_xor, NULL);
4437    } else {
4438       return gfx9_nir_meta_addr_from_coord(b, info, equation, dcc_pitch,
4439                                            dcc_height, x, y, z,
4440                                            sample, pipe_xor, NULL);
4441    }
4442 }
4443 
ac_nir_cmask_addr_from_coord(nir_builder * b,const struct radeon_info * info,const struct gfx9_meta_equation * equation,nir_def * cmask_pitch,nir_def * cmask_height,nir_def * cmask_slice_size,nir_def * x,nir_def * y,nir_def * z,nir_def * pipe_xor,nir_def ** bit_position)4444 nir_def *ac_nir_cmask_addr_from_coord(nir_builder *b, const struct radeon_info *info,
4445                                       const struct gfx9_meta_equation *equation,
4446                                       nir_def *cmask_pitch, nir_def *cmask_height,
4447                                       nir_def *cmask_slice_size,
4448                                       nir_def *x, nir_def *y, nir_def *z,
4449                                       nir_def *pipe_xor,
4450                                       nir_def **bit_position)
4451 {
4452    nir_def *zero = nir_imm_int(b, 0);
4453 
4454    if (info->gfx_level >= GFX10) {
4455       return gfx10_nir_meta_addr_from_coord(b, info, equation, -7, 1,
4456                                             cmask_pitch, cmask_slice_size,
4457                                             x, y, z, pipe_xor, bit_position);
4458    } else {
4459       return gfx9_nir_meta_addr_from_coord(b, info, equation, cmask_pitch,
4460                                            cmask_height, x, y, z, zero,
4461                                            pipe_xor, bit_position);
4462    }
4463 }
4464 
ac_nir_htile_addr_from_coord(nir_builder * b,const struct radeon_info * info,const struct gfx9_meta_equation * equation,nir_def * htile_pitch,nir_def * htile_slice_size,nir_def * x,nir_def * y,nir_def * z,nir_def * pipe_xor)4465 nir_def *ac_nir_htile_addr_from_coord(nir_builder *b, const struct radeon_info *info,
4466                                       const struct gfx9_meta_equation *equation,
4467                                       nir_def *htile_pitch,
4468                                       nir_def *htile_slice_size,
4469                                       nir_def *x, nir_def *y, nir_def *z,
4470                                       nir_def *pipe_xor)
4471 {
4472    return gfx10_nir_meta_addr_from_coord(b, info, equation, -4, 2,
4473                                             htile_pitch, htile_slice_size,
4474                                             x, y, z, pipe_xor, NULL);
4475 }
4476