1 /* 2 * Copyright © 2017 Advanced Micro Devices, Inc. 3 * 4 * SPDX-License-Identifier: MIT 5 */ 6 7 #ifndef AC_SURFACE_H 8 #define AC_SURFACE_H 9 10 #include "amd_family.h" 11 #include "util/format/u_format.h" 12 13 /* NIR is optional. Some components don't want to include NIR with ac_surface.h. */ 14 #ifdef AC_SURFACE_INCLUDE_NIR 15 #include "compiler/nir/nir_builder.h" 16 #endif 17 18 #include <stdbool.h> 19 #include <stdint.h> 20 #include <stdio.h> 21 22 #ifdef __cplusplus 23 extern "C" { 24 #endif 25 26 /* Forward declarations. */ 27 struct ac_addrlib; 28 29 struct amdgpu_gpu_info; 30 struct radeon_info; 31 32 #define RADEON_SURF_MAX_LEVELS 17 33 34 enum radeon_surf_mode 35 { 36 RADEON_SURF_MODE_LINEAR_ALIGNED = 1, 37 RADEON_SURF_MODE_1D = 2, 38 RADEON_SURF_MODE_2D = 3, 39 }; 40 41 /* This describes D/S/Z/R swizzle modes. 42 * Defined in the GB_TILE_MODEn.MICRO_TILE_MODE_NEW order. 43 */ 44 enum radeon_micro_mode 45 { 46 RADEON_MICRO_MODE_DISPLAY = 0, 47 RADEON_MICRO_MODE_STANDARD = 1, 48 RADEON_MICRO_MODE_DEPTH = 2, 49 RADEON_MICRO_MODE_RENDER = 3, /* gfx9 and older: rotated */ 50 }; 51 52 /* the first 16 bits are reserved for libdrm_radeon, don't use them */ 53 #define RADEON_SURF_SCANOUT (1 << 16) 54 #define RADEON_SURF_ZBUFFER (1 << 17) 55 #define RADEON_SURF_SBUFFER (1 << 18) 56 #define RADEON_SURF_Z_OR_SBUFFER (RADEON_SURF_ZBUFFER | RADEON_SURF_SBUFFER) 57 /* bits 19 and 20 are reserved for libdrm_radeon, don't use them */ 58 #define RADEON_SURF_FMASK (1 << 21) 59 #define RADEON_SURF_DISABLE_DCC (1ull << 22) 60 #define RADEON_SURF_TC_COMPATIBLE_HTILE (1ull << 23) 61 #define RADEON_SURF_IMPORTED (1ull << 24) 62 #define RADEON_SURF_CONTIGUOUS_DCC_LAYERS (1ull << 25) 63 #define RADEON_SURF_SHAREABLE (1ull << 26) 64 #define RADEON_SURF_NO_RENDER_TARGET (1ull << 27) 65 /* Force a swizzle mode (gfx9+) or tile mode (gfx6-8). 66 * If this is not set, optimize for space. */ 67 #define RADEON_SURF_FORCE_SWIZZLE_MODE (1ull << 28) 68 #define RADEON_SURF_NO_FMASK (1ull << 29) 69 /* This disables HTILE on gfx6-11, and HiZ/HiS on gfx12, */ 70 #define RADEON_SURF_NO_HTILE (1ull << 30) 71 #define RADEON_SURF_FORCE_MICRO_TILE_MODE (1ull << 31) 72 #define RADEON_SURF_PRT (1ull << 32) 73 #define RADEON_SURF_VRS_RATE (1ull << 33) 74 /* Block compressed + linear format is not supported in addrlib. These surface can be 75 * used as transfer resource. This flag indicates not to set flags.texture flag for 76 * color surface in gfx9_compute_surface(). */ 77 #define RADEON_SURF_NO_TEXTURE (1ull << 34) 78 #define RADEON_SURF_NO_STENCIL_ADJUST (1ull << 35) 79 #define RADEON_SURF_PREFER_4K_ALIGNMENT (1ull << 36) 80 #define RADEON_SURF_PREFER_64K_ALIGNMENT (1ull << 37) 81 #define RADEON_SURF_VIDEO_REFERENCE (1ull << 38) 82 83 enum radeon_enc_hevc_surface_alignment 84 { 85 RADEON_ENC_HEVC_SURFACE_LOG2_WIDTH_ALIGNMENT = 6, 86 RADEON_ENC_HEVC_SURFACE_LOG2_HEIGHT_ALIGNMENT = 4, 87 }; 88 89 struct legacy_surf_level { 90 uint32_t offset_256B; /* divided by 256, the hw can only do 40-bit addresses */ 91 uint32_t slice_size_dw; /* in dwords; max = 4GB / 4. */ 92 unsigned nblk_x : 15; 93 unsigned nblk_y : 15; 94 enum radeon_surf_mode mode : 2; 95 }; 96 97 struct legacy_surf_dcc_level { 98 uint32_t dcc_offset; /* relative offset within DCC mip tree */ 99 uint32_t dcc_fast_clear_size; 100 uint32_t dcc_slice_fast_clear_size; 101 }; 102 103 struct legacy_surf_fmask { 104 unsigned slice_tile_max; /* max 4M */ 105 uint8_t tiling_index; /* max 31 */ 106 uint8_t bankh; /* max 8 */ 107 uint16_t pitch_in_pixels; 108 }; 109 110 struct legacy_surf_layout { 111 unsigned bankw : 4; /* max 8 */ 112 unsigned bankh : 4; /* max 8 */ 113 unsigned mtilea : 4; /* max 8 */ 114 unsigned tile_split : 13; /* max 4K */ 115 unsigned stencil_tile_split : 13; /* max 4K */ 116 unsigned pipe_config : 5; /* max 17 */ 117 unsigned num_banks : 5; /* max 16 */ 118 unsigned macro_tile_index : 4; /* max 15 */ 119 120 /* Whether the depth miptree or stencil miptree as used by the DB are 121 * adjusted from their TC compatible form to ensure depth/stencil 122 * compatibility. If either is true, the corresponding plane cannot be 123 * sampled from. 124 */ 125 unsigned depth_adjusted : 1; 126 unsigned stencil_adjusted : 1; 127 128 struct legacy_surf_level level[RADEON_SURF_MAX_LEVELS]; 129 uint8_t tiling_index[RADEON_SURF_MAX_LEVELS]; 130 131 union { 132 /* Color layout */ 133 struct { 134 struct legacy_surf_dcc_level dcc_level[RADEON_SURF_MAX_LEVELS]; 135 struct legacy_surf_fmask fmask; 136 unsigned cmask_slice_tile_max; 137 } color; 138 139 /* Z/S layout */ 140 struct { 141 struct legacy_surf_level stencil_level[RADEON_SURF_MAX_LEVELS]; 142 uint8_t stencil_tiling_index[RADEON_SURF_MAX_LEVELS]; 143 } zs; 144 }; 145 }; 146 147 /* Same as addrlib - AddrResourceType. */ 148 enum gfx9_resource_type 149 { 150 RADEON_RESOURCE_1D = 0, 151 RADEON_RESOURCE_2D, 152 RADEON_RESOURCE_3D, 153 }; 154 155 struct gfx9_surf_meta_flags { 156 uint8_t rb_aligned : 1; /* optimal for RBs */ 157 uint8_t pipe_aligned : 1; /* optimal for L2 */ 158 uint8_t independent_64B_blocks : 1; 159 uint8_t independent_128B_blocks : 1; 160 uint8_t max_compressed_block_size : 2; 161 uint8_t display_equation_valid : 1; 162 }; 163 164 struct gfx9_surf_meta_level { 165 unsigned offset; 166 unsigned size; /* the size of one level in one layer (the image is an array of layers 167 * where each layer has an array of levels) */ 168 }; 169 170 /** 171 * Meta address equation. 172 * 173 * DCC/HTILE address equation for doing DCC/HTILE address computations in shaders. 174 * 175 * ac_surface_meta_address_test.c contains the reference implementation. 176 * ac_nir_{dcc,htile}_addr_from_coord is the NIR implementation. 177 * 178 * For DCC: 179 * The gfx9 equation doesn't support mipmapping. 180 * The gfx10 equation doesn't support mipmapping and MSAA. 181 * (those are also limitations of Addr2ComputeDccAddrFromCoord) 182 * 183 * For HTILE: 184 * The gfx9 equation isn't implemented. 185 * The gfx10 equation doesn't support mipmapping. 186 */ 187 struct gfx9_meta_equation { 188 uint16_t meta_block_width; 189 uint16_t meta_block_height; 190 uint16_t meta_block_depth; 191 192 union { 193 /* The gfx9 DCC equation is chip-specific, and it varies with: 194 * - resource type 195 * - swizzle_mode 196 * - bpp 197 * - number of samples 198 * - number of fragments 199 * - pipe_aligned 200 * - rb_aligned 201 */ 202 struct { 203 uint8_t num_bits; 204 uint8_t num_pipe_bits; 205 206 struct { 207 struct { 208 uint8_t dim:3; /* 0..4 */ 209 uint8_t ord:5; /* 0..31 */ 210 } coord[5]; /* 0..num_coords-1 */ 211 } bit[20]; /* 0..num_bits-1 */ 212 } gfx9; 213 214 /* The gfx10 DCC equation is chip-specific, it requires 64KB_R_X, and it varies with: 215 * - bpp 216 * - number of samples 217 * - number of fragments 218 * - pipe_aligned 219 * 220 * The gfx10 HTILE equation is chip-specific, it requires 64KB_Z_X, and it varies with: 221 * - number of samples 222 */ 223 uint16_t gfx10_bits[64]; 224 } u; 225 }; 226 227 struct gfx12_hiz_his_layout { 228 uint64_t offset; 229 uint32_t size; 230 uint16_t width_in_tiles; 231 uint16_t height_in_tiles; 232 uint8_t swizzle_mode; 233 uint8_t alignment_log2; 234 }; 235 236 struct gfx9_surf_layout { 237 uint16_t epitch; /* gfx9 only, not on gfx10 */ 238 uint8_t swizzle_mode; /* color or depth */ 239 bool uses_custom_pitch; /* only used by gfx10.3+ */ 240 bool gfx12_enable_dcc; /* set AMDGPU_GEM_CREATE_GFX12_DCC if the placement is VRAM */ 241 242 enum gfx9_resource_type resource_type:8; /* 1D, 2D or 3D */ 243 uint32_t surf_pitch; /* up to 64K (in blocks) */ 244 uint32_t surf_height; /* up to 64K */ 245 246 uint64_t surf_offset; /* 0 unless imported with an offset */ 247 /* The size of the 2D plane containing all mipmap levels. */ 248 uint64_t surf_slice_size; 249 /* Mipmap level offset within the slice in bytes. Only valid for LINEAR. */ 250 uint64_t offset[RADEON_SURF_MAX_LEVELS]; /* up to 64K * 64K * 16 * ~1.33 */ 251 /* Mipmap level pitch in elements. Only valid for LINEAR. */ 252 uint32_t pitch[RADEON_SURF_MAX_LEVELS]; /* up to 64K */ 253 254 uint32_t base_mip_width; /* up to 64K */ 255 uint32_t base_mip_height; /* up to 64K */ 256 257 /* Pitch of level in blocks, only valid for prt images. */ 258 uint32_t prt_level_pitch[RADEON_SURF_MAX_LEVELS]; /* up to 64K */ 259 /* Offset within slice in bytes, only valid for prt images. */ 260 uint64_t prt_level_offset[RADEON_SURF_MAX_LEVELS]; /* up to 64K * 64K * 16 * ~1.33 */ 261 262 /* DCC or HTILE level info */ 263 struct gfx9_surf_meta_level meta_levels[RADEON_SURF_MAX_LEVELS]; 264 265 union { 266 /* Color */ 267 struct { 268 struct gfx9_surf_meta_flags dcc; /* metadata of color */ 269 uint8_t fmask_swizzle_mode; 270 uint16_t fmask_epitch; /* gfx9 only, not on gfx10 */ 271 272 uint16_t dcc_pitch_max; 273 uint16_t dcc_height; 274 275 uint8_t dcc_block_width; 276 uint8_t dcc_block_height; 277 uint8_t dcc_block_depth; 278 279 /* Gfx12 DCC recompression settings used by kernel memory management. 280 * The driver sets these, not ac_compute_surface. 281 */ 282 uint8_t dcc_number_type; /* CB_COLOR0_INFO.NUMBER_TYPE */ 283 uint8_t dcc_data_format; /* [0:4]:CB_COLOR0_INFO.FORMAT, [5]:MM */ 284 285 /* Displayable DCC. This is always rb_aligned=0 and pipe_aligned=0. 286 * The 3D engine doesn't support that layout except for chips with 1 RB. 287 * All other chips must set rb_aligned=1. 288 * A compute shader needs to convert from aligned DCC to unaligned. 289 */ 290 uint8_t display_dcc_alignment_log2; 291 uint32_t display_dcc_size; 292 uint16_t display_dcc_pitch_max; /* (mip chain pitch - 1) */ 293 uint16_t display_dcc_height; 294 bool dcc_retile_use_uint16; /* if all values fit into uint16_t */ 295 uint32_t dcc_retile_num_elements; 296 void *dcc_retile_map; 297 298 /* CMASK level info (only level 0) */ 299 struct gfx9_surf_meta_level cmask_level0; 300 301 /* For DCC retiling. */ 302 struct gfx9_meta_equation dcc_equation; /* 2D only */ 303 struct gfx9_meta_equation display_dcc_equation; 304 305 /* For FCE compute. */ 306 struct gfx9_meta_equation cmask_equation; /* 2D only */ 307 } color; 308 309 /* Z/S */ 310 struct { 311 uint64_t stencil_offset; /* separate stencil */ 312 uint16_t stencil_epitch; /* gfx9 only, not on gfx10 */ 313 uint8_t stencil_swizzle_mode; 314 315 struct gfx12_hiz_his_layout hiz, his; 316 317 /* For HTILE VRS. (only Gfx103-Gfx11) */ 318 struct gfx9_meta_equation htile_equation; 319 } zs; 320 }; 321 }; 322 323 struct radeon_surf { 324 /* Format properties. */ 325 uint8_t blk_w : 4; 326 uint8_t blk_h : 4; 327 uint8_t bpe : 5; 328 /* Display, standard(thin), depth, render(rotated). AKA D,S,Z,R swizzle modes. */ 329 uint8_t micro_tile_mode : 3; 330 /* Number of mipmap levels where DCC or HTILE is enabled starting from level 0. 331 * Non-zero levels may be disabled due to alignment constraints, but not 332 * the first level. 333 */ 334 uint8_t num_meta_levels : 4; 335 uint8_t is_linear : 1; 336 uint8_t has_stencil : 1; 337 /* This might be true even if micro_tile_mode isn't displayable or rotated. */ 338 uint8_t is_displayable : 1; 339 /* Thick tiling means 3D tiles. Use 3D compute workgroups for blits. (4x4x4 works well) */ 340 uint8_t thick_tiling : 1; 341 uint8_t first_mip_tail_level : 4; 342 343 /* These are return values. Some of them can be set by the caller, but 344 * they will be treated as hints (e.g. bankw, bankh) and might be 345 * changed by the calculator. 346 */ 347 348 /* Not supported yet for depth + stencil. */ 349 uint16_t prt_tile_width; /* up to 256 roughly (for 64KB tiles) */ 350 uint16_t prt_tile_height; /* up to 256 roughly (for 64KB tiles) */ 351 uint16_t prt_tile_depth; /* up to 32 roughly (for 64KB thick tiles) */ 352 353 /* Tile swizzle can be OR'd with low bits of the BASE_256B address. 354 * The value is the same for all mipmap levels. Supported tile modes: 355 * - GFX6: Only macro tiling. 356 * - GFX9: Only *_X and *_T swizzle modes. Level 0 must not be in the mip 357 * tail. 358 * 359 * Only these surfaces are allowed to set it: 360 * - color (if it doesn't have to be displayable) 361 * - DCC (same tile swizzle as color) 362 * - FMASK 363 * - CMASK if it's TC-compatible or if the gen is GFX9 364 * - depth/stencil if HTILE is not TC-compatible and if the gen is not GFX9 365 */ 366 uint16_t tile_swizzle; /* it has 16 bits because gfx11 shifts it by 2 bits */ 367 uint8_t fmask_tile_swizzle; 368 369 /* Use (1 << log2) to compute the alignment. */ 370 uint8_t surf_alignment_log2; 371 uint8_t fmask_alignment_log2; 372 uint8_t meta_alignment_log2; /* DCC or HTILE */ 373 uint8_t cmask_alignment_log2; 374 uint8_t alignment_log2; 375 376 /* DRM format modifier. Set to DRM_FORMAT_MOD_INVALID to have addrlib 377 * select tiling parameters instead. 378 */ 379 uint64_t modifier; 380 uint64_t flags; 381 382 uint64_t surf_size; 383 uint64_t fmask_size; 384 uint32_t fmask_slice_size; /* max 2^31 (16K * 16K * 8) */ 385 386 /* DCC and HTILE (they are very small) */ 387 uint32_t meta_size; 388 uint32_t meta_slice_size; 389 uint32_t meta_pitch; 390 391 uint32_t cmask_size; 392 uint32_t cmask_slice_size; 393 uint16_t cmask_pitch; /* GFX9+ */ 394 uint16_t cmask_height; /* GFX9+ */ 395 396 /* All buffers combined. */ 397 uint64_t meta_offset; /* DCC (Gfx8-Gfx11) or HTILE (Gfx6-Gfx11) */ 398 uint64_t fmask_offset; /* Gfx6-Gfx10 */ 399 uint64_t cmask_offset; /* Gfx6-Gfx10 */ 400 uint64_t display_dcc_offset; /* Gfx9-Gfx11 */ 401 uint64_t total_size; 402 403 union { 404 /* Gfx3-8 surface info. 405 * 406 * Some of them can be set by the caller if certain parameters are 407 * desirable. The allocator will try to obey them. 408 */ 409 struct legacy_surf_layout legacy; 410 411 /* Gfx9+ surface info. */ 412 struct gfx9_surf_layout gfx9; 413 } u; 414 }; 415 416 struct ac_surf_info { 417 uint32_t width; /* up to 64K */ 418 uint32_t height; /* up to 64K */ 419 uint32_t depth; /* up to 16K */ 420 uint8_t samples; /* For Z/S: samples; For color: FMASK coverage samples */ 421 uint8_t storage_samples; /* For color: allocated samples */ 422 uint8_t levels; 423 uint8_t num_channels; /* heuristic for displayability */ 424 uint16_t array_size; 425 uint32_t *surf_index; /* Set a monotonic counter for tile swizzling. */ 426 uint32_t *fmask_surf_index; 427 }; 428 429 struct ac_surf_config { 430 struct ac_surf_info info; 431 unsigned is_1d : 1; 432 unsigned is_3d : 1; 433 unsigned is_cube : 1; 434 unsigned is_array : 1; 435 }; 436 437 /* Output parameters for ac_surface_compute_nbc_view */ 438 struct ac_surf_nbc_view { 439 bool valid; 440 uint32_t width; /* up to 64K */ 441 uint32_t height; /* up to 64K */ 442 uint32_t level; 443 uint32_t num_levels; /* Used for max_mip in the resource descriptor */ 444 uint8_t tile_swizzle; 445 uint64_t base_address_offset; 446 }; 447 448 struct ac_addrlib *ac_addrlib_create(const struct radeon_info *info, uint64_t *max_alignment); 449 void ac_addrlib_destroy(struct ac_addrlib *addrlib); 450 void *ac_addrlib_get_handle(struct ac_addrlib *addrlib); 451 452 int ac_compute_surface(struct ac_addrlib *addrlib, const struct radeon_info *info, 453 const struct ac_surf_config *config, enum radeon_surf_mode mode, 454 struct radeon_surf *surf); 455 void ac_surface_zero_dcc_fields(struct radeon_surf *surf); 456 unsigned ac_pipe_config_to_num_pipes(unsigned pipe_config); 457 458 #define AC_SURF_METADATA_FLAG_EXTRA_MD_BIT 0 459 #define AC_SURF_METADATA_FLAG_FAMILY_OVERRIDEN_BIT 1 460 void ac_surface_apply_bo_metadata(enum amd_gfx_level gfx_level, struct radeon_surf *surf, 461 uint64_t tiling_flags, enum radeon_surf_mode *mode); 462 void ac_surface_compute_bo_metadata(const struct radeon_info *info, struct radeon_surf *surf, 463 uint64_t *tiling_flags); 464 465 bool ac_surface_apply_umd_metadata(const struct radeon_info *info, struct radeon_surf *surf, 466 unsigned num_storage_samples, unsigned num_mipmap_levels, 467 unsigned size_metadata, const uint32_t metadata[64]); 468 void ac_surface_compute_umd_metadata(const struct radeon_info *info, struct radeon_surf *surf, 469 unsigned num_mipmap_levels, uint32_t desc[8], 470 unsigned *size_metadata, uint32_t metadata[64], 471 bool include_tool_md); 472 473 bool ac_surface_override_offset_stride(const struct radeon_info *info, struct radeon_surf *surf, 474 unsigned num_layers, unsigned num_mipmap_levels, 475 uint64_t offset, unsigned pitch); 476 477 struct ac_modifier_options { 478 bool dcc; /* Whether to allow DCC. */ 479 bool dcc_retile; /* Whether to allow use of a DCC retile map. */ 480 }; 481 482 bool ac_is_modifier_supported(const struct radeon_info *info, 483 const struct ac_modifier_options *options, 484 enum pipe_format format, 485 uint64_t modifier); 486 bool ac_get_supported_modifiers(const struct radeon_info *info, 487 const struct ac_modifier_options *options, 488 enum pipe_format format, 489 unsigned *mod_count, 490 uint64_t *mods); 491 bool ac_modifier_has_dcc(uint64_t modifier); 492 bool ac_modifier_has_dcc_retile(uint64_t modifier); 493 bool ac_modifier_supports_dcc_image_stores(enum amd_gfx_level gfx_level, uint64_t modifier); 494 void ac_modifier_max_extent(const struct radeon_info *info, 495 uint64_t modifier, uint32_t *width, uint32_t *height); 496 497 unsigned ac_surface_get_nplanes(const struct radeon_surf *surf); 498 uint64_t ac_surface_get_plane_offset(enum amd_gfx_level gfx_level, 499 const struct radeon_surf *surf, 500 unsigned plane, unsigned layer); 501 uint64_t ac_surface_get_plane_stride(enum amd_gfx_level gfx_level, 502 const struct radeon_surf *surf, 503 unsigned plane, unsigned level); 504 /* Of the whole miplevel, not an individual layer */ 505 uint64_t ac_surface_get_plane_size(const struct radeon_surf *surf, 506 unsigned plane); 507 508 uint64_t ac_surface_addr_from_coord(struct ac_addrlib *addrlib, const struct radeon_info *info, 509 const struct radeon_surf *surf, 510 const struct ac_surf_info *surf_info, unsigned level, 511 unsigned x, unsigned y, unsigned layer, bool is_3d); 512 void ac_surface_compute_nbc_view(struct ac_addrlib *addrlib, const struct radeon_info *info, 513 const struct radeon_surf *surf, 514 const struct ac_surf_info *surf_info, unsigned level, 515 unsigned layer, struct ac_surf_nbc_view *out); 516 517 void ac_surface_print_info(FILE *out, const struct radeon_info *info, 518 const struct radeon_surf *surf); 519 520 bool ac_surface_supports_dcc_image_stores(enum amd_gfx_level gfx_level, 521 const struct radeon_surf *surf); 522 523 #ifdef AC_SURFACE_INCLUDE_NIR 524 nir_def *ac_nir_dcc_addr_from_coord(nir_builder *b, const struct radeon_info *info, 525 unsigned bpe, const struct gfx9_meta_equation *equation, 526 nir_def *dcc_pitch, nir_def *dcc_height, 527 nir_def *dcc_slice_size, 528 nir_def *x, nir_def *y, nir_def *z, 529 nir_def *sample, nir_def *pipe_xor); 530 531 nir_def *ac_nir_cmask_addr_from_coord(nir_builder *b, const struct radeon_info *info, 532 const struct gfx9_meta_equation *equation, 533 nir_def *cmask_pitch, nir_def *cmask_height, 534 nir_def *cmask_slice_size, 535 nir_def *x, nir_def *y, nir_def *z, 536 nir_def *pipe_xor, 537 nir_def **bit_position); 538 539 nir_def *ac_nir_htile_addr_from_coord(nir_builder *b, const struct radeon_info *info, 540 const struct gfx9_meta_equation *equation, 541 nir_def *htile_pitch, 542 nir_def *htile_slice_size, 543 nir_def *x, nir_def *y, nir_def *z, 544 nir_def *pipe_xor); 545 #endif 546 547 #ifdef __cplusplus 548 } 549 #endif 550 551 #endif /* AC_SURFACE_H */ 552