1 /* 2 * Copyright © 2017 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining 5 * a copy of this software and associated documentation files (the 6 * "Software"), to deal in the Software without restriction, including 7 * without limitation the rights to use, copy, modify, merge, publish, 8 * distribute, sub license, and/or sell copies of the Software, and to 9 * permit persons to whom the Software is furnished to do so, subject to 10 * the following conditions: 11 * 12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 13 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 14 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 15 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS 16 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 18 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 19 * USE OR OTHER DEALINGS IN THE SOFTWARE. 20 * 21 * The above copyright notice and this permission notice (including the 22 * next paragraph) shall be included in all copies or substantial portions 23 * of the Software. 24 */ 25 26 #ifndef AC_GPU_INFO_H 27 #define AC_GPU_INFO_H 28 29 #include "amd_family.h" 30 31 #include <stdbool.h> 32 #include <stddef.h> 33 #include <stdint.h> 34 #include <stdio.h> 35 36 #ifdef __cplusplus 37 extern "C" { 38 #endif 39 40 #define AMD_MAX_SE 8 41 #define AMD_MAX_SA_PER_SE 2 42 43 struct amdgpu_gpu_info; 44 45 struct amd_ip_info { 46 uint8_t ver_major; 47 uint8_t ver_minor; 48 uint8_t num_queues; 49 }; 50 51 struct radeon_info { 52 /* Device info. */ 53 const char *name; 54 char lowercase_name[32]; 55 const char *marketing_name; 56 uint32_t num_se; /* only enabled SEs */ 57 uint32_t num_rb; /* only enabled RBs */ 58 uint32_t num_cu; /* only enabled CUs */ 59 uint32_t max_gpu_freq_mhz; /* also known as the shader clock */ 60 uint32_t max_gflops; 61 uint32_t l1_cache_size; 62 uint32_t l2_cache_size; 63 uint32_t l3_cache_size_mb; 64 uint32_t num_tcc_blocks; /* also the number of memory channels */ 65 uint32_t memory_freq_mhz; 66 uint32_t memory_freq_mhz_effective; 67 uint32_t memory_bus_width; 68 uint32_t memory_bandwidth_gbps; 69 uint32_t clock_crystal_freq; 70 struct amd_ip_info ip[AMD_NUM_IP_TYPES]; 71 72 /* Identification. */ 73 /* PCI info: domain:bus:dev:func */ 74 uint32_t pci_domain; 75 uint32_t pci_bus; 76 uint32_t pci_dev; 77 uint32_t pci_func; 78 79 uint32_t pci_id; 80 uint32_t pci_rev_id; 81 enum radeon_family family; 82 enum amd_gfx_level gfx_level; 83 uint32_t family_id; 84 uint32_t chip_external_rev; 85 uint32_t chip_rev; /* 0 = A0, 1 = A1, etc. */ 86 87 /* Flags. */ 88 bool is_pro_graphics; 89 bool has_graphics; /* false if the chip is compute-only */ 90 uint32_t ib_pad_dw_mask[AMD_NUM_IP_TYPES]; 91 bool has_clear_state; 92 bool has_distributed_tess; 93 bool has_dcc_constant_encode; 94 bool has_rbplus; /* if RB+ registers exist */ 95 bool rbplus_allowed; /* if RB+ is allowed */ 96 bool has_load_ctx_reg_pkt; 97 bool has_out_of_order_rast; 98 bool has_packed_math_16bit; 99 bool has_accelerated_dot_product; 100 bool cpdma_prefetch_writes_memory; 101 bool has_gfx9_scissor_bug; 102 bool has_tc_compat_zrange_bug; 103 bool has_msaa_sample_loc_bug; 104 bool has_ls_vgpr_init_bug; 105 bool has_zero_index_buffer_bug; 106 bool has_image_load_dcc_bug; 107 bool has_two_planes_iterate256_bug; 108 bool has_vgt_flush_ngg_legacy_bug; 109 bool has_cs_regalloc_hang_bug; 110 bool has_32bit_predication; 111 bool has_3d_cube_border_color_mipmap; 112 bool never_stop_sq_perf_counters; 113 bool has_sqtt_rb_harvest_bug; 114 bool has_sqtt_auto_flush_mode_bug; 115 bool never_send_perfcounter_stop; 116 bool discardable_allows_big_page; 117 bool has_export_conflict_bug; 118 119 /* Display features. */ 120 /* There are 2 display DCC codepaths, because display expects unaligned DCC. */ 121 /* Disable RB and pipe alignment to skip the retile blit. (1 RB chips only) */ 122 bool use_display_dcc_unaligned; 123 /* Allocate both aligned and unaligned DCC and use the retile blit. */ 124 bool use_display_dcc_with_retile_blit; 125 126 /* Memory info. */ 127 uint32_t pte_fragment_size; 128 uint32_t gart_page_size; 129 uint32_t gart_size_kb; 130 uint32_t vram_size_kb; 131 uint64_t vram_vis_size_kb; 132 uint32_t vram_type; 133 uint32_t max_heap_size_kb; 134 uint32_t min_alloc_size; 135 uint32_t address32_hi; 136 bool has_dedicated_vram; 137 bool all_vram_visible; 138 bool smart_access_memory; 139 bool has_l2_uncached; 140 bool r600_has_virtual_memory; 141 uint32_t max_tcc_blocks; 142 uint32_t tcc_cache_line_size; 143 bool tcc_rb_non_coherent; /* whether L2 inv is needed for render->texture transitions */ 144 unsigned pc_lines; 145 uint32_t lds_size_per_workgroup; 146 uint32_t lds_alloc_granularity; 147 uint32_t lds_encode_granularity; 148 149 /* CP info. */ 150 bool gfx_ib_pad_with_type2; 151 unsigned ib_alignment; /* both start and size alignment */ 152 uint32_t me_fw_version; 153 uint32_t me_fw_feature; 154 uint32_t mec_fw_version; 155 uint32_t mec_fw_feature; 156 uint32_t pfp_fw_version; 157 uint32_t pfp_fw_feature; 158 159 /* Multimedia info. */ 160 struct { 161 bool vcn_decode; /* TODO: remove */ 162 } has_video_hw; 163 164 uint32_t uvd_fw_version; 165 uint32_t vce_fw_version; 166 uint32_t vce_harvest_config; 167 struct video_caps_info { 168 struct { 169 uint32_t valid; 170 uint32_t max_width; 171 uint32_t max_height; 172 uint32_t max_pixels_per_frame; 173 uint32_t max_level; 174 uint32_t pad; 175 } codec_info[8]; /* the number of available codecs */ 176 } dec_caps, enc_caps; 177 178 /* Kernel & winsys capabilities. */ 179 uint32_t drm_major; /* version */ 180 uint32_t drm_minor; 181 uint32_t drm_patchlevel; 182 bool is_amdgpu; 183 bool has_userptr; 184 bool has_syncobj; 185 bool has_timeline_syncobj; 186 bool has_fence_to_handle; 187 bool has_local_buffers; 188 bool has_bo_metadata; 189 bool has_eqaa_surface_allocator; 190 bool has_sparse_vm_mappings; 191 bool has_scheduled_fence_dependency; 192 bool has_stable_pstate; 193 /* Whether SR-IOV is enabled or amdgpu.mcbp=1 was set on the kernel command line. */ 194 bool mid_command_buffer_preemption_enabled; 195 bool has_tmz_support; 196 bool kernel_has_modifiers; 197 198 /* Shader cores. */ 199 uint32_t cu_mask[AMD_MAX_SE][AMD_MAX_SA_PER_SE]; 200 uint32_t r600_max_quad_pipes; /* wave size / 16 */ 201 uint32_t max_good_cu_per_sa; 202 uint32_t min_good_cu_per_sa; /* min != max if SAs have different # of CUs */ 203 uint32_t max_se; /* number of shader engines incl. disabled ones */ 204 uint32_t max_sa_per_se; /* shader arrays per shader engine */ 205 uint32_t max_wave64_per_simd; 206 uint32_t num_physical_sgprs_per_simd; 207 uint32_t num_physical_wave64_vgprs_per_simd; 208 uint32_t num_simd_per_compute_unit; 209 uint32_t min_sgpr_alloc; 210 uint32_t max_sgpr_alloc; 211 uint32_t sgpr_alloc_granularity; 212 uint32_t min_wave64_vgpr_alloc; 213 uint32_t max_vgpr_alloc; 214 uint32_t wave64_vgpr_alloc_granularity; 215 uint32_t max_scratch_waves; 216 217 /* Render backends (color + depth blocks). */ 218 uint32_t r300_num_gb_pipes; 219 uint32_t r300_num_z_pipes; 220 uint32_t r600_gb_backend_map; /* R600 harvest config */ 221 bool r600_gb_backend_map_valid; 222 uint32_t r600_num_banks; 223 uint32_t mc_arb_ramcfg; 224 uint32_t gb_addr_config; 225 uint32_t pa_sc_tile_steering_override; /* CLEAR_STATE also sets this */ 226 uint32_t max_render_backends; /* number of render backends incl. disabled ones */ 227 uint32_t num_tile_pipes; /* pipe count from PIPE_CONFIG */ 228 uint32_t pipe_interleave_bytes; 229 uint32_t enabled_rb_mask; /* GCN harvest config */ 230 uint64_t max_alignment; /* from addrlib */ 231 uint32_t pbb_max_alloc_count; 232 233 /* Tile modes. */ 234 uint32_t si_tile_mode_array[32]; 235 uint32_t cik_macrotile_mode_array[16]; 236 237 /* AMD_CU_MASK environment variable or ~0. */ 238 bool spi_cu_en_has_effect; 239 uint32_t spi_cu_en; 240 }; 241 242 bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info); 243 244 void ac_compute_driver_uuid(char *uuid, size_t size); 245 246 void ac_compute_device_uuid(struct radeon_info *info, char *uuid, size_t size); 247 void ac_print_gpu_info(struct radeon_info *info, FILE *f); 248 int ac_get_gs_table_depth(enum amd_gfx_level gfx_level, enum radeon_family family); 249 void ac_get_raster_config(struct radeon_info *info, uint32_t *raster_config_p, 250 uint32_t *raster_config_1_p, uint32_t *se_tile_repeat_p); 251 void ac_get_harvested_configs(struct radeon_info *info, unsigned raster_config, 252 unsigned *cik_raster_config_1_p, unsigned *raster_config_se); 253 unsigned ac_get_compute_resource_limits(const struct radeon_info *info, 254 unsigned waves_per_threadgroup, unsigned max_waves_per_sh, 255 unsigned threadgroups_per_cu); 256 257 struct ac_hs_info { 258 uint32_t tess_offchip_block_dw_size; 259 uint32_t max_offchip_buffers; 260 uint32_t hs_offchip_param; 261 uint32_t tess_factor_ring_size; 262 uint32_t tess_offchip_ring_offset; 263 uint32_t tess_offchip_ring_size; 264 }; 265 266 void ac_get_hs_info(struct radeon_info *info, 267 struct ac_hs_info *hs); 268 269 /* Task rings BO layout information. 270 * This BO is shared between GFX and ACE queues so that the ACE and GFX 271 * firmware can cooperate on task->mesh dispatches and is also used to 272 * store the task payload which is passed to mesh shaders. 273 * 274 * The driver only needs to create this BO once, 275 * and it will always be able to accomodate the maximum needed 276 * task payload size. 277 * 278 * The following memory layout is used: 279 * 1. Control buffer: 9 DWORDs, 256 byte aligned 280 * Used by the firmware to maintain the current state. 281 * (padding) 282 * 2. Draw ring: 4 DWORDs per entry, 256 byte aligned 283 * Task shaders store the mesh dispatch size here. 284 * (padding) 285 * 3. Payload ring: 16K bytes per entry, 256 byte aligned. 286 * This is where task payload is stored by task shaders and 287 * read by mesh shaders. 288 * 289 */ 290 struct ac_task_info { 291 uint32_t draw_ring_offset; 292 uint32_t payload_ring_offset; 293 uint32_t bo_size_bytes; 294 uint16_t num_entries; 295 }; 296 297 /* Size of each payload entry in the task payload ring. 298 * Spec requires minimum 16K bytes. 299 */ 300 #define AC_TASK_PAYLOAD_ENTRY_BYTES 16384 301 302 /* Size of each draw entry in the task draw ring. 303 * 4 DWORDs per entry. 304 */ 305 #define AC_TASK_DRAW_ENTRY_BYTES 16 306 307 /* Size of the task control buffer. 9 DWORDs. */ 308 #define AC_TASK_CTRLBUF_BYTES 36 309 310 void ac_get_task_info(struct radeon_info *info, 311 struct ac_task_info *task_info); 312 313 #ifdef __cplusplus 314 } 315 #endif 316 317 #endif /* AC_GPU_INFO_H */ 318