1# Copyright © 2024 Intel Corporation 2 3# Permission is hereby granted, free of charge, to any person obtaining a 4# copy of this software and associated documentation files (the "Software"), 5# to deal in the Software without restriction, including without limitation 6# the rights to use, copy, modify, merge, publish, distribute, sublicense, 7# and/or sell copies of the Software, and to permit persons to whom the 8# Software is furnished to do so, subject to the following conditions: 9 10# The above copyright notice and this permission notice (including the next 11# paragraph) shall be included in all copies or substantial portions of the 12# Software. 13 14# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 20# IN THE SOFTWARE. 21 22from textwrap import dedent 23 24# TYPES is an ordered list of all declarations in this file. 25TYPES = [] 26 27# TYPES_BY_NAME allows the lookup of any declaration 28TYPES_BY_NAME = {} 29 30class Define: 31 """Specifies a c macro definition.""" 32 def __init__(self, name, value, comment=None): 33 self.name = name 34 self.value = value 35 self.comment = comment 36 TYPES.append(self) 37 38class EnumValue: 39 """allows comments and setting of enum values""" 40 def __init__(self, name, value=None, comment=None, 41 group_begin=None, group_end=None): 42 self.name = name 43 self.value = value 44 self.comment = comment 45 self.group_begin = group_begin 46 self.group_end = group_end 47 48 def __str__(self): 49 return self.name 50 51class Enum: 52 """Stores details needed to declare and serialize an enumeration""" 53 def __init__(self, name, values, external=False): 54 self.name = name 55 self.values = [] 56 for v in values: 57 if isinstance(v, EnumValue): 58 self.values.append(v) 59 else: 60 self.values.append(EnumValue(v)) 61 62 self.external = external 63 TYPES.append(self) 64 TYPES_BY_NAME[name] = TYPES[-1] 65 66class Member: 67 """Stores details needed to declare and serialize the member of a struct.""" 68 def __init__(self, member_type, name, array=None, compiler_field=False, comment=None): 69 self.member_type = member_type 70 self.name = name 71 self.array = array 72 # indicates whether this field is used by the compiler, and whether it 73 # should be included in the shader compiler cache hash function. 74 self.compiler_field = compiler_field 75 self.comment=comment 76 77class Struct: 78 """Stores details needed to declare and serialize a struct""" 79 def __init__(self, name, members): 80 self.name = name 81 self.members = members 82 TYPES.append(self) 83 TYPES_BY_NAME[name] = TYPES[-1] 84 85INT_TYPES = set(["uint8_t", 86 "uint16_t", 87 "uint64_t", 88 "unsigned", 89 "int"]) 90 91FUNDAMENTAL_TYPES = set(["char", "bool"]).union(INT_TYPES) 92 93Define("INTEL_DEVICE_MAX_NAME_SIZE", 64) 94Define("INTEL_DEVICE_MAX_SLICES", 8) 95Define("INTEL_DEVICE_MAX_SUBSLICES", 8, "Maximum on gfx11") 96Define("INTEL_DEVICE_MAX_EUS_PER_SUBSLICE", 16, "Maximum on gfx11") 97Define("INTEL_DEVICE_MAX_PIXEL_PIPES", 16, "Maximum on DG2") 98 99Enum("intel_platform", 100 [EnumValue("INTEL_PLATFORM_GFX3", value=1), 101 "INTEL_PLATFORM_I965", 102 "INTEL_PLATFORM_ILK", 103 "INTEL_PLATFORM_G4X", 104 "INTEL_PLATFORM_SNB", 105 "INTEL_PLATFORM_IVB", 106 "INTEL_PLATFORM_BYT", 107 "INTEL_PLATFORM_HSW", 108 "INTEL_PLATFORM_BDW", 109 "INTEL_PLATFORM_CHV", 110 "INTEL_PLATFORM_SKL", 111 "INTEL_PLATFORM_BXT", 112 "INTEL_PLATFORM_KBL", 113 "INTEL_PLATFORM_GLK", 114 "INTEL_PLATFORM_CFL", 115 "INTEL_PLATFORM_ICL", 116 "INTEL_PLATFORM_EHL", 117 "INTEL_PLATFORM_TGL", 118 "INTEL_PLATFORM_RKL", 119 "INTEL_PLATFORM_DG1", 120 "INTEL_PLATFORM_ADL", 121 "INTEL_PLATFORM_RPL", 122 EnumValue("INTEL_PLATFORM_DG2_G10", group_begin="DG2"), 123 "INTEL_PLATFORM_DG2_G11", 124 EnumValue("INTEL_PLATFORM_DG2_G12", group_end="DG2"), 125 EnumValue("INTEL_PLATFORM_ATSM_G10", group_begin="ATSM"), 126 EnumValue("INTEL_PLATFORM_ATSM_G11", group_end="ATSM"), 127 EnumValue("INTEL_PLATFORM_MTL_U", group_begin="MTL"), 128 EnumValue("INTEL_PLATFORM_MTL_H", group_end="MTL"), 129 EnumValue("INTEL_PLATFORM_ARL_U", group_begin="ARL"), 130 EnumValue("INTEL_PLATFORM_ARL_H", group_end="ARL"), 131 "INTEL_PLATFORM_LNL" 132 ]) 133 134Struct("intel_memory_class_instance", 135 [ Member("int", "klass", 136 comment = "Kernel backend specific class value, no translation needed yet"), 137 Member("int", "instance")]) 138 139Enum("intel_device_info_mmap_mode", 140 [EnumValue("INTEL_DEVICE_INFO_MMAP_MODE_UC", value=0), 141 "INTEL_DEVICE_INFO_MMAP_MODE_WC", 142 "INTEL_DEVICE_INFO_MMAP_MODE_WB" 143 ]) 144 145Enum("intel_device_info_coherency_mode", 146 [EnumValue("INTEL_DEVICE_INFO_COHERENCY_MODE_NONE", value=0), 147 EnumValue("INTEL_DEVICE_INFO_COHERENCY_MODE_1WAY", comment="CPU caches are snooped by GPU"), 148 EnumValue("INTEL_DEVICE_INFO_COHERENCY_MODE_2WAY", 149 comment="Fully coherent between GPU and CPU") 150 ]) 151 152Struct("intel_device_info_pat_entry", 153 [Member("uint8_t", "index"), 154 Member("intel_device_info_mmap_mode", "mmap"), 155 Member("intel_device_info_coherency_mode", "coherency")]) 156 157Enum("intel_cmat_scope", 158 [EnumValue("INTEL_CMAT_SCOPE_NONE", value=0), 159 "INTEL_CMAT_SCOPE_SUBGROUP"]) 160 161Enum("intel_cooperative_matrix_component_type", 162 ["INTEL_CMAT_FLOAT16", 163 "INTEL_CMAT_FLOAT32", 164 "INTEL_CMAT_SINT32", 165 "INTEL_CMAT_SINT8", 166 "INTEL_CMAT_UINT32", 167 "INTEL_CMAT_UINT8"]) 168 169Enum("intel_engine_class", 170 ["INTEL_ENGINE_CLASS_RENDER", 171 "INTEL_ENGINE_CLASS_COPY", 172 "INTEL_ENGINE_CLASS_VIDEO", 173 "INTEL_ENGINE_CLASS_VIDEO_ENHANCE", 174 "INTEL_ENGINE_CLASS_COMPUTE", 175 "INTEL_ENGINE_CLASS_INVALID"]) 176 177Struct("intel_cooperative_matrix_configuration", 178 [Member("intel_cmat_scope", "scope", 179 comment=dedent("""\ 180 Matrix A is MxK. 181 Matrix B is KxN. 182 Matrix C and Matrix Result are MxN. 183 184 Result = A * B + C;""")), 185 Member("uint8_t", "m"), 186 Member("uint8_t", "n"), 187 Member("uint8_t", "k"), 188 Member("intel_cooperative_matrix_component_type", "a"), 189 Member("intel_cooperative_matrix_component_type", "b"), 190 Member("intel_cooperative_matrix_component_type", "c"), 191 Member("intel_cooperative_matrix_component_type", "result")]) 192 193Enum("intel_kmd_type", 194 ["INTEL_KMD_TYPE_INVALID", 195 "INTEL_KMD_TYPE_I915", 196 "INTEL_KMD_TYPE_XE", 197 "INTEL_KMD_TYPE_STUB", 198 "INTEL_KMD_TYPE_LAST" 199 ], external=True) 200 201Struct("intel_device_info_mem_region", 202 [Member("uint64_t", "size"), 203 Member("uint64_t", "free")]) 204 205Struct("intel_device_info_ram_desc", 206 [Member("intel_memory_class_instance", "mem"), 207 Member("intel_device_info_mem_region", "mappable"), 208 Member("intel_device_info_mem_region", "unmappable")]) 209 210Struct("intel_device_info_mem_desc", 211 [Member("bool", "use_class_instance"), 212 Member("intel_device_info_ram_desc", "sram"), 213 Member("intel_device_info_ram_desc", "vram")]) 214 215Struct("intel_device_info_urb_desc", 216 [Member("int", "size"), 217 Member("int", "min_entries", array=4), 218 Member("int", "max_entries", array=4)]) 219 220Struct("intel_device_info_pat_desc", 221 [Member("intel_device_info_pat_entry", "cached_coherent", 222 comment="To be used when CPU access is frequent, WB + 1 or 2 way coherent"), 223 224 Member("intel_device_info_pat_entry", "scanout", 225 comment="scanout and external BOs"), 226 227 Member("intel_device_info_pat_entry", "writeback_incoherent", 228 comment=("BOs without special needs, can be WB not coherent " 229 "or WC it depends on the platforms and KMD")), 230 231 Member("intel_device_info_pat_entry", "writecombining")]) 232 233Struct("intel_device_info", 234 [Member("intel_kmd_type", "kmd_type"), 235 236 Member("int", "ver", compiler_field=True, 237 comment="Driver internal numbers used to differentiate platforms."), 238 239 Member("int", "verx10", compiler_field=True), 240 Member("int", "display_ver"), 241 242 Member("int", "revision", compiler_field=True, 243 comment=dedent("""\ 244 This revision is from ioctl (I915_PARAM_REVISION) unlike 245 pci_revision_id from drm device. Its value is not always 246 same as the pci_revision_id.""")), 247 248 Member("int", "gt"), 249 Member("uint16_t", "pci_domain", comment="PCI info"), 250 Member("uint8_t", "pci_bus"), 251 Member("uint8_t", "pci_dev"), 252 Member("uint8_t", "pci_func"), 253 Member("uint16_t", "pci_device_id"), 254 Member("uint8_t", "pci_revision_id"), 255 Member("intel_platform", "platform", compiler_field=True), 256 Member("bool", "has_hiz_and_separate_stencil"), 257 Member("bool", "must_use_separate_stencil"), 258 Member("bool", "has_sample_with_hiz"), 259 Member("bool", "has_bit6_swizzle"), 260 Member("bool", "has_llc"), 261 Member("bool", "has_pln", compiler_field=True), 262 Member("bool", "has_64bit_float", compiler_field=True), 263 Member("bool", "has_64bit_float_via_math_pipe", compiler_field=True), 264 Member("bool", "has_64bit_int", compiler_field=True), 265 Member("bool", "has_integer_dword_mul", compiler_field=True), 266 Member("bool", "has_compr4", compiler_field=True), 267 Member("bool", "has_surface_tile_offset"), 268 Member("bool", "supports_simd16_3src", compiler_field=True), 269 Member("bool", "disable_ccs_repack"), 270 271 Member("bool", "has_illegal_ccs_values", 272 comment="True if CCS needs to be initialized before use."), 273 274 Member("bool", "has_flat_ccs", 275 comment=dedent("""\ 276 True if CCS uses a flat virtual address translation to a memory 277 carve-out, rather than aux map translations, or additional surfaces.""")), 278 279 Member("bool", "has_aux_map"), 280 Member("bool", "has_caching_uapi"), 281 Member("bool", "has_tiling_uapi"), 282 Member("bool", "has_ray_tracing", compiler_field=True), 283 Member("bool", "has_ray_query"), 284 Member("bool", "has_local_mem"), 285 Member("bool", "has_lsc", compiler_field=True), 286 Member("bool", "has_mesh_shading"), 287 Member("bool", "has_mmap_offset"), 288 Member("bool", "has_userptr_probe"), 289 Member("bool", "has_context_isolation"), 290 Member("bool", "has_set_pat_uapi"), 291 Member("bool", "has_indirect_unroll"), 292 Member("bool", "has_negative_rhw_bug", compiler_field=True, 293 comment="Intel hardware quirks"), 294 295 Member("bool", "has_coarse_pixel_primitive_and_cb", compiler_field=True, 296 comment=dedent("""\ 297 Whether this platform supports fragment shading rate controlled by a 298 primitive in geometry shaders and by a control buffer.""")), 299 300 Member("bool", "has_compute_engine", comment="Whether this platform has compute engine"), 301 302 Member("bool", "needs_unlit_centroid_workaround", compiler_field=True, 303 comment=dedent("""\ 304 Some versions of Gen hardware don't do centroid interpolation correctly 305 on unlit pixels, causing incorrect values for derivatives near triangle 306 edges. Enabling this flag causes the fragment shader to use 307 non-centroid interpolation for unlit pixels, at the expense of two extra 308 fragment shader instructions.""")), 309 310 Member("bool", "is_adl_n", comment="We need this for ADL-N specific Wa_14014966230."), 311 312 Member("unsigned", "num_slices", 313 comment=dedent("""\ 314 GPU hardware limits 315 316 In general, you can find shader thread maximums by looking at the "Maximum 317 Number of Threads" field in the Intel PRM description of the 3DSTATE_VS, 318 3DSTATE_GS, 3DSTATE_HS, 3DSTATE_DS, and 3DSTATE_PS commands. URB entry 319 limits come from the "Number of URB Entries" field in the 320 3DSTATE_URB_VS command and friends. 321 322 These fields are used to calculate the scratch space to allocate. The 323 amount of scratch space can be larger without being harmful on modern 324 GPUs, however, prior to Haswell, programming the maximum number of threads 325 to greater than the hardware maximum would cause GPU performance to tank. 326 327 Total number of slices present on the device whether or not they've been 328 fused off. 329 330 XXX: CS thread counts are limited by the inability to do cross subslice 331 communication. It is the effectively the number of logical threads which 332 can be executed in a subslice. Fuse configurations may cause this number 333 to change, so we program @max_cs_threads as the lower maximum.""")), 334 335 Member("unsigned", "max_slices", compiler_field=True, 336 comment=dedent("""\ 337 Maximum number of slices present on this device (can be more than 338 num_slices if some slices are fused).""")), 339 340 Member("unsigned", "num_subslices", array="INTEL_DEVICE_MAX_SLICES", 341 comment="Number of subslices for each slice (used to be uniform until CNL)."), 342 343 Member("unsigned", "max_subslices_per_slice", compiler_field=True, 344 comment=dedent("""\ 345 Maximum number of subslices per slice present on this device (can be 346 more than the maximum value in the num_subslices[] array if some 347 subslices are fused). 348 349 This is GT_SS_PER_SLICE in SKU.""")), 350 351 Member("unsigned", "ppipe_subslices", array="INTEL_DEVICE_MAX_PIXEL_PIPES", 352 comment="Number of subslices on each pixel pipe (ICL)."), 353 354 Member("unsigned", "max_eus_per_subslice", compiler_field=True, 355 comment="Maximum number of EUs per subslice (some EUs can be fused off)."), 356 357 Member("unsigned", "num_thread_per_eu", compiler_field=True, 358 comment="Number of threads per eu, varies between 4 and 8 between generations."), 359 360 Member("uint8_t", "slice_masks", 361 comment="A bit mask of the slices available."), 362 363 Member("uint8_t", "subslice_masks", 364 array="INTEL_DEVICE_MAX_SLICES * DIV_ROUND_UP(INTEL_DEVICE_MAX_SUBSLICES, 8)", 365 compiler_field=True, 366 comment=dedent("""\ 367 An array of bit mask of the subslices available, use subslice_slice_stride 368 to access this array.""")), 369 370 Member("unsigned", "subslice_total", 371 comment=dedent("""\ 372 The number of enabled subslices (considering fusing). For exactly which 373 subslices are enabled, see subslice_masks[].""")), 374 375 Member("uint8_t", "eu_masks", 376 array=("INTEL_DEVICE_MAX_SLICES * INTEL_DEVICE_MAX_SUBSLICES * " 377 "DIV_ROUND_UP(INTEL_DEVICE_MAX_EUS_PER_SUBSLICE, 8)"), 378 comment=dedent("""\ 379 An array of bit mask of EUs available, use eu_slice_stride & 380 eu_subslice_stride to access this array.""")), 381 382 Member("uint16_t", "subslice_slice_stride", compiler_field=True, 383 comment="Stride to access subslice_masks[]."), 384 385 Member("uint16_t", "eu_slice_stride", 386 comment="Strides to access eu_masks[]."), 387 388 Member("uint16_t", "eu_subslice_stride"), 389 Member("unsigned", "l3_banks"), 390 391 Member("unsigned", "max_vs_threads", 392 comment="Maximum Vertex Shader threads"), 393 394 Member("unsigned", "max_tcs_threads", 395 comment="Maximum Hull Shader threads"), 396 397 Member("unsigned", "max_tes_threads", 398 comment="Maximum Domain Shader threads"), 399 400 Member("unsigned", "max_gs_threads", 401 comment="Maximum Geometry Shader threads"), 402 403 Member("unsigned", "max_wm_threads", 404 comment=dedent("""\ 405 Theoretical maximum number of Pixel Shader threads. 406 407 PSD means Pixel Shader Dispatcher. On modern Intel GPUs, hardware will 408 automatically scale pixel shader thread count, based on a single value 409 programmed into 3DSTATE_PS. 410 411 To calculate the maximum number of threads for Gfx8 beyond (which have 412 multiple Pixel Shader Dispatchers): 413 414 - Look up 3DSTATE_PS and find "Maximum Number of Threads Per PSD" 415 - Usually there's only one PSD per subslice, so use the number of 416 subslices for number of PSDs. 417 - For max_wm_threads, the total should be PSD threads * #PSDs.""")), 418 419 Member("unsigned", "max_threads_per_psd"), 420 421 Member("unsigned", "max_cs_threads", 422 comment=dedent("""\ 423 Maximum Compute Shader threads. 424 425 Thread count * number of EUs per subslice""")), 426 427 Member("unsigned", "max_cs_workgroup_threads", compiler_field=True, 428 comment=dedent("""\ 429 Maximum number of threads per workgroup supported by the GPGPU_WALKER or 430 COMPUTE_WALKER command. 431 432 This may be smaller than max_cs_threads as it takes into account added 433 restrictions on the GPGPU/COMPUTE_WALKER commands. While max_cs_threads 434 expresses the total parallelism of the GPU, this expresses the maximum 435 number of threads we can dispatch in a single workgroup.""")), 436 437 438 Member("unsigned", "max_scratch_ids", array="MESA_SHADER_STAGES", compiler_field=True, 439 comment=dedent("""\ 440 The maximum number of potential scratch ids. Due to hardware 441 implementation details, the range of scratch ids may be larger than the 442 number of subslices.""")), 443 444 Member("intel_device_info_urb_desc", "urb"), 445 Member("unsigned", "max_constant_urb_size_kb"), 446 Member("unsigned", "mesh_max_constant_urb_size_kb"), 447 Member("unsigned", "engine_class_prefetch", array="INTEL_ENGINE_CLASS_COMPUTE + 1"), 448 Member("unsigned", "mem_alignment"), 449 Member("uint64_t", "timestamp_frequency"), 450 Member("uint64_t", "aperture_bytes"), 451 Member("uint64_t", "gtt_size"), 452 Member("int", "simulator_id"), 453 Member("char", "name", array="INTEL_DEVICE_MAX_NAME_SIZE"), 454 Member("bool", "no_hw"), 455 Member("bool", "apply_hwconfig"), 456 Member("intel_device_info_mem_desc", "mem"), 457 Member("intel_device_info_pat_desc", "pat"), 458 Member("intel_cooperative_matrix_configuration", 459 "cooperative_matrix_configurations", array=4)] 460 ) 461