1# Copyright © 2024 Intel Corporation 2 3# Permission is hereby granted, free of charge, to any person obtaining a 4# copy of this software and associated documentation files (the "Software"), 5# to deal in the Software without restriction, including without limitation 6# the rights to use, copy, modify, merge, publish, distribute, sublicense, 7# and/or sell copies of the Software, and to permit persons to whom the 8# Software is furnished to do so, subject to the following conditions: 9 10# The above copyright notice and this permission notice (including the next 11# paragraph) shall be included in all copies or substantial portions of the 12# Software. 13 14# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 20# IN THE SOFTWARE. 21 22from textwrap import dedent 23 24# TYPES is an ordered list of all declarations in this file. 25TYPES = [] 26 27# TYPES_BY_NAME allows the lookup of any declaration 28TYPES_BY_NAME = {} 29 30class Define: 31 """Specifies a c macro definition.""" 32 def __init__(self, name, value, comment=None): 33 self.name = name 34 self.value = value 35 self.comment = comment 36 TYPES.append(self) 37 38class EnumValue: 39 """allows comments and setting of enum values""" 40 def __init__(self, name, value=None, comment=None, 41 group_begin=None, group_end=None): 42 self.name = name 43 self.value = value 44 self.comment = comment 45 self.group_begin = group_begin 46 self.group_end = group_end 47 48 def __str__(self): 49 return self.name 50 51class Enum: 52 """Stores details needed to declare and serialize an enumeration""" 53 def __init__(self, name, values, external=False): 54 self.name = name 55 self.values = [] 56 for v in values: 57 if isinstance(v, EnumValue): 58 self.values.append(v) 59 else: 60 self.values.append(EnumValue(v)) 61 62 self.external = external 63 TYPES.append(self) 64 TYPES_BY_NAME[name] = TYPES[-1] 65 66class Member: 67 """Stores details needed to declare and serialize the member of a struct.""" 68 def __init__(self, member_type, name, array=None, 69 compiler_field=False, ray_tracing_field=False, 70 comment=None): 71 self.member_type = member_type 72 self.name = name 73 self.array = array 74 # indicates whether this field is used by the compiler, and whether it 75 # should be included in the shader compiler cache hash function. 76 self.compiler_field = compiler_field 77 self.ray_tracing_field = ray_tracing_field 78 self.comment=comment 79 80class Struct: 81 """Stores details needed to declare and serialize a struct""" 82 def __init__(self, name, members): 83 self.name = name 84 self.members = members 85 TYPES.append(self) 86 TYPES_BY_NAME[name] = TYPES[-1] 87 88INT_TYPES = set(["uint8_t", 89 "uint16_t", 90 "uint32_t", 91 "uint64_t", 92 "unsigned", 93 "int"]) 94 95FUNDAMENTAL_TYPES = set(["char", "bool"]).union(INT_TYPES) 96 97Define("INTEL_DEVICE_MAX_NAME_SIZE", 64) 98Define("INTEL_DEVICE_MAX_SLICES", 8) 99Define("INTEL_DEVICE_MAX_SUBSLICES", 8, "Maximum on gfx11") 100Define("INTEL_DEVICE_MAX_EUS_PER_SUBSLICE", 16, "Maximum on gfx11") 101Define("INTEL_DEVICE_MAX_PIXEL_PIPES", 16, "Maximum on DG2") 102 103Enum("intel_platform", 104 [EnumValue("INTEL_PLATFORM_GFX3", value=1), 105 "INTEL_PLATFORM_I965", 106 "INTEL_PLATFORM_ILK", 107 "INTEL_PLATFORM_G4X", 108 "INTEL_PLATFORM_SNB", 109 "INTEL_PLATFORM_IVB", 110 "INTEL_PLATFORM_BYT", 111 "INTEL_PLATFORM_HSW", 112 "INTEL_PLATFORM_BDW", 113 "INTEL_PLATFORM_CHV", 114 "INTEL_PLATFORM_SKL", 115 "INTEL_PLATFORM_BXT", 116 "INTEL_PLATFORM_KBL", 117 "INTEL_PLATFORM_GLK", 118 "INTEL_PLATFORM_CFL", 119 "INTEL_PLATFORM_ICL", 120 "INTEL_PLATFORM_EHL", 121 "INTEL_PLATFORM_TGL", 122 "INTEL_PLATFORM_RKL", 123 "INTEL_PLATFORM_DG1", 124 "INTEL_PLATFORM_ADL", 125 "INTEL_PLATFORM_RPL", 126 EnumValue("INTEL_PLATFORM_DG2_G10", group_begin="DG2"), 127 "INTEL_PLATFORM_DG2_G11", 128 EnumValue("INTEL_PLATFORM_DG2_G12", group_end="DG2"), 129 EnumValue("INTEL_PLATFORM_ATSM_G10", group_begin="ATSM"), 130 EnumValue("INTEL_PLATFORM_ATSM_G11", group_end="ATSM"), 131 EnumValue("INTEL_PLATFORM_MTL_U", group_begin="MTL"), 132 EnumValue("INTEL_PLATFORM_MTL_H", group_end="MTL"), 133 EnumValue("INTEL_PLATFORM_ARL_U", group_begin="ARL"), 134 EnumValue("INTEL_PLATFORM_ARL_H", group_end="ARL"), 135 "INTEL_PLATFORM_LNL", 136 "INTEL_PLATFORM_BMG", 137 "INTEL_PLATFORM_PTL", 138 ]) 139 140Struct("intel_memory_class_instance", 141 [ Member("int", "klass", 142 comment = "Kernel backend specific class value, no translation needed yet"), 143 Member("int", "instance")]) 144 145Enum("intel_device_info_mmap_mode", 146 [EnumValue("INTEL_DEVICE_INFO_MMAP_MODE_UC", value=0), 147 EnumValue("INTEL_DEVICE_INFO_MMAP_MODE_WC"), 148 EnumValue("INTEL_DEVICE_INFO_MMAP_MODE_WB"), 149 EnumValue("INTEL_DEVICE_INFO_MMAP_MODE_XD", 150 comment=dedent("""\ 151 Xe2+ only. Only supported in GPU side and used for displayable 152 buffers.""")) 153 ]) 154 155Struct("intel_device_info_pat_entry", 156 [Member("uint8_t", "index"), 157 Member("intel_device_info_mmap_mode", "mmap", 158 comment=dedent("""\ 159 This tells KMD what caching mode the CPU mapping should use. 160 It has nothing to do with any PAT cache modes."""))]) 161 162Enum("intel_cmat_scope", 163 [EnumValue("INTEL_CMAT_SCOPE_NONE", value=0), 164 "INTEL_CMAT_SCOPE_SUBGROUP"]) 165 166Enum("intel_cooperative_matrix_component_type", 167 ["INTEL_CMAT_FLOAT16", 168 "INTEL_CMAT_FLOAT32", 169 "INTEL_CMAT_SINT32", 170 "INTEL_CMAT_SINT8", 171 "INTEL_CMAT_UINT32", 172 "INTEL_CMAT_UINT8"]) 173 174Enum("intel_engine_class", 175 ["INTEL_ENGINE_CLASS_RENDER", 176 "INTEL_ENGINE_CLASS_COPY", 177 "INTEL_ENGINE_CLASS_VIDEO", 178 "INTEL_ENGINE_CLASS_VIDEO_ENHANCE", 179 "INTEL_ENGINE_CLASS_COMPUTE", 180 "INTEL_ENGINE_CLASS_INVALID"]) 181 182Struct("intel_cooperative_matrix_configuration", 183 [Member("intel_cmat_scope", "scope", 184 comment=dedent("""\ 185 Matrix A is MxK. 186 Matrix B is KxN. 187 Matrix C and Matrix Result are MxN. 188 189 Result = A * B + C;""")), 190 Member("uint8_t", "m"), 191 Member("uint8_t", "n"), 192 Member("uint8_t", "k"), 193 Member("intel_cooperative_matrix_component_type", "a"), 194 Member("intel_cooperative_matrix_component_type", "b"), 195 Member("intel_cooperative_matrix_component_type", "c"), 196 Member("intel_cooperative_matrix_component_type", "result")]) 197 198Enum("intel_kmd_type", 199 ["INTEL_KMD_TYPE_INVALID", 200 "INTEL_KMD_TYPE_I915", 201 "INTEL_KMD_TYPE_XE", 202 "INTEL_KMD_TYPE_STUB", 203 "INTEL_KMD_TYPE_LAST" 204 ], external=True) 205 206Struct("intel_device_info_mem_region", 207 [Member("uint64_t", "size"), 208 Member("uint64_t", "free")]) 209 210Struct("intel_device_info_ram_desc", 211 [Member("intel_memory_class_instance", "mem"), 212 Member("intel_device_info_mem_region", "mappable"), 213 Member("intel_device_info_mem_region", "unmappable")]) 214 215Struct("intel_device_info_mem_desc", 216 [Member("bool", "use_class_instance"), 217 Member("intel_device_info_ram_desc", "sram"), 218 Member("intel_device_info_ram_desc", "vram")]) 219 220Struct("intel_device_info_urb_desc", 221 [Member("int", "size"), 222 Member("int", "min_entries", array=4), 223 Member("int", "max_entries", array=4)]) 224 225Struct("intel_device_info_pat_desc", 226 [Member("intel_device_info_pat_entry", "cached_coherent", 227 comment="To be used when CPU access is frequent, WB + 1 or 2 way coherent"), 228 229 Member("intel_device_info_pat_entry", "scanout", 230 comment="scanout and external BOs"), 231 232 Member("intel_device_info_pat_entry", "compressed", 233 comment="Only supported in Xe2, compressed + WC"), 234 235 Member("intel_device_info_pat_entry", "writeback_incoherent", 236 comment=("BOs without special needs, can be WB not coherent " 237 "or WC it depends on the platforms and KMD")), 238 239 Member("intel_device_info_pat_entry", "writecombining")]) 240 241Struct("intel_device_info", 242 [Member("intel_kmd_type", "kmd_type"), 243 244 Member("int", "ver", compiler_field=True, 245 comment="Driver internal numbers used to differentiate platforms."), 246 247 Member("int", "verx10", compiler_field=True), 248 249 Member("uint32_t", "gfx_ip_ver", compiler_field=True, 250 comment=dedent("""\ 251 This is the run-time hardware GFX IP version that may be more specific 252 than ver/verx10. ver/verx10 may be more useful for comparing a class 253 of devices whereas gfx_ip_ver may be more useful for precisely 254 checking for a graphics ip type. GFX_IP_VER(major, minor) should be 255 used to compare IP versions.""")), 256 257 Member("int", "revision", 258 comment=dedent("""\ 259 This revision is queried from KMD unlike 260 pci_revision_id from drm device. Its value is not always 261 same as the pci_revision_id. 262 For LNL+ this is the stepping of GT IP/GMD RevId.""")), 263 264 Member("int", "gt"), 265 Member("uint16_t", "pci_domain", comment="PCI info"), 266 Member("uint8_t", "pci_bus"), 267 Member("uint8_t", "pci_dev"), 268 Member("uint8_t", "pci_func"), 269 Member("uint16_t", "pci_device_id"), 270 Member("uint8_t", "pci_revision_id"), 271 Member("intel_platform", "platform", compiler_field=True), 272 Member("bool", "has_hiz_and_separate_stencil"), 273 Member("bool", "must_use_separate_stencil"), 274 Member("bool", "has_sample_with_hiz"), 275 Member("bool", "has_bit6_swizzle"), 276 Member("bool", "has_llc"), 277 Member("bool", "has_pln", compiler_field=True), 278 Member("bool", "has_64bit_float", compiler_field=True), 279 Member("bool", "has_64bit_float_via_math_pipe", compiler_field=True), 280 Member("bool", "has_64bit_int", compiler_field=True), 281 Member("bool", "has_integer_dword_mul", compiler_field=True), 282 Member("bool", "has_compr4", compiler_field=True), 283 Member("bool", "has_surface_tile_offset"), 284 Member("bool", "supports_simd16_3src", compiler_field=True), 285 Member("bool", "disable_ccs_repack"), 286 287 Member("bool", "has_illegal_ccs_values", 288 comment="True if CCS needs to be initialized before use."), 289 290 Member("bool", "has_flat_ccs", 291 comment=dedent("""\ 292 True if CCS uses a flat virtual address translation to a memory 293 carve-out, rather than aux map translations, or additional surfaces.""")), 294 295 Member("bool", "has_aux_map"), 296 Member("bool", "has_caching_uapi"), 297 Member("bool", "has_tiling_uapi"), 298 Member("bool", "has_ray_tracing", compiler_field=True), 299 Member("bool", "has_ray_query"), 300 Member("bool", "has_local_mem"), 301 Member("bool", "has_lsc", compiler_field=True), 302 Member("bool", "has_mesh_shading"), 303 Member("bool", "has_mmap_offset"), 304 Member("bool", "has_userptr_probe"), 305 Member("bool", "has_context_isolation"), 306 Member("bool", "has_set_pat_uapi"), 307 Member("bool", "has_indirect_unroll"), 308 Member("bool", "has_negative_rhw_bug", compiler_field=True, 309 comment="Intel hardware quirks"), 310 311 Member("bool", "has_coarse_pixel_primitive_and_cb", compiler_field=True, 312 comment=dedent("""\ 313 Whether this platform supports fragment shading rate controlled by a 314 primitive in geometry shaders and by a control buffer.""")), 315 316 Member("bool", "has_compute_engine", comment="Whether this platform has compute engine"), 317 318 Member("bool", "needs_unlit_centroid_workaround", compiler_field=True, 319 comment=dedent("""\ 320 Some versions of Gen hardware don't do centroid interpolation correctly 321 on unlit pixels, causing incorrect values for derivatives near triangle 322 edges. Enabling this flag causes the fragment shader to use 323 non-centroid interpolation for unlit pixels, at the expense of two extra 324 fragment shader instructions.""")), 325 326 Member("bool", "needs_null_push_constant_tbimr_workaround", 327 comment=dedent("""\ 328 Whether the platform needs an undocumented workaround for a hardware bug 329 that affects draw calls with a pixel shader that has 0 push constant cycles 330 when TBIMR is enabled, which has been seen to lead to hangs. To avoid the 331 issue we simply pad the push constant payload to be at least 1 register.""")), 332 333 Member("bool", "is_adl_n", comment="We need this for ADL-N specific Wa_14014966230."), 334 335 Member("unsigned", "num_slices", 336 comment=dedent("""\ 337 GPU hardware limits 338 339 In general, you can find shader thread maximums by looking at the "Maximum 340 Number of Threads" field in the Intel PRM description of the 3DSTATE_VS, 341 3DSTATE_GS, 3DSTATE_HS, 3DSTATE_DS, and 3DSTATE_PS commands. URB entry 342 limits come from the "Number of URB Entries" field in the 343 3DSTATE_URB_VS command and friends. 344 345 These fields are used to calculate the scratch space to allocate. The 346 amount of scratch space can be larger without being harmful on modern 347 GPUs, however, prior to Haswell, programming the maximum number of threads 348 to greater than the hardware maximum would cause GPU performance to tank. 349 350 Total number of slices present on the device whether or not they've been 351 fused off. 352 353 XXX: CS thread counts are limited by the inability to do cross subslice 354 communication. It is the effectively the number of logical threads which 355 can be executed in a subslice. Fuse configurations may cause this number 356 to change, so we program @max_cs_threads as the lower maximum.""")), 357 358 Member("unsigned", "max_slices", compiler_field=True, 359 comment=dedent("""\ 360 Maximum number of slices present on this device (can be more than 361 num_slices if some slices are fused).""")), 362 363 Member("unsigned", "num_subslices", array="INTEL_DEVICE_MAX_SLICES", 364 comment="Number of subslices for each slice (used to be uniform until CNL)."), 365 366 Member("unsigned", "max_subslices_per_slice", compiler_field=True, 367 comment=dedent("""\ 368 Maximum number of subslices per slice present on this device (can be 369 more than the maximum value in the num_subslices[] array if some 370 subslices are fused). 371 372 This is GT_SS_PER_SLICE in SKU.""")), 373 374 Member("unsigned", "ppipe_subslices", array="INTEL_DEVICE_MAX_PIXEL_PIPES", 375 comment="Number of subslices on each pixel pipe (ICL)."), 376 377 Member("unsigned", "max_eus_per_subslice", compiler_field=True, 378 comment="Maximum number of EUs per subslice (some EUs can be fused off)."), 379 380 Member("unsigned", "num_thread_per_eu", compiler_field=True, 381 comment="Number of threads per eu, varies between 4 and 8 between generations."), 382 383 Member("uint8_t", "grf_size", 384 comment="Size of a register from the EU GRF file in bytes."), 385 386 Member("uint8_t", "slice_masks", 387 comment="A bit mask of the slices available."), 388 389 Member("uint8_t", "subslice_masks", 390 array="INTEL_DEVICE_MAX_SLICES * DIV_ROUND_UP(INTEL_DEVICE_MAX_SUBSLICES, 8)", 391 compiler_field=True, 392 ray_tracing_field=True, 393 comment=dedent("""\ 394 An array of bit mask of the subslices available, use subslice_slice_stride 395 to access this array.""")), 396 397 Member("unsigned", "subslice_total", 398 comment=dedent("""\ 399 The number of enabled subslices (considering fusing). For exactly which 400 subslices are enabled, see subslice_masks[].""")), 401 402 Member("uint8_t", "eu_masks", 403 array=("INTEL_DEVICE_MAX_SLICES * INTEL_DEVICE_MAX_SUBSLICES * " 404 "DIV_ROUND_UP(INTEL_DEVICE_MAX_EUS_PER_SUBSLICE, 8)"), 405 comment=dedent("""\ 406 An array of bit mask of EUs available, use eu_slice_stride & 407 eu_subslice_stride to access this array.""")), 408 409 Member("uint16_t", "subslice_slice_stride", compiler_field=True, 410 comment="Stride to access subslice_masks[]."), 411 412 Member("uint16_t", "eu_slice_stride", 413 comment="Strides to access eu_masks[]."), 414 415 Member("uint16_t", "eu_subslice_stride"), 416 Member("unsigned", "l3_banks"), 417 418 Member("unsigned", "max_vs_threads", 419 comment="Maximum Vertex Shader threads"), 420 421 Member("unsigned", "max_tcs_threads", 422 comment="Maximum Hull Shader threads"), 423 424 Member("unsigned", "max_tes_threads", 425 comment="Maximum Domain Shader threads"), 426 427 Member("unsigned", "max_gs_threads", 428 comment="Maximum Geometry Shader threads"), 429 430 Member("unsigned", "max_wm_threads", 431 comment=dedent("""\ 432 Theoretical maximum number of Pixel Shader threads. 433 434 PSD means Pixel Shader Dispatcher. On modern Intel GPUs, hardware will 435 automatically scale pixel shader thread count, based on a single value 436 programmed into 3DSTATE_PS. 437 438 To calculate the maximum number of threads for Gfx8 beyond (which have 439 multiple Pixel Shader Dispatchers): 440 441 - Look up 3DSTATE_PS and find "Maximum Number of Threads Per PSD" 442 - Usually there's only one PSD per subslice, so use the number of 443 subslices for number of PSDs. 444 - For max_wm_threads, the total should be PSD threads * #PSDs.""")), 445 446 Member("unsigned", "max_threads_per_psd"), 447 448 Member("unsigned", "max_cs_threads", 449 comment=dedent("""\ 450 Maximum Compute Shader threads. 451 452 Thread count * number of EUs per subslice""")), 453 454 Member("unsigned", "max_cs_workgroup_threads", compiler_field=True, 455 comment=dedent("""\ 456 Maximum number of threads per workgroup supported by the GPGPU_WALKER or 457 COMPUTE_WALKER command. 458 459 This may be smaller than max_cs_threads as it takes into account added 460 restrictions on the GPGPU/COMPUTE_WALKER commands. While max_cs_threads 461 expresses the total parallelism of the GPU, this expresses the maximum 462 number of threads we can dispatch in a single workgroup.""")), 463 464 465 Member("unsigned", "max_scratch_ids", array="MESA_SHADER_STAGES", compiler_field=True, 466 comment=dedent("""\ 467 The maximum number of potential scratch ids. Due to hardware 468 implementation details, the range of scratch ids may be larger than the 469 number of subslices.""")), 470 471 Member("uint32_t", "max_scratch_size_per_thread", compiler_field=True), 472 473 Member("intel_device_info_urb_desc", "urb"), 474 Member("unsigned", "max_constant_urb_size_kb"), 475 Member("unsigned", "mesh_max_constant_urb_size_kb"), 476 Member("unsigned", "engine_class_prefetch", array="INTEL_ENGINE_CLASS_INVALID"), 477 Member("unsigned", "engine_class_supported_count", array="INTEL_ENGINE_CLASS_INVALID"), 478 Member("unsigned", "mem_alignment"), 479 Member("uint64_t", "timestamp_frequency"), 480 Member("uint64_t", "aperture_bytes"), 481 Member("uint64_t", "gtt_size"), 482 Member("int", "simulator_id"), 483 Member("char", "name", array="INTEL_DEVICE_MAX_NAME_SIZE"), 484 Member("bool", "no_hw"), 485 Member("bool", "probe_forced", comment="Device needed INTEL_FORCE_PROBE"), 486 Member("intel_device_info_mem_desc", "mem"), 487 Member("intel_device_info_pat_desc", "pat"), 488 Member("intel_cooperative_matrix_configuration", 489 "cooperative_matrix_configurations", array=4)] 490 ) 491