• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright © 2024 Intel Corporation
2
3# Permission is hereby granted, free of charge, to any person obtaining a
4# copy of this software and associated documentation files (the "Software"),
5# to deal in the Software without restriction, including without limitation
6# the rights to use, copy, modify, merge, publish, distribute, sublicense,
7# and/or sell copies of the Software, and to permit persons to whom the
8# Software is furnished to do so, subject to the following conditions:
9
10# The above copyright notice and this permission notice (including the next
11# paragraph) shall be included in all copies or substantial portions of the
12# Software.
13
14# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
20# IN THE SOFTWARE.
21
22from textwrap import dedent
23
24# TYPES is an ordered list of all declarations in this file.
25TYPES = []
26
27# TYPES_BY_NAME allows the lookup of any declaration
28TYPES_BY_NAME = {}
29
30class Define:
31    """Specifies a c macro definition."""
32    def __init__(self, name, value, comment=None):
33        self.name = name
34        self.value = value
35        self.comment = comment
36        TYPES.append(self)
37
38class EnumValue:
39    """allows comments and setting of enum values"""
40    def __init__(self, name, value=None, comment=None,
41                 group_begin=None, group_end=None):
42        self.name = name
43        self.value = value
44        self.comment = comment
45        self.group_begin = group_begin
46        self.group_end = group_end
47
48    def __str__(self):
49        return self.name
50
51class Enum:
52    """Stores details needed to declare and serialize an enumeration"""
53    def __init__(self, name, values, external=False):
54        self.name = name
55        self.values = []
56        for v in values:
57            if isinstance(v, EnumValue):
58                self.values.append(v)
59            else:
60                self.values.append(EnumValue(v))
61
62        self.external = external
63        TYPES.append(self)
64        TYPES_BY_NAME[name] = TYPES[-1]
65
66class Member:
67    """Stores details needed to declare and serialize the member of a struct."""
68    def __init__(self, member_type, name, array=None,
69                 compiler_field=False, ray_tracing_field=False,
70                 comment=None):
71        self.member_type = member_type
72        self.name = name
73        self.array = array
74        # indicates whether this field is used by the compiler, and whether it
75        # should be included in the shader compiler cache hash function.
76        self.compiler_field = compiler_field
77        self.ray_tracing_field = ray_tracing_field
78        self.comment=comment
79
80class Struct:
81    """Stores details needed to declare and serialize a struct"""
82    def __init__(self, name, members):
83        self.name = name
84        self.members = members
85        TYPES.append(self)
86        TYPES_BY_NAME[name] = TYPES[-1]
87
88INT_TYPES = set(["uint8_t",
89                 "uint16_t",
90                 "uint32_t",
91                 "uint64_t",
92                 "unsigned",
93                 "int"])
94
95FUNDAMENTAL_TYPES = set(["char", "bool"]).union(INT_TYPES)
96
97Define("INTEL_DEVICE_MAX_NAME_SIZE", 64)
98Define("INTEL_DEVICE_MAX_SLICES", 8)
99Define("INTEL_DEVICE_MAX_SUBSLICES", 8, "Maximum on gfx11")
100Define("INTEL_DEVICE_MAX_EUS_PER_SUBSLICE", 16, "Maximum on gfx11")
101Define("INTEL_DEVICE_MAX_PIXEL_PIPES", 16, "Maximum on DG2")
102
103Enum("intel_platform",
104     [EnumValue("INTEL_PLATFORM_GFX3", value=1),
105      "INTEL_PLATFORM_I965",
106      "INTEL_PLATFORM_ILK",
107      "INTEL_PLATFORM_G4X",
108      "INTEL_PLATFORM_SNB",
109      "INTEL_PLATFORM_IVB",
110      "INTEL_PLATFORM_BYT",
111      "INTEL_PLATFORM_HSW",
112      "INTEL_PLATFORM_BDW",
113      "INTEL_PLATFORM_CHV",
114      "INTEL_PLATFORM_SKL",
115      "INTEL_PLATFORM_BXT",
116      "INTEL_PLATFORM_KBL",
117      "INTEL_PLATFORM_GLK",
118      "INTEL_PLATFORM_CFL",
119      "INTEL_PLATFORM_ICL",
120      "INTEL_PLATFORM_EHL",
121      "INTEL_PLATFORM_TGL",
122      "INTEL_PLATFORM_RKL",
123      "INTEL_PLATFORM_DG1",
124      "INTEL_PLATFORM_ADL",
125      "INTEL_PLATFORM_RPL",
126      EnumValue("INTEL_PLATFORM_DG2_G10", group_begin="DG2"),
127      "INTEL_PLATFORM_DG2_G11",
128      EnumValue("INTEL_PLATFORM_DG2_G12", group_end="DG2"),
129      EnumValue("INTEL_PLATFORM_ATSM_G10", group_begin="ATSM"),
130      EnumValue("INTEL_PLATFORM_ATSM_G11", group_end="ATSM"),
131      EnumValue("INTEL_PLATFORM_MTL_U", group_begin="MTL"),
132      EnumValue("INTEL_PLATFORM_MTL_H", group_end="MTL"),
133      EnumValue("INTEL_PLATFORM_ARL_U", group_begin="ARL"),
134      EnumValue("INTEL_PLATFORM_ARL_H", group_end="ARL"),
135      "INTEL_PLATFORM_LNL",
136      "INTEL_PLATFORM_BMG",
137      "INTEL_PLATFORM_PTL",
138      ])
139
140Struct("intel_memory_class_instance",
141       [ Member("int", "klass",
142                comment = "Kernel backend specific class value, no translation needed yet"),
143         Member("int", "instance")])
144
145Enum("intel_device_info_mmap_mode",
146      [EnumValue("INTEL_DEVICE_INFO_MMAP_MODE_UC", value=0),
147       EnumValue("INTEL_DEVICE_INFO_MMAP_MODE_WC"),
148       EnumValue("INTEL_DEVICE_INFO_MMAP_MODE_WB"),
149       EnumValue("INTEL_DEVICE_INFO_MMAP_MODE_XD",
150                 comment=dedent("""\
151                 Xe2+ only. Only supported in GPU side and used for displayable
152                 buffers."""))
153       ])
154
155Struct("intel_device_info_pat_entry",
156       [Member("uint8_t", "index"),
157        Member("intel_device_info_mmap_mode", "mmap",
158               comment=dedent("""\
159               This tells KMD what caching mode the CPU mapping should use.
160               It has nothing to do with any PAT cache modes."""))])
161
162Enum("intel_cmat_scope",
163     [EnumValue("INTEL_CMAT_SCOPE_NONE", value=0),
164     "INTEL_CMAT_SCOPE_SUBGROUP"])
165
166Enum("intel_cooperative_matrix_component_type",
167     ["INTEL_CMAT_FLOAT16",
168      "INTEL_CMAT_FLOAT32",
169      "INTEL_CMAT_SINT32",
170      "INTEL_CMAT_SINT8",
171      "INTEL_CMAT_UINT32",
172      "INTEL_CMAT_UINT8"])
173
174Enum("intel_engine_class",
175     ["INTEL_ENGINE_CLASS_RENDER",
176      "INTEL_ENGINE_CLASS_COPY",
177      "INTEL_ENGINE_CLASS_VIDEO",
178      "INTEL_ENGINE_CLASS_VIDEO_ENHANCE",
179      "INTEL_ENGINE_CLASS_COMPUTE",
180      "INTEL_ENGINE_CLASS_INVALID"])
181
182Struct("intel_cooperative_matrix_configuration",
183   [Member("intel_cmat_scope", "scope",
184           comment=dedent("""\
185           Matrix A is MxK.
186           Matrix B is KxN.
187           Matrix C and Matrix Result are MxN.
188
189           Result = A * B + C;""")),
190    Member("uint8_t", "m"),
191    Member("uint8_t", "n"),
192    Member("uint8_t", "k"),
193    Member("intel_cooperative_matrix_component_type", "a"),
194    Member("intel_cooperative_matrix_component_type", "b"),
195    Member("intel_cooperative_matrix_component_type", "c"),
196    Member("intel_cooperative_matrix_component_type", "result")])
197
198Enum("intel_kmd_type",
199     ["INTEL_KMD_TYPE_INVALID",
200      "INTEL_KMD_TYPE_I915",
201      "INTEL_KMD_TYPE_XE",
202      "INTEL_KMD_TYPE_STUB",
203      "INTEL_KMD_TYPE_LAST"
204      ], external=True)
205
206Struct("intel_device_info_mem_region",
207       [Member("uint64_t", "size"),
208        Member("uint64_t", "free")])
209
210Struct("intel_device_info_ram_desc",
211       [Member("intel_memory_class_instance", "mem"),
212        Member("intel_device_info_mem_region", "mappable"),
213        Member("intel_device_info_mem_region", "unmappable")])
214
215Struct("intel_device_info_mem_desc",
216       [Member("bool", "use_class_instance"),
217        Member("intel_device_info_ram_desc", "sram"),
218        Member("intel_device_info_ram_desc", "vram")])
219
220Struct("intel_device_info_urb_desc",
221       [Member("int", "size"),
222        Member("int", "min_entries", array=4),
223        Member("int", "max_entries", array=4)])
224
225Struct("intel_device_info_pat_desc",
226       [Member("intel_device_info_pat_entry", "cached_coherent",
227               comment="To be used when CPU access is frequent, WB + 1 or 2 way coherent"),
228
229        Member("intel_device_info_pat_entry", "scanout",
230               comment="scanout and external BOs"),
231
232        Member("intel_device_info_pat_entry", "compressed",
233               comment="Only supported in Xe2, compressed + WC"),
234
235        Member("intel_device_info_pat_entry", "writeback_incoherent",
236               comment=("BOs without special needs, can be WB not coherent "
237                        "or WC it depends on the platforms and KMD")),
238
239        Member("intel_device_info_pat_entry", "writecombining")])
240
241Struct("intel_device_info",
242       [Member("intel_kmd_type", "kmd_type"),
243
244        Member("int", "ver", compiler_field=True,
245               comment="Driver internal numbers used to differentiate platforms."),
246
247        Member("int", "verx10", compiler_field=True),
248
249        Member("uint32_t", "gfx_ip_ver", compiler_field=True,
250               comment=dedent("""\
251               This is the run-time hardware GFX IP version that may be more specific
252               than ver/verx10. ver/verx10 may be more useful for comparing a class
253               of devices whereas gfx_ip_ver may be more useful for precisely
254               checking for a graphics ip type. GFX_IP_VER(major, minor) should be
255               used to compare IP versions.""")),
256
257        Member("int", "revision",
258               comment=dedent("""\
259               This revision is queried from KMD unlike
260               pci_revision_id from drm device. Its value is not always
261               same as the pci_revision_id.
262               For LNL+ this is the stepping of GT IP/GMD RevId.""")),
263
264        Member("int", "gt"),
265        Member("uint16_t", "pci_domain", comment="PCI info"),
266        Member("uint8_t", "pci_bus"),
267        Member("uint8_t", "pci_dev"),
268        Member("uint8_t", "pci_func"),
269        Member("uint16_t", "pci_device_id"),
270        Member("uint8_t", "pci_revision_id"),
271        Member("intel_platform", "platform", compiler_field=True),
272        Member("bool", "has_hiz_and_separate_stencil"),
273        Member("bool", "must_use_separate_stencil"),
274        Member("bool", "has_sample_with_hiz"),
275        Member("bool", "has_bit6_swizzle"),
276        Member("bool", "has_llc"),
277        Member("bool", "has_pln", compiler_field=True),
278        Member("bool", "has_64bit_float", compiler_field=True),
279        Member("bool", "has_64bit_float_via_math_pipe", compiler_field=True),
280        Member("bool", "has_64bit_int", compiler_field=True),
281        Member("bool", "has_integer_dword_mul", compiler_field=True),
282        Member("bool", "has_compr4", compiler_field=True),
283        Member("bool", "has_surface_tile_offset"),
284        Member("bool", "supports_simd16_3src", compiler_field=True),
285        Member("bool", "disable_ccs_repack"),
286
287        Member("bool", "has_illegal_ccs_values",
288               comment="True if CCS needs to be initialized before use."),
289
290        Member("bool", "has_flat_ccs",
291               comment=dedent("""\
292               True if CCS uses a flat virtual address translation to a memory
293               carve-out, rather than aux map translations, or additional surfaces.""")),
294
295        Member("bool", "has_aux_map"),
296        Member("bool", "has_caching_uapi"),
297        Member("bool", "has_tiling_uapi"),
298        Member("bool", "has_ray_tracing", compiler_field=True),
299        Member("bool", "has_ray_query"),
300        Member("bool", "has_local_mem"),
301        Member("bool", "has_lsc", compiler_field=True),
302        Member("bool", "has_mesh_shading"),
303        Member("bool", "has_mmap_offset"),
304        Member("bool", "has_userptr_probe"),
305        Member("bool", "has_context_isolation"),
306        Member("bool", "has_set_pat_uapi"),
307        Member("bool", "has_indirect_unroll"),
308        Member("bool", "has_negative_rhw_bug", compiler_field=True,
309               comment="Intel hardware quirks"),
310
311        Member("bool", "has_coarse_pixel_primitive_and_cb", compiler_field=True,
312               comment=dedent("""\
313               Whether this platform supports fragment shading rate controlled by a
314               primitive in geometry shaders and by a control buffer.""")),
315
316        Member("bool", "has_compute_engine", comment="Whether this platform has compute engine"),
317
318        Member("bool", "needs_unlit_centroid_workaround", compiler_field=True,
319               comment=dedent("""\
320               Some versions of Gen hardware don't do centroid interpolation correctly
321               on unlit pixels, causing incorrect values for derivatives near triangle
322               edges.  Enabling this flag causes the fragment shader to use
323               non-centroid interpolation for unlit pixels, at the expense of two extra
324               fragment shader instructions.""")),
325
326        Member("bool", "needs_null_push_constant_tbimr_workaround",
327               comment=dedent("""\
328               Whether the platform needs an undocumented workaround for a hardware bug
329               that affects draw calls with a pixel shader that has 0 push constant cycles
330               when TBIMR is enabled, which has been seen to lead to hangs.  To avoid the
331               issue we simply pad the push constant payload to be at least 1 register.""")),
332
333        Member("bool", "is_adl_n", comment="We need this for ADL-N specific Wa_14014966230."),
334
335        Member("unsigned", "num_slices",
336               comment=dedent("""\
337               GPU hardware limits
338
339               In general, you can find shader thread maximums by looking at the "Maximum
340               Number of Threads" field in the Intel PRM description of the 3DSTATE_VS,
341               3DSTATE_GS, 3DSTATE_HS, 3DSTATE_DS, and 3DSTATE_PS commands. URB entry
342               limits come from the "Number of URB Entries" field in the
343               3DSTATE_URB_VS command and friends.
344
345               These fields are used to calculate the scratch space to allocate.  The
346               amount of scratch space can be larger without being harmful on modern
347               GPUs, however, prior to Haswell, programming the maximum number of threads
348               to greater than the hardware maximum would cause GPU performance to tank.
349
350               Total number of slices present on the device whether or not they've been
351               fused off.
352
353               XXX: CS thread counts are limited by the inability to do cross subslice
354               communication. It is the effectively the number of logical threads which
355               can be executed in a subslice. Fuse configurations may cause this number
356               to change, so we program @max_cs_threads as the lower maximum.""")),
357
358        Member("unsigned", "max_slices", compiler_field=True,
359               comment=dedent("""\
360               Maximum number of slices present on this device (can be more than
361               num_slices if some slices are fused).""")),
362
363        Member("unsigned", "num_subslices", array="INTEL_DEVICE_MAX_SLICES",
364               comment="Number of subslices for each slice (used to be uniform until CNL)."),
365
366        Member("unsigned", "max_subslices_per_slice", compiler_field=True,
367               comment=dedent("""\
368               Maximum number of subslices per slice present on this device (can be
369               more than the maximum value in the num_subslices[] array if some
370               subslices are fused).
371
372               This is GT_SS_PER_SLICE in SKU.""")),
373
374        Member("unsigned", "ppipe_subslices", array="INTEL_DEVICE_MAX_PIXEL_PIPES",
375               comment="Number of subslices on each pixel pipe (ICL)."),
376
377        Member("unsigned", "max_eus_per_subslice", compiler_field=True,
378               comment="Maximum number of EUs per subslice (some EUs can be fused off)."),
379
380        Member("unsigned", "num_thread_per_eu", compiler_field=True,
381               comment="Number of threads per eu, varies between 4 and 8 between generations."),
382
383        Member("uint8_t", "grf_size",
384               comment="Size of a register from the EU GRF file in bytes."),
385
386        Member("uint8_t", "slice_masks",
387               comment="A bit mask of the slices available."),
388
389        Member("uint8_t", "subslice_masks",
390               array="INTEL_DEVICE_MAX_SLICES * DIV_ROUND_UP(INTEL_DEVICE_MAX_SUBSLICES, 8)",
391               compiler_field=True,
392               ray_tracing_field=True,
393               comment=dedent("""\
394               An array of bit mask of the subslices available, use subslice_slice_stride
395               to access this array.""")),
396
397        Member("unsigned", "subslice_total",
398               comment=dedent("""\
399               The number of enabled subslices (considering fusing). For exactly which
400               subslices are enabled, see subslice_masks[].""")),
401
402        Member("uint8_t", "eu_masks",
403               array=("INTEL_DEVICE_MAX_SLICES * INTEL_DEVICE_MAX_SUBSLICES * "
404                      "DIV_ROUND_UP(INTEL_DEVICE_MAX_EUS_PER_SUBSLICE, 8)"),
405               comment=dedent("""\
406               An array of bit mask of EUs available, use eu_slice_stride &
407               eu_subslice_stride to access this array.""")),
408
409        Member("uint16_t", "subslice_slice_stride", compiler_field=True,
410               comment="Stride to access subslice_masks[]."),
411
412        Member("uint16_t", "eu_slice_stride",
413               comment="Strides to access eu_masks[]."),
414
415        Member("uint16_t", "eu_subslice_stride"),
416        Member("unsigned", "l3_banks"),
417
418        Member("unsigned", "max_vs_threads",
419               comment="Maximum Vertex Shader threads"),
420
421        Member("unsigned", "max_tcs_threads",
422               comment="Maximum Hull Shader threads"),
423
424        Member("unsigned", "max_tes_threads",
425               comment="Maximum Domain Shader threads"),
426
427        Member("unsigned", "max_gs_threads",
428               comment="Maximum Geometry Shader threads"),
429
430        Member("unsigned", "max_wm_threads",
431               comment=dedent("""\
432               Theoretical maximum number of Pixel Shader threads.
433
434               PSD means Pixel Shader Dispatcher. On modern Intel GPUs, hardware will
435               automatically scale pixel shader thread count, based on a single value
436               programmed into 3DSTATE_PS.
437
438               To calculate the maximum number of threads for Gfx8 beyond (which have
439               multiple Pixel Shader Dispatchers):
440
441               - Look up 3DSTATE_PS and find "Maximum Number of Threads Per PSD"
442               - Usually there's only one PSD per subslice, so use the number of
443                 subslices for number of PSDs.
444               - For max_wm_threads, the total should be PSD threads * #PSDs.""")),
445
446        Member("unsigned", "max_threads_per_psd"),
447
448        Member("unsigned", "max_cs_threads",
449               comment=dedent("""\
450               Maximum Compute Shader threads.
451
452               Thread count * number of EUs per subslice""")),
453
454        Member("unsigned", "max_cs_workgroup_threads", compiler_field=True,
455               comment=dedent("""\
456               Maximum number of threads per workgroup supported by the GPGPU_WALKER or
457               COMPUTE_WALKER command.
458
459               This may be smaller than max_cs_threads as it takes into account added
460               restrictions on the GPGPU/COMPUTE_WALKER commands.  While max_cs_threads
461               expresses the total parallelism of the GPU, this expresses the maximum
462               number of threads we can dispatch in a single workgroup.""")),
463
464
465        Member("unsigned", "max_scratch_ids", array="MESA_SHADER_STAGES", compiler_field=True,
466               comment=dedent("""\
467               The maximum number of potential scratch ids. Due to hardware
468               implementation details, the range of scratch ids may be larger than the
469               number of subslices.""")),
470
471        Member("uint32_t", "max_scratch_size_per_thread", compiler_field=True),
472
473        Member("intel_device_info_urb_desc", "urb"),
474        Member("unsigned", "max_constant_urb_size_kb"),
475        Member("unsigned", "mesh_max_constant_urb_size_kb"),
476        Member("unsigned", "engine_class_prefetch", array="INTEL_ENGINE_CLASS_INVALID"),
477        Member("unsigned", "engine_class_supported_count", array="INTEL_ENGINE_CLASS_INVALID"),
478        Member("unsigned", "mem_alignment"),
479        Member("uint64_t", "timestamp_frequency"),
480        Member("uint64_t", "aperture_bytes"),
481        Member("uint64_t", "gtt_size"),
482        Member("int", "simulator_id"),
483        Member("char", "name", array="INTEL_DEVICE_MAX_NAME_SIZE"),
484        Member("bool", "no_hw"),
485        Member("bool", "probe_forced", comment="Device needed INTEL_FORCE_PROBE"),
486        Member("intel_device_info_mem_desc", "mem"),
487        Member("intel_device_info_pat_desc", "pat"),
488        Member("intel_cooperative_matrix_configuration",
489               "cooperative_matrix_configurations", array=4)]
490       )
491