• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright © 2024 Intel Corporation
2
3# Permission is hereby granted, free of charge, to any person obtaining a
4# copy of this software and associated documentation files (the "Software"),
5# to deal in the Software without restriction, including without limitation
6# the rights to use, copy, modify, merge, publish, distribute, sublicense,
7# and/or sell copies of the Software, and to permit persons to whom the
8# Software is furnished to do so, subject to the following conditions:
9
10# The above copyright notice and this permission notice (including the next
11# paragraph) shall be included in all copies or substantial portions of the
12# Software.
13
14# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
20# IN THE SOFTWARE.
21
22from textwrap import dedent
23
24# TYPES is an ordered list of all declarations in this file.
25TYPES = []
26
27# TYPES_BY_NAME allows the lookup of any declaration
28TYPES_BY_NAME = {}
29
30class Define:
31    """Specifies a c macro definition."""
32    def __init__(self, name, value, comment=None):
33        self.name = name
34        self.value = value
35        self.comment = comment
36        TYPES.append(self)
37
38class EnumValue:
39    """allows comments and setting of enum values"""
40    def __init__(self, name, value=None, comment=None,
41                 group_begin=None, group_end=None):
42        self.name = name
43        self.value = value
44        self.comment = comment
45        self.group_begin = group_begin
46        self.group_end = group_end
47
48    def __str__(self):
49        return self.name
50
51class Enum:
52    """Stores details needed to declare and serialize an enumeration"""
53    def __init__(self, name, values, external=False):
54        self.name = name
55        self.values = []
56        for v in values:
57            if isinstance(v, EnumValue):
58                self.values.append(v)
59            else:
60                self.values.append(EnumValue(v))
61
62        self.external = external
63        TYPES.append(self)
64        TYPES_BY_NAME[name] = TYPES[-1]
65
66class Member:
67    """Stores details needed to declare and serialize the member of a struct."""
68    def __init__(self, member_type, name, array=None, compiler_field=False, comment=None):
69        self.member_type = member_type
70        self.name = name
71        self.array = array
72        # indicates whether this field is used by the compiler, and whether it
73        # should be included in the shader compiler cache hash function.
74        self.compiler_field = compiler_field
75        self.comment=comment
76
77class Struct:
78    """Stores details needed to declare and serialize a struct"""
79    def __init__(self, name, members):
80        self.name = name
81        self.members = members
82        TYPES.append(self)
83        TYPES_BY_NAME[name] = TYPES[-1]
84
85INT_TYPES = set(["uint8_t",
86                 "uint16_t",
87                 "uint64_t",
88                 "unsigned",
89                 "int"])
90
91FUNDAMENTAL_TYPES = set(["char", "bool"]).union(INT_TYPES)
92
93Define("INTEL_DEVICE_MAX_NAME_SIZE", 64)
94Define("INTEL_DEVICE_MAX_SLICES", 8)
95Define("INTEL_DEVICE_MAX_SUBSLICES", 8, "Maximum on gfx11")
96Define("INTEL_DEVICE_MAX_EUS_PER_SUBSLICE", 16, "Maximum on gfx11")
97Define("INTEL_DEVICE_MAX_PIXEL_PIPES", 16, "Maximum on DG2")
98
99Enum("intel_platform",
100     [EnumValue("INTEL_PLATFORM_GFX3", value=1),
101      "INTEL_PLATFORM_I965",
102      "INTEL_PLATFORM_ILK",
103      "INTEL_PLATFORM_G4X",
104      "INTEL_PLATFORM_SNB",
105      "INTEL_PLATFORM_IVB",
106      "INTEL_PLATFORM_BYT",
107      "INTEL_PLATFORM_HSW",
108      "INTEL_PLATFORM_BDW",
109      "INTEL_PLATFORM_CHV",
110      "INTEL_PLATFORM_SKL",
111      "INTEL_PLATFORM_BXT",
112      "INTEL_PLATFORM_KBL",
113      "INTEL_PLATFORM_GLK",
114      "INTEL_PLATFORM_CFL",
115      "INTEL_PLATFORM_ICL",
116      "INTEL_PLATFORM_EHL",
117      "INTEL_PLATFORM_TGL",
118      "INTEL_PLATFORM_RKL",
119      "INTEL_PLATFORM_DG1",
120      "INTEL_PLATFORM_ADL",
121      "INTEL_PLATFORM_RPL",
122      EnumValue("INTEL_PLATFORM_DG2_G10", group_begin="DG2"),
123      "INTEL_PLATFORM_DG2_G11",
124      EnumValue("INTEL_PLATFORM_DG2_G12", group_end="DG2"),
125      EnumValue("INTEL_PLATFORM_ATSM_G10", group_begin="ATSM"),
126      EnumValue("INTEL_PLATFORM_ATSM_G11", group_end="ATSM"),
127      EnumValue("INTEL_PLATFORM_MTL_U", group_begin="MTL"),
128      EnumValue("INTEL_PLATFORM_MTL_H", group_end="MTL"),
129      EnumValue("INTEL_PLATFORM_ARL_U", group_begin="ARL"),
130      EnumValue("INTEL_PLATFORM_ARL_H", group_end="ARL"),
131      "INTEL_PLATFORM_LNL"
132      ])
133
134Struct("intel_memory_class_instance",
135       [ Member("int", "klass",
136                comment = "Kernel backend specific class value, no translation needed yet"),
137         Member("int", "instance")])
138
139Enum("intel_device_info_mmap_mode",
140      [EnumValue("INTEL_DEVICE_INFO_MMAP_MODE_UC", value=0),
141       "INTEL_DEVICE_INFO_MMAP_MODE_WC",
142       "INTEL_DEVICE_INFO_MMAP_MODE_WB"
143       ])
144
145Enum("intel_device_info_coherency_mode",
146     [EnumValue("INTEL_DEVICE_INFO_COHERENCY_MODE_NONE", value=0),
147      EnumValue("INTEL_DEVICE_INFO_COHERENCY_MODE_1WAY", comment="CPU caches are snooped by GPU"),
148      EnumValue("INTEL_DEVICE_INFO_COHERENCY_MODE_2WAY",
149                comment="Fully coherent between GPU and CPU")
150      ])
151
152Struct("intel_device_info_pat_entry",
153       [Member("uint8_t", "index"),
154        Member("intel_device_info_mmap_mode", "mmap"),
155        Member("intel_device_info_coherency_mode", "coherency")])
156
157Enum("intel_cmat_scope",
158     [EnumValue("INTEL_CMAT_SCOPE_NONE", value=0),
159     "INTEL_CMAT_SCOPE_SUBGROUP"])
160
161Enum("intel_cooperative_matrix_component_type",
162     ["INTEL_CMAT_FLOAT16",
163      "INTEL_CMAT_FLOAT32",
164      "INTEL_CMAT_SINT32",
165      "INTEL_CMAT_SINT8",
166      "INTEL_CMAT_UINT32",
167      "INTEL_CMAT_UINT8"])
168
169Enum("intel_engine_class",
170     ["INTEL_ENGINE_CLASS_RENDER",
171      "INTEL_ENGINE_CLASS_COPY",
172      "INTEL_ENGINE_CLASS_VIDEO",
173      "INTEL_ENGINE_CLASS_VIDEO_ENHANCE",
174      "INTEL_ENGINE_CLASS_COMPUTE",
175      "INTEL_ENGINE_CLASS_INVALID"])
176
177Struct("intel_cooperative_matrix_configuration",
178   [Member("intel_cmat_scope", "scope",
179           comment=dedent("""\
180           Matrix A is MxK.
181           Matrix B is KxN.
182           Matrix C and Matrix Result are MxN.
183
184           Result = A * B + C;""")),
185    Member("uint8_t", "m"),
186    Member("uint8_t", "n"),
187    Member("uint8_t", "k"),
188    Member("intel_cooperative_matrix_component_type", "a"),
189    Member("intel_cooperative_matrix_component_type", "b"),
190    Member("intel_cooperative_matrix_component_type", "c"),
191    Member("intel_cooperative_matrix_component_type", "result")])
192
193Enum("intel_kmd_type",
194     ["INTEL_KMD_TYPE_INVALID",
195      "INTEL_KMD_TYPE_I915",
196      "INTEL_KMD_TYPE_XE",
197      "INTEL_KMD_TYPE_STUB",
198      "INTEL_KMD_TYPE_LAST"
199      ], external=True)
200
201Struct("intel_device_info_mem_region",
202       [Member("uint64_t", "size"),
203        Member("uint64_t", "free")])
204
205Struct("intel_device_info_ram_desc",
206       [Member("intel_memory_class_instance", "mem"),
207        Member("intel_device_info_mem_region", "mappable"),
208        Member("intel_device_info_mem_region", "unmappable")])
209
210Struct("intel_device_info_mem_desc",
211       [Member("bool", "use_class_instance"),
212        Member("intel_device_info_ram_desc", "sram"),
213        Member("intel_device_info_ram_desc", "vram")])
214
215Struct("intel_device_info_urb_desc",
216       [Member("int", "size"),
217        Member("int", "min_entries", array=4),
218        Member("int", "max_entries", array=4)])
219
220Struct("intel_device_info_pat_desc",
221       [Member("intel_device_info_pat_entry", "cached_coherent",
222               comment="To be used when CPU access is frequent, WB + 1 or 2 way coherent"),
223
224        Member("intel_device_info_pat_entry", "scanout",
225               comment="scanout and external BOs"),
226
227        Member("intel_device_info_pat_entry", "writeback_incoherent",
228               comment=("BOs without special needs, can be WB not coherent "
229                        "or WC it depends on the platforms and KMD")),
230
231        Member("intel_device_info_pat_entry", "writecombining")])
232
233Struct("intel_device_info",
234       [Member("intel_kmd_type", "kmd_type"),
235
236        Member("int", "ver", compiler_field=True,
237               comment="Driver internal numbers used to differentiate platforms."),
238
239        Member("int", "verx10", compiler_field=True),
240        Member("int", "display_ver"),
241
242        Member("int", "revision", compiler_field=True,
243               comment=dedent("""\
244               This revision is from ioctl (I915_PARAM_REVISION) unlike
245               pci_revision_id from drm device. Its value is not always
246               same as the pci_revision_id.""")),
247
248        Member("int", "gt"),
249        Member("uint16_t", "pci_domain", comment="PCI info"),
250        Member("uint8_t", "pci_bus"),
251        Member("uint8_t", "pci_dev"),
252        Member("uint8_t", "pci_func"),
253        Member("uint16_t", "pci_device_id"),
254        Member("uint8_t", "pci_revision_id"),
255        Member("intel_platform", "platform", compiler_field=True),
256        Member("bool", "has_hiz_and_separate_stencil"),
257        Member("bool", "must_use_separate_stencil"),
258        Member("bool", "has_sample_with_hiz"),
259        Member("bool", "has_bit6_swizzle"),
260        Member("bool", "has_llc"),
261        Member("bool", "has_pln", compiler_field=True),
262        Member("bool", "has_64bit_float", compiler_field=True),
263        Member("bool", "has_64bit_float_via_math_pipe", compiler_field=True),
264        Member("bool", "has_64bit_int", compiler_field=True),
265        Member("bool", "has_integer_dword_mul", compiler_field=True),
266        Member("bool", "has_compr4", compiler_field=True),
267        Member("bool", "has_surface_tile_offset"),
268        Member("bool", "supports_simd16_3src", compiler_field=True),
269        Member("bool", "disable_ccs_repack"),
270
271        Member("bool", "has_illegal_ccs_values",
272               comment="True if CCS needs to be initialized before use."),
273
274        Member("bool", "has_flat_ccs",
275               comment=dedent("""\
276               True if CCS uses a flat virtual address translation to a memory
277               carve-out, rather than aux map translations, or additional surfaces.""")),
278
279        Member("bool", "has_aux_map"),
280        Member("bool", "has_caching_uapi"),
281        Member("bool", "has_tiling_uapi"),
282        Member("bool", "has_ray_tracing", compiler_field=True),
283        Member("bool", "has_ray_query"),
284        Member("bool", "has_local_mem"),
285        Member("bool", "has_lsc", compiler_field=True),
286        Member("bool", "has_mesh_shading"),
287        Member("bool", "has_mmap_offset"),
288        Member("bool", "has_userptr_probe"),
289        Member("bool", "has_context_isolation"),
290        Member("bool", "has_set_pat_uapi"),
291        Member("bool", "has_indirect_unroll"),
292        Member("bool", "has_negative_rhw_bug", compiler_field=True,
293               comment="Intel hardware quirks"),
294
295        Member("bool", "has_coarse_pixel_primitive_and_cb", compiler_field=True,
296               comment=dedent("""\
297               Whether this platform supports fragment shading rate controlled by a
298               primitive in geometry shaders and by a control buffer.""")),
299
300        Member("bool", "has_compute_engine", comment="Whether this platform has compute engine"),
301
302        Member("bool", "needs_unlit_centroid_workaround", compiler_field=True,
303               comment=dedent("""\
304               Some versions of Gen hardware don't do centroid interpolation correctly
305               on unlit pixels, causing incorrect values for derivatives near triangle
306               edges.  Enabling this flag causes the fragment shader to use
307               non-centroid interpolation for unlit pixels, at the expense of two extra
308               fragment shader instructions.""")),
309
310        Member("bool", "is_adl_n", comment="We need this for ADL-N specific Wa_14014966230."),
311
312        Member("unsigned", "num_slices",
313               comment=dedent("""\
314               GPU hardware limits
315
316               In general, you can find shader thread maximums by looking at the "Maximum
317               Number of Threads" field in the Intel PRM description of the 3DSTATE_VS,
318               3DSTATE_GS, 3DSTATE_HS, 3DSTATE_DS, and 3DSTATE_PS commands. URB entry
319               limits come from the "Number of URB Entries" field in the
320               3DSTATE_URB_VS command and friends.
321
322               These fields are used to calculate the scratch space to allocate.  The
323               amount of scratch space can be larger without being harmful on modern
324               GPUs, however, prior to Haswell, programming the maximum number of threads
325               to greater than the hardware maximum would cause GPU performance to tank.
326
327               Total number of slices present on the device whether or not they've been
328               fused off.
329
330               XXX: CS thread counts are limited by the inability to do cross subslice
331               communication. It is the effectively the number of logical threads which
332               can be executed in a subslice. Fuse configurations may cause this number
333               to change, so we program @max_cs_threads as the lower maximum.""")),
334
335        Member("unsigned", "max_slices", compiler_field=True,
336               comment=dedent("""\
337               Maximum number of slices present on this device (can be more than
338               num_slices if some slices are fused).""")),
339
340        Member("unsigned", "num_subslices", array="INTEL_DEVICE_MAX_SLICES",
341               comment="Number of subslices for each slice (used to be uniform until CNL)."),
342
343        Member("unsigned", "max_subslices_per_slice", compiler_field=True,
344               comment=dedent("""\
345               Maximum number of subslices per slice present on this device (can be
346               more than the maximum value in the num_subslices[] array if some
347               subslices are fused).
348
349               This is GT_SS_PER_SLICE in SKU.""")),
350
351        Member("unsigned", "ppipe_subslices", array="INTEL_DEVICE_MAX_PIXEL_PIPES",
352               comment="Number of subslices on each pixel pipe (ICL)."),
353
354        Member("unsigned", "max_eus_per_subslice", compiler_field=True,
355               comment="Maximum number of EUs per subslice (some EUs can be fused off)."),
356
357        Member("unsigned", "num_thread_per_eu", compiler_field=True,
358               comment="Number of threads per eu, varies between 4 and 8 between generations."),
359
360        Member("uint8_t", "slice_masks",
361               comment="A bit mask of the slices available."),
362
363        Member("uint8_t", "subslice_masks",
364               array="INTEL_DEVICE_MAX_SLICES * DIV_ROUND_UP(INTEL_DEVICE_MAX_SUBSLICES, 8)",
365               compiler_field=True,
366               comment=dedent("""\
367               An array of bit mask of the subslices available, use subslice_slice_stride
368               to access this array.""")),
369
370        Member("unsigned", "subslice_total",
371               comment=dedent("""\
372               The number of enabled subslices (considering fusing). For exactly which
373               subslices are enabled, see subslice_masks[].""")),
374
375        Member("uint8_t", "eu_masks",
376               array=("INTEL_DEVICE_MAX_SLICES * INTEL_DEVICE_MAX_SUBSLICES * "
377                      "DIV_ROUND_UP(INTEL_DEVICE_MAX_EUS_PER_SUBSLICE, 8)"),
378               comment=dedent("""\
379               An array of bit mask of EUs available, use eu_slice_stride &
380               eu_subslice_stride to access this array.""")),
381
382        Member("uint16_t", "subslice_slice_stride", compiler_field=True,
383               comment="Stride to access subslice_masks[]."),
384
385        Member("uint16_t", "eu_slice_stride",
386               comment="Strides to access eu_masks[]."),
387
388        Member("uint16_t", "eu_subslice_stride"),
389        Member("unsigned", "l3_banks"),
390
391        Member("unsigned", "max_vs_threads",
392               comment="Maximum Vertex Shader threads"),
393
394        Member("unsigned", "max_tcs_threads",
395               comment="Maximum Hull Shader threads"),
396
397        Member("unsigned", "max_tes_threads",
398               comment="Maximum Domain Shader threads"),
399
400        Member("unsigned", "max_gs_threads",
401               comment="Maximum Geometry Shader threads"),
402
403        Member("unsigned", "max_wm_threads",
404               comment=dedent("""\
405               Theoretical maximum number of Pixel Shader threads.
406
407               PSD means Pixel Shader Dispatcher. On modern Intel GPUs, hardware will
408               automatically scale pixel shader thread count, based on a single value
409               programmed into 3DSTATE_PS.
410
411               To calculate the maximum number of threads for Gfx8 beyond (which have
412               multiple Pixel Shader Dispatchers):
413
414               - Look up 3DSTATE_PS and find "Maximum Number of Threads Per PSD"
415               - Usually there's only one PSD per subslice, so use the number of
416                 subslices for number of PSDs.
417               - For max_wm_threads, the total should be PSD threads * #PSDs.""")),
418
419        Member("unsigned", "max_threads_per_psd"),
420
421        Member("unsigned", "max_cs_threads",
422               comment=dedent("""\
423               Maximum Compute Shader threads.
424
425               Thread count * number of EUs per subslice""")),
426
427        Member("unsigned", "max_cs_workgroup_threads", compiler_field=True,
428               comment=dedent("""\
429               Maximum number of threads per workgroup supported by the GPGPU_WALKER or
430               COMPUTE_WALKER command.
431
432               This may be smaller than max_cs_threads as it takes into account added
433               restrictions on the GPGPU/COMPUTE_WALKER commands.  While max_cs_threads
434               expresses the total parallelism of the GPU, this expresses the maximum
435               number of threads we can dispatch in a single workgroup.""")),
436
437
438        Member("unsigned", "max_scratch_ids", array="MESA_SHADER_STAGES", compiler_field=True,
439               comment=dedent("""\
440               The maximum number of potential scratch ids. Due to hardware
441               implementation details, the range of scratch ids may be larger than the
442               number of subslices.""")),
443
444        Member("intel_device_info_urb_desc", "urb"),
445        Member("unsigned", "max_constant_urb_size_kb"),
446        Member("unsigned", "mesh_max_constant_urb_size_kb"),
447        Member("unsigned", "engine_class_prefetch", array="INTEL_ENGINE_CLASS_COMPUTE + 1"),
448        Member("unsigned", "mem_alignment"),
449        Member("uint64_t", "timestamp_frequency"),
450        Member("uint64_t", "aperture_bytes"),
451        Member("uint64_t", "gtt_size"),
452        Member("int", "simulator_id"),
453        Member("char", "name", array="INTEL_DEVICE_MAX_NAME_SIZE"),
454        Member("bool", "no_hw"),
455        Member("bool", "apply_hwconfig"),
456        Member("intel_device_info_mem_desc", "mem"),
457        Member("intel_device_info_pat_desc", "pat"),
458        Member("intel_cooperative_matrix_configuration",
459               "cooperative_matrix_configurations", array=4)]
460       )
461