• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2013 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <unistd.h>
30 
31 #include "util/libdrm.h"
32 
33 #include "intel_device_info.h"
34 #include "intel_hwconfig.h"
35 #include "intel_wa.h"
36 #include "i915/intel_device_info.h"
37 #include "xe/intel_device_info.h"
38 
39 #include "common/intel_gem.h"
40 #include "util/u_debug.h"
41 #include "util/log.h"
42 #include "util/macros.h"
43 
44 static const struct {
45    const char *name;
46    int pci_id;
47 } name_map[] = {
48    { "lpt", 0x27a2 },
49    { "brw", 0x2a02 },
50    { "g4x", 0x2a42 },
51    { "ilk", 0x0042 },
52    { "snb", 0x0126 },
53    { "ivb", 0x016a },
54    { "hsw", 0x0d2e },
55    { "byt", 0x0f33 },
56    { "bdw", 0x162e },
57    { "chv", 0x22B3 },
58    { "skl", 0x1912 },
59    { "bxt", 0x5A85 },
60    { "kbl", 0x5912 },
61    { "aml", 0x591C },
62    { "glk", 0x3185 },
63    { "cfl", 0x3E9B },
64    { "whl", 0x3EA1 },
65    { "cml", 0x9b41 },
66    { "icl", 0x8a52 },
67    { "ehl", 0x4571 },
68    { "jsl", 0x4E71 },
69    { "tgl", 0x9a49 },
70    { "rkl", 0x4c8a },
71    { "dg1", 0x4905 },
72    { "adl", 0x4680 },
73    { "sg1", 0x4907 },
74    { "rpl", 0xa780 },
75    { "dg2", 0x5690 },
76    { "mtl", 0x7d60 },
77    { "arl", 0x7d67 },
78    { "lnl", 0x64a0 },
79    { "bmg", 0xe202 },
80    { "ptl", 0xb080 },
81 };
82 
83 /**
84  * Get the PCI ID for the device name.
85  *
86  * Returns -1 if the device is not known.
87  */
88 int
intel_device_name_to_pci_device_id(const char * name)89 intel_device_name_to_pci_device_id(const char *name)
90 {
91    for (unsigned i = 0; i < ARRAY_SIZE(name_map); i++) {
92       if (!strcmp(name_map[i].name, name))
93          return name_map[i].pci_id;
94    }
95 
96    return -1;
97 }
98 
99 static const struct intel_device_info intel_device_info_gfx3 = {
100    .ver = 3,
101    .platform = INTEL_PLATFORM_GFX3,
102    .simulator_id = -1,
103    .num_slices = 1,
104    .num_subslices = { 1, },
105    .max_eus_per_subslice = 8,
106    .num_thread_per_eu = 4,
107    .grf_size = 32,
108    .timestamp_frequency = 12500000,
109 };
110 
111 static const struct intel_device_info intel_device_info_i965 = {
112    .ver = 4,
113    .platform = INTEL_PLATFORM_I965,
114    .has_negative_rhw_bug = true,
115    .num_slices = 1,
116    .num_subslices = { 1, },
117    .max_eus_per_subslice = 8,
118    .num_thread_per_eu = 4,
119    .grf_size = 32,
120    .max_vs_threads = 16,
121    .max_gs_threads = 2,
122    .max_wm_threads = 8 * 4,
123    .urb = {
124       .size = 256,
125    },
126    .timestamp_frequency = 12500000,
127    .simulator_id = -1,
128 };
129 
130 static const struct intel_device_info intel_device_info_g4x = {
131    .ver = 4,
132    .verx10 = 45,
133    .has_pln = true,
134    .has_compr4 = true,
135    .has_surface_tile_offset = true,
136    .platform = INTEL_PLATFORM_G4X,
137    .num_slices = 1,
138    .num_subslices = { 1, },
139    .max_eus_per_subslice = 10,
140    .num_thread_per_eu = 5,
141    .grf_size = 32,
142    .max_vs_threads = 32,
143    .max_gs_threads = 2,
144    .max_wm_threads = 10 * 5,
145    .urb = {
146       .size = 384,
147    },
148    .timestamp_frequency = 12500000,
149    .simulator_id = -1,
150 };
151 
152 static const struct intel_device_info intel_device_info_ilk = {
153    .ver = 5,
154    .platform = INTEL_PLATFORM_ILK,
155    .has_pln = true,
156    .has_compr4 = true,
157    .has_surface_tile_offset = true,
158    .num_slices = 1,
159    .num_subslices = { 1, },
160    .max_eus_per_subslice = 12,
161    .num_thread_per_eu = 6,
162    .grf_size = 32,
163    .max_vs_threads = 72,
164    .max_gs_threads = 32,
165    .max_wm_threads = 12 * 6,
166    .urb = {
167       .size = 1024,
168    },
169    .timestamp_frequency = 12500000,
170    .simulator_id = -1,
171 };
172 
173 static const struct intel_device_info intel_device_info_snb_gt1 = {
174    .ver = 6,
175    .gt = 1,
176    .platform = INTEL_PLATFORM_SNB,
177    .has_hiz_and_separate_stencil = true,
178    .has_llc = true,
179    .has_pln = true,
180    .has_surface_tile_offset = true,
181    .needs_unlit_centroid_workaround = true,
182    .num_slices = 1,
183    .num_subslices = { 1, },
184    .max_eus_per_subslice = 6,
185    .num_thread_per_eu = 6, /* Not confirmed */
186    .grf_size = 32,
187    .max_vs_threads = 24,
188    .max_gs_threads = 21, /* conservative; 24 if rendering disabled. */
189    .max_wm_threads = 40,
190    .urb = {
191       .size = 32,
192       .min_entries = {
193          [MESA_SHADER_VERTEX]   = 24,
194       },
195       .max_entries = {
196          [MESA_SHADER_VERTEX]   = 256,
197          [MESA_SHADER_GEOMETRY] = 256,
198       },
199    },
200    .timestamp_frequency = 12500000,
201    .simulator_id = -1,
202 };
203 
204 static const struct intel_device_info intel_device_info_snb_gt2 = {
205    .ver = 6,
206    .gt = 2,
207    .platform = INTEL_PLATFORM_SNB,
208    .has_hiz_and_separate_stencil = true,
209    .has_llc = true,
210    .has_pln = true,
211    .has_surface_tile_offset = true,
212    .needs_unlit_centroid_workaround = true,
213    .num_slices = 1,
214    .num_subslices = { 1, },
215    .max_eus_per_subslice = 12,
216    .num_thread_per_eu = 6, /* Not confirmed */
217    .grf_size = 32,
218    .max_vs_threads = 60,
219    .max_gs_threads = 60,
220    .max_wm_threads = 80,
221    .urb = {
222       .size = 64,
223       .min_entries = {
224          [MESA_SHADER_VERTEX]   = 24,
225       },
226       .max_entries = {
227          [MESA_SHADER_VERTEX]   = 256,
228          [MESA_SHADER_GEOMETRY] = 256,
229       },
230    },
231    .timestamp_frequency = 12500000,
232    .simulator_id = -1,
233 };
234 
235 #define GFX7_FEATURES                               \
236    .ver = 7,                                        \
237    .has_hiz_and_separate_stencil = true,            \
238    .must_use_separate_stencil = true,               \
239    .has_llc = true,                                 \
240    .has_pln = true,                                 \
241    .has_64bit_float = true,                         \
242    .has_surface_tile_offset = true,                 \
243    .grf_size = 32,                                  \
244    .timestamp_frequency = 12500000,                 \
245    .max_constant_urb_size_kb = 16
246 
247 static const struct intel_device_info intel_device_info_ivb_gt1 = {
248    GFX7_FEATURES, .platform = INTEL_PLATFORM_IVB, .gt = 1,
249    .num_slices = 1,
250    .num_subslices = { 1, },
251    .max_eus_per_subslice = 6,
252    .num_thread_per_eu = 6,
253    .l3_banks = 2,
254    .max_vs_threads = 36,
255    .max_tcs_threads = 36,
256    .max_tes_threads = 36,
257    .max_gs_threads = 36,
258    .max_wm_threads = 48,
259    .max_cs_threads = 36,
260    .urb = {
261       .min_entries = {
262          [MESA_SHADER_VERTEX]    = 32,
263          [MESA_SHADER_TESS_EVAL] = 10,
264       },
265       .max_entries = {
266          [MESA_SHADER_VERTEX]    = 512,
267          [MESA_SHADER_TESS_CTRL] = 32,
268          [MESA_SHADER_TESS_EVAL] = 288,
269          [MESA_SHADER_GEOMETRY]  = 192,
270       },
271    },
272    .simulator_id = 7,
273 };
274 
275 static const struct intel_device_info intel_device_info_ivb_gt2 = {
276    GFX7_FEATURES, .platform = INTEL_PLATFORM_IVB, .gt = 2,
277    .num_slices = 1,
278    .num_subslices = { 1, },
279    .max_eus_per_subslice = 12,
280    .num_thread_per_eu = 8, /* Not sure why this isn't a multiple of
281                             * @max_wm_threads ... */
282    .l3_banks = 4,
283    .max_vs_threads = 128,
284    .max_tcs_threads = 128,
285    .max_tes_threads = 128,
286    .max_gs_threads = 128,
287    .max_wm_threads = 172,
288    .max_cs_threads = 64,
289    .urb = {
290       .min_entries = {
291          [MESA_SHADER_VERTEX]    = 32,
292          [MESA_SHADER_TESS_EVAL] = 10,
293       },
294       .max_entries = {
295          [MESA_SHADER_VERTEX]    = 704,
296          [MESA_SHADER_TESS_CTRL] = 64,
297          [MESA_SHADER_TESS_EVAL] = 448,
298          [MESA_SHADER_GEOMETRY]  = 320,
299       },
300    },
301    .simulator_id = 7,
302 };
303 
304 static const struct intel_device_info intel_device_info_byt = {
305    GFX7_FEATURES, .platform = INTEL_PLATFORM_BYT, .gt = 1,
306    .num_slices = 1,
307    .num_subslices = { 1, },
308    .max_eus_per_subslice = 4,
309    .num_thread_per_eu = 8,
310    .l3_banks = 1,
311    .has_llc = false,
312    .max_vs_threads = 36,
313    .max_tcs_threads = 36,
314    .max_tes_threads = 36,
315    .max_gs_threads = 36,
316    .max_wm_threads = 48,
317    .max_cs_threads = 32,
318    .urb = {
319       .min_entries = {
320          [MESA_SHADER_VERTEX]    = 32,
321          [MESA_SHADER_TESS_EVAL] = 10,
322       },
323       .max_entries = {
324          [MESA_SHADER_VERTEX]    = 512,
325          [MESA_SHADER_TESS_CTRL] = 32,
326          [MESA_SHADER_TESS_EVAL] = 288,
327          [MESA_SHADER_GEOMETRY]  = 192,
328       },
329    },
330    .simulator_id = 10,
331 };
332 
333 #define HSW_FEATURES \
334    GFX7_FEATURES, \
335    .platform = INTEL_PLATFORM_HSW, \
336    .verx10 = 75, \
337    .supports_simd16_3src = true
338 
339 static const struct intel_device_info intel_device_info_hsw_gt1 = {
340    HSW_FEATURES, .gt = 1,
341    .num_slices = 1,
342    .num_subslices = { 1, },
343    .max_eus_per_subslice = 10,
344    .num_thread_per_eu = 7,
345    .l3_banks = 2,
346    .max_vs_threads = 70,
347    .max_tcs_threads = 70,
348    .max_tes_threads = 70,
349    .max_gs_threads = 70,
350    .max_wm_threads = 102,
351    .max_cs_threads = 70,
352    .urb = {
353       .min_entries = {
354          [MESA_SHADER_VERTEX]    = 32,
355          [MESA_SHADER_TESS_EVAL] = 10,
356       },
357       .max_entries = {
358          [MESA_SHADER_VERTEX]    = 640,
359          [MESA_SHADER_TESS_CTRL] = 64,
360          [MESA_SHADER_TESS_EVAL] = 384,
361          [MESA_SHADER_GEOMETRY]  = 256,
362       },
363    },
364    .simulator_id = 9,
365 };
366 
367 static const struct intel_device_info intel_device_info_hsw_gt2 = {
368    HSW_FEATURES, .gt = 2,
369    .num_slices = 1,
370    .num_subslices = { 2, },
371    .max_eus_per_subslice = 10,
372    .num_thread_per_eu = 7,
373    .l3_banks = 4,
374    .max_vs_threads = 280,
375    .max_tcs_threads = 256,
376    .max_tes_threads = 280,
377    .max_gs_threads = 256,
378    .max_wm_threads = 204,
379    .max_cs_threads = 70,
380    .urb = {
381       .min_entries = {
382          [MESA_SHADER_VERTEX]    = 64,
383          [MESA_SHADER_TESS_EVAL] = 10,
384       },
385       .max_entries = {
386          [MESA_SHADER_VERTEX]    = 1664,
387          [MESA_SHADER_TESS_CTRL] = 128,
388          [MESA_SHADER_TESS_EVAL] = 960,
389          [MESA_SHADER_GEOMETRY]  = 640,
390       },
391    },
392    .simulator_id = 9,
393 };
394 
395 static const struct intel_device_info intel_device_info_hsw_gt3 = {
396    HSW_FEATURES, .gt = 3,
397    .num_slices = 2,
398    .num_subslices = { 2, 2, },
399    .max_eus_per_subslice = 10,
400    .num_thread_per_eu = 7,
401    .l3_banks = 8,
402    .max_vs_threads = 280,
403    .max_tcs_threads = 256,
404    .max_tes_threads = 280,
405    .max_gs_threads = 256,
406    .max_wm_threads = 408,
407    .max_cs_threads = 70,
408    .urb = {
409       .min_entries = {
410          [MESA_SHADER_VERTEX]    = 64,
411          [MESA_SHADER_TESS_EVAL] = 10,
412       },
413       .max_entries = {
414          [MESA_SHADER_VERTEX]    = 1664,
415          [MESA_SHADER_TESS_CTRL] = 128,
416          [MESA_SHADER_TESS_EVAL] = 960,
417          [MESA_SHADER_GEOMETRY]  = 640,
418       },
419    },
420    .max_constant_urb_size_kb = 32,
421    .simulator_id = 9,
422 };
423 
424 /* It's unclear how well supported sampling from the hiz buffer is on GFX8,
425  * so keep things conservative for now and set has_sample_with_hiz = false.
426  */
427 #define GFX8_FEATURES                               \
428    .ver = 8,                                        \
429    .has_hiz_and_separate_stencil = true,            \
430    .must_use_separate_stencil = true,               \
431    .has_llc = true,                                 \
432    .has_sample_with_hiz = false,                    \
433    .has_pln = true,                                 \
434    .has_integer_dword_mul = true,                   \
435    .has_64bit_float = true,                         \
436    .has_64bit_int = true,                           \
437    .supports_simd16_3src = true,                    \
438    .has_surface_tile_offset = true,                 \
439    .num_thread_per_eu = 7,                          \
440    .grf_size = 32,                                  \
441    .max_vs_threads = 504,                           \
442    .max_tcs_threads = 504,                          \
443    .max_tes_threads = 504,                          \
444    .max_gs_threads = 504,                           \
445    .max_wm_threads = 384,                           \
446    .max_threads_per_psd = 64,                       \
447    .timestamp_frequency = 12500000,                 \
448    .max_constant_urb_size_kb = 32
449 
450 static const struct intel_device_info intel_device_info_bdw_gt1 = {
451    GFX8_FEATURES, .gt = 1,
452    .platform = INTEL_PLATFORM_BDW,
453    .num_slices = 1,
454    .num_subslices = { 2, },
455    .max_eus_per_subslice = 6,
456    .l3_banks = 2,
457    .max_cs_threads = 42,
458    .urb = {
459       .min_entries = {
460          [MESA_SHADER_VERTEX]    = 64,
461          [MESA_SHADER_TESS_EVAL] = 34,
462       },
463       .max_entries = {
464          [MESA_SHADER_VERTEX]    = 2560,
465          [MESA_SHADER_TESS_CTRL] = 504,
466          [MESA_SHADER_TESS_EVAL] = 1536,
467          /* Reduced from 960, seems to be similar to the bug on Gfx9 GT1. */
468          [MESA_SHADER_GEOMETRY]  = 690,
469       },
470    },
471    .simulator_id = 11,
472 };
473 
474 static const struct intel_device_info intel_device_info_bdw_gt2 = {
475    GFX8_FEATURES, .gt = 2,
476    .platform = INTEL_PLATFORM_BDW,
477    .num_slices = 1,
478    .num_subslices = { 3, },
479    .max_eus_per_subslice = 8,
480    .l3_banks = 4,
481    .max_cs_threads = 56,
482    .urb = {
483       .min_entries = {
484          [MESA_SHADER_VERTEX]    = 64,
485          [MESA_SHADER_TESS_EVAL] = 34,
486       },
487       .max_entries = {
488          [MESA_SHADER_VERTEX]    = 2560,
489          [MESA_SHADER_TESS_CTRL] = 504,
490          [MESA_SHADER_TESS_EVAL] = 1536,
491          [MESA_SHADER_GEOMETRY]  = 960,
492       },
493    },
494    .simulator_id = 11,
495 };
496 
497 static const struct intel_device_info intel_device_info_bdw_gt3 = {
498    GFX8_FEATURES, .gt = 3,
499    .platform = INTEL_PLATFORM_BDW,
500    .num_slices = 2,
501    .num_subslices = { 3, 3, },
502    .max_eus_per_subslice = 8,
503    .l3_banks = 8,
504    .max_cs_threads = 56,
505    .urb = {
506       .min_entries = {
507          [MESA_SHADER_VERTEX]    = 64,
508          [MESA_SHADER_TESS_EVAL] = 34,
509       },
510       .max_entries = {
511          [MESA_SHADER_VERTEX]    = 2560,
512          [MESA_SHADER_TESS_CTRL] = 504,
513          [MESA_SHADER_TESS_EVAL] = 1536,
514          [MESA_SHADER_GEOMETRY]  = 960,
515       },
516    },
517    .simulator_id = 11,
518 };
519 
520 static const struct intel_device_info intel_device_info_chv = {
521    GFX8_FEATURES, .platform = INTEL_PLATFORM_CHV, .gt = 1,
522    .has_llc = false,
523    .has_integer_dword_mul = false,
524    .num_slices = 1,
525    .num_subslices = { 2, },
526    .max_eus_per_subslice = 8,
527    .l3_banks = 2,
528    .max_vs_threads = 80,
529    .max_tcs_threads = 80,
530    .max_tes_threads = 80,
531    .max_gs_threads = 80,
532    .max_wm_threads = 128,
533    .max_cs_threads = 6 * 7,
534    .urb = {
535       .min_entries = {
536          [MESA_SHADER_VERTEX]    = 34,
537          [MESA_SHADER_TESS_EVAL] = 34,
538       },
539       .max_entries = {
540          [MESA_SHADER_VERTEX]    = 640,
541          [MESA_SHADER_TESS_CTRL] = 80,
542          [MESA_SHADER_TESS_EVAL] = 384,
543          [MESA_SHADER_GEOMETRY]  = 256,
544       },
545    },
546    .simulator_id = 13,
547 };
548 
549 #define GFX9_HW_INFO                                \
550    .ver = 9,                                        \
551    .max_vs_threads = 336,                           \
552    .max_gs_threads = 336,                           \
553    .max_tcs_threads = 336,                          \
554    .max_tes_threads = 336,                          \
555    .max_threads_per_psd = 64,                       \
556    .max_cs_threads = 56,                            \
557    .timestamp_frequency = 12000000,                 \
558    .urb = {                                         \
559       .min_entries = {                              \
560          [MESA_SHADER_VERTEX]    = 64,              \
561          [MESA_SHADER_TESS_EVAL] = 34,              \
562       },                                            \
563       .max_entries = {                              \
564          [MESA_SHADER_VERTEX]    = 1856,            \
565          [MESA_SHADER_TESS_CTRL] = 672,             \
566          [MESA_SHADER_TESS_EVAL] = 1120,            \
567          [MESA_SHADER_GEOMETRY]  = 640,             \
568       },                                            \
569    }
570 
571 #define GFX9_LP_FEATURES                           \
572    GFX8_FEATURES,                                  \
573    GFX9_HW_INFO,                                   \
574    .has_integer_dword_mul = false,                 \
575    .gt = 1,                                        \
576    .has_llc = false,                               \
577    .has_sample_with_hiz = true,                    \
578    .has_illegal_ccs_values = true,                 \
579    .num_slices = 1,                                \
580    .num_thread_per_eu = 6,                         \
581    .max_vs_threads = 112,                          \
582    .max_tcs_threads = 112,                         \
583    .max_tes_threads = 112,                         \
584    .max_gs_threads = 112,                          \
585    .max_cs_threads = 6 * 6,                        \
586    .timestamp_frequency = 19200000,                \
587    .urb = {                                        \
588       .min_entries = {                             \
589          [MESA_SHADER_VERTEX]    = 34,             \
590          [MESA_SHADER_TESS_EVAL] = 34,             \
591       },                                           \
592       .max_entries = {                             \
593          [MESA_SHADER_VERTEX]    = 704,            \
594          [MESA_SHADER_TESS_CTRL] = 256,            \
595          [MESA_SHADER_TESS_EVAL] = 416,            \
596          [MESA_SHADER_GEOMETRY]  = 256,            \
597       },                                           \
598    }
599 
600 #define GFX9_LP_FEATURES_3X6                       \
601    GFX9_LP_FEATURES,                               \
602    .num_subslices = { 3, },                        \
603    .max_eus_per_subslice = 6
604 
605 #define GFX9_LP_FEATURES_2X6                       \
606    GFX9_LP_FEATURES,                               \
607    .num_subslices = { 2, },                        \
608    .max_eus_per_subslice = 6,                       \
609    .max_vs_threads = 56,                           \
610    .max_tcs_threads = 56,                          \
611    .max_tes_threads = 56,                          \
612    .max_gs_threads = 56,                           \
613    .max_cs_threads = 6 * 6,                        \
614    .urb = {                                        \
615       .min_entries = {                             \
616          [MESA_SHADER_VERTEX]    = 34,             \
617          [MESA_SHADER_TESS_EVAL] = 34,             \
618       },                                           \
619       .max_entries = {                             \
620          [MESA_SHADER_VERTEX]    = 352,            \
621          [MESA_SHADER_TESS_CTRL] = 128,            \
622          [MESA_SHADER_TESS_EVAL] = 208,            \
623          [MESA_SHADER_GEOMETRY]  = 128,            \
624       },                                           \
625    }
626 
627 #define GFX9_FEATURES                               \
628    GFX8_FEATURES,                                   \
629    GFX9_HW_INFO,                                    \
630    .has_sample_with_hiz = true,                     \
631    .has_illegal_ccs_values = true,                                    \
632    .cooperative_matrix_configurations = {                             \
633     { INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT32, INTEL_CMAT_FLOAT32 }, \
634     { INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 32, INTEL_CMAT_SINT8, INTEL_CMAT_SINT8, INTEL_CMAT_SINT32, INTEL_CMAT_SINT32 },       \
635     { INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 32, INTEL_CMAT_UINT8, INTEL_CMAT_UINT8, INTEL_CMAT_UINT32, INTEL_CMAT_UINT32 },       \
636    }
637 
638 static const struct intel_device_info intel_device_info_skl_gt1 = {
639    GFX9_FEATURES, .gt = 1,
640    .platform = INTEL_PLATFORM_SKL,
641    .num_slices = 1,
642    .num_subslices = { 2, },
643    .max_eus_per_subslice = 6,
644    .l3_banks = 2,
645    /* GT1 seems to have a bug in the top of the pipe (VF/VS?) fixed functions
646     * leading to some vertices to go missing if we use too much URB.
647     */
648    .urb.max_entries[MESA_SHADER_VERTEX] = 928,
649    .simulator_id = 12,
650 };
651 
652 static const struct intel_device_info intel_device_info_skl_gt2 = {
653    GFX9_FEATURES, .gt = 2,
654    .platform = INTEL_PLATFORM_SKL,
655    .num_slices = 1,
656    .num_subslices = { 3, },
657    .max_eus_per_subslice = 8,
658    .l3_banks = 4,
659    .simulator_id = 12,
660 };
661 
662 static const struct intel_device_info intel_device_info_skl_gt3 = {
663    GFX9_FEATURES, .gt = 3,
664    .platform = INTEL_PLATFORM_SKL,
665    .num_slices = 2,
666    .num_subslices = { 3, 3, },
667    .max_eus_per_subslice = 8,
668    .l3_banks = 8,
669    .simulator_id = 12,
670 };
671 
672 static const struct intel_device_info intel_device_info_skl_gt4 = {
673    GFX9_FEATURES, .gt = 4,
674    .platform = INTEL_PLATFORM_SKL,
675    .num_slices = 3,
676    .num_subslices = { 3, 3, 3, },
677    .max_eus_per_subslice = 8,
678    .l3_banks = 12,
679    /* From the "L3 Allocation and Programming" documentation:
680     *
681     * "URB is limited to 1008KB due to programming restrictions.  This is not a
682     * restriction of the L3 implementation, but of the FF and other clients.
683     * Therefore, in a GT4 implementation it is possible for the programmed
684     * allocation of the L3 data array to provide 3*384KB=1152KB for URB, but
685     * only 1008KB of this will be used."
686     */
687    .simulator_id = 12,
688 };
689 
690 static const struct intel_device_info intel_device_info_bxt = {
691    GFX9_LP_FEATURES_3X6,
692    .platform = INTEL_PLATFORM_BXT,
693    .l3_banks = 2,
694    .simulator_id = 14,
695 };
696 
697 static const struct intel_device_info intel_device_info_bxt_2x6 = {
698    GFX9_LP_FEATURES_2X6,
699    .platform = INTEL_PLATFORM_BXT,
700    .l3_banks = 1,
701    .simulator_id = 14,
702 };
703 /*
704  * Note: for all KBL SKUs, the PRM says SKL for GS entries, not SKL+.
705  * There's no KBL entry. Using the default SKL (GFX9) GS entries value.
706  */
707 
708 static const struct intel_device_info intel_device_info_kbl_gt1 = {
709    GFX9_FEATURES,
710    .platform = INTEL_PLATFORM_KBL,
711    .gt = 1,
712 
713    .max_cs_threads = 7 * 6,
714    .num_slices = 1,
715    .num_subslices = { 2, },
716    .max_eus_per_subslice = 6,
717    .l3_banks = 2,
718    /* GT1 seems to have a bug in the top of the pipe (VF/VS?) fixed functions
719     * leading to some vertices to go missing if we use too much URB.
720     */
721    .urb.max_entries[MESA_SHADER_VERTEX] = 928,
722    .urb.max_entries[MESA_SHADER_GEOMETRY] = 256,
723    .simulator_id = 16,
724 };
725 
726 static const struct intel_device_info intel_device_info_kbl_gt1_5 = {
727    GFX9_FEATURES,
728    .platform = INTEL_PLATFORM_KBL,
729    .gt = 1,
730 
731    .max_cs_threads = 7 * 6,
732    .num_slices = 1,
733    .num_subslices = { 3, },
734    .max_eus_per_subslice = 6,
735    .l3_banks = 4,
736    .simulator_id = 16,
737 };
738 
739 static const struct intel_device_info intel_device_info_kbl_gt2 = {
740    GFX9_FEATURES,
741    .platform = INTEL_PLATFORM_KBL,
742    .gt = 2,
743 
744    .num_slices = 1,
745    .num_subslices = { 3, },
746    .max_eus_per_subslice = 8,
747    .l3_banks = 4,
748    .simulator_id = 16,
749 };
750 
751 static const struct intel_device_info intel_device_info_kbl_gt3 = {
752    GFX9_FEATURES,
753    .platform = INTEL_PLATFORM_KBL,
754    .gt = 3,
755 
756    .num_slices = 2,
757    .num_subslices = { 3, 3, },
758    .max_eus_per_subslice = 8,
759    .l3_banks = 8,
760    .simulator_id = 16,
761 };
762 
763 static const struct intel_device_info intel_device_info_kbl_gt4 = {
764    GFX9_FEATURES,
765    .platform = INTEL_PLATFORM_KBL,
766    .gt = 4,
767 
768    /*
769     * From the "L3 Allocation and Programming" documentation:
770     *
771     * "URB is limited to 1008KB due to programming restrictions.  This
772     *  is not a restriction of the L3 implementation, but of the FF and
773     *  other clients.  Therefore, in a GT4 implementation it is
774     *  possible for the programmed allocation of the L3 data array to
775     *  provide 3*384KB=1152KB for URB, but only 1008KB of this
776     *  will be used."
777     */
778    .num_slices = 3,
779    .num_subslices = { 3, 3, 3, },
780    .max_eus_per_subslice = 8,
781    .l3_banks = 12,
782    .simulator_id = 16,
783 };
784 
785 static const struct intel_device_info intel_device_info_glk = {
786    GFX9_LP_FEATURES_3X6,
787    .platform = INTEL_PLATFORM_GLK,
788    .l3_banks = 2,
789    .simulator_id = 17,
790 };
791 
792 static const struct intel_device_info intel_device_info_glk_2x6 = {
793    GFX9_LP_FEATURES_2X6,
794    .platform = INTEL_PLATFORM_GLK,
795    .l3_banks = 2,
796    .simulator_id = 17,
797 };
798 
799 static const struct intel_device_info intel_device_info_cfl_gt1 = {
800    GFX9_FEATURES,
801    .platform = INTEL_PLATFORM_CFL,
802    .gt = 1,
803 
804    .num_slices = 1,
805    .num_subslices = { 2, },
806    .max_eus_per_subslice = 6,
807    .l3_banks = 2,
808    /* GT1 seems to have a bug in the top of the pipe (VF/VS?) fixed functions
809     * leading to some vertices to go missing if we use too much URB.
810     */
811    .urb.max_entries[MESA_SHADER_VERTEX] = 928,
812    .urb.max_entries[MESA_SHADER_GEOMETRY] = 256,
813    .simulator_id = 24,
814 };
815 static const struct intel_device_info intel_device_info_cfl_gt2 = {
816    GFX9_FEATURES,
817    .platform = INTEL_PLATFORM_CFL,
818    .gt = 2,
819 
820    .num_slices = 1,
821    .num_subslices = { 3, },
822    .max_eus_per_subslice = 8,
823    .l3_banks = 4,
824    .simulator_id = 24,
825 };
826 
827 static const struct intel_device_info intel_device_info_cfl_gt3 = {
828    GFX9_FEATURES,
829    .platform = INTEL_PLATFORM_CFL,
830    .gt = 3,
831 
832    .num_slices = 2,
833    .num_subslices = { 3, 3, },
834    .max_eus_per_subslice = 8,
835    .l3_banks = 8,
836    .simulator_id = 24,
837 };
838 
839 #define subslices(args...) { args, }
840 
841 #define GFX11_HW_INFO                               \
842    .ver = 11,                                       \
843    .has_pln = false,                                \
844    .max_vs_threads = 364,                           \
845    .max_gs_threads = 224,                           \
846    .max_tcs_threads = 224,                          \
847    .max_tes_threads = 364,                          \
848    .max_threads_per_psd = 64,                       \
849    .max_cs_threads = 56
850 
851 #define GFX11_FEATURES(_gt, _slices, _subslices, _l3, _platform)  \
852    GFX8_FEATURES,                                     \
853    GFX11_HW_INFO,                                     \
854    .platform = _platform,                             \
855    .has_64bit_float = false,                          \
856    .has_64bit_int = false,                            \
857    .has_integer_dword_mul = false,                    \
858    .has_sample_with_hiz = false,                      \
859    .has_illegal_ccs_values = true,                    \
860    .gt = _gt, .num_slices = _slices, .l3_banks = _l3, \
861    .num_subslices = _subslices,                       \
862    .max_eus_per_subslice = 8,                                         \
863    .cooperative_matrix_configurations = {                             \
864     { INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT32, INTEL_CMAT_FLOAT32 }, \
865     { INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 32, INTEL_CMAT_SINT8, INTEL_CMAT_SINT8, INTEL_CMAT_SINT32, INTEL_CMAT_SINT32 },       \
866     { INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 32, INTEL_CMAT_UINT8, INTEL_CMAT_UINT8, INTEL_CMAT_UINT32, INTEL_CMAT_UINT32 },       \
867    }
868 
869 #define GFX11_URB_MIN_MAX_ENTRIES                     \
870    .min_entries = {                                   \
871       [MESA_SHADER_VERTEX]    = 64,                   \
872       [MESA_SHADER_TESS_EVAL] = 34,                   \
873    },                                                 \
874    .max_entries = {                                   \
875       [MESA_SHADER_VERTEX]    = 2384,                 \
876       [MESA_SHADER_TESS_CTRL] = 1032,                 \
877       [MESA_SHADER_TESS_EVAL] = 2384,                 \
878       [MESA_SHADER_GEOMETRY]  = 1032,                 \
879    }
880 
881 static const struct intel_device_info intel_device_info_icl_gt2 = {
882    GFX11_FEATURES(2, 1, subslices(8), 8, INTEL_PLATFORM_ICL),
883    .urb = {
884       GFX11_URB_MIN_MAX_ENTRIES,
885    },
886    .simulator_id = 19,
887 };
888 
889 static const struct intel_device_info intel_device_info_icl_gt1_5 = {
890    GFX11_FEATURES(1, 1, subslices(6), 6, INTEL_PLATFORM_ICL),
891    .urb = {
892       GFX11_URB_MIN_MAX_ENTRIES,
893    },
894    .simulator_id = 19,
895 };
896 
897 static const struct intel_device_info intel_device_info_icl_gt1 = {
898    GFX11_FEATURES(1, 1, subslices(4), 6, INTEL_PLATFORM_ICL),
899    .urb = {
900       GFX11_URB_MIN_MAX_ENTRIES,
901    },
902    .simulator_id = 19,
903 };
904 
905 static const struct intel_device_info intel_device_info_icl_gt0_5 = {
906    GFX11_FEATURES(1, 1, subslices(1), 6, INTEL_PLATFORM_ICL),
907    .urb = {
908       GFX11_URB_MIN_MAX_ENTRIES,
909    },
910    .simulator_id = 19,
911 };
912 
913 #define GFX11_LP_FEATURES                           \
914    .urb = {                                         \
915       GFX11_URB_MIN_MAX_ENTRIES,                    \
916    },                                               \
917    .disable_ccs_repack = true,                      \
918    .has_illegal_ccs_values = true,                  \
919    .simulator_id = 28
920 
921 static const struct intel_device_info intel_device_info_ehl_4x8 = {
922    GFX11_FEATURES(1, 1, subslices(4), 4, INTEL_PLATFORM_EHL),
923    GFX11_LP_FEATURES,
924 };
925 
926 static const struct intel_device_info intel_device_info_ehl_4x6 = {
927    GFX11_FEATURES(1, 1, subslices(4), 4, INTEL_PLATFORM_EHL),
928    GFX11_LP_FEATURES,
929    .max_eus_per_subslice = 6,
930 };
931 
932 static const struct intel_device_info intel_device_info_ehl_4x5 = {
933    GFX11_FEATURES(1, 1, subslices(4), 4, INTEL_PLATFORM_EHL),
934    GFX11_LP_FEATURES,
935    .max_eus_per_subslice = 5,
936 };
937 
938 static const struct intel_device_info intel_device_info_ehl_4x4 = {
939    GFX11_FEATURES(1, 1, subslices(4), 4, INTEL_PLATFORM_EHL),
940    GFX11_LP_FEATURES,
941    .max_eus_per_subslice = 4,
942 };
943 
944 static const struct intel_device_info intel_device_info_ehl_2x8 = {
945    GFX11_FEATURES(1, 1, subslices(2), 4, INTEL_PLATFORM_EHL),
946    GFX11_LP_FEATURES,
947 };
948 
949 static const struct intel_device_info intel_device_info_ehl_2x4 = {
950    GFX11_FEATURES(1, 1, subslices(2), 4, INTEL_PLATFORM_EHL),
951    GFX11_LP_FEATURES,
952    .max_eus_per_subslice = 4,
953 };
954 
955 #define GFX12_HW_INFO                               \
956    .ver = 12,                                       \
957    .has_pln = false,                                \
958    .has_sample_with_hiz = false,                    \
959    .has_aux_map = true,                             \
960    .max_vs_threads = 546,                           \
961    .max_gs_threads = 336,                           \
962    .max_tcs_threads = 336,                          \
963    .max_tes_threads = 546,                          \
964    .max_threads_per_psd = 64,                       \
965    .max_cs_threads = 112, /* threads per DSS */     \
966    .urb = {                                         \
967       .size = 512, /* For intel_stub_gpu */         \
968       .min_entries = {                              \
969          [MESA_SHADER_VERTEX]    = 64,              \
970          [MESA_SHADER_TESS_EVAL] = 34,              \
971       },                                            \
972       .max_entries = {                              \
973          [MESA_SHADER_VERTEX]    = 3576,            \
974          [MESA_SHADER_TESS_CTRL] = 1548,            \
975          [MESA_SHADER_TESS_EVAL] = 3576,            \
976          [MESA_SHADER_GEOMETRY]  = 1548,            \
977       },                                            \
978    }
979 
980 #define GFX12_FEATURES(_gt, _slices, _l3)                       \
981    GFX8_FEATURES,                                               \
982    GFX12_HW_INFO,                                               \
983    .has_64bit_float = false,                                    \
984    .has_64bit_int = false,                                      \
985    .has_integer_dword_mul = false,                              \
986    .gt = _gt, .num_slices = _slices, .l3_banks = _l3,           \
987    .simulator_id = 22,                                          \
988    .max_eus_per_subslice = 16,                                  \
989    /* BSpec 45101 (r51017) */                                   \
990    .pat = {                                                     \
991          /* CPU: WB, GPU: PAT 0 => WB, 2WAY */                  \
992          .cached_coherent = PAT_ENTRY(0, WB),                   \
993          /* CPU: WC, GPU: PAT 1 => WC */                        \
994          .scanout = PAT_ENTRY(1, WC),                           \
995          /* CPU: WB, GPU: PAT 0 => WB, 2WAY */                  \
996          .writeback_incoherent = PAT_ENTRY(0, WB),              \
997          /* CPU: WC, GPU: PAT 1 => WC */                        \
998          .writecombining = PAT_ENTRY(1, WC),                    \
999    },                                                           \
1000    .cooperative_matrix_configurations = {                       \
1001     { INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT32, INTEL_CMAT_FLOAT32 }, \
1002     { INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 32, INTEL_CMAT_SINT8, INTEL_CMAT_SINT8, INTEL_CMAT_SINT32, INTEL_CMAT_SINT32 },       \
1003     { INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 32, INTEL_CMAT_UINT8, INTEL_CMAT_UINT8, INTEL_CMAT_UINT32, INTEL_CMAT_UINT32 },       \
1004    }
1005 
1006 #define dual_subslices(args...) { args, }
1007 
1008 #define GFX12_GT05_FEATURES                                     \
1009    GFX12_FEATURES(1, 1, 4),                                     \
1010    .num_subslices = dual_subslices(1)
1011 
1012 #define GFX12_GT_FEATURES(_gt)                                  \
1013    GFX12_FEATURES(_gt, 1, _gt == 1 ? 4 : 8),                    \
1014    .num_subslices = dual_subslices(_gt == 1 ? 2 : 6)
1015 
1016 static const struct intel_device_info intel_device_info_tgl_gt1 = {
1017    GFX12_GT_FEATURES(1),
1018    .platform = INTEL_PLATFORM_TGL,
1019 };
1020 
1021 static const struct intel_device_info intel_device_info_tgl_gt2 = {
1022    GFX12_GT_FEATURES(2),
1023    .platform = INTEL_PLATFORM_TGL,
1024 };
1025 
1026 static const struct intel_device_info intel_device_info_rkl_gt05 = {
1027    GFX12_GT05_FEATURES,
1028    .platform = INTEL_PLATFORM_RKL,
1029 };
1030 
1031 static const struct intel_device_info intel_device_info_rkl_gt1 = {
1032    GFX12_GT_FEATURES(1),
1033    .platform = INTEL_PLATFORM_RKL,
1034 };
1035 
1036 static const struct intel_device_info intel_device_info_adl_gt05 = {
1037    GFX12_GT05_FEATURES,
1038    .platform = INTEL_PLATFORM_ADL,
1039 };
1040 
1041 static const struct intel_device_info intel_device_info_adl_gt1 = {
1042    GFX12_GT_FEATURES(1),
1043    .platform = INTEL_PLATFORM_ADL,
1044 };
1045 
1046 static const struct intel_device_info intel_device_info_adl_n = {
1047    GFX12_GT_FEATURES(1),
1048    .platform = INTEL_PLATFORM_ADL,
1049    .is_adl_n = true,
1050 };
1051 
1052 static const struct intel_device_info intel_device_info_adl_gt2 = {
1053    GFX12_GT_FEATURES(2),
1054    .platform = INTEL_PLATFORM_ADL,
1055 };
1056 
1057 static const struct intel_device_info intel_device_info_rpl = {
1058    GFX12_FEATURES(1, 1, 4),
1059    .num_subslices = dual_subslices(2),
1060    .platform = INTEL_PLATFORM_RPL,
1061 };
1062 
1063 static const struct intel_device_info intel_device_info_rpl_p = {
1064    GFX12_GT_FEATURES(2),
1065    .platform = INTEL_PLATFORM_RPL,
1066 };
1067 
1068 #define GFX12_DG1_SG1_FEATURES                           \
1069    GFX12_GT_FEATURES(2),                                 \
1070    .platform = INTEL_PLATFORM_DG1,                       \
1071    .has_llc = false,                                     \
1072    .has_local_mem = true,                                \
1073    .urb.size = 768,                                      \
1074    .simulator_id = 30
1075 
1076 static const struct intel_device_info intel_device_info_dg1 = {
1077    GFX12_DG1_SG1_FEATURES,
1078 };
1079 
1080 static const struct intel_device_info intel_device_info_sg1 = {
1081    GFX12_DG1_SG1_FEATURES,
1082 };
1083 
1084 #define XEHP_URB_MIN_MAX_ENTRIES                        \
1085    .min_entries = {                                     \
1086       [MESA_SHADER_VERTEX]    = 64,                     \
1087       [MESA_SHADER_TESS_EVAL] = 34,                     \
1088    },                                                   \
1089    .max_entries = {                                     \
1090       [MESA_SHADER_VERTEX]    = 3832, /* BSpec 47138 */ \
1091       [MESA_SHADER_TESS_CTRL] = 1548, /* BSpec 47137 */ \
1092       [MESA_SHADER_TESS_EVAL] = 3576, /* BSpec 47135 */ \
1093       [MESA_SHADER_GEOMETRY]  = 1548, /* BSpec 47136 */ \
1094    }
1095 
1096 #define XEHP_FEATURES(_gt, _slices, _l3)                        \
1097    GFX8_FEATURES,                                               \
1098    .needs_null_push_constant_tbimr_workaround = true,           \
1099    .has_64bit_float = false,                                    \
1100    .has_64bit_int = false,                                      \
1101    .has_integer_dword_mul = false,                              \
1102    .gt = _gt, .num_slices = _slices, .l3_banks = _l3,           \
1103    .num_subslices = dual_subslices(1), /* updated by topology */\
1104    .ver = 12,                                                   \
1105    .has_pln = false,                                            \
1106    .has_sample_with_hiz = false,                                \
1107    .max_vs_threads = 546,  /* BSpec 46312 */                    \
1108    .max_gs_threads = 336,  /* BSpec 46299 */                    \
1109    .max_tcs_threads = 336, /* BSpec 46300 */                    \
1110    .max_tes_threads = 546, /* BSpec 46298 */                    \
1111    .max_threads_per_psd = 64,                                   \
1112    .max_cs_threads = 112, /* threads per DSS */                 \
1113    .urb = {                                                     \
1114       .size = 768, /* For intel_stub_gpu */                     \
1115       XEHP_URB_MIN_MAX_ENTRIES,                                 \
1116    },                                                           \
1117    .num_thread_per_eu = 8 /* BSpec 44472 */,                    \
1118    .max_eus_per_subslice = 16,                                  \
1119    .verx10 = 125,                                               \
1120    .has_llc = false,                                            \
1121    .has_lsc = true,                                             \
1122    .has_local_mem = true,                                       \
1123    .has_aux_map = false,                                        \
1124    .simulator_id = 29,                                          \
1125    .cooperative_matrix_configurations = {                       \
1126     { INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT32, INTEL_CMAT_FLOAT32 }, \
1127     { INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 32, INTEL_CMAT_SINT8, INTEL_CMAT_SINT8, INTEL_CMAT_SINT32, INTEL_CMAT_SINT32 },       \
1128     { INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 32, INTEL_CMAT_UINT8, INTEL_CMAT_UINT8, INTEL_CMAT_UINT32, INTEL_CMAT_UINT32 },       \
1129    }
1130 
1131 #define DG2_FEATURES                                            \
1132    /* (Sub)slice info comes from the kernel topology info */    \
1133    XEHP_FEATURES(0, 1, 0),                                      \
1134    .revision = 4, /* For offline compiler */                    \
1135    .has_coarse_pixel_primitive_and_cb = true,                   \
1136    .has_mesh_shading = true,                                    \
1137    .has_ray_tracing = true,                                     \
1138    .has_flat_ccs = true,                                        \
1139    /* There is no PAT table for DG2, using TGL ones */          \
1140    /* BSpec 45101 (r51017) */                                   \
1141    .pat = {                                                     \
1142          /* CPU: WB, GPU: PAT 0 => WB, 2WAY */                  \
1143          .cached_coherent = PAT_ENTRY(0, WB),                   \
1144          /* CPU: WC, GPU: PAT 1 => WC */                        \
1145          .scanout = PAT_ENTRY(1, WC),                           \
1146          /* CPU: WB, GPU: PAT 0 => WB, 2WAY */                  \
1147          .writeback_incoherent = PAT_ENTRY(0, WB),              \
1148          /* CPU: WC, GPU: PAT 1 => WC */                        \
1149          .writecombining = PAT_ENTRY(1, WC),                    \
1150    }
1151 
1152 static const struct intel_device_info intel_device_info_dg2_g10 = {
1153    DG2_FEATURES,
1154    .platform = INTEL_PLATFORM_DG2_G10,
1155 };
1156 
1157 static const struct intel_device_info intel_device_info_dg2_g11 = {
1158    DG2_FEATURES,
1159    .platform = INTEL_PLATFORM_DG2_G11,
1160 };
1161 
1162 static const struct intel_device_info intel_device_info_dg2_g12 = {
1163    DG2_FEATURES,
1164    .platform = INTEL_PLATFORM_DG2_G12,
1165 };
1166 
1167 static const struct intel_device_info intel_device_info_atsm_g10 = {
1168    DG2_FEATURES,
1169    .platform = INTEL_PLATFORM_ATSM_G10,
1170 };
1171 
1172 static const struct intel_device_info intel_device_info_atsm_g11 = {
1173    DG2_FEATURES,
1174    .platform = INTEL_PLATFORM_ATSM_G11,
1175 };
1176 
1177 #define MTL_FEATURES                                            \
1178    /* (Sub)slice info comes from the kernel topology info */    \
1179    XEHP_FEATURES(0, 1, 0),                                      \
1180    .has_local_mem = false,                                      \
1181    .has_aux_map = true,                                         \
1182    .has_64bit_float = true,                                     \
1183    .has_64bit_float_via_math_pipe = true,                       \
1184    .has_integer_dword_mul = false,                              \
1185    .has_coarse_pixel_primitive_and_cb = true,                   \
1186    .has_mesh_shading = true,                                    \
1187    .has_ray_tracing = true,                                     \
1188    /* BSpec 45101 (r51017) */                                   \
1189    .pat = {                                                     \
1190          /* CPU: WB, GPU: PAT 3 => WB, 1WAY */                  \
1191          .cached_coherent = PAT_ENTRY(3, WB),                   \
1192          /* CPU: WC, GPU: PAT 1 => WC */                        \
1193          .scanout = PAT_ENTRY(1, WC),                           \
1194          /* CPU: WB, GPU: PAT 0 => WB, 0WAY */                  \
1195          .writeback_incoherent = PAT_ENTRY(0, WB),              \
1196          /* CPU: WC, GPU: PAT 1 => WC */                        \
1197          .writecombining = PAT_ENTRY(1, WC),                    \
1198    }
1199 
1200 static const struct intel_device_info intel_device_info_mtl_u = {
1201    MTL_FEATURES,
1202    .platform = INTEL_PLATFORM_MTL_U,
1203 };
1204 
1205 static const struct intel_device_info intel_device_info_mtl_h = {
1206    MTL_FEATURES,
1207    .platform = INTEL_PLATFORM_MTL_H,
1208 };
1209 
1210 static const struct intel_device_info intel_device_info_arl_u = {
1211    MTL_FEATURES,
1212    .platform = INTEL_PLATFORM_ARL_U,
1213 };
1214 
1215 static const struct intel_device_info intel_device_info_arl_h = {
1216    MTL_FEATURES,
1217    .platform = INTEL_PLATFORM_ARL_H,
1218 };
1219 
1220 #define XE2_FEATURES                                            \
1221    /* (Sub)slice info comes from the kernel topology info */    \
1222    XEHP_FEATURES(0, 1, 0),                                      \
1223    .ver = 20,                                                   \
1224    .verx10 = 200,                                               \
1225    .num_subslices = dual_subslices(1),                          \
1226    .grf_size = 64,                                              \
1227    .needs_null_push_constant_tbimr_workaround = false,          \
1228    .has_64bit_float = true,                                     \
1229    .has_64bit_int = true,                                       \
1230    .has_integer_dword_mul = false,                              \
1231    .has_coarse_pixel_primitive_and_cb = true,                   \
1232    .has_mesh_shading = true,                                    \
1233    .has_ray_tracing = true,                                     \
1234    .has_indirect_unroll = true,                                 \
1235    /* BSpec 71582 (r59285) */                                   \
1236    .pat = {                                                     \
1237       /* CPU: WB, GPU: PAT 1 => WB, 1WAY */                     \
1238       .cached_coherent = PAT_ENTRY(1, WB),                      \
1239       /* CPU: WC, GPU: PAT 6 => XD */                           \
1240       .scanout = PAT_ENTRY(6, WC),                              \
1241       /* CPU: WC, GPU: PAT 0 => WB */                           \
1242       .writecombining = PAT_ENTRY(0, WC),                       \
1243       /* CPU: WC, GPU: PAT 11 => XD, compressed */              \
1244       .compressed = PAT_ENTRY(11, WC)                           \
1245    },                                                           \
1246    .cooperative_matrix_configurations = {                       \
1247     { INTEL_CMAT_SCOPE_SUBGROUP, 8, 16, 16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT32, INTEL_CMAT_FLOAT32 }, \
1248     { INTEL_CMAT_SCOPE_SUBGROUP, 8, 16, 32, INTEL_CMAT_SINT8, INTEL_CMAT_SINT8, INTEL_CMAT_SINT32, INTEL_CMAT_SINT32 },       \
1249     { INTEL_CMAT_SCOPE_SUBGROUP, 8, 16, 32, INTEL_CMAT_UINT8, INTEL_CMAT_UINT8, INTEL_CMAT_UINT32, INTEL_CMAT_UINT32 },       \
1250    },                                                           \
1251    .has_flat_ccs = true
1252 
1253 static const struct intel_device_info intel_device_info_bmg = {
1254    XE2_FEATURES,
1255    .platform = INTEL_PLATFORM_BMG,
1256    .has_local_mem = true,
1257 };
1258 
1259 static const struct intel_device_info intel_device_info_lnl = {
1260    XE2_FEATURES,
1261    .platform = INTEL_PLATFORM_LNL,
1262    .has_local_mem = false,
1263 };
1264 
1265 #define XE3_FEATURES                                            \
1266    XE2_FEATURES,                                                \
1267    .ver = 30,                                                   \
1268    .verx10 = 300
1269 
1270 static const struct intel_device_info intel_device_info_ptl = {
1271    XE3_FEATURES,
1272    .platform = INTEL_PLATFORM_PTL,
1273    .has_local_mem = false,
1274 };
1275 
1276 void
intel_device_info_topology_reset_masks(struct intel_device_info * devinfo)1277 intel_device_info_topology_reset_masks(struct intel_device_info *devinfo)
1278 {
1279    devinfo->subslice_slice_stride = 0;
1280    devinfo->eu_subslice_stride = 0;
1281    devinfo->eu_slice_stride = 0;
1282 
1283    devinfo->num_slices = 0;
1284    memset(devinfo->num_subslices, 0, sizeof(devinfo->num_subslices));
1285 
1286    memset(&devinfo->slice_masks, 0, sizeof(devinfo->slice_masks));
1287    memset(devinfo->subslice_masks, 0, sizeof(devinfo->subslice_masks));
1288    memset(devinfo->eu_masks, 0, sizeof(devinfo->eu_masks));
1289    memset(devinfo->ppipe_subslices, 0, sizeof(devinfo->ppipe_subslices));
1290 }
1291 
1292 void
intel_device_info_topology_update_counts(struct intel_device_info * devinfo)1293 intel_device_info_topology_update_counts(struct intel_device_info *devinfo)
1294 {
1295    devinfo->num_slices = __builtin_popcount(devinfo->slice_masks);
1296    devinfo->subslice_total = 0;
1297    for (int s = 0; s < devinfo->max_slices; s++) {
1298       if (!intel_device_info_slice_available(devinfo, s))
1299          continue;
1300 
1301       for (int b = 0; b < devinfo->subslice_slice_stride; b++) {
1302          devinfo->num_subslices[s] +=
1303             __builtin_popcount(devinfo->subslice_masks[s * devinfo->subslice_slice_stride + b]);
1304       }
1305       devinfo->subslice_total += devinfo->num_subslices[s];
1306    }
1307    assert(devinfo->num_slices > 0);
1308    assert(devinfo->subslice_total > 0);
1309 }
1310 
1311 void
intel_device_info_update_pixel_pipes(struct intel_device_info * devinfo,uint8_t * subslice_masks)1312 intel_device_info_update_pixel_pipes(struct intel_device_info *devinfo, uint8_t *subslice_masks)
1313 {
1314    if (devinfo->ver < 11)
1315       return;
1316 
1317    /* The kernel only reports one slice on all existing ICL+ platforms, even
1318     * if multiple slices are present. The slice mask is allowed to have the
1319     * accurate value greater than 1 on gfx12.5+ platforms though, in order to
1320     * be tolerant with the behavior of our simulation environment.
1321     */
1322    assert(devinfo->slice_masks == 1 || devinfo->verx10 >= 125);
1323 
1324    /* Count the number of subslices on each pixel pipe. Assume that every
1325     * contiguous group of 4 subslices in the mask belong to the same pixel
1326     * pipe. However note that on TGL+ the kernel returns a mask of enabled
1327     * *dual* subslices instead of actual subslices somewhat confusingly, so
1328     * each pixel pipe only takes 2 bits in the mask even though it's still 4
1329     * subslices.
1330     */
1331    const unsigned ppipe_bits = devinfo->ver >= 12 ? 2 : 4;
1332    for (unsigned p = 0; p < INTEL_DEVICE_MAX_PIXEL_PIPES; p++) {
1333       const unsigned offset = p * ppipe_bits;
1334       const unsigned subslice_idx = offset /
1335          devinfo->max_subslices_per_slice * devinfo->subslice_slice_stride;
1336       const unsigned ppipe_mask =
1337          BITFIELD_RANGE(offset % devinfo->max_subslices_per_slice, ppipe_bits);
1338 
1339       if (subslice_idx < ARRAY_SIZE(devinfo->subslice_masks))
1340          devinfo->ppipe_subslices[p] =
1341             __builtin_popcount(subslice_masks[subslice_idx] & ppipe_mask);
1342       else
1343          devinfo->ppipe_subslices[p] = 0;
1344    }
1345 }
1346 
1347 void
intel_device_info_update_l3_banks(struct intel_device_info * devinfo)1348 intel_device_info_update_l3_banks(struct intel_device_info *devinfo)
1349 {
1350    if (devinfo->ver != 12)
1351       return;
1352 
1353    if (devinfo->verx10 >= 125) {
1354       if (devinfo->subslice_total > 16) {
1355          assert(devinfo->subslice_total <= 32);
1356          devinfo->l3_banks = 32;
1357       } else if (devinfo->subslice_total > 8) {
1358          devinfo->l3_banks = 16;
1359       } else {
1360          devinfo->l3_banks = 8;
1361       }
1362    } else {
1363       assert(devinfo->num_slices == 1);
1364       if (devinfo->subslice_total >= 6) {
1365          assert(devinfo->subslice_total == 6);
1366          devinfo->l3_banks = 8;
1367       } else if (devinfo->subslice_total > 2) {
1368          devinfo->l3_banks = 6;
1369       } else {
1370          devinfo->l3_banks = 4;
1371       }
1372    }
1373 }
1374 
1375 /* Returns the number of EUs of the first subslice enabled */
1376 uint32_t
intel_device_info_get_eu_count_first_subslice(const struct intel_device_info * devinfo)1377 intel_device_info_get_eu_count_first_subslice(const struct intel_device_info *devinfo)
1378 {
1379    uint32_t first_subslice, first_slice, offset, i;
1380    uint32_t eu_count = 0;
1381 
1382    first_slice = ffs(devinfo->slice_masks);
1383    first_slice--;
1384    offset = first_slice * devinfo->subslice_slice_stride;
1385 
1386    for (i = 0; i < DIV_ROUND_UP(devinfo->max_subslices_per_slice, 8); i++) {
1387       first_subslice = ffs(devinfo->subslice_masks[offset + i]);
1388 
1389       if (first_subslice == 0)
1390          continue;
1391 
1392       break;
1393    }
1394 
1395    assert(first_subslice > 0);
1396    first_subslice--;
1397    offset = first_slice * devinfo->eu_slice_stride +
1398             first_subslice * devinfo->eu_subslice_stride;
1399    for (i = 0; i < DIV_ROUND_UP(devinfo->max_eus_per_subslice, 8); i++)
1400       eu_count += __builtin_popcount(devinfo->eu_masks[offset + i]);
1401 
1402    assert(eu_count > 0);
1403    return eu_count;
1404 }
1405 
1406 /* Generate mask from the device data. */
1407 static void
fill_masks(struct intel_device_info * devinfo)1408 fill_masks(struct intel_device_info *devinfo)
1409 {
1410    /* All of our internal device descriptions assign the same number of
1411     * subslices for each slice. Just verify that this is true.
1412     */
1413    for (int s = 1; s < devinfo->num_slices; s++)
1414       assert(devinfo->num_subslices[0] == devinfo->num_subslices[s]);
1415 
1416    intel_device_info_i915_update_from_masks(devinfo,
1417                           (1U << devinfo->num_slices) - 1,
1418                           (1U << devinfo->num_subslices[0]) - 1,
1419                           devinfo->num_slices * devinfo->num_subslices[0] *
1420                           devinfo->max_eus_per_subslice);
1421 }
1422 
1423 void
intel_device_info_update_cs_workgroup_threads(struct intel_device_info * devinfo)1424 intel_device_info_update_cs_workgroup_threads(struct intel_device_info *devinfo)
1425 {
1426    /* GPGPU_WALKER::ThreadWidthCounterMaximum is U6-1 so the most threads we
1427     * can program is 64 without going up to a rectangular group. This only
1428     * impacts Haswell and TGL which have higher thread counts.
1429     *
1430     * INTERFACE_DESCRIPTOR_DATA::NumberofThreadsinGPGPUThreadGroup on Xe-HP+
1431     * is 10 bits so we have no such restrictions.
1432     */
1433    devinfo->max_cs_workgroup_threads =
1434       devinfo->verx10 >= 125 ? devinfo->max_cs_threads :
1435                                MIN2(devinfo->max_cs_threads, 64);
1436 }
1437 
1438 static bool
parse_force_probe_entry(int pci_id,const char * entry,bool * force_on,bool * force_off)1439 parse_force_probe_entry(int pci_id, const char *entry, bool *force_on,
1440                         bool *force_off)
1441 {
1442    const char *cp = entry;
1443 
1444    bool negated = *cp == '!';
1445    if (negated)
1446       cp++;
1447 
1448    if (*cp == '\0')
1449       return false;
1450 
1451    bool wildcard = *cp == '*';
1452    long val = 0;
1453 
1454    if (wildcard) {
1455       cp++;
1456    } else {
1457       char *end;
1458       val = strtol(cp, &end, 16);
1459       if (end == cp)
1460          return false;
1461       cp = end;
1462    }
1463 
1464    if (*cp != '\0')
1465       return false;
1466 
1467    bool matched = wildcard || (long)pci_id == val;
1468    if (matched) {
1469       *force_on = !negated;
1470       *force_off = negated;
1471    }
1472 
1473    return matched;
1474 }
1475 
1476 static void
scan_for_force_probe(int pci_id,bool * force_on,bool * force_off)1477 scan_for_force_probe(int pci_id, bool *force_on, bool *force_off)
1478 {
1479    *force_on = false;
1480    *force_off = false;
1481 
1482    const char *env = getenv("INTEL_FORCE_PROBE");
1483    if (env == NULL)
1484       return;
1485 
1486    size_t len = strlen(env);
1487    if (len == 0)
1488       return;
1489 
1490    char *dup = strndup(env, len);
1491    if (dup == NULL)
1492       return;
1493 
1494    for (char *entry = strtok(dup, ","); entry; entry = strtok(NULL, ","))
1495       parse_force_probe_entry(pci_id, entry, force_on, force_off);
1496 
1497    free(dup);
1498    assert(!*force_on || !*force_off);
1499 }
1500 
1501 struct device_init_config {
1502    bool require_force_probe;
1503 };
1504 
1505 /* Example PCI ID entry using FORCE_PROBE:
1506  *
1507  * CHIPSET(0x1234, foo, "FOO", "Intel(R) Graphics", FORCE_PROBE)
1508  */
1509 #define FORCE_PROBE .require_force_probe = true
1510 
1511 static bool
intel_device_info_init_common(int pci_id,bool building,struct intel_device_info * devinfo)1512 intel_device_info_init_common(int pci_id, bool building,
1513                               struct intel_device_info *devinfo)
1514 {
1515    struct device_init_config device_config = { 0 };
1516    switch (pci_id) {
1517 #undef CHIPSET
1518 #define CHIPSET(id, family, fam_str, name, ...)                         \
1519       case id:                                                          \
1520          *devinfo = intel_device_info_##family;                         \
1521          device_config = *&(struct device_init_config) { __VA_ARGS__ }; \
1522          break;
1523 #include "pci_ids/crocus_pci_ids.h"
1524 #include "pci_ids/iris_pci_ids.h"
1525 
1526 #undef CHIPSET
1527 #define CHIPSET(id, fam_str, name) \
1528       case id: *devinfo = intel_device_info_gfx3; break;
1529 #include "pci_ids/i915_pci_ids.h"
1530 
1531    default:
1532       mesa_logw("Driver does not support the 0x%x PCI ID.", pci_id);
1533       return false;
1534    }
1535 
1536    switch (pci_id) {
1537 #undef CHIPSET
1538 #define CHIPSET(_id, _family, _fam_str, _name, ...) \
1539    case _id: \
1540       /* sizeof(str_literal) includes the null */ \
1541       STATIC_ASSERT(sizeof(_name) + sizeof(_fam_str) + 2 <= \
1542                     sizeof(devinfo->name)); \
1543       strncpy(devinfo->name, _name " (" _fam_str ")", sizeof(devinfo->name)); \
1544       break;
1545 #include "pci_ids/crocus_pci_ids.h"
1546 #include "pci_ids/iris_pci_ids.h"
1547    default:
1548       strncpy(devinfo->name, "Intel Unknown", sizeof(devinfo->name));
1549    }
1550 
1551    bool force_on = false;
1552    bool force_off = false;
1553    if (building)
1554       force_on = true;
1555    else
1556       scan_for_force_probe(pci_id, &force_on, &force_off);
1557    devinfo->probe_forced = force_on;
1558    if (force_off) {
1559       mesa_logw("%s (0x%x) disabled with INTEL_FORCE_PROBE", devinfo->name,
1560                 pci_id);
1561       return false;
1562    } else if (device_config.require_force_probe) {
1563       if (force_on) {
1564          if (!building)
1565             mesa_logw("Forcing probe of unsupported: %s (0x%x)", devinfo->name,
1566                       pci_id);
1567       } else {
1568          mesa_loge("%s (0x%x) requires INTEL_FORCE_PROBE", devinfo->name,
1569                    pci_id);
1570          return false;
1571       }
1572    }
1573 
1574    devinfo->pci_device_id = pci_id;
1575 
1576    fill_masks(devinfo);
1577 
1578    /* From the Skylake PRM, 3DSTATE_PS::Scratch Space Base Pointer:
1579     *
1580     * "Scratch Space per slice is computed based on 4 sub-slices.  SW must
1581     *  allocate scratch space enough so that each slice has 4 slices allowed."
1582     *
1583     * The equivalent internal documentation says that this programming note
1584     * applies to all Gfx9+ platforms.
1585     *
1586     * The hardware typically calculates the scratch space pointer by taking
1587     * the base address, and adding per-thread-scratch-space * thread ID.
1588     * Extra padding can be necessary depending how the thread IDs are
1589     * calculated for a particular shader stage.
1590     */
1591 
1592    switch(devinfo->ver) {
1593    case 9:
1594       devinfo->max_wm_threads = 64 /* threads-per-PSD */
1595                               * devinfo->num_slices
1596                               * 4; /* effective subslices per slice */
1597       break;
1598    case 11:
1599    case 12:
1600    case 20:
1601    case 30:
1602       devinfo->max_wm_threads = 128 /* threads-per-PSD */
1603                               * devinfo->num_slices
1604                               * 8; /* subslices per slice */
1605       break;
1606    default:
1607       assert(devinfo->ver < 9);
1608       break;
1609    }
1610 
1611    assert(devinfo->num_slices <= ARRAY_SIZE(devinfo->num_subslices));
1612 
1613    if (devinfo->verx10 == 0)
1614       devinfo->verx10 = devinfo->ver * 10;
1615 
1616    uint16_t major = devinfo->ver;
1617    uint16_t minor = (devinfo->verx10 - (devinfo->ver * 10)) * 10;
1618    /* When supported gfx_ip_ver will be overwritten by values read from KMD.
1619     * This is a approximation for platforms that do not support GMD ID or
1620     * when running offline tools.
1621     * verx10 125 becomes GFX_IP_VER(12, 50) for example.
1622     */
1623    devinfo->gfx_ip_ver = GFX_IP_VER(major, minor);
1624 
1625    if (devinfo->has_mesh_shading) {
1626       /* Half of push constant space matches the size used in the simplest
1627        * primitive pipeline (VS + FS). Tweaking this affects performance.
1628        */
1629       devinfo->mesh_max_constant_urb_size_kb =
1630             devinfo->max_constant_urb_size_kb / 2;
1631    }
1632 
1633    /*
1634     * Gfx 12.5 moved scratch to a surface and SURFTYPE_SCRATCH has this pitch
1635     * restriction:
1636     *
1637     * BSpec 43862 (r52666)
1638     * RENDER_SURFACE_STATE::Surface Pitch
1639     *    For surfaces of type SURFTYPE_SCRATCH, valid range of pitch is:
1640     *    [63,262143] -> [64B, 256KB]
1641     *
1642     * The pitch of the surface is the scratch size per thread and the surface
1643     * should be large enough to accommodate every physical thread.
1644     */
1645    devinfo->max_scratch_size_per_thread = devinfo->verx10 >= 125 ?
1646                                           (256 * 1024) : (2 * 1024 * 1024);
1647    intel_device_info_update_cs_workgroup_threads(devinfo);
1648 
1649    return true;
1650 }
1651 
1652 static void
intel_device_info_apply_workarounds(struct intel_device_info * devinfo)1653 intel_device_info_apply_workarounds(struct intel_device_info *devinfo)
1654 {
1655    if (intel_needs_workaround(devinfo, 18012660806))
1656       devinfo->urb.max_entries[MESA_SHADER_GEOMETRY] = 1536;
1657 
1658    if (intel_needs_workaround(devinfo, 18040209780))
1659       devinfo->max_gs_threads = 312;
1660 
1661    /* Fixes issues with:
1662     * dEQP-GLES31.functional.geometry_shading.layered.render_with_default_layer_cubemap
1663     * when running on GFX12 platforms with small EU count.
1664     */
1665    const uint32_t eu_total = intel_device_info_eu_total(devinfo);
1666    if (devinfo->verx10 == 120 && eu_total <= 32)
1667       devinfo->urb.max_entries[MESA_SHADER_GEOMETRY] = 1024;
1668 }
1669 
1670 static bool
intel_get_device_info_from_pci_id_common(int pci_id,bool building,struct intel_device_info * devinfo)1671 intel_get_device_info_from_pci_id_common(int pci_id, bool building,
1672                                          struct intel_device_info *devinfo)
1673 {
1674    intel_device_info_init_common(pci_id, building, devinfo);
1675 
1676    /* This is a placeholder until a proper value is set. */
1677    devinfo->kmd_type = INTEL_KMD_TYPE_I915;
1678 
1679    intel_device_info_init_was(devinfo);
1680    intel_device_info_apply_workarounds(devinfo);
1681 
1682    return true;
1683 }
1684 
1685 bool
intel_get_device_info_from_pci_id(int pci_id,struct intel_device_info * devinfo)1686 intel_get_device_info_from_pci_id(int pci_id,
1687                                   struct intel_device_info *devinfo)
1688 {
1689    return intel_get_device_info_from_pci_id_common(pci_id, false, devinfo);
1690 }
1691 
1692 bool
intel_get_device_info_for_build(int pci_id,struct intel_device_info * devinfo)1693 intel_get_device_info_for_build(int pci_id,
1694                                 struct intel_device_info *devinfo)
1695 {
1696    return intel_get_device_info_from_pci_id_common(pci_id, true, devinfo);
1697 }
1698 
1699 bool
intel_device_info_compute_system_memory(struct intel_device_info * devinfo,bool update)1700 intel_device_info_compute_system_memory(struct intel_device_info *devinfo, bool update)
1701 {
1702    if (!update) {
1703       if (!os_get_total_physical_memory(&devinfo->mem.sram.mappable.size))
1704          return false;
1705    }
1706 
1707    os_get_available_system_memory(&devinfo->mem.sram.mappable.free);
1708 
1709    return true;
1710 }
1711 
1712 static void
intel_device_info_adjust_memory(struct intel_device_info * devinfo)1713 intel_device_info_adjust_memory(struct intel_device_info *devinfo)
1714 {
1715    uint64_t available;
1716 
1717    /* Applications running without elevated privileges don't report valid
1718     * numbers for free sram
1719     */
1720    if (os_get_available_system_memory(&available)) {
1721       devinfo->mem.sram.mappable.free = MIN3(devinfo->mem.sram.mappable.free,
1722                                              devinfo->mem.sram.mappable.size,
1723                                              available);
1724    }
1725 }
1726 
1727 static void
init_max_scratch_ids(struct intel_device_info * devinfo)1728 init_max_scratch_ids(struct intel_device_info *devinfo)
1729 {
1730    /* Determine the max number of subslices that potentially might be used in
1731     * scratch space ids.
1732     *
1733     * For, Gfx11+, scratch space allocation is based on the number of threads
1734     * in the base configuration.
1735     *
1736     * For Gfx9, devinfo->subslice_total is the TOTAL number of subslices and
1737     * we wish to view that there are 4 subslices per slice instead of the
1738     * actual number of subslices per slice. The documentation for 3DSTATE_PS
1739     * "Scratch Space Base Pointer" says:
1740     *
1741     *    "Scratch Space per slice is computed based on 4 sub-slices.  SW
1742     *     must allocate scratch space enough so that each slice has 4
1743     *     slices allowed."
1744     *
1745     * According to the other driver team, this applies to compute shaders
1746     * as well.  This is not currently documented at all.
1747     *
1748     * For Gfx8 and older we user devinfo->subslice_total.
1749     */
1750    unsigned subslices;
1751    if (devinfo->verx10 == 125)
1752       subslices = 32;
1753    else if (devinfo->ver == 12)
1754       subslices = (devinfo->platform == INTEL_PLATFORM_DG1 || devinfo->gt == 2 ? 6 : 2);
1755    else if (devinfo->ver == 11)
1756       subslices = 8;
1757    else if (devinfo->ver >= 9 && devinfo->ver < 11)
1758       subslices = 4 * devinfo->num_slices;
1759    else
1760       subslices = devinfo->subslice_total;
1761    assert(subslices >= devinfo->subslice_total);
1762 
1763    unsigned scratch_ids_per_subslice;
1764    if (devinfo->ver >= 12) {
1765       /* Same as ICL below, but with 16 EUs. */
1766       scratch_ids_per_subslice = 16 * 8;
1767    } else if (devinfo->ver >= 11) {
1768       /* The MEDIA_VFE_STATE docs say:
1769        *
1770        *    "Starting with this configuration, the Maximum Number of
1771        *     Threads must be set to (#EU * 8) for GPGPU dispatches.
1772        *
1773        *     Although there are only 7 threads per EU in the configuration,
1774        *     the FFTID is calculated as if there are 8 threads per EU,
1775        *     which in turn requires a larger amount of Scratch Space to be
1776        *     allocated by the driver."
1777        */
1778       scratch_ids_per_subslice = 8 * 8;
1779    } else if (devinfo->platform == INTEL_PLATFORM_HSW) {
1780       /* WaCSScratchSize:hsw
1781        *
1782        * Haswell's scratch space address calculation appears to be sparse
1783        * rather than tightly packed. The Thread ID has bits indicating
1784        * which subslice, EU within a subslice, and thread within an EU it
1785        * is. There's a maximum of two slices and two subslices, so these
1786        * can be stored with a single bit. Even though there are only 10 EUs
1787        * per subslice, this is stored in 4 bits, so there's an effective
1788        * maximum value of 16 EUs. Similarly, although there are only 7
1789        * threads per EU, this is stored in a 3 bit number, giving an
1790        * effective maximum value of 8 threads per EU.
1791        *
1792        * This means that we need to use 16 * 8 instead of 10 * 7 for the
1793        * number of threads per subslice.
1794        */
1795       scratch_ids_per_subslice = 16 * 8;
1796    } else if (devinfo->platform == INTEL_PLATFORM_CHV) {
1797       /* Cherryview devices have either 6 or 8 EUs per subslice, and each
1798        * EU has 7 threads. The 6 EU devices appear to calculate thread IDs
1799        * as if it had 8 EUs.
1800        */
1801       scratch_ids_per_subslice = 8 * 7;
1802    } else {
1803       scratch_ids_per_subslice = devinfo->max_cs_threads;
1804    }
1805 
1806    unsigned max_thread_ids = scratch_ids_per_subslice * subslices;
1807 
1808    if (devinfo->verx10 >= 125) {
1809       /* On GFX version 12.5, scratch access changed to a surface-based model.
1810        * Instead of each shader type having its own layout based on IDs passed
1811        * from the relevant fixed-function unit, all scratch access is based on
1812        * thread IDs like it always has been for compute.
1813        */
1814       for (int i = MESA_SHADER_VERTEX; i < MESA_SHADER_STAGES; i++)
1815          devinfo->max_scratch_ids[i] = max_thread_ids;
1816    } else {
1817       unsigned max_scratch_ids[] = {
1818          [MESA_SHADER_VERTEX]    = devinfo->max_vs_threads,
1819          [MESA_SHADER_TESS_CTRL] = devinfo->max_tcs_threads,
1820          [MESA_SHADER_TESS_EVAL] = devinfo->max_tes_threads,
1821          [MESA_SHADER_GEOMETRY]  = devinfo->max_gs_threads,
1822          [MESA_SHADER_FRAGMENT]  = devinfo->max_wm_threads,
1823          [MESA_SHADER_COMPUTE]   = max_thread_ids,
1824       };
1825       STATIC_ASSERT(sizeof(devinfo->max_scratch_ids) == sizeof(max_scratch_ids));
1826       memcpy(devinfo->max_scratch_ids, max_scratch_ids,
1827              sizeof(devinfo->max_scratch_ids));
1828    }
1829 }
1830 
1831 static unsigned
intel_device_info_calc_engine_prefetch(const struct intel_device_info * devinfo,enum intel_engine_class engine_class)1832 intel_device_info_calc_engine_prefetch(const struct intel_device_info *devinfo,
1833                                        enum intel_engine_class engine_class)
1834 {
1835    if (devinfo->verx10 >= 200) {
1836       switch (engine_class) {
1837       case INTEL_ENGINE_CLASS_RENDER:
1838          return 4096;
1839       case INTEL_ENGINE_CLASS_COMPUTE:
1840          return 1024;
1841       default:
1842          return 512;
1843       }
1844    }
1845 
1846    if (intel_device_info_is_mtl_or_arl(devinfo)) {
1847       switch (engine_class) {
1848       case INTEL_ENGINE_CLASS_RENDER:
1849          return 2048;
1850       case INTEL_ENGINE_CLASS_COMPUTE:
1851          return 1024;
1852       default:
1853          return 512;
1854       }
1855    }
1856 
1857    /* DG2 */
1858    if (devinfo->verx10 == 125)
1859       return 1024;
1860 
1861    /* Older than DG2/MTL */
1862    return 512;
1863 }
1864 
1865 bool
intel_get_device_info_from_fd(int fd,struct intel_device_info * devinfo,int min_ver,int max_ver)1866 intel_get_device_info_from_fd(int fd, struct intel_device_info *devinfo, int min_ver, int max_ver)
1867 {
1868    if (NULL != getenv("INTEL_STUB_GPU_JSON")) {
1869       /* This call will succeed when shim-drm has been initialized with a
1870        * serialized intel_device_info structure.
1871        */
1872       struct drm_intel_stub_devinfo arg = {
1873          .addr = (uintptr_t)devinfo,
1874          .size = sizeof(*devinfo),
1875       };
1876       if (0 == intel_ioctl(fd, DRM_IOCTL_INTEL_STUB_DEVINFO, &arg)) {
1877          intel_device_info_init_was(devinfo);
1878          intel_device_info_apply_workarounds(devinfo);
1879          return true;
1880       }
1881    }
1882 
1883    /* Get PCI info.
1884     *
1885     * Some callers may already have a valid drm device which holds values of
1886     * PCI fields queried here prior to calling this function. But making this
1887     * query optional leads to a more cumbersome implementation. These callers
1888     * still need to initialize the fields somewhere out of this function and
1889     * rely on an ioctl to get PCI device id for the next step when skipping
1890     * this drm query.
1891     */
1892    drmDevicePtr drmdev = NULL;
1893    if (drmGetDevice2(fd, DRM_DEVICE_GET_PCI_REVISION, &drmdev)) {
1894       mesa_loge("Failed to query drm device.");
1895       return false;
1896    }
1897    if (!intel_device_info_init_common(drmdev->deviceinfo.pci->device_id,
1898                                       false, devinfo)) {
1899       drmFreeDevice(&drmdev);
1900       return false;
1901    }
1902 
1903    if ((min_ver > 0 && devinfo->ver < min_ver) || (max_ver > 0 && devinfo->ver > max_ver)) {
1904       drmFreeDevice(&drmdev);
1905       return false;
1906    }
1907 
1908    devinfo->pci_domain = drmdev->businfo.pci->domain;
1909    devinfo->pci_bus = drmdev->businfo.pci->bus;
1910    devinfo->pci_dev = drmdev->businfo.pci->dev;
1911    devinfo->pci_func = drmdev->businfo.pci->func;
1912    devinfo->pci_device_id = drmdev->deviceinfo.pci->device_id;
1913    devinfo->pci_revision_id = drmdev->deviceinfo.pci->revision_id;
1914    drmFreeDevice(&drmdev);
1915    devinfo->no_hw = debug_get_bool_option("INTEL_NO_HW", false);
1916 
1917    devinfo->kmd_type = intel_get_kmd_type(fd);
1918    if (devinfo->kmd_type == INTEL_KMD_TYPE_INVALID) {
1919       mesa_loge("Unknown kernel mode driver");
1920       return false;
1921    }
1922 
1923    /* remaining initialization queries the kernel for device info */
1924    if (devinfo->no_hw) {
1925       /* Provide some sensible values for NO_HW. */
1926       devinfo->gtt_size =
1927          devinfo->ver >= 8 ? (1ull << 48) : 2ull * 1024 * 1024 * 1024;
1928       intel_device_info_compute_system_memory(devinfo, false);
1929       return true;
1930    }
1931 
1932    bool ret;
1933    switch (devinfo->kmd_type) {
1934    case INTEL_KMD_TYPE_I915:
1935       ret = intel_device_info_i915_get_info_from_fd(fd, devinfo);
1936       break;
1937    case INTEL_KMD_TYPE_XE:
1938       ret = intel_device_info_xe_get_info_from_fd(fd, devinfo);
1939       if (devinfo->verx10 < 200)
1940          mesa_logw("Support for this platform is experimental with Xe KMD, bug reports may be ignored.");
1941       break;
1942    default:
1943       ret = false;
1944       unreachable("Missing");
1945    }
1946    if (!ret) {
1947       mesa_logw("Could not get intel_device_info.");
1948       return false;
1949    }
1950 
1951    /* region info is required for lmem support */
1952    if (devinfo->has_local_mem && !devinfo->mem.use_class_instance) {
1953       mesa_logw("Could not query local memory size.");
1954       return false;
1955    }
1956 
1957    intel_device_info_adjust_memory(devinfo);
1958 
1959    /* Gfx7 and older do not support EU/Subslice info */
1960    assert(devinfo->subslice_total >= 1 || devinfo->ver <= 7);
1961    devinfo->subslice_total = MAX2(devinfo->subslice_total, 1);
1962 
1963    init_max_scratch_ids(devinfo);
1964 
1965    for (enum intel_engine_class engine = INTEL_ENGINE_CLASS_RENDER;
1966         engine < ARRAY_SIZE(devinfo->engine_class_prefetch); engine++)
1967       devinfo->engine_class_prefetch[engine] =
1968             intel_device_info_calc_engine_prefetch(devinfo, engine);
1969 
1970    intel_device_info_init_was(devinfo);
1971    intel_device_info_apply_workarounds(devinfo);
1972 
1973    intel_check_hwconfig_items(fd, devinfo);
1974 
1975    return true;
1976 }
1977 
intel_device_info_update_memory_info(struct intel_device_info * devinfo,int fd)1978 bool intel_device_info_update_memory_info(struct intel_device_info *devinfo, int fd)
1979 {
1980    bool ret;
1981 
1982    switch (devinfo->kmd_type) {
1983    case INTEL_KMD_TYPE_I915:
1984       ret = intel_device_info_i915_query_regions(devinfo, fd, true);
1985       break;
1986    case INTEL_KMD_TYPE_XE:
1987       ret = intel_device_info_xe_query_regions(fd, devinfo, true);
1988       break;
1989    default:
1990       ret = false;
1991    }
1992 
1993    if (ret)
1994       intel_device_info_adjust_memory(devinfo);
1995    return ret;
1996 }
1997 
1998 void
intel_device_info_update_after_hwconfig(struct intel_device_info * devinfo)1999 intel_device_info_update_after_hwconfig(struct intel_device_info *devinfo)
2000 {
2001    /* After applying hwconfig values, some items need to be recalculated. */
2002    devinfo->max_cs_threads =
2003       devinfo->max_eus_per_subslice * devinfo->num_thread_per_eu;
2004 
2005    intel_device_info_update_cs_workgroup_threads(devinfo);
2006 }
2007 
2008 enum intel_wa_steppings
intel_device_info_wa_stepping(struct intel_device_info * devinfo)2009 intel_device_info_wa_stepping(struct intel_device_info *devinfo)
2010 {
2011    /* When adding platforms to this function, check to see if
2012     * stepping-specific workarounds impact the compiler.
2013     *
2014     * If a stepping specific compiler workaround is required on a released
2015     * platform, intel_device_info->revision must be added as a
2016     * 'compiler_field' in intel_device_info.py
2017     */
2018 
2019    if (devinfo->platform == INTEL_PLATFORM_BMG) {
2020       switch (devinfo->revision) {
2021       case 0:
2022          return INTEL_STEPPING_A0;
2023       case 1:
2024          return INTEL_STEPPING_A1;
2025       case 4:
2026          return INTEL_STEPPING_B0;
2027       default:
2028          return INTEL_STEPPING_RELEASE;
2029       }
2030    } else if (devinfo->platform == INTEL_PLATFORM_LNL) {
2031       switch (devinfo->revision) {
2032       case 0:
2033          return INTEL_STEPPING_A0;
2034       case 1:
2035          return INTEL_STEPPING_A1;
2036       case 4:
2037          return INTEL_STEPPING_B0;
2038       default:
2039          return INTEL_STEPPING_RELEASE;
2040       }
2041    } else if (devinfo->platform == INTEL_PLATFORM_TGL) {
2042       /* TGL production steppings: B0 and C0 */
2043       switch (devinfo->revision) {
2044       case 1:
2045          return INTEL_STEPPING_B0;
2046       case 3:
2047          return INTEL_STEPPING_C0;
2048       default:
2049          return INTEL_STEPPING_RELEASE;
2050       }
2051    }
2052 
2053    /* all other platforms support only released steppings */
2054    return INTEL_STEPPING_RELEASE;
2055 }
2056 
2057 uint32_t
intel_device_info_get_max_slm_size(const struct intel_device_info * devinfo)2058 intel_device_info_get_max_slm_size(const struct intel_device_info *devinfo)
2059 {
2060    uint32_t bytes = 0;
2061 
2062    if (devinfo->verx10 >= 300) {
2063       bytes = 128 * 1024;
2064    } else if (devinfo->verx10 >= 200) {
2065       bytes = intel_device_info_get_max_preferred_slm_size(devinfo);
2066    } else {
2067       bytes = 64 * 1024;
2068    }
2069 
2070    return bytes;
2071 }
2072 
2073 uint32_t
intel_device_info_get_max_preferred_slm_size(const struct intel_device_info * devinfo)2074 intel_device_info_get_max_preferred_slm_size(const struct intel_device_info *devinfo)
2075 {
2076    uint32_t k_bytes = 0;
2077 
2078    if (devinfo->verx10 >= 300) {
2079       k_bytes = 192;
2080    } else if (devinfo->verx10 >= 200) {
2081       if (intel_needs_workaround(devinfo, 16018610683))
2082          k_bytes = 128;
2083       else
2084          k_bytes = 160;
2085    } else {
2086       k_bytes = 128;
2087    }
2088 
2089    return k_bytes * 1024;
2090 }
2091