• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2013 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <unistd.h>
30 
31 #include "util/libdrm.h"
32 
33 #include "intel_device_info.h"
34 #include "intel_wa.h"
35 #include "i915/intel_device_info.h"
36 #include "xe/intel_device_info.h"
37 
38 #include "common/intel_gem.h"
39 #include "util/u_debug.h"
40 #include "util/log.h"
41 #include "util/macros.h"
42 
43 static const struct {
44    const char *name;
45    int pci_id;
46 } name_map[] = {
47    { "lpt", 0x27a2 },
48    { "brw", 0x2a02 },
49    { "g4x", 0x2a42 },
50    { "ilk", 0x0042 },
51    { "snb", 0x0126 },
52    { "ivb", 0x016a },
53    { "hsw", 0x0d2e },
54    { "byt", 0x0f33 },
55    { "bdw", 0x162e },
56    { "chv", 0x22B3 },
57    { "skl", 0x1912 },
58    { "bxt", 0x5A85 },
59    { "kbl", 0x5912 },
60    { "aml", 0x591C },
61    { "glk", 0x3185 },
62    { "cfl", 0x3E9B },
63    { "whl", 0x3EA1 },
64    { "cml", 0x9b41 },
65    { "icl", 0x8a52 },
66    { "ehl", 0x4571 },
67    { "jsl", 0x4E71 },
68    { "tgl", 0x9a49 },
69    { "rkl", 0x4c8a },
70    { "dg1", 0x4905 },
71    { "adl", 0x4680 },
72    { "sg1", 0x4907 },
73    { "rpl", 0xa780 },
74    { "dg2", 0x5690 },
75    { "mtl", 0x7d60 },
76    { "arl", 0x7d67 },
77 };
78 
79 /**
80  * Get the PCI ID for the device name.
81  *
82  * Returns -1 if the device is not known.
83  */
84 int
intel_device_name_to_pci_device_id(const char * name)85 intel_device_name_to_pci_device_id(const char *name)
86 {
87    for (unsigned i = 0; i < ARRAY_SIZE(name_map); i++) {
88       if (!strcmp(name_map[i].name, name))
89          return name_map[i].pci_id;
90    }
91 
92    return -1;
93 }
94 
95 static const struct intel_device_info intel_device_info_gfx3 = {
96    .ver = 3,
97    .platform = INTEL_PLATFORM_GFX3,
98    .simulator_id = -1,
99    .num_slices = 1,
100    .num_subslices = { 1, },
101    .max_eus_per_subslice = 8,
102    .num_thread_per_eu = 4,
103    .timestamp_frequency = 12500000,
104 };
105 
106 static const struct intel_device_info intel_device_info_i965 = {
107    .ver = 4,
108    .platform = INTEL_PLATFORM_I965,
109    .has_negative_rhw_bug = true,
110    .num_slices = 1,
111    .num_subslices = { 1, },
112    .max_eus_per_subslice = 8,
113    .num_thread_per_eu = 4,
114    .max_vs_threads = 16,
115    .max_gs_threads = 2,
116    .max_wm_threads = 8 * 4,
117    .urb = {
118       .size = 256,
119    },
120    .timestamp_frequency = 12500000,
121    .simulator_id = -1,
122 };
123 
124 static const struct intel_device_info intel_device_info_g4x = {
125    .ver = 4,
126    .verx10 = 45,
127    .has_pln = true,
128    .has_compr4 = true,
129    .has_surface_tile_offset = true,
130    .platform = INTEL_PLATFORM_G4X,
131    .num_slices = 1,
132    .num_subslices = { 1, },
133    .max_eus_per_subslice = 10,
134    .num_thread_per_eu = 5,
135    .max_vs_threads = 32,
136    .max_gs_threads = 2,
137    .max_wm_threads = 10 * 5,
138    .urb = {
139       .size = 384,
140    },
141    .timestamp_frequency = 12500000,
142    .simulator_id = -1,
143 };
144 
145 static const struct intel_device_info intel_device_info_ilk = {
146    .ver = 5,
147    .platform = INTEL_PLATFORM_ILK,
148    .has_pln = true,
149    .has_compr4 = true,
150    .has_surface_tile_offset = true,
151    .num_slices = 1,
152    .num_subslices = { 1, },
153    .max_eus_per_subslice = 12,
154    .num_thread_per_eu = 6,
155    .max_vs_threads = 72,
156    .max_gs_threads = 32,
157    .max_wm_threads = 12 * 6,
158    .urb = {
159       .size = 1024,
160    },
161    .timestamp_frequency = 12500000,
162    .simulator_id = -1,
163 };
164 
165 static const struct intel_device_info intel_device_info_snb_gt1 = {
166    .ver = 6,
167    .gt = 1,
168    .platform = INTEL_PLATFORM_SNB,
169    .has_hiz_and_separate_stencil = true,
170    .has_llc = true,
171    .has_pln = true,
172    .has_surface_tile_offset = true,
173    .needs_unlit_centroid_workaround = true,
174    .num_slices = 1,
175    .num_subslices = { 1, },
176    .max_eus_per_subslice = 6,
177    .num_thread_per_eu = 6, /* Not confirmed */
178    .max_vs_threads = 24,
179    .max_gs_threads = 21, /* conservative; 24 if rendering disabled. */
180    .max_wm_threads = 40,
181    .urb = {
182       .size = 32,
183       .min_entries = {
184          [MESA_SHADER_VERTEX]   = 24,
185       },
186       .max_entries = {
187          [MESA_SHADER_VERTEX]   = 256,
188          [MESA_SHADER_GEOMETRY] = 256,
189       },
190    },
191    .timestamp_frequency = 12500000,
192    .simulator_id = -1,
193 };
194 
195 static const struct intel_device_info intel_device_info_snb_gt2 = {
196    .ver = 6,
197    .gt = 2,
198    .platform = INTEL_PLATFORM_SNB,
199    .has_hiz_and_separate_stencil = true,
200    .has_llc = true,
201    .has_pln = true,
202    .has_surface_tile_offset = true,
203    .needs_unlit_centroid_workaround = true,
204    .num_slices = 1,
205    .num_subslices = { 1, },
206    .max_eus_per_subslice = 12,
207    .num_thread_per_eu = 6, /* Not confirmed */
208    .max_vs_threads = 60,
209    .max_gs_threads = 60,
210    .max_wm_threads = 80,
211    .urb = {
212       .size = 64,
213       .min_entries = {
214          [MESA_SHADER_VERTEX]   = 24,
215       },
216       .max_entries = {
217          [MESA_SHADER_VERTEX]   = 256,
218          [MESA_SHADER_GEOMETRY] = 256,
219       },
220    },
221    .timestamp_frequency = 12500000,
222    .simulator_id = -1,
223 };
224 
225 #define GFX7_FEATURES                               \
226    .ver = 7,                                        \
227    .has_hiz_and_separate_stencil = true,            \
228    .must_use_separate_stencil = true,               \
229    .has_llc = true,                                 \
230    .has_pln = true,                                 \
231    .has_64bit_float = true,                         \
232    .has_surface_tile_offset = true,                 \
233    .timestamp_frequency = 12500000,                 \
234    .max_constant_urb_size_kb = 16
235 
236 static const struct intel_device_info intel_device_info_ivb_gt1 = {
237    GFX7_FEATURES, .platform = INTEL_PLATFORM_IVB, .gt = 1,
238    .num_slices = 1,
239    .num_subslices = { 1, },
240    .max_eus_per_subslice = 6,
241    .num_thread_per_eu = 6,
242    .l3_banks = 2,
243    .max_vs_threads = 36,
244    .max_tcs_threads = 36,
245    .max_tes_threads = 36,
246    .max_gs_threads = 36,
247    .max_wm_threads = 48,
248    .max_cs_threads = 36,
249    .urb = {
250       .min_entries = {
251          [MESA_SHADER_VERTEX]    = 32,
252          [MESA_SHADER_TESS_EVAL] = 10,
253       },
254       .max_entries = {
255          [MESA_SHADER_VERTEX]    = 512,
256          [MESA_SHADER_TESS_CTRL] = 32,
257          [MESA_SHADER_TESS_EVAL] = 288,
258          [MESA_SHADER_GEOMETRY]  = 192,
259       },
260    },
261    .simulator_id = 7,
262 };
263 
264 static const struct intel_device_info intel_device_info_ivb_gt2 = {
265    GFX7_FEATURES, .platform = INTEL_PLATFORM_IVB, .gt = 2,
266    .num_slices = 1,
267    .num_subslices = { 1, },
268    .max_eus_per_subslice = 12,
269    .num_thread_per_eu = 8, /* Not sure why this isn't a multiple of
270                             * @max_wm_threads ... */
271    .l3_banks = 4,
272    .max_vs_threads = 128,
273    .max_tcs_threads = 128,
274    .max_tes_threads = 128,
275    .max_gs_threads = 128,
276    .max_wm_threads = 172,
277    .max_cs_threads = 64,
278    .urb = {
279       .min_entries = {
280          [MESA_SHADER_VERTEX]    = 32,
281          [MESA_SHADER_TESS_EVAL] = 10,
282       },
283       .max_entries = {
284          [MESA_SHADER_VERTEX]    = 704,
285          [MESA_SHADER_TESS_CTRL] = 64,
286          [MESA_SHADER_TESS_EVAL] = 448,
287          [MESA_SHADER_GEOMETRY]  = 320,
288       },
289    },
290    .simulator_id = 7,
291 };
292 
293 static const struct intel_device_info intel_device_info_byt = {
294    GFX7_FEATURES, .platform = INTEL_PLATFORM_BYT, .gt = 1,
295    .num_slices = 1,
296    .num_subslices = { 1, },
297    .max_eus_per_subslice = 4,
298    .num_thread_per_eu = 8,
299    .l3_banks = 1,
300    .has_llc = false,
301    .max_vs_threads = 36,
302    .max_tcs_threads = 36,
303    .max_tes_threads = 36,
304    .max_gs_threads = 36,
305    .max_wm_threads = 48,
306    .max_cs_threads = 32,
307    .urb = {
308       .min_entries = {
309          [MESA_SHADER_VERTEX]    = 32,
310          [MESA_SHADER_TESS_EVAL] = 10,
311       },
312       .max_entries = {
313          [MESA_SHADER_VERTEX]    = 512,
314          [MESA_SHADER_TESS_CTRL] = 32,
315          [MESA_SHADER_TESS_EVAL] = 288,
316          [MESA_SHADER_GEOMETRY]  = 192,
317       },
318    },
319    .simulator_id = 10,
320 };
321 
322 #define HSW_FEATURES \
323    GFX7_FEATURES, \
324    .platform = INTEL_PLATFORM_HSW, \
325    .verx10 = 75, \
326    .supports_simd16_3src = true
327 
328 static const struct intel_device_info intel_device_info_hsw_gt1 = {
329    HSW_FEATURES, .gt = 1,
330    .num_slices = 1,
331    .num_subslices = { 1, },
332    .max_eus_per_subslice = 10,
333    .num_thread_per_eu = 7,
334    .l3_banks = 2,
335    .max_vs_threads = 70,
336    .max_tcs_threads = 70,
337    .max_tes_threads = 70,
338    .max_gs_threads = 70,
339    .max_wm_threads = 102,
340    .max_cs_threads = 70,
341    .urb = {
342       .min_entries = {
343          [MESA_SHADER_VERTEX]    = 32,
344          [MESA_SHADER_TESS_EVAL] = 10,
345       },
346       .max_entries = {
347          [MESA_SHADER_VERTEX]    = 640,
348          [MESA_SHADER_TESS_CTRL] = 64,
349          [MESA_SHADER_TESS_EVAL] = 384,
350          [MESA_SHADER_GEOMETRY]  = 256,
351       },
352    },
353    .simulator_id = 9,
354 };
355 
356 static const struct intel_device_info intel_device_info_hsw_gt2 = {
357    HSW_FEATURES, .gt = 2,
358    .num_slices = 1,
359    .num_subslices = { 2, },
360    .max_eus_per_subslice = 10,
361    .num_thread_per_eu = 7,
362    .l3_banks = 4,
363    .max_vs_threads = 280,
364    .max_tcs_threads = 256,
365    .max_tes_threads = 280,
366    .max_gs_threads = 256,
367    .max_wm_threads = 204,
368    .max_cs_threads = 70,
369    .urb = {
370       .min_entries = {
371          [MESA_SHADER_VERTEX]    = 64,
372          [MESA_SHADER_TESS_EVAL] = 10,
373       },
374       .max_entries = {
375          [MESA_SHADER_VERTEX]    = 1664,
376          [MESA_SHADER_TESS_CTRL] = 128,
377          [MESA_SHADER_TESS_EVAL] = 960,
378          [MESA_SHADER_GEOMETRY]  = 640,
379       },
380    },
381    .simulator_id = 9,
382 };
383 
384 static const struct intel_device_info intel_device_info_hsw_gt3 = {
385    HSW_FEATURES, .gt = 3,
386    .num_slices = 2,
387    .num_subslices = { 2, 2, },
388    .max_eus_per_subslice = 10,
389    .num_thread_per_eu = 7,
390    .l3_banks = 8,
391    .max_vs_threads = 280,
392    .max_tcs_threads = 256,
393    .max_tes_threads = 280,
394    .max_gs_threads = 256,
395    .max_wm_threads = 408,
396    .max_cs_threads = 70,
397    .urb = {
398       .min_entries = {
399          [MESA_SHADER_VERTEX]    = 64,
400          [MESA_SHADER_TESS_EVAL] = 10,
401       },
402       .max_entries = {
403          [MESA_SHADER_VERTEX]    = 1664,
404          [MESA_SHADER_TESS_CTRL] = 128,
405          [MESA_SHADER_TESS_EVAL] = 960,
406          [MESA_SHADER_GEOMETRY]  = 640,
407       },
408    },
409    .max_constant_urb_size_kb = 32,
410    .simulator_id = 9,
411 };
412 
413 /* It's unclear how well supported sampling from the hiz buffer is on GFX8,
414  * so keep things conservative for now and set has_sample_with_hiz = false.
415  */
416 #define GFX8_FEATURES                               \
417    .ver = 8,                                        \
418    .has_hiz_and_separate_stencil = true,            \
419    .must_use_separate_stencil = true,               \
420    .has_llc = true,                                 \
421    .has_sample_with_hiz = false,                    \
422    .has_pln = true,                                 \
423    .has_integer_dword_mul = true,                   \
424    .has_64bit_float = true,                         \
425    .has_64bit_int = true,                           \
426    .supports_simd16_3src = true,                    \
427    .has_surface_tile_offset = true,                 \
428    .num_thread_per_eu = 7,                          \
429    .max_vs_threads = 504,                           \
430    .max_tcs_threads = 504,                          \
431    .max_tes_threads = 504,                          \
432    .max_gs_threads = 504,                           \
433    .max_wm_threads = 384,                           \
434    .max_threads_per_psd = 64,                       \
435    .timestamp_frequency = 12500000,                 \
436    .max_constant_urb_size_kb = 32
437 
438 static const struct intel_device_info intel_device_info_bdw_gt1 = {
439    GFX8_FEATURES, .gt = 1,
440    .platform = INTEL_PLATFORM_BDW,
441    .num_slices = 1,
442    .num_subslices = { 2, },
443    .max_eus_per_subslice = 6,
444    .l3_banks = 2,
445    .max_cs_threads = 42,
446    .urb = {
447       .min_entries = {
448          [MESA_SHADER_VERTEX]    = 64,
449          [MESA_SHADER_TESS_EVAL] = 34,
450       },
451       .max_entries = {
452          [MESA_SHADER_VERTEX]    = 2560,
453          [MESA_SHADER_TESS_CTRL] = 504,
454          [MESA_SHADER_TESS_EVAL] = 1536,
455          /* Reduced from 960, seems to be similar to the bug on Gfx9 GT1. */
456          [MESA_SHADER_GEOMETRY]  = 690,
457       },
458    },
459    .simulator_id = 11,
460 };
461 
462 static const struct intel_device_info intel_device_info_bdw_gt2 = {
463    GFX8_FEATURES, .gt = 2,
464    .platform = INTEL_PLATFORM_BDW,
465    .num_slices = 1,
466    .num_subslices = { 3, },
467    .max_eus_per_subslice = 8,
468    .l3_banks = 4,
469    .max_cs_threads = 56,
470    .urb = {
471       .min_entries = {
472          [MESA_SHADER_VERTEX]    = 64,
473          [MESA_SHADER_TESS_EVAL] = 34,
474       },
475       .max_entries = {
476          [MESA_SHADER_VERTEX]    = 2560,
477          [MESA_SHADER_TESS_CTRL] = 504,
478          [MESA_SHADER_TESS_EVAL] = 1536,
479          [MESA_SHADER_GEOMETRY]  = 960,
480       },
481    },
482    .simulator_id = 11,
483 };
484 
485 static const struct intel_device_info intel_device_info_bdw_gt3 = {
486    GFX8_FEATURES, .gt = 3,
487    .platform = INTEL_PLATFORM_BDW,
488    .num_slices = 2,
489    .num_subslices = { 3, 3, },
490    .max_eus_per_subslice = 8,
491    .l3_banks = 8,
492    .max_cs_threads = 56,
493    .urb = {
494       .min_entries = {
495          [MESA_SHADER_VERTEX]    = 64,
496          [MESA_SHADER_TESS_EVAL] = 34,
497       },
498       .max_entries = {
499          [MESA_SHADER_VERTEX]    = 2560,
500          [MESA_SHADER_TESS_CTRL] = 504,
501          [MESA_SHADER_TESS_EVAL] = 1536,
502          [MESA_SHADER_GEOMETRY]  = 960,
503       },
504    },
505    .simulator_id = 11,
506 };
507 
508 static const struct intel_device_info intel_device_info_chv = {
509    GFX8_FEATURES, .platform = INTEL_PLATFORM_CHV, .gt = 1,
510    .has_llc = false,
511    .has_integer_dword_mul = false,
512    .num_slices = 1,
513    .num_subslices = { 2, },
514    .max_eus_per_subslice = 8,
515    .l3_banks = 2,
516    .max_vs_threads = 80,
517    .max_tcs_threads = 80,
518    .max_tes_threads = 80,
519    .max_gs_threads = 80,
520    .max_wm_threads = 128,
521    .max_cs_threads = 6 * 7,
522    .urb = {
523       .min_entries = {
524          [MESA_SHADER_VERTEX]    = 34,
525          [MESA_SHADER_TESS_EVAL] = 34,
526       },
527       .max_entries = {
528          [MESA_SHADER_VERTEX]    = 640,
529          [MESA_SHADER_TESS_CTRL] = 80,
530          [MESA_SHADER_TESS_EVAL] = 384,
531          [MESA_SHADER_GEOMETRY]  = 256,
532       },
533    },
534    .simulator_id = 13,
535 };
536 
537 #define GFX9_HW_INFO                                \
538    .ver = 9,                                        \
539    .max_vs_threads = 336,                           \
540    .max_gs_threads = 336,                           \
541    .max_tcs_threads = 336,                          \
542    .max_tes_threads = 336,                          \
543    .max_threads_per_psd = 64,                       \
544    .max_cs_threads = 56,                            \
545    .timestamp_frequency = 12000000,                 \
546    .urb = {                                         \
547       .min_entries = {                              \
548          [MESA_SHADER_VERTEX]    = 64,              \
549          [MESA_SHADER_TESS_EVAL] = 34,              \
550       },                                            \
551       .max_entries = {                              \
552          [MESA_SHADER_VERTEX]    = 1856,            \
553          [MESA_SHADER_TESS_CTRL] = 672,             \
554          [MESA_SHADER_TESS_EVAL] = 1120,            \
555          [MESA_SHADER_GEOMETRY]  = 640,             \
556       },                                            \
557    }
558 
559 #define GFX9_LP_FEATURES                           \
560    GFX8_FEATURES,                                  \
561    GFX9_HW_INFO,                                   \
562    .has_integer_dword_mul = false,                 \
563    .gt = 1,                                        \
564    .has_llc = false,                               \
565    .has_sample_with_hiz = true,                    \
566    .has_illegal_ccs_values = true,                 \
567    .num_slices = 1,                                \
568    .num_thread_per_eu = 6,                         \
569    .max_vs_threads = 112,                          \
570    .max_tcs_threads = 112,                         \
571    .max_tes_threads = 112,                         \
572    .max_gs_threads = 112,                          \
573    .max_cs_threads = 6 * 6,                        \
574    .timestamp_frequency = 19200000,                \
575    .urb = {                                        \
576       .min_entries = {                             \
577          [MESA_SHADER_VERTEX]    = 34,             \
578          [MESA_SHADER_TESS_EVAL] = 34,             \
579       },                                           \
580       .max_entries = {                             \
581          [MESA_SHADER_VERTEX]    = 704,            \
582          [MESA_SHADER_TESS_CTRL] = 256,            \
583          [MESA_SHADER_TESS_EVAL] = 416,            \
584          [MESA_SHADER_GEOMETRY]  = 256,            \
585       },                                           \
586    }
587 
588 #define GFX9_LP_FEATURES_3X6                       \
589    GFX9_LP_FEATURES,                               \
590    .num_subslices = { 3, },                        \
591    .max_eus_per_subslice = 6
592 
593 #define GFX9_LP_FEATURES_2X6                       \
594    GFX9_LP_FEATURES,                               \
595    .num_subslices = { 2, },                        \
596    .max_eus_per_subslice = 6,                       \
597    .max_vs_threads = 56,                           \
598    .max_tcs_threads = 56,                          \
599    .max_tes_threads = 56,                          \
600    .max_gs_threads = 56,                           \
601    .max_cs_threads = 6 * 6,                        \
602    .urb = {                                        \
603       .min_entries = {                             \
604          [MESA_SHADER_VERTEX]    = 34,             \
605          [MESA_SHADER_TESS_EVAL] = 34,             \
606       },                                           \
607       .max_entries = {                             \
608          [MESA_SHADER_VERTEX]    = 352,            \
609          [MESA_SHADER_TESS_CTRL] = 128,            \
610          [MESA_SHADER_TESS_EVAL] = 208,            \
611          [MESA_SHADER_GEOMETRY]  = 128,            \
612       },                                           \
613    }
614 
615 #define GFX9_FEATURES                               \
616    GFX8_FEATURES,                                   \
617    GFX9_HW_INFO,                                    \
618    .has_sample_with_hiz = true,                     \
619    .has_illegal_ccs_values = true,                                    \
620    .cooperative_matrix_configurations = {                             \
621     { INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16 }, \
622     { INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT32, INTEL_CMAT_FLOAT32 }, \
623     { INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 32, INTEL_CMAT_SINT8, INTEL_CMAT_SINT8, INTEL_CMAT_SINT32, INTEL_CMAT_SINT32 },       \
624     { INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 32, INTEL_CMAT_UINT8, INTEL_CMAT_UINT8, INTEL_CMAT_UINT32, INTEL_CMAT_UINT32 },       \
625    }
626 
627 static const struct intel_device_info intel_device_info_skl_gt1 = {
628    GFX9_FEATURES, .gt = 1,
629    .platform = INTEL_PLATFORM_SKL,
630    .num_slices = 1,
631    .num_subslices = { 2, },
632    .max_eus_per_subslice = 6,
633    .l3_banks = 2,
634    /* GT1 seems to have a bug in the top of the pipe (VF/VS?) fixed functions
635     * leading to some vertices to go missing if we use too much URB.
636     */
637    .urb.max_entries[MESA_SHADER_VERTEX] = 928,
638    .simulator_id = 12,
639 };
640 
641 static const struct intel_device_info intel_device_info_skl_gt2 = {
642    GFX9_FEATURES, .gt = 2,
643    .platform = INTEL_PLATFORM_SKL,
644    .num_slices = 1,
645    .num_subslices = { 3, },
646    .max_eus_per_subslice = 8,
647    .l3_banks = 4,
648    .simulator_id = 12,
649 };
650 
651 static const struct intel_device_info intel_device_info_skl_gt3 = {
652    GFX9_FEATURES, .gt = 3,
653    .platform = INTEL_PLATFORM_SKL,
654    .num_slices = 2,
655    .num_subslices = { 3, 3, },
656    .max_eus_per_subslice = 8,
657    .l3_banks = 8,
658    .simulator_id = 12,
659 };
660 
661 static const struct intel_device_info intel_device_info_skl_gt4 = {
662    GFX9_FEATURES, .gt = 4,
663    .platform = INTEL_PLATFORM_SKL,
664    .num_slices = 3,
665    .num_subslices = { 3, 3, 3, },
666    .max_eus_per_subslice = 8,
667    .l3_banks = 12,
668    /* From the "L3 Allocation and Programming" documentation:
669     *
670     * "URB is limited to 1008KB due to programming restrictions.  This is not a
671     * restriction of the L3 implementation, but of the FF and other clients.
672     * Therefore, in a GT4 implementation it is possible for the programmed
673     * allocation of the L3 data array to provide 3*384KB=1152KB for URB, but
674     * only 1008KB of this will be used."
675     */
676    .simulator_id = 12,
677 };
678 
679 static const struct intel_device_info intel_device_info_bxt = {
680    GFX9_LP_FEATURES_3X6,
681    .platform = INTEL_PLATFORM_BXT,
682    .l3_banks = 2,
683    .simulator_id = 14,
684 };
685 
686 static const struct intel_device_info intel_device_info_bxt_2x6 = {
687    GFX9_LP_FEATURES_2X6,
688    .platform = INTEL_PLATFORM_BXT,
689    .l3_banks = 1,
690    .simulator_id = 14,
691 };
692 /*
693  * Note: for all KBL SKUs, the PRM says SKL for GS entries, not SKL+.
694  * There's no KBL entry. Using the default SKL (GFX9) GS entries value.
695  */
696 
697 static const struct intel_device_info intel_device_info_kbl_gt1 = {
698    GFX9_FEATURES,
699    .platform = INTEL_PLATFORM_KBL,
700    .gt = 1,
701 
702    .max_cs_threads = 7 * 6,
703    .num_slices = 1,
704    .num_subslices = { 2, },
705    .max_eus_per_subslice = 6,
706    .l3_banks = 2,
707    /* GT1 seems to have a bug in the top of the pipe (VF/VS?) fixed functions
708     * leading to some vertices to go missing if we use too much URB.
709     */
710    .urb.max_entries[MESA_SHADER_VERTEX] = 928,
711    .urb.max_entries[MESA_SHADER_GEOMETRY] = 256,
712    .simulator_id = 16,
713 };
714 
715 static const struct intel_device_info intel_device_info_kbl_gt1_5 = {
716    GFX9_FEATURES,
717    .platform = INTEL_PLATFORM_KBL,
718    .gt = 1,
719 
720    .max_cs_threads = 7 * 6,
721    .num_slices = 1,
722    .num_subslices = { 3, },
723    .max_eus_per_subslice = 6,
724    .l3_banks = 4,
725    .simulator_id = 16,
726 };
727 
728 static const struct intel_device_info intel_device_info_kbl_gt2 = {
729    GFX9_FEATURES,
730    .platform = INTEL_PLATFORM_KBL,
731    .gt = 2,
732 
733    .num_slices = 1,
734    .num_subslices = { 3, },
735    .max_eus_per_subslice = 8,
736    .l3_banks = 4,
737    .simulator_id = 16,
738 };
739 
740 static const struct intel_device_info intel_device_info_kbl_gt3 = {
741    GFX9_FEATURES,
742    .platform = INTEL_PLATFORM_KBL,
743    .gt = 3,
744 
745    .num_slices = 2,
746    .num_subslices = { 3, 3, },
747    .max_eus_per_subslice = 8,
748    .l3_banks = 8,
749    .simulator_id = 16,
750 };
751 
752 static const struct intel_device_info intel_device_info_kbl_gt4 = {
753    GFX9_FEATURES,
754    .platform = INTEL_PLATFORM_KBL,
755    .gt = 4,
756 
757    /*
758     * From the "L3 Allocation and Programming" documentation:
759     *
760     * "URB is limited to 1008KB due to programming restrictions.  This
761     *  is not a restriction of the L3 implementation, but of the FF and
762     *  other clients.  Therefore, in a GT4 implementation it is
763     *  possible for the programmed allocation of the L3 data array to
764     *  provide 3*384KB=1152KB for URB, but only 1008KB of this
765     *  will be used."
766     */
767    .num_slices = 3,
768    .num_subslices = { 3, 3, 3, },
769    .max_eus_per_subslice = 8,
770    .l3_banks = 12,
771    .simulator_id = 16,
772 };
773 
774 static const struct intel_device_info intel_device_info_glk = {
775    GFX9_LP_FEATURES_3X6,
776    .platform = INTEL_PLATFORM_GLK,
777    .l3_banks = 2,
778    .simulator_id = 17,
779 };
780 
781 static const struct intel_device_info intel_device_info_glk_2x6 = {
782    GFX9_LP_FEATURES_2X6,
783    .platform = INTEL_PLATFORM_GLK,
784    .l3_banks = 2,
785    .simulator_id = 17,
786 };
787 
788 static const struct intel_device_info intel_device_info_cfl_gt1 = {
789    GFX9_FEATURES,
790    .platform = INTEL_PLATFORM_CFL,
791    .gt = 1,
792 
793    .num_slices = 1,
794    .num_subslices = { 2, },
795    .max_eus_per_subslice = 6,
796    .l3_banks = 2,
797    /* GT1 seems to have a bug in the top of the pipe (VF/VS?) fixed functions
798     * leading to some vertices to go missing if we use too much URB.
799     */
800    .urb.max_entries[MESA_SHADER_VERTEX] = 928,
801    .urb.max_entries[MESA_SHADER_GEOMETRY] = 256,
802    .simulator_id = 24,
803 };
804 static const struct intel_device_info intel_device_info_cfl_gt2 = {
805    GFX9_FEATURES,
806    .platform = INTEL_PLATFORM_CFL,
807    .gt = 2,
808 
809    .num_slices = 1,
810    .num_subslices = { 3, },
811    .max_eus_per_subslice = 8,
812    .l3_banks = 4,
813    .simulator_id = 24,
814 };
815 
816 static const struct intel_device_info intel_device_info_cfl_gt3 = {
817    GFX9_FEATURES,
818    .platform = INTEL_PLATFORM_CFL,
819    .gt = 3,
820 
821    .num_slices = 2,
822    .num_subslices = { 3, 3, },
823    .max_eus_per_subslice = 8,
824    .l3_banks = 8,
825    .simulator_id = 24,
826 };
827 
828 #define subslices(args...) { args, }
829 
830 #define GFX11_HW_INFO                               \
831    .ver = 11,                                       \
832    .has_pln = false,                                \
833    .max_vs_threads = 364,                           \
834    .max_gs_threads = 224,                           \
835    .max_tcs_threads = 224,                          \
836    .max_tes_threads = 364,                          \
837    .max_threads_per_psd = 64,                       \
838    .max_cs_threads = 56
839 
840 #define GFX11_FEATURES(_gt, _slices, _subslices, _l3, _platform)  \
841    GFX8_FEATURES,                                     \
842    GFX11_HW_INFO,                                     \
843    .platform = _platform,                             \
844    .has_64bit_float = false,                          \
845    .has_64bit_int = false,                            \
846    .has_integer_dword_mul = false,                    \
847    .has_sample_with_hiz = false,                      \
848    .has_illegal_ccs_values = true,                    \
849    .gt = _gt, .num_slices = _slices, .l3_banks = _l3, \
850    .num_subslices = _subslices,                       \
851    .max_eus_per_subslice = 8,                                         \
852    .cooperative_matrix_configurations = {                             \
853     { INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16 }, \
854     { INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT32, INTEL_CMAT_FLOAT32 }, \
855     { INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 32, INTEL_CMAT_SINT8, INTEL_CMAT_SINT8, INTEL_CMAT_SINT32, INTEL_CMAT_SINT32 },       \
856     { INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 32, INTEL_CMAT_UINT8, INTEL_CMAT_UINT8, INTEL_CMAT_UINT32, INTEL_CMAT_UINT32 },       \
857    }
858 
859 #define GFX11_URB_MIN_MAX_ENTRIES                     \
860    .min_entries = {                                   \
861       [MESA_SHADER_VERTEX]    = 64,                   \
862       [MESA_SHADER_TESS_EVAL] = 34,                   \
863    },                                                 \
864    .max_entries = {                                   \
865       [MESA_SHADER_VERTEX]    = 2384,                 \
866       [MESA_SHADER_TESS_CTRL] = 1032,                 \
867       [MESA_SHADER_TESS_EVAL] = 2384,                 \
868       [MESA_SHADER_GEOMETRY]  = 1032,                 \
869    }
870 
871 static const struct intel_device_info intel_device_info_icl_gt2 = {
872    GFX11_FEATURES(2, 1, subslices(8), 8, INTEL_PLATFORM_ICL),
873    .urb = {
874       GFX11_URB_MIN_MAX_ENTRIES,
875    },
876    .simulator_id = 19,
877 };
878 
879 static const struct intel_device_info intel_device_info_icl_gt1_5 = {
880    GFX11_FEATURES(1, 1, subslices(6), 6, INTEL_PLATFORM_ICL),
881    .urb = {
882       GFX11_URB_MIN_MAX_ENTRIES,
883    },
884    .simulator_id = 19,
885 };
886 
887 static const struct intel_device_info intel_device_info_icl_gt1 = {
888    GFX11_FEATURES(1, 1, subslices(4), 6, INTEL_PLATFORM_ICL),
889    .urb = {
890       GFX11_URB_MIN_MAX_ENTRIES,
891    },
892    .simulator_id = 19,
893 };
894 
895 static const struct intel_device_info intel_device_info_icl_gt0_5 = {
896    GFX11_FEATURES(1, 1, subslices(1), 6, INTEL_PLATFORM_ICL),
897    .urb = {
898       GFX11_URB_MIN_MAX_ENTRIES,
899    },
900    .simulator_id = 19,
901 };
902 
903 #define GFX11_LP_FEATURES                           \
904    .urb = {                                         \
905       GFX11_URB_MIN_MAX_ENTRIES,                    \
906    },                                               \
907    .disable_ccs_repack = true,                      \
908    .has_illegal_ccs_values = true,                  \
909    .simulator_id = 28
910 
911 static const struct intel_device_info intel_device_info_ehl_4x8 = {
912    GFX11_FEATURES(1, 1, subslices(4), 4, INTEL_PLATFORM_EHL),
913    GFX11_LP_FEATURES,
914 };
915 
916 static const struct intel_device_info intel_device_info_ehl_4x6 = {
917    GFX11_FEATURES(1, 1, subslices(4), 4, INTEL_PLATFORM_EHL),
918    GFX11_LP_FEATURES,
919    .max_eus_per_subslice = 6,
920 };
921 
922 static const struct intel_device_info intel_device_info_ehl_4x5 = {
923    GFX11_FEATURES(1, 1, subslices(4), 4, INTEL_PLATFORM_EHL),
924    GFX11_LP_FEATURES,
925    .max_eus_per_subslice = 5,
926 };
927 
928 static const struct intel_device_info intel_device_info_ehl_4x4 = {
929    GFX11_FEATURES(1, 1, subslices(4), 4, INTEL_PLATFORM_EHL),
930    GFX11_LP_FEATURES,
931    .max_eus_per_subslice = 4,
932 };
933 
934 static const struct intel_device_info intel_device_info_ehl_2x8 = {
935    GFX11_FEATURES(1, 1, subslices(2), 4, INTEL_PLATFORM_EHL),
936    GFX11_LP_FEATURES,
937 };
938 
939 static const struct intel_device_info intel_device_info_ehl_2x4 = {
940    GFX11_FEATURES(1, 1, subslices(2), 4, INTEL_PLATFORM_EHL),
941    GFX11_LP_FEATURES,
942    .max_eus_per_subslice = 4,
943 };
944 
945 #define GFX12_HW_INFO                               \
946    .ver = 12,                                       \
947    .has_pln = false,                                \
948    .has_sample_with_hiz = false,                    \
949    .has_aux_map = true,                             \
950    .max_vs_threads = 546,                           \
951    .max_gs_threads = 336,                           \
952    .max_tcs_threads = 336,                          \
953    .max_tes_threads = 546,                          \
954    .max_threads_per_psd = 64,                       \
955    .max_cs_threads = 112, /* threads per DSS */     \
956    .urb = {                                         \
957       .size = 512, /* For intel_stub_gpu */         \
958       .min_entries = {                              \
959          [MESA_SHADER_VERTEX]    = 64,              \
960          [MESA_SHADER_TESS_EVAL] = 34,              \
961       },                                            \
962       .max_entries = {                              \
963          [MESA_SHADER_VERTEX]    = 3576,            \
964          [MESA_SHADER_TESS_CTRL] = 1548,            \
965          [MESA_SHADER_TESS_EVAL] = 3576,            \
966          [MESA_SHADER_GEOMETRY]  = 1548,            \
967       },                                            \
968    }
969 
970 #define GFX12_FEATURES(_gt, _slices, _l3)                       \
971    GFX8_FEATURES,                                               \
972    GFX12_HW_INFO,                                               \
973    .has_64bit_float = false,                                    \
974    .has_64bit_int = false,                                      \
975    .has_integer_dword_mul = false,                              \
976    .gt = _gt, .num_slices = _slices, .l3_banks = _l3,           \
977    .simulator_id = 22,                                          \
978    .max_eus_per_subslice = 16,                                  \
979    .pat = {                                                     \
980          .cached_coherent = PAT_ENTRY(0, WB, 2WAY),             \
981          .scanout = PAT_ENTRY(1, WC, NONE),                     \
982          .writeback_incoherent = PAT_ENTRY(0, WB, 2WAY),        \
983          .writecombining = PAT_ENTRY(1, WC, NONE),              \
984    },                                                           \
985    .cooperative_matrix_configurations = {                       \
986     { INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16 }, \
987     { INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT32, INTEL_CMAT_FLOAT32 }, \
988     { INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 32, INTEL_CMAT_SINT8, INTEL_CMAT_SINT8, INTEL_CMAT_SINT32, INTEL_CMAT_SINT32 },       \
989     { INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 32, INTEL_CMAT_UINT8, INTEL_CMAT_UINT8, INTEL_CMAT_UINT32, INTEL_CMAT_UINT32 },       \
990    }
991 
992 #define dual_subslices(args...) { args, }
993 
994 #define GFX12_GT05_FEATURES                                     \
995    GFX12_FEATURES(1, 1, 4),                                     \
996    .num_subslices = dual_subslices(1)
997 
998 #define GFX12_GT_FEATURES(_gt)                                  \
999    GFX12_FEATURES(_gt, 1, _gt == 1 ? 4 : 8),                    \
1000    .num_subslices = dual_subslices(_gt == 1 ? 2 : 6)
1001 
1002 static const struct intel_device_info intel_device_info_tgl_gt1 = {
1003    GFX12_GT_FEATURES(1),
1004    .platform = INTEL_PLATFORM_TGL,
1005 };
1006 
1007 static const struct intel_device_info intel_device_info_tgl_gt2 = {
1008    GFX12_GT_FEATURES(2),
1009    .platform = INTEL_PLATFORM_TGL,
1010 };
1011 
1012 static const struct intel_device_info intel_device_info_rkl_gt05 = {
1013    GFX12_GT05_FEATURES,
1014    .platform = INTEL_PLATFORM_RKL,
1015 };
1016 
1017 static const struct intel_device_info intel_device_info_rkl_gt1 = {
1018    GFX12_GT_FEATURES(1),
1019    .platform = INTEL_PLATFORM_RKL,
1020 };
1021 
1022 static const struct intel_device_info intel_device_info_adl_gt05 = {
1023    GFX12_GT05_FEATURES,
1024    .platform = INTEL_PLATFORM_ADL,
1025    .display_ver = 13,
1026 };
1027 
1028 static const struct intel_device_info intel_device_info_adl_gt1 = {
1029    GFX12_GT_FEATURES(1),
1030    .platform = INTEL_PLATFORM_ADL,
1031    .display_ver = 13,
1032 };
1033 
1034 static const struct intel_device_info intel_device_info_adl_n = {
1035    GFX12_GT_FEATURES(1),
1036    .platform = INTEL_PLATFORM_ADL,
1037    .display_ver = 13,
1038    .is_adl_n = true,
1039 };
1040 
1041 static const struct intel_device_info intel_device_info_adl_gt2 = {
1042    GFX12_GT_FEATURES(2),
1043    .platform = INTEL_PLATFORM_ADL,
1044    .display_ver = 13,
1045 };
1046 
1047 static const struct intel_device_info intel_device_info_rpl = {
1048    GFX12_FEATURES(1, 1, 4),
1049    .num_subslices = dual_subslices(2),
1050    .platform = INTEL_PLATFORM_RPL,
1051    .display_ver = 13,
1052 };
1053 
1054 static const struct intel_device_info intel_device_info_rpl_p = {
1055    GFX12_GT_FEATURES(2),
1056    .platform = INTEL_PLATFORM_RPL,
1057    .display_ver = 13,
1058 };
1059 
1060 #define GFX12_DG1_SG1_FEATURES                           \
1061    GFX12_GT_FEATURES(2),                                 \
1062    .platform = INTEL_PLATFORM_DG1,                       \
1063    .has_llc = false,                                     \
1064    .has_local_mem = true,                                \
1065    .urb.size = 768,                                      \
1066    .simulator_id = 30,                                   \
1067    /* There is no PAT table for DG1, using TGL one */    \
1068    .pat = {                                              \
1069          .cached_coherent = PAT_ENTRY(0, WB, 2WAY),      \
1070          .scanout = PAT_ENTRY(1, WC, NONE),              \
1071          .writeback_incoherent = PAT_ENTRY(0, WB, 2WAY), \
1072          .writecombining = PAT_ENTRY(1, WC, NONE),       \
1073    }
1074 
1075 static const struct intel_device_info intel_device_info_dg1 = {
1076    GFX12_DG1_SG1_FEATURES,
1077 };
1078 
1079 static const struct intel_device_info intel_device_info_sg1 = {
1080    GFX12_DG1_SG1_FEATURES,
1081 };
1082 
1083 #define XEHP_URB_MIN_MAX_ENTRIES                        \
1084    .min_entries = {                                     \
1085       [MESA_SHADER_VERTEX]    = 64,                     \
1086       [MESA_SHADER_TESS_EVAL] = 34,                     \
1087    },                                                   \
1088    .max_entries = {                                     \
1089       [MESA_SHADER_VERTEX]    = 3832, /* BSpec 47138 */ \
1090       [MESA_SHADER_TESS_CTRL] = 1548, /* BSpec 47137 */ \
1091       [MESA_SHADER_TESS_EVAL] = 3576, /* BSpec 47135 */ \
1092       [MESA_SHADER_GEOMETRY]  = 1548, /* BSpec 47136 */ \
1093    }
1094 
1095 #define XEHP_FEATURES(_gt, _slices, _l3)                        \
1096    GFX8_FEATURES,                                               \
1097    .has_64bit_float = false,                                    \
1098    .has_64bit_int = false,                                      \
1099    .has_integer_dword_mul = false,                              \
1100    .gt = _gt, .num_slices = _slices, .l3_banks = _l3,           \
1101    .num_subslices = dual_subslices(1), /* updated by topology */\
1102    .ver = 12,                                                   \
1103    .has_pln = false,                                            \
1104    .has_sample_with_hiz = false,                                \
1105    .max_vs_threads = 546,  /* BSpec 46312 */                    \
1106    .max_gs_threads = 336,  /* BSpec 46299 */                    \
1107    .max_tcs_threads = 336, /* BSpec 46300 */                    \
1108    .max_tes_threads = 546, /* BSpec 46298 */                    \
1109    .max_threads_per_psd = 64,                                   \
1110    .max_cs_threads = 112, /* threads per DSS */                 \
1111    .urb = {                                                     \
1112       .size = 768, /* For intel_stub_gpu */                     \
1113       XEHP_URB_MIN_MAX_ENTRIES,                                 \
1114    },                                                           \
1115    .num_thread_per_eu = 8 /* BSpec 44472 */,                    \
1116    .max_eus_per_subslice = 16,                                  \
1117    .verx10 = 125,                                               \
1118    .has_llc = false,                                            \
1119    .has_lsc = true,                                             \
1120    .has_local_mem = true,                                       \
1121    .has_aux_map = false,                                        \
1122    .simulator_id = 29,                                          \
1123    .cooperative_matrix_configurations = {                       \
1124     { INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16 }, \
1125     { INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT32, INTEL_CMAT_FLOAT32 }, \
1126     { INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 32, INTEL_CMAT_SINT8, INTEL_CMAT_SINT8, INTEL_CMAT_SINT32, INTEL_CMAT_SINT32 },       \
1127     { INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 32, INTEL_CMAT_UINT8, INTEL_CMAT_UINT8, INTEL_CMAT_UINT32, INTEL_CMAT_UINT32 },       \
1128    }
1129 
1130 #define DG2_FEATURES                                            \
1131    /* (Sub)slice info comes from the kernel topology info */    \
1132    XEHP_FEATURES(0, 1, 0),                                      \
1133    .display_ver = 13,                                           \
1134    .revision = 4, /* For offline compiler */                    \
1135    .apply_hwconfig = true,                                      \
1136    .has_coarse_pixel_primitive_and_cb = true,                   \
1137    .has_mesh_shading = true,                                    \
1138    .has_ray_tracing = true,                                     \
1139    .has_flat_ccs = true,                                        \
1140    /* There is no PAT table for DG2, using TGL ones */          \
1141    .pat = {                                                     \
1142          .cached_coherent = PAT_ENTRY(0, WB, 1WAY),             \
1143          .scanout = PAT_ENTRY(1, WC, NONE),                     \
1144          .writeback_incoherent = PAT_ENTRY(0, WB, 2WAY),        \
1145          .writecombining = PAT_ENTRY(1, WC, NONE),              \
1146    }
1147 
1148 static const struct intel_device_info intel_device_info_dg2_g10 = {
1149    DG2_FEATURES,
1150    .platform = INTEL_PLATFORM_DG2_G10,
1151 };
1152 
1153 static const struct intel_device_info intel_device_info_dg2_g11 = {
1154    DG2_FEATURES,
1155    .platform = INTEL_PLATFORM_DG2_G11,
1156 };
1157 
1158 static const struct intel_device_info intel_device_info_dg2_g12 = {
1159    DG2_FEATURES,
1160    .platform = INTEL_PLATFORM_DG2_G12,
1161 };
1162 
1163 static const struct intel_device_info intel_device_info_atsm_g10 = {
1164    DG2_FEATURES,
1165    .platform = INTEL_PLATFORM_ATSM_G10,
1166 };
1167 
1168 static const struct intel_device_info intel_device_info_atsm_g11 = {
1169    DG2_FEATURES,
1170    .platform = INTEL_PLATFORM_ATSM_G11,
1171 };
1172 
1173 #define MTL_FEATURES                                            \
1174    /* (Sub)slice info comes from the kernel topology info */    \
1175    XEHP_FEATURES(0, 1, 0),                                      \
1176    .has_local_mem = false,                                      \
1177    .has_aux_map = true,                                         \
1178    .apply_hwconfig = true,                                      \
1179    .has_64bit_float = true,                                     \
1180    .has_64bit_float_via_math_pipe = true,                       \
1181    .has_integer_dword_mul = false,                              \
1182    .has_coarse_pixel_primitive_and_cb = true,                   \
1183    .has_mesh_shading = true,                                    \
1184    .has_ray_tracing = true,                                     \
1185    .pat = {                                                     \
1186          .cached_coherent = PAT_ENTRY(3, WB, 1WAY),             \
1187          .scanout = PAT_ENTRY(1, WC, NONE),                     \
1188          .writeback_incoherent = PAT_ENTRY(0, WB, NONE),        \
1189          .writecombining = PAT_ENTRY(1, WC, NONE),              \
1190    }
1191 
1192 static const struct intel_device_info intel_device_info_mtl_u = {
1193    MTL_FEATURES,
1194    .platform = INTEL_PLATFORM_MTL_U,
1195 };
1196 
1197 static const struct intel_device_info intel_device_info_mtl_h = {
1198    MTL_FEATURES,
1199    .platform = INTEL_PLATFORM_MTL_H,
1200 };
1201 
1202 static const struct intel_device_info intel_device_info_arl_u = {
1203    MTL_FEATURES,
1204    .platform = INTEL_PLATFORM_ARL_U,
1205 };
1206 
1207 static const struct intel_device_info intel_device_info_arl_h = {
1208    MTL_FEATURES,
1209    .platform = INTEL_PLATFORM_ARL_H,
1210 };
1211 
1212 void
intel_device_info_topology_reset_masks(struct intel_device_info * devinfo)1213 intel_device_info_topology_reset_masks(struct intel_device_info *devinfo)
1214 {
1215    devinfo->subslice_slice_stride = 0;
1216    devinfo->eu_subslice_stride = 0;
1217    devinfo->eu_slice_stride = 0;
1218 
1219    devinfo->num_slices = 0;
1220    memset(devinfo->num_subslices, 0, sizeof(devinfo->num_subslices));
1221 
1222    memset(&devinfo->slice_masks, 0, sizeof(devinfo->slice_masks));
1223    memset(devinfo->subslice_masks, 0, sizeof(devinfo->subslice_masks));
1224    memset(devinfo->eu_masks, 0, sizeof(devinfo->eu_masks));
1225    memset(devinfo->ppipe_subslices, 0, sizeof(devinfo->ppipe_subslices));
1226 }
1227 
1228 void
intel_device_info_topology_update_counts(struct intel_device_info * devinfo)1229 intel_device_info_topology_update_counts(struct intel_device_info *devinfo)
1230 {
1231    devinfo->num_slices = __builtin_popcount(devinfo->slice_masks);
1232    devinfo->subslice_total = 0;
1233    for (int s = 0; s < devinfo->max_slices; s++) {
1234       if (!intel_device_info_slice_available(devinfo, s))
1235          continue;
1236 
1237       for (int b = 0; b < devinfo->subslice_slice_stride; b++) {
1238          devinfo->num_subslices[s] +=
1239             __builtin_popcount(devinfo->subslice_masks[s * devinfo->subslice_slice_stride + b]);
1240       }
1241       devinfo->subslice_total += devinfo->num_subslices[s];
1242    }
1243    assert(devinfo->num_slices > 0);
1244    assert(devinfo->subslice_total > 0);
1245 }
1246 
1247 void
intel_device_info_update_pixel_pipes(struct intel_device_info * devinfo,uint8_t * subslice_masks)1248 intel_device_info_update_pixel_pipes(struct intel_device_info *devinfo, uint8_t *subslice_masks)
1249 {
1250    if (devinfo->ver < 11)
1251       return;
1252 
1253    /* The kernel only reports one slice on all existing ICL+ platforms, even
1254     * if multiple slices are present. The slice mask is allowed to have the
1255     * accurate value greater than 1 on gfx12.5+ platforms though, in order to
1256     * be tolerant with the behavior of our simulation environment.
1257     */
1258    assert(devinfo->slice_masks == 1 || devinfo->verx10 >= 125);
1259 
1260    /* Count the number of subslices on each pixel pipe. Assume that every
1261     * contiguous group of 4 subslices in the mask belong to the same pixel
1262     * pipe. However note that on TGL+ the kernel returns a mask of enabled
1263     * *dual* subslices instead of actual subslices somewhat confusingly, so
1264     * each pixel pipe only takes 2 bits in the mask even though it's still 4
1265     * subslices.
1266     */
1267    const unsigned ppipe_bits = devinfo->ver >= 12 ? 2 : 4;
1268    for (unsigned p = 0; p < INTEL_DEVICE_MAX_PIXEL_PIPES; p++) {
1269       const unsigned offset = p * ppipe_bits;
1270       const unsigned subslice_idx = offset /
1271          devinfo->max_subslices_per_slice * devinfo->subslice_slice_stride;
1272       const unsigned ppipe_mask =
1273          BITFIELD_RANGE(offset % devinfo->max_subslices_per_slice, ppipe_bits);
1274 
1275       if (subslice_idx < ARRAY_SIZE(devinfo->subslice_masks))
1276          devinfo->ppipe_subslices[p] =
1277             __builtin_popcount(subslice_masks[subslice_idx] & ppipe_mask);
1278       else
1279          devinfo->ppipe_subslices[p] = 0;
1280    }
1281 }
1282 
1283 void
intel_device_info_update_l3_banks(struct intel_device_info * devinfo)1284 intel_device_info_update_l3_banks(struct intel_device_info *devinfo)
1285 {
1286    if (devinfo->ver != 12)
1287       return;
1288 
1289    if (devinfo->verx10 >= 125) {
1290       if (devinfo->subslice_total > 16) {
1291          assert(devinfo->subslice_total <= 32);
1292          devinfo->l3_banks = 32;
1293       } else if (devinfo->subslice_total > 8) {
1294          devinfo->l3_banks = 16;
1295       } else {
1296          devinfo->l3_banks = 8;
1297       }
1298    } else {
1299       assert(devinfo->num_slices == 1);
1300       if (devinfo->subslice_total >= 6) {
1301          assert(devinfo->subslice_total == 6);
1302          devinfo->l3_banks = 8;
1303       } else if (devinfo->subslice_total > 2) {
1304          devinfo->l3_banks = 6;
1305       } else {
1306          devinfo->l3_banks = 4;
1307       }
1308    }
1309 }
1310 
1311 /* Generate mask from the device data. */
1312 static void
fill_masks(struct intel_device_info * devinfo)1313 fill_masks(struct intel_device_info *devinfo)
1314 {
1315    /* All of our internal device descriptions assign the same number of
1316     * subslices for each slice. Just verify that this is true.
1317     */
1318    for (int s = 1; s < devinfo->num_slices; s++)
1319       assert(devinfo->num_subslices[0] == devinfo->num_subslices[s]);
1320 
1321    intel_device_info_i915_update_from_masks(devinfo,
1322                           (1U << devinfo->num_slices) - 1,
1323                           (1U << devinfo->num_subslices[0]) - 1,
1324                           devinfo->num_slices * devinfo->num_subslices[0] *
1325                           devinfo->max_eus_per_subslice);
1326 }
1327 
1328 void
intel_device_info_update_cs_workgroup_threads(struct intel_device_info * devinfo)1329 intel_device_info_update_cs_workgroup_threads(struct intel_device_info *devinfo)
1330 {
1331    /* GPGPU_WALKER::ThreadWidthCounterMaximum is U6-1 so the most threads we
1332     * can program is 64 without going up to a rectangular group. This only
1333     * impacts Haswell and TGL which have higher thread counts.
1334     *
1335     * INTERFACE_DESCRIPTOR_DATA::NumberofThreadsinGPGPUThreadGroup on Xe-HP+
1336     * is 10 bits so we have no such restrictions.
1337     */
1338    devinfo->max_cs_workgroup_threads =
1339       devinfo->verx10 >= 125 ? devinfo->max_cs_threads :
1340                                MIN2(devinfo->max_cs_threads, 64);
1341 }
1342 
1343 static bool
intel_device_info_init_common(int pci_id,struct intel_device_info * devinfo)1344 intel_device_info_init_common(int pci_id,
1345                               struct intel_device_info *devinfo)
1346 {
1347    switch (pci_id) {
1348 #undef CHIPSET
1349 #define CHIPSET(id, family, fam_str, name) \
1350       case id: *devinfo = intel_device_info_##family; break;
1351 #include "pci_ids/crocus_pci_ids.h"
1352 #include "pci_ids/iris_pci_ids.h"
1353 
1354 #undef CHIPSET
1355 #define CHIPSET(id, fam_str, name) \
1356       case id: *devinfo = intel_device_info_gfx3; break;
1357 #include "pci_ids/i915_pci_ids.h"
1358 
1359    default:
1360       mesa_logw("Driver does not support the 0x%x PCI ID.", pci_id);
1361       return false;
1362    }
1363 
1364    switch (pci_id) {
1365 #undef CHIPSET
1366 #define CHIPSET(_id, _family, _fam_str, _name) \
1367    case _id: \
1368       /* sizeof(str_literal) includes the null */ \
1369       STATIC_ASSERT(sizeof(_name) + sizeof(_fam_str) + 2 <= \
1370                     sizeof(devinfo->name)); \
1371       strncpy(devinfo->name, _name " (" _fam_str ")", sizeof(devinfo->name)); \
1372       break;
1373 #include "pci_ids/crocus_pci_ids.h"
1374 #include "pci_ids/iris_pci_ids.h"
1375    default:
1376       strncpy(devinfo->name, "Intel Unknown", sizeof(devinfo->name));
1377    }
1378 
1379    devinfo->pci_device_id = pci_id;
1380 
1381    fill_masks(devinfo);
1382 
1383    /* From the Skylake PRM, 3DSTATE_PS::Scratch Space Base Pointer:
1384     *
1385     * "Scratch Space per slice is computed based on 4 sub-slices.  SW must
1386     *  allocate scratch space enough so that each slice has 4 slices allowed."
1387     *
1388     * The equivalent internal documentation says that this programming note
1389     * applies to all Gfx9+ platforms.
1390     *
1391     * The hardware typically calculates the scratch space pointer by taking
1392     * the base address, and adding per-thread-scratch-space * thread ID.
1393     * Extra padding can be necessary depending how the thread IDs are
1394     * calculated for a particular shader stage.
1395     */
1396 
1397    switch(devinfo->ver) {
1398    case 9:
1399       devinfo->max_wm_threads = 64 /* threads-per-PSD */
1400                               * devinfo->num_slices
1401                               * 4; /* effective subslices per slice */
1402       break;
1403    case 11:
1404    case 12:
1405    case 20:
1406       devinfo->max_wm_threads = 128 /* threads-per-PSD */
1407                               * devinfo->num_slices
1408                               * 8; /* subslices per slice */
1409       break;
1410    default:
1411       assert(devinfo->ver < 9);
1412       break;
1413    }
1414 
1415    assert(devinfo->num_slices <= ARRAY_SIZE(devinfo->num_subslices));
1416 
1417    if (devinfo->verx10 == 0)
1418       devinfo->verx10 = devinfo->ver * 10;
1419 
1420    if (devinfo->display_ver == 0)
1421       devinfo->display_ver = devinfo->ver;
1422 
1423    if (devinfo->has_mesh_shading) {
1424       /* Half of push constant space matches the size used in the simplest
1425        * primitive pipeline (VS + FS). Tweaking this affects performance.
1426        */
1427       devinfo->mesh_max_constant_urb_size_kb =
1428             devinfo->max_constant_urb_size_kb / 2;
1429    }
1430 
1431    intel_device_info_update_cs_workgroup_threads(devinfo);
1432 
1433    return true;
1434 }
1435 
1436 static void
intel_device_info_apply_workarounds(struct intel_device_info * devinfo)1437 intel_device_info_apply_workarounds(struct intel_device_info *devinfo)
1438 {
1439    if (intel_needs_workaround(devinfo, 18012660806))
1440       devinfo->urb.max_entries[MESA_SHADER_GEOMETRY] = 1536;
1441 
1442    /* Fixes issues with:
1443     * dEQP-GLES31.functional.geometry_shading.layered.render_with_default_layer_cubemap
1444     * when running on GFX12 platforms with small EU count.
1445     */
1446    const uint32_t eu_total = intel_device_info_eu_total(devinfo);
1447    if (devinfo->verx10 == 120 && eu_total <= 32)
1448       devinfo->urb.max_entries[MESA_SHADER_GEOMETRY] = 1024;
1449 }
1450 
1451 bool
intel_get_device_info_from_pci_id(int pci_id,struct intel_device_info * devinfo)1452 intel_get_device_info_from_pci_id(int pci_id,
1453                                   struct intel_device_info *devinfo)
1454 {
1455    intel_device_info_init_common(pci_id, devinfo);
1456 
1457    /* This is a placeholder until a proper value is set. */
1458    devinfo->kmd_type = INTEL_KMD_TYPE_I915;
1459 
1460    intel_device_info_init_was(devinfo);
1461    intel_device_info_apply_workarounds(devinfo);
1462 
1463    return true;
1464 }
1465 
1466 bool
intel_device_info_compute_system_memory(struct intel_device_info * devinfo,bool update)1467 intel_device_info_compute_system_memory(struct intel_device_info *devinfo, bool update)
1468 {
1469    if (!update) {
1470       if (!os_get_total_physical_memory(&devinfo->mem.sram.mappable.size))
1471          return false;
1472    }
1473 
1474    os_get_available_system_memory(&devinfo->mem.sram.mappable.free);
1475 
1476    return true;
1477 }
1478 
1479 static void
intel_device_info_adjust_memory(struct intel_device_info * devinfo)1480 intel_device_info_adjust_memory(struct intel_device_info *devinfo)
1481 {
1482    uint64_t available;
1483 
1484    /* Applications running without elevated privileges don't report valid
1485     * numbers for free sram
1486     */
1487    if (os_get_available_system_memory(&available)) {
1488       devinfo->mem.sram.mappable.free = MIN3(devinfo->mem.sram.mappable.free,
1489                                              devinfo->mem.sram.mappable.size,
1490                                              available);
1491    }
1492 }
1493 
1494 static void
init_max_scratch_ids(struct intel_device_info * devinfo)1495 init_max_scratch_ids(struct intel_device_info *devinfo)
1496 {
1497    /* Determine the max number of subslices that potentially might be used in
1498     * scratch space ids.
1499     *
1500     * For, Gfx11+, scratch space allocation is based on the number of threads
1501     * in the base configuration.
1502     *
1503     * For Gfx9, devinfo->subslice_total is the TOTAL number of subslices and
1504     * we wish to view that there are 4 subslices per slice instead of the
1505     * actual number of subslices per slice. The documentation for 3DSTATE_PS
1506     * "Scratch Space Base Pointer" says:
1507     *
1508     *    "Scratch Space per slice is computed based on 4 sub-slices.  SW
1509     *     must allocate scratch space enough so that each slice has 4
1510     *     slices allowed."
1511     *
1512     * According to the other driver team, this applies to compute shaders
1513     * as well.  This is not currently documented at all.
1514     *
1515     * For Gfx8 and older we user devinfo->subslice_total.
1516     */
1517    unsigned subslices;
1518    if (devinfo->verx10 == 125)
1519       subslices = 32;
1520    else if (devinfo->ver == 12)
1521       subslices = (devinfo->platform == INTEL_PLATFORM_DG1 || devinfo->gt == 2 ? 6 : 2);
1522    else if (devinfo->ver == 11)
1523       subslices = 8;
1524    else if (devinfo->ver >= 9 && devinfo->ver < 11)
1525       subslices = 4 * devinfo->num_slices;
1526    else
1527       subslices = devinfo->subslice_total;
1528    assert(subslices >= devinfo->subslice_total);
1529 
1530    unsigned scratch_ids_per_subslice;
1531    if (devinfo->ver >= 12) {
1532       /* Same as ICL below, but with 16 EUs. */
1533       scratch_ids_per_subslice = 16 * 8;
1534    } else if (devinfo->ver >= 11) {
1535       /* The MEDIA_VFE_STATE docs say:
1536        *
1537        *    "Starting with this configuration, the Maximum Number of
1538        *     Threads must be set to (#EU * 8) for GPGPU dispatches.
1539        *
1540        *     Although there are only 7 threads per EU in the configuration,
1541        *     the FFTID is calculated as if there are 8 threads per EU,
1542        *     which in turn requires a larger amount of Scratch Space to be
1543        *     allocated by the driver."
1544        */
1545       scratch_ids_per_subslice = 8 * 8;
1546    } else if (devinfo->platform == INTEL_PLATFORM_HSW) {
1547       /* WaCSScratchSize:hsw
1548        *
1549        * Haswell's scratch space address calculation appears to be sparse
1550        * rather than tightly packed. The Thread ID has bits indicating
1551        * which subslice, EU within a subslice, and thread within an EU it
1552        * is. There's a maximum of two slices and two subslices, so these
1553        * can be stored with a single bit. Even though there are only 10 EUs
1554        * per subslice, this is stored in 4 bits, so there's an effective
1555        * maximum value of 16 EUs. Similarly, although there are only 7
1556        * threads per EU, this is stored in a 3 bit number, giving an
1557        * effective maximum value of 8 threads per EU.
1558        *
1559        * This means that we need to use 16 * 8 instead of 10 * 7 for the
1560        * number of threads per subslice.
1561        */
1562       scratch_ids_per_subslice = 16 * 8;
1563    } else if (devinfo->platform == INTEL_PLATFORM_CHV) {
1564       /* Cherryview devices have either 6 or 8 EUs per subslice, and each
1565        * EU has 7 threads. The 6 EU devices appear to calculate thread IDs
1566        * as if it had 8 EUs.
1567        */
1568       scratch_ids_per_subslice = 8 * 7;
1569    } else {
1570       scratch_ids_per_subslice = devinfo->max_cs_threads;
1571    }
1572 
1573    unsigned max_thread_ids = scratch_ids_per_subslice * subslices;
1574 
1575    if (devinfo->verx10 >= 125) {
1576       /* On GFX version 12.5, scratch access changed to a surface-based model.
1577        * Instead of each shader type having its own layout based on IDs passed
1578        * from the relevant fixed-function unit, all scratch access is based on
1579        * thread IDs like it always has been for compute.
1580        */
1581       for (int i = MESA_SHADER_VERTEX; i < MESA_SHADER_STAGES; i++)
1582          devinfo->max_scratch_ids[i] = max_thread_ids;
1583    } else {
1584       unsigned max_scratch_ids[] = {
1585          [MESA_SHADER_VERTEX]    = devinfo->max_vs_threads,
1586          [MESA_SHADER_TESS_CTRL] = devinfo->max_tcs_threads,
1587          [MESA_SHADER_TESS_EVAL] = devinfo->max_tes_threads,
1588          [MESA_SHADER_GEOMETRY]  = devinfo->max_gs_threads,
1589          [MESA_SHADER_FRAGMENT]  = devinfo->max_wm_threads,
1590          [MESA_SHADER_COMPUTE]   = max_thread_ids,
1591       };
1592       STATIC_ASSERT(sizeof(devinfo->max_scratch_ids) == sizeof(max_scratch_ids));
1593       memcpy(devinfo->max_scratch_ids, max_scratch_ids,
1594              sizeof(devinfo->max_scratch_ids));
1595    }
1596 }
1597 
1598 static unsigned
intel_device_info_calc_engine_prefetch(const struct intel_device_info * devinfo,enum intel_engine_class engine_class)1599 intel_device_info_calc_engine_prefetch(const struct intel_device_info *devinfo,
1600                                        enum intel_engine_class engine_class)
1601 {
1602    if (devinfo->verx10 >= 200) {
1603       switch (engine_class) {
1604       case INTEL_ENGINE_CLASS_RENDER:
1605          return 4096;
1606       case INTEL_ENGINE_CLASS_COMPUTE:
1607          return 1024;
1608       default:
1609          return 512;
1610       }
1611    }
1612 
1613    if (intel_device_info_is_mtl_or_arl(devinfo)) {
1614       switch (engine_class) {
1615       case INTEL_ENGINE_CLASS_RENDER:
1616          return 2048;
1617       case INTEL_ENGINE_CLASS_COMPUTE:
1618          return 1024;
1619       default:
1620          return 512;
1621       }
1622    }
1623 
1624    /* DG2 */
1625    if (devinfo->verx10 == 125)
1626       return 1024;
1627 
1628    /* Older than DG2/MTL */
1629    return 512;
1630 }
1631 
1632 bool
intel_get_device_info_from_fd(int fd,struct intel_device_info * devinfo,int min_ver,int max_ver)1633 intel_get_device_info_from_fd(int fd, struct intel_device_info *devinfo, int min_ver, int max_ver)
1634 {
1635    if (NULL != getenv("INTEL_STUB_GPU_JSON")) {
1636       /* This call will succeed when shim-drm has been initialized with a
1637        * serialized intel_device_info structure.
1638        */
1639       struct drm_intel_stub_devinfo arg = {
1640          .addr = (uintptr_t)devinfo,
1641          .size = sizeof(*devinfo),
1642       };
1643       if (0 == intel_ioctl(fd, DRM_IOCTL_INTEL_STUB_DEVINFO, &arg)) {
1644          intel_device_info_init_was(devinfo);
1645          intel_device_info_apply_workarounds(devinfo);
1646          return true;
1647       }
1648    }
1649 
1650    /* Get PCI info.
1651     *
1652     * Some callers may already have a valid drm device which holds values of
1653     * PCI fields queried here prior to calling this function. But making this
1654     * query optional leads to a more cumbersome implementation. These callers
1655     * still need to initialize the fields somewhere out of this function and
1656     * rely on an ioctl to get PCI device id for the next step when skipping
1657     * this drm query.
1658     */
1659    drmDevicePtr drmdev = NULL;
1660    if (drmGetDevice2(fd, DRM_DEVICE_GET_PCI_REVISION, &drmdev)) {
1661       mesa_loge("Failed to query drm device.");
1662       return false;
1663    }
1664    if (!intel_device_info_init_common(
1665           drmdev->deviceinfo.pci->device_id, devinfo)) {
1666       drmFreeDevice(&drmdev);
1667       return false;
1668    }
1669 
1670    if ((min_ver > 0 && devinfo->ver < min_ver) || (max_ver > 0 && devinfo->ver > max_ver)) {
1671       drmFreeDevice(&drmdev);
1672       return false;
1673    }
1674 
1675    devinfo->pci_domain = drmdev->businfo.pci->domain;
1676    devinfo->pci_bus = drmdev->businfo.pci->bus;
1677    devinfo->pci_dev = drmdev->businfo.pci->dev;
1678    devinfo->pci_func = drmdev->businfo.pci->func;
1679    devinfo->pci_device_id = drmdev->deviceinfo.pci->device_id;
1680    devinfo->pci_revision_id = drmdev->deviceinfo.pci->revision_id;
1681    drmFreeDevice(&drmdev);
1682    devinfo->no_hw = debug_get_bool_option("INTEL_NO_HW", false);
1683 
1684    if (devinfo->ver == 10) {
1685       mesa_loge("Gfx10 support is redacted.");
1686       return false;
1687    }
1688 
1689    devinfo->kmd_type = intel_get_kmd_type(fd);
1690    if (devinfo->kmd_type == INTEL_KMD_TYPE_INVALID) {
1691       mesa_loge("Unknown kernel mode driver");
1692       return false;
1693    }
1694 
1695    /* remaining initialization queries the kernel for device info */
1696    if (devinfo->no_hw) {
1697       /* Provide some sensible values for NO_HW. */
1698       devinfo->gtt_size =
1699          devinfo->ver >= 8 ? (1ull << 48) : 2ull * 1024 * 1024 * 1024;
1700       intel_device_info_compute_system_memory(devinfo, false);
1701       return true;
1702    }
1703 
1704    bool ret;
1705    switch (devinfo->kmd_type) {
1706    case INTEL_KMD_TYPE_I915:
1707       ret = intel_device_info_i915_get_info_from_fd(fd, devinfo);
1708       break;
1709    case INTEL_KMD_TYPE_XE:
1710       ret = intel_device_info_xe_get_info_from_fd(fd, devinfo);
1711       break;
1712    default:
1713       ret = false;
1714       unreachable("Missing");
1715    }
1716    if (!ret) {
1717       mesa_logw("Could not get intel_device_info.");
1718       return false;
1719    }
1720 
1721    /* region info is required for lmem support */
1722    if (devinfo->has_local_mem && !devinfo->mem.use_class_instance) {
1723       mesa_logw("Could not query local memory size.");
1724       return false;
1725    }
1726 
1727    intel_device_info_adjust_memory(devinfo);
1728 
1729    /* Gfx7 and older do not support EU/Subslice info */
1730    assert(devinfo->subslice_total >= 1 || devinfo->ver <= 7);
1731    devinfo->subslice_total = MAX2(devinfo->subslice_total, 1);
1732 
1733    init_max_scratch_ids(devinfo);
1734 
1735    for (enum intel_engine_class engine = INTEL_ENGINE_CLASS_RENDER;
1736         engine < ARRAY_SIZE(devinfo->engine_class_prefetch); engine++)
1737       devinfo->engine_class_prefetch[engine] =
1738             intel_device_info_calc_engine_prefetch(devinfo, engine);
1739 
1740    intel_device_info_init_was(devinfo);
1741    intel_device_info_apply_workarounds(devinfo);
1742 
1743    return true;
1744 }
1745 
intel_device_info_update_memory_info(struct intel_device_info * devinfo,int fd)1746 bool intel_device_info_update_memory_info(struct intel_device_info *devinfo, int fd)
1747 {
1748    bool ret;
1749 
1750    switch (devinfo->kmd_type) {
1751    case INTEL_KMD_TYPE_I915:
1752       ret = intel_device_info_i915_query_regions(devinfo, fd, true);
1753       break;
1754    case INTEL_KMD_TYPE_XE:
1755       ret = intel_device_info_xe_query_regions(fd, devinfo, true);
1756       break;
1757    default:
1758       ret = false;
1759    }
1760 
1761    if (ret)
1762       intel_device_info_adjust_memory(devinfo);
1763    return ret;
1764 }
1765 
1766 void
intel_device_info_update_after_hwconfig(struct intel_device_info * devinfo)1767 intel_device_info_update_after_hwconfig(struct intel_device_info *devinfo)
1768 {
1769    /* After applying hwconfig values, some items need to be recalculated. */
1770    devinfo->max_cs_threads =
1771       devinfo->max_eus_per_subslice * devinfo->num_thread_per_eu;
1772 
1773    intel_device_info_update_cs_workgroup_threads(devinfo);
1774 }
1775 
1776 enum intel_wa_steppings
intel_device_info_wa_stepping(struct intel_device_info * devinfo)1777 intel_device_info_wa_stepping(struct intel_device_info *devinfo)
1778 {
1779    if (intel_device_info_is_mtl(devinfo)) {
1780       if (devinfo->revision < 4)
1781          return INTEL_STEPPING_A0;
1782       return INTEL_STEPPING_B0;
1783    } else if (devinfo->platform == INTEL_PLATFORM_TGL) {
1784       switch (devinfo->revision) {
1785       case 0:
1786          return INTEL_STEPPING_A0;
1787       case 1:
1788          return INTEL_STEPPING_B0;
1789       case 3:
1790          return INTEL_STEPPING_C0;
1791       default:
1792          return INTEL_STEPPING_RELEASE;
1793       }
1794    }
1795 
1796    /* all other platforms support only released steppings */
1797    return INTEL_STEPPING_RELEASE;
1798 }
1799 
1800