1 /*
2 * Copyright © 2013 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <unistd.h>
30
31 #include "util/libdrm.h"
32
33 #include "intel_device_info.h"
34 #include "intel_wa.h"
35 #include "i915/intel_device_info.h"
36 #include "xe/intel_device_info.h"
37
38 #include "common/intel_gem.h"
39 #include "util/u_debug.h"
40 #include "util/log.h"
41 #include "util/macros.h"
42
43 static const struct {
44 const char *name;
45 int pci_id;
46 } name_map[] = {
47 { "lpt", 0x27a2 },
48 { "brw", 0x2a02 },
49 { "g4x", 0x2a42 },
50 { "ilk", 0x0042 },
51 { "snb", 0x0126 },
52 { "ivb", 0x016a },
53 { "hsw", 0x0d2e },
54 { "byt", 0x0f33 },
55 { "bdw", 0x162e },
56 { "chv", 0x22B3 },
57 { "skl", 0x1912 },
58 { "bxt", 0x5A85 },
59 { "kbl", 0x5912 },
60 { "aml", 0x591C },
61 { "glk", 0x3185 },
62 { "cfl", 0x3E9B },
63 { "whl", 0x3EA1 },
64 { "cml", 0x9b41 },
65 { "icl", 0x8a52 },
66 { "ehl", 0x4571 },
67 { "jsl", 0x4E71 },
68 { "tgl", 0x9a49 },
69 { "rkl", 0x4c8a },
70 { "dg1", 0x4905 },
71 { "adl", 0x4680 },
72 { "sg1", 0x4907 },
73 { "rpl", 0xa780 },
74 { "dg2", 0x5690 },
75 { "mtl", 0x7d60 },
76 { "arl", 0x7d67 },
77 };
78
79 /**
80 * Get the PCI ID for the device name.
81 *
82 * Returns -1 if the device is not known.
83 */
84 int
intel_device_name_to_pci_device_id(const char * name)85 intel_device_name_to_pci_device_id(const char *name)
86 {
87 for (unsigned i = 0; i < ARRAY_SIZE(name_map); i++) {
88 if (!strcmp(name_map[i].name, name))
89 return name_map[i].pci_id;
90 }
91
92 return -1;
93 }
94
95 static const struct intel_device_info intel_device_info_gfx3 = {
96 .ver = 3,
97 .platform = INTEL_PLATFORM_GFX3,
98 .simulator_id = -1,
99 .num_slices = 1,
100 .num_subslices = { 1, },
101 .max_eus_per_subslice = 8,
102 .num_thread_per_eu = 4,
103 .timestamp_frequency = 12500000,
104 };
105
106 static const struct intel_device_info intel_device_info_i965 = {
107 .ver = 4,
108 .platform = INTEL_PLATFORM_I965,
109 .has_negative_rhw_bug = true,
110 .num_slices = 1,
111 .num_subslices = { 1, },
112 .max_eus_per_subslice = 8,
113 .num_thread_per_eu = 4,
114 .max_vs_threads = 16,
115 .max_gs_threads = 2,
116 .max_wm_threads = 8 * 4,
117 .urb = {
118 .size = 256,
119 },
120 .timestamp_frequency = 12500000,
121 .simulator_id = -1,
122 };
123
124 static const struct intel_device_info intel_device_info_g4x = {
125 .ver = 4,
126 .verx10 = 45,
127 .has_pln = true,
128 .has_compr4 = true,
129 .has_surface_tile_offset = true,
130 .platform = INTEL_PLATFORM_G4X,
131 .num_slices = 1,
132 .num_subslices = { 1, },
133 .max_eus_per_subslice = 10,
134 .num_thread_per_eu = 5,
135 .max_vs_threads = 32,
136 .max_gs_threads = 2,
137 .max_wm_threads = 10 * 5,
138 .urb = {
139 .size = 384,
140 },
141 .timestamp_frequency = 12500000,
142 .simulator_id = -1,
143 };
144
145 static const struct intel_device_info intel_device_info_ilk = {
146 .ver = 5,
147 .platform = INTEL_PLATFORM_ILK,
148 .has_pln = true,
149 .has_compr4 = true,
150 .has_surface_tile_offset = true,
151 .num_slices = 1,
152 .num_subslices = { 1, },
153 .max_eus_per_subslice = 12,
154 .num_thread_per_eu = 6,
155 .max_vs_threads = 72,
156 .max_gs_threads = 32,
157 .max_wm_threads = 12 * 6,
158 .urb = {
159 .size = 1024,
160 },
161 .timestamp_frequency = 12500000,
162 .simulator_id = -1,
163 };
164
165 static const struct intel_device_info intel_device_info_snb_gt1 = {
166 .ver = 6,
167 .gt = 1,
168 .platform = INTEL_PLATFORM_SNB,
169 .has_hiz_and_separate_stencil = true,
170 .has_llc = true,
171 .has_pln = true,
172 .has_surface_tile_offset = true,
173 .needs_unlit_centroid_workaround = true,
174 .num_slices = 1,
175 .num_subslices = { 1, },
176 .max_eus_per_subslice = 6,
177 .num_thread_per_eu = 6, /* Not confirmed */
178 .max_vs_threads = 24,
179 .max_gs_threads = 21, /* conservative; 24 if rendering disabled. */
180 .max_wm_threads = 40,
181 .urb = {
182 .size = 32,
183 .min_entries = {
184 [MESA_SHADER_VERTEX] = 24,
185 },
186 .max_entries = {
187 [MESA_SHADER_VERTEX] = 256,
188 [MESA_SHADER_GEOMETRY] = 256,
189 },
190 },
191 .timestamp_frequency = 12500000,
192 .simulator_id = -1,
193 };
194
195 static const struct intel_device_info intel_device_info_snb_gt2 = {
196 .ver = 6,
197 .gt = 2,
198 .platform = INTEL_PLATFORM_SNB,
199 .has_hiz_and_separate_stencil = true,
200 .has_llc = true,
201 .has_pln = true,
202 .has_surface_tile_offset = true,
203 .needs_unlit_centroid_workaround = true,
204 .num_slices = 1,
205 .num_subslices = { 1, },
206 .max_eus_per_subslice = 12,
207 .num_thread_per_eu = 6, /* Not confirmed */
208 .max_vs_threads = 60,
209 .max_gs_threads = 60,
210 .max_wm_threads = 80,
211 .urb = {
212 .size = 64,
213 .min_entries = {
214 [MESA_SHADER_VERTEX] = 24,
215 },
216 .max_entries = {
217 [MESA_SHADER_VERTEX] = 256,
218 [MESA_SHADER_GEOMETRY] = 256,
219 },
220 },
221 .timestamp_frequency = 12500000,
222 .simulator_id = -1,
223 };
224
225 #define GFX7_FEATURES \
226 .ver = 7, \
227 .has_hiz_and_separate_stencil = true, \
228 .must_use_separate_stencil = true, \
229 .has_llc = true, \
230 .has_pln = true, \
231 .has_64bit_float = true, \
232 .has_surface_tile_offset = true, \
233 .timestamp_frequency = 12500000, \
234 .max_constant_urb_size_kb = 16
235
236 static const struct intel_device_info intel_device_info_ivb_gt1 = {
237 GFX7_FEATURES, .platform = INTEL_PLATFORM_IVB, .gt = 1,
238 .num_slices = 1,
239 .num_subslices = { 1, },
240 .max_eus_per_subslice = 6,
241 .num_thread_per_eu = 6,
242 .l3_banks = 2,
243 .max_vs_threads = 36,
244 .max_tcs_threads = 36,
245 .max_tes_threads = 36,
246 .max_gs_threads = 36,
247 .max_wm_threads = 48,
248 .max_cs_threads = 36,
249 .urb = {
250 .min_entries = {
251 [MESA_SHADER_VERTEX] = 32,
252 [MESA_SHADER_TESS_EVAL] = 10,
253 },
254 .max_entries = {
255 [MESA_SHADER_VERTEX] = 512,
256 [MESA_SHADER_TESS_CTRL] = 32,
257 [MESA_SHADER_TESS_EVAL] = 288,
258 [MESA_SHADER_GEOMETRY] = 192,
259 },
260 },
261 .simulator_id = 7,
262 };
263
264 static const struct intel_device_info intel_device_info_ivb_gt2 = {
265 GFX7_FEATURES, .platform = INTEL_PLATFORM_IVB, .gt = 2,
266 .num_slices = 1,
267 .num_subslices = { 1, },
268 .max_eus_per_subslice = 12,
269 .num_thread_per_eu = 8, /* Not sure why this isn't a multiple of
270 * @max_wm_threads ... */
271 .l3_banks = 4,
272 .max_vs_threads = 128,
273 .max_tcs_threads = 128,
274 .max_tes_threads = 128,
275 .max_gs_threads = 128,
276 .max_wm_threads = 172,
277 .max_cs_threads = 64,
278 .urb = {
279 .min_entries = {
280 [MESA_SHADER_VERTEX] = 32,
281 [MESA_SHADER_TESS_EVAL] = 10,
282 },
283 .max_entries = {
284 [MESA_SHADER_VERTEX] = 704,
285 [MESA_SHADER_TESS_CTRL] = 64,
286 [MESA_SHADER_TESS_EVAL] = 448,
287 [MESA_SHADER_GEOMETRY] = 320,
288 },
289 },
290 .simulator_id = 7,
291 };
292
293 static const struct intel_device_info intel_device_info_byt = {
294 GFX7_FEATURES, .platform = INTEL_PLATFORM_BYT, .gt = 1,
295 .num_slices = 1,
296 .num_subslices = { 1, },
297 .max_eus_per_subslice = 4,
298 .num_thread_per_eu = 8,
299 .l3_banks = 1,
300 .has_llc = false,
301 .max_vs_threads = 36,
302 .max_tcs_threads = 36,
303 .max_tes_threads = 36,
304 .max_gs_threads = 36,
305 .max_wm_threads = 48,
306 .max_cs_threads = 32,
307 .urb = {
308 .min_entries = {
309 [MESA_SHADER_VERTEX] = 32,
310 [MESA_SHADER_TESS_EVAL] = 10,
311 },
312 .max_entries = {
313 [MESA_SHADER_VERTEX] = 512,
314 [MESA_SHADER_TESS_CTRL] = 32,
315 [MESA_SHADER_TESS_EVAL] = 288,
316 [MESA_SHADER_GEOMETRY] = 192,
317 },
318 },
319 .simulator_id = 10,
320 };
321
322 #define HSW_FEATURES \
323 GFX7_FEATURES, \
324 .platform = INTEL_PLATFORM_HSW, \
325 .verx10 = 75, \
326 .supports_simd16_3src = true
327
328 static const struct intel_device_info intel_device_info_hsw_gt1 = {
329 HSW_FEATURES, .gt = 1,
330 .num_slices = 1,
331 .num_subslices = { 1, },
332 .max_eus_per_subslice = 10,
333 .num_thread_per_eu = 7,
334 .l3_banks = 2,
335 .max_vs_threads = 70,
336 .max_tcs_threads = 70,
337 .max_tes_threads = 70,
338 .max_gs_threads = 70,
339 .max_wm_threads = 102,
340 .max_cs_threads = 70,
341 .urb = {
342 .min_entries = {
343 [MESA_SHADER_VERTEX] = 32,
344 [MESA_SHADER_TESS_EVAL] = 10,
345 },
346 .max_entries = {
347 [MESA_SHADER_VERTEX] = 640,
348 [MESA_SHADER_TESS_CTRL] = 64,
349 [MESA_SHADER_TESS_EVAL] = 384,
350 [MESA_SHADER_GEOMETRY] = 256,
351 },
352 },
353 .simulator_id = 9,
354 };
355
356 static const struct intel_device_info intel_device_info_hsw_gt2 = {
357 HSW_FEATURES, .gt = 2,
358 .num_slices = 1,
359 .num_subslices = { 2, },
360 .max_eus_per_subslice = 10,
361 .num_thread_per_eu = 7,
362 .l3_banks = 4,
363 .max_vs_threads = 280,
364 .max_tcs_threads = 256,
365 .max_tes_threads = 280,
366 .max_gs_threads = 256,
367 .max_wm_threads = 204,
368 .max_cs_threads = 70,
369 .urb = {
370 .min_entries = {
371 [MESA_SHADER_VERTEX] = 64,
372 [MESA_SHADER_TESS_EVAL] = 10,
373 },
374 .max_entries = {
375 [MESA_SHADER_VERTEX] = 1664,
376 [MESA_SHADER_TESS_CTRL] = 128,
377 [MESA_SHADER_TESS_EVAL] = 960,
378 [MESA_SHADER_GEOMETRY] = 640,
379 },
380 },
381 .simulator_id = 9,
382 };
383
384 static const struct intel_device_info intel_device_info_hsw_gt3 = {
385 HSW_FEATURES, .gt = 3,
386 .num_slices = 2,
387 .num_subslices = { 2, 2, },
388 .max_eus_per_subslice = 10,
389 .num_thread_per_eu = 7,
390 .l3_banks = 8,
391 .max_vs_threads = 280,
392 .max_tcs_threads = 256,
393 .max_tes_threads = 280,
394 .max_gs_threads = 256,
395 .max_wm_threads = 408,
396 .max_cs_threads = 70,
397 .urb = {
398 .min_entries = {
399 [MESA_SHADER_VERTEX] = 64,
400 [MESA_SHADER_TESS_EVAL] = 10,
401 },
402 .max_entries = {
403 [MESA_SHADER_VERTEX] = 1664,
404 [MESA_SHADER_TESS_CTRL] = 128,
405 [MESA_SHADER_TESS_EVAL] = 960,
406 [MESA_SHADER_GEOMETRY] = 640,
407 },
408 },
409 .max_constant_urb_size_kb = 32,
410 .simulator_id = 9,
411 };
412
413 /* It's unclear how well supported sampling from the hiz buffer is on GFX8,
414 * so keep things conservative for now and set has_sample_with_hiz = false.
415 */
416 #define GFX8_FEATURES \
417 .ver = 8, \
418 .has_hiz_and_separate_stencil = true, \
419 .must_use_separate_stencil = true, \
420 .has_llc = true, \
421 .has_sample_with_hiz = false, \
422 .has_pln = true, \
423 .has_integer_dword_mul = true, \
424 .has_64bit_float = true, \
425 .has_64bit_int = true, \
426 .supports_simd16_3src = true, \
427 .has_surface_tile_offset = true, \
428 .num_thread_per_eu = 7, \
429 .max_vs_threads = 504, \
430 .max_tcs_threads = 504, \
431 .max_tes_threads = 504, \
432 .max_gs_threads = 504, \
433 .max_wm_threads = 384, \
434 .max_threads_per_psd = 64, \
435 .timestamp_frequency = 12500000, \
436 .max_constant_urb_size_kb = 32
437
438 static const struct intel_device_info intel_device_info_bdw_gt1 = {
439 GFX8_FEATURES, .gt = 1,
440 .platform = INTEL_PLATFORM_BDW,
441 .num_slices = 1,
442 .num_subslices = { 2, },
443 .max_eus_per_subslice = 6,
444 .l3_banks = 2,
445 .max_cs_threads = 42,
446 .urb = {
447 .min_entries = {
448 [MESA_SHADER_VERTEX] = 64,
449 [MESA_SHADER_TESS_EVAL] = 34,
450 },
451 .max_entries = {
452 [MESA_SHADER_VERTEX] = 2560,
453 [MESA_SHADER_TESS_CTRL] = 504,
454 [MESA_SHADER_TESS_EVAL] = 1536,
455 /* Reduced from 960, seems to be similar to the bug on Gfx9 GT1. */
456 [MESA_SHADER_GEOMETRY] = 690,
457 },
458 },
459 .simulator_id = 11,
460 };
461
462 static const struct intel_device_info intel_device_info_bdw_gt2 = {
463 GFX8_FEATURES, .gt = 2,
464 .platform = INTEL_PLATFORM_BDW,
465 .num_slices = 1,
466 .num_subslices = { 3, },
467 .max_eus_per_subslice = 8,
468 .l3_banks = 4,
469 .max_cs_threads = 56,
470 .urb = {
471 .min_entries = {
472 [MESA_SHADER_VERTEX] = 64,
473 [MESA_SHADER_TESS_EVAL] = 34,
474 },
475 .max_entries = {
476 [MESA_SHADER_VERTEX] = 2560,
477 [MESA_SHADER_TESS_CTRL] = 504,
478 [MESA_SHADER_TESS_EVAL] = 1536,
479 [MESA_SHADER_GEOMETRY] = 960,
480 },
481 },
482 .simulator_id = 11,
483 };
484
485 static const struct intel_device_info intel_device_info_bdw_gt3 = {
486 GFX8_FEATURES, .gt = 3,
487 .platform = INTEL_PLATFORM_BDW,
488 .num_slices = 2,
489 .num_subslices = { 3, 3, },
490 .max_eus_per_subslice = 8,
491 .l3_banks = 8,
492 .max_cs_threads = 56,
493 .urb = {
494 .min_entries = {
495 [MESA_SHADER_VERTEX] = 64,
496 [MESA_SHADER_TESS_EVAL] = 34,
497 },
498 .max_entries = {
499 [MESA_SHADER_VERTEX] = 2560,
500 [MESA_SHADER_TESS_CTRL] = 504,
501 [MESA_SHADER_TESS_EVAL] = 1536,
502 [MESA_SHADER_GEOMETRY] = 960,
503 },
504 },
505 .simulator_id = 11,
506 };
507
508 static const struct intel_device_info intel_device_info_chv = {
509 GFX8_FEATURES, .platform = INTEL_PLATFORM_CHV, .gt = 1,
510 .has_llc = false,
511 .has_integer_dword_mul = false,
512 .num_slices = 1,
513 .num_subslices = { 2, },
514 .max_eus_per_subslice = 8,
515 .l3_banks = 2,
516 .max_vs_threads = 80,
517 .max_tcs_threads = 80,
518 .max_tes_threads = 80,
519 .max_gs_threads = 80,
520 .max_wm_threads = 128,
521 .max_cs_threads = 6 * 7,
522 .urb = {
523 .min_entries = {
524 [MESA_SHADER_VERTEX] = 34,
525 [MESA_SHADER_TESS_EVAL] = 34,
526 },
527 .max_entries = {
528 [MESA_SHADER_VERTEX] = 640,
529 [MESA_SHADER_TESS_CTRL] = 80,
530 [MESA_SHADER_TESS_EVAL] = 384,
531 [MESA_SHADER_GEOMETRY] = 256,
532 },
533 },
534 .simulator_id = 13,
535 };
536
537 #define GFX9_HW_INFO \
538 .ver = 9, \
539 .max_vs_threads = 336, \
540 .max_gs_threads = 336, \
541 .max_tcs_threads = 336, \
542 .max_tes_threads = 336, \
543 .max_threads_per_psd = 64, \
544 .max_cs_threads = 56, \
545 .timestamp_frequency = 12000000, \
546 .urb = { \
547 .min_entries = { \
548 [MESA_SHADER_VERTEX] = 64, \
549 [MESA_SHADER_TESS_EVAL] = 34, \
550 }, \
551 .max_entries = { \
552 [MESA_SHADER_VERTEX] = 1856, \
553 [MESA_SHADER_TESS_CTRL] = 672, \
554 [MESA_SHADER_TESS_EVAL] = 1120, \
555 [MESA_SHADER_GEOMETRY] = 640, \
556 }, \
557 }
558
559 #define GFX9_LP_FEATURES \
560 GFX8_FEATURES, \
561 GFX9_HW_INFO, \
562 .has_integer_dword_mul = false, \
563 .gt = 1, \
564 .has_llc = false, \
565 .has_sample_with_hiz = true, \
566 .has_illegal_ccs_values = true, \
567 .num_slices = 1, \
568 .num_thread_per_eu = 6, \
569 .max_vs_threads = 112, \
570 .max_tcs_threads = 112, \
571 .max_tes_threads = 112, \
572 .max_gs_threads = 112, \
573 .max_cs_threads = 6 * 6, \
574 .timestamp_frequency = 19200000, \
575 .urb = { \
576 .min_entries = { \
577 [MESA_SHADER_VERTEX] = 34, \
578 [MESA_SHADER_TESS_EVAL] = 34, \
579 }, \
580 .max_entries = { \
581 [MESA_SHADER_VERTEX] = 704, \
582 [MESA_SHADER_TESS_CTRL] = 256, \
583 [MESA_SHADER_TESS_EVAL] = 416, \
584 [MESA_SHADER_GEOMETRY] = 256, \
585 }, \
586 }
587
588 #define GFX9_LP_FEATURES_3X6 \
589 GFX9_LP_FEATURES, \
590 .num_subslices = { 3, }, \
591 .max_eus_per_subslice = 6
592
593 #define GFX9_LP_FEATURES_2X6 \
594 GFX9_LP_FEATURES, \
595 .num_subslices = { 2, }, \
596 .max_eus_per_subslice = 6, \
597 .max_vs_threads = 56, \
598 .max_tcs_threads = 56, \
599 .max_tes_threads = 56, \
600 .max_gs_threads = 56, \
601 .max_cs_threads = 6 * 6, \
602 .urb = { \
603 .min_entries = { \
604 [MESA_SHADER_VERTEX] = 34, \
605 [MESA_SHADER_TESS_EVAL] = 34, \
606 }, \
607 .max_entries = { \
608 [MESA_SHADER_VERTEX] = 352, \
609 [MESA_SHADER_TESS_CTRL] = 128, \
610 [MESA_SHADER_TESS_EVAL] = 208, \
611 [MESA_SHADER_GEOMETRY] = 128, \
612 }, \
613 }
614
615 #define GFX9_FEATURES \
616 GFX8_FEATURES, \
617 GFX9_HW_INFO, \
618 .has_sample_with_hiz = true, \
619 .has_illegal_ccs_values = true, \
620 .cooperative_matrix_configurations = { \
621 { INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16 }, \
622 { INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT32, INTEL_CMAT_FLOAT32 }, \
623 { INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 32, INTEL_CMAT_SINT8, INTEL_CMAT_SINT8, INTEL_CMAT_SINT32, INTEL_CMAT_SINT32 }, \
624 { INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 32, INTEL_CMAT_UINT8, INTEL_CMAT_UINT8, INTEL_CMAT_UINT32, INTEL_CMAT_UINT32 }, \
625 }
626
627 static const struct intel_device_info intel_device_info_skl_gt1 = {
628 GFX9_FEATURES, .gt = 1,
629 .platform = INTEL_PLATFORM_SKL,
630 .num_slices = 1,
631 .num_subslices = { 2, },
632 .max_eus_per_subslice = 6,
633 .l3_banks = 2,
634 /* GT1 seems to have a bug in the top of the pipe (VF/VS?) fixed functions
635 * leading to some vertices to go missing if we use too much URB.
636 */
637 .urb.max_entries[MESA_SHADER_VERTEX] = 928,
638 .simulator_id = 12,
639 };
640
641 static const struct intel_device_info intel_device_info_skl_gt2 = {
642 GFX9_FEATURES, .gt = 2,
643 .platform = INTEL_PLATFORM_SKL,
644 .num_slices = 1,
645 .num_subslices = { 3, },
646 .max_eus_per_subslice = 8,
647 .l3_banks = 4,
648 .simulator_id = 12,
649 };
650
651 static const struct intel_device_info intel_device_info_skl_gt3 = {
652 GFX9_FEATURES, .gt = 3,
653 .platform = INTEL_PLATFORM_SKL,
654 .num_slices = 2,
655 .num_subslices = { 3, 3, },
656 .max_eus_per_subslice = 8,
657 .l3_banks = 8,
658 .simulator_id = 12,
659 };
660
661 static const struct intel_device_info intel_device_info_skl_gt4 = {
662 GFX9_FEATURES, .gt = 4,
663 .platform = INTEL_PLATFORM_SKL,
664 .num_slices = 3,
665 .num_subslices = { 3, 3, 3, },
666 .max_eus_per_subslice = 8,
667 .l3_banks = 12,
668 /* From the "L3 Allocation and Programming" documentation:
669 *
670 * "URB is limited to 1008KB due to programming restrictions. This is not a
671 * restriction of the L3 implementation, but of the FF and other clients.
672 * Therefore, in a GT4 implementation it is possible for the programmed
673 * allocation of the L3 data array to provide 3*384KB=1152KB for URB, but
674 * only 1008KB of this will be used."
675 */
676 .simulator_id = 12,
677 };
678
679 static const struct intel_device_info intel_device_info_bxt = {
680 GFX9_LP_FEATURES_3X6,
681 .platform = INTEL_PLATFORM_BXT,
682 .l3_banks = 2,
683 .simulator_id = 14,
684 };
685
686 static const struct intel_device_info intel_device_info_bxt_2x6 = {
687 GFX9_LP_FEATURES_2X6,
688 .platform = INTEL_PLATFORM_BXT,
689 .l3_banks = 1,
690 .simulator_id = 14,
691 };
692 /*
693 * Note: for all KBL SKUs, the PRM says SKL for GS entries, not SKL+.
694 * There's no KBL entry. Using the default SKL (GFX9) GS entries value.
695 */
696
697 static const struct intel_device_info intel_device_info_kbl_gt1 = {
698 GFX9_FEATURES,
699 .platform = INTEL_PLATFORM_KBL,
700 .gt = 1,
701
702 .max_cs_threads = 7 * 6,
703 .num_slices = 1,
704 .num_subslices = { 2, },
705 .max_eus_per_subslice = 6,
706 .l3_banks = 2,
707 /* GT1 seems to have a bug in the top of the pipe (VF/VS?) fixed functions
708 * leading to some vertices to go missing if we use too much URB.
709 */
710 .urb.max_entries[MESA_SHADER_VERTEX] = 928,
711 .urb.max_entries[MESA_SHADER_GEOMETRY] = 256,
712 .simulator_id = 16,
713 };
714
715 static const struct intel_device_info intel_device_info_kbl_gt1_5 = {
716 GFX9_FEATURES,
717 .platform = INTEL_PLATFORM_KBL,
718 .gt = 1,
719
720 .max_cs_threads = 7 * 6,
721 .num_slices = 1,
722 .num_subslices = { 3, },
723 .max_eus_per_subslice = 6,
724 .l3_banks = 4,
725 .simulator_id = 16,
726 };
727
728 static const struct intel_device_info intel_device_info_kbl_gt2 = {
729 GFX9_FEATURES,
730 .platform = INTEL_PLATFORM_KBL,
731 .gt = 2,
732
733 .num_slices = 1,
734 .num_subslices = { 3, },
735 .max_eus_per_subslice = 8,
736 .l3_banks = 4,
737 .simulator_id = 16,
738 };
739
740 static const struct intel_device_info intel_device_info_kbl_gt3 = {
741 GFX9_FEATURES,
742 .platform = INTEL_PLATFORM_KBL,
743 .gt = 3,
744
745 .num_slices = 2,
746 .num_subslices = { 3, 3, },
747 .max_eus_per_subslice = 8,
748 .l3_banks = 8,
749 .simulator_id = 16,
750 };
751
752 static const struct intel_device_info intel_device_info_kbl_gt4 = {
753 GFX9_FEATURES,
754 .platform = INTEL_PLATFORM_KBL,
755 .gt = 4,
756
757 /*
758 * From the "L3 Allocation and Programming" documentation:
759 *
760 * "URB is limited to 1008KB due to programming restrictions. This
761 * is not a restriction of the L3 implementation, but of the FF and
762 * other clients. Therefore, in a GT4 implementation it is
763 * possible for the programmed allocation of the L3 data array to
764 * provide 3*384KB=1152KB for URB, but only 1008KB of this
765 * will be used."
766 */
767 .num_slices = 3,
768 .num_subslices = { 3, 3, 3, },
769 .max_eus_per_subslice = 8,
770 .l3_banks = 12,
771 .simulator_id = 16,
772 };
773
774 static const struct intel_device_info intel_device_info_glk = {
775 GFX9_LP_FEATURES_3X6,
776 .platform = INTEL_PLATFORM_GLK,
777 .l3_banks = 2,
778 .simulator_id = 17,
779 };
780
781 static const struct intel_device_info intel_device_info_glk_2x6 = {
782 GFX9_LP_FEATURES_2X6,
783 .platform = INTEL_PLATFORM_GLK,
784 .l3_banks = 2,
785 .simulator_id = 17,
786 };
787
788 static const struct intel_device_info intel_device_info_cfl_gt1 = {
789 GFX9_FEATURES,
790 .platform = INTEL_PLATFORM_CFL,
791 .gt = 1,
792
793 .num_slices = 1,
794 .num_subslices = { 2, },
795 .max_eus_per_subslice = 6,
796 .l3_banks = 2,
797 /* GT1 seems to have a bug in the top of the pipe (VF/VS?) fixed functions
798 * leading to some vertices to go missing if we use too much URB.
799 */
800 .urb.max_entries[MESA_SHADER_VERTEX] = 928,
801 .urb.max_entries[MESA_SHADER_GEOMETRY] = 256,
802 .simulator_id = 24,
803 };
804 static const struct intel_device_info intel_device_info_cfl_gt2 = {
805 GFX9_FEATURES,
806 .platform = INTEL_PLATFORM_CFL,
807 .gt = 2,
808
809 .num_slices = 1,
810 .num_subslices = { 3, },
811 .max_eus_per_subslice = 8,
812 .l3_banks = 4,
813 .simulator_id = 24,
814 };
815
816 static const struct intel_device_info intel_device_info_cfl_gt3 = {
817 GFX9_FEATURES,
818 .platform = INTEL_PLATFORM_CFL,
819 .gt = 3,
820
821 .num_slices = 2,
822 .num_subslices = { 3, 3, },
823 .max_eus_per_subslice = 8,
824 .l3_banks = 8,
825 .simulator_id = 24,
826 };
827
828 #define subslices(args...) { args, }
829
830 #define GFX11_HW_INFO \
831 .ver = 11, \
832 .has_pln = false, \
833 .max_vs_threads = 364, \
834 .max_gs_threads = 224, \
835 .max_tcs_threads = 224, \
836 .max_tes_threads = 364, \
837 .max_threads_per_psd = 64, \
838 .max_cs_threads = 56
839
840 #define GFX11_FEATURES(_gt, _slices, _subslices, _l3, _platform) \
841 GFX8_FEATURES, \
842 GFX11_HW_INFO, \
843 .platform = _platform, \
844 .has_64bit_float = false, \
845 .has_64bit_int = false, \
846 .has_integer_dword_mul = false, \
847 .has_sample_with_hiz = false, \
848 .has_illegal_ccs_values = true, \
849 .gt = _gt, .num_slices = _slices, .l3_banks = _l3, \
850 .num_subslices = _subslices, \
851 .max_eus_per_subslice = 8, \
852 .cooperative_matrix_configurations = { \
853 { INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16 }, \
854 { INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT32, INTEL_CMAT_FLOAT32 }, \
855 { INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 32, INTEL_CMAT_SINT8, INTEL_CMAT_SINT8, INTEL_CMAT_SINT32, INTEL_CMAT_SINT32 }, \
856 { INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 32, INTEL_CMAT_UINT8, INTEL_CMAT_UINT8, INTEL_CMAT_UINT32, INTEL_CMAT_UINT32 }, \
857 }
858
859 #define GFX11_URB_MIN_MAX_ENTRIES \
860 .min_entries = { \
861 [MESA_SHADER_VERTEX] = 64, \
862 [MESA_SHADER_TESS_EVAL] = 34, \
863 }, \
864 .max_entries = { \
865 [MESA_SHADER_VERTEX] = 2384, \
866 [MESA_SHADER_TESS_CTRL] = 1032, \
867 [MESA_SHADER_TESS_EVAL] = 2384, \
868 [MESA_SHADER_GEOMETRY] = 1032, \
869 }
870
871 static const struct intel_device_info intel_device_info_icl_gt2 = {
872 GFX11_FEATURES(2, 1, subslices(8), 8, INTEL_PLATFORM_ICL),
873 .urb = {
874 GFX11_URB_MIN_MAX_ENTRIES,
875 },
876 .simulator_id = 19,
877 };
878
879 static const struct intel_device_info intel_device_info_icl_gt1_5 = {
880 GFX11_FEATURES(1, 1, subslices(6), 6, INTEL_PLATFORM_ICL),
881 .urb = {
882 GFX11_URB_MIN_MAX_ENTRIES,
883 },
884 .simulator_id = 19,
885 };
886
887 static const struct intel_device_info intel_device_info_icl_gt1 = {
888 GFX11_FEATURES(1, 1, subslices(4), 6, INTEL_PLATFORM_ICL),
889 .urb = {
890 GFX11_URB_MIN_MAX_ENTRIES,
891 },
892 .simulator_id = 19,
893 };
894
895 static const struct intel_device_info intel_device_info_icl_gt0_5 = {
896 GFX11_FEATURES(1, 1, subslices(1), 6, INTEL_PLATFORM_ICL),
897 .urb = {
898 GFX11_URB_MIN_MAX_ENTRIES,
899 },
900 .simulator_id = 19,
901 };
902
903 #define GFX11_LP_FEATURES \
904 .urb = { \
905 GFX11_URB_MIN_MAX_ENTRIES, \
906 }, \
907 .disable_ccs_repack = true, \
908 .has_illegal_ccs_values = true, \
909 .simulator_id = 28
910
911 static const struct intel_device_info intel_device_info_ehl_4x8 = {
912 GFX11_FEATURES(1, 1, subslices(4), 4, INTEL_PLATFORM_EHL),
913 GFX11_LP_FEATURES,
914 };
915
916 static const struct intel_device_info intel_device_info_ehl_4x6 = {
917 GFX11_FEATURES(1, 1, subslices(4), 4, INTEL_PLATFORM_EHL),
918 GFX11_LP_FEATURES,
919 .max_eus_per_subslice = 6,
920 };
921
922 static const struct intel_device_info intel_device_info_ehl_4x5 = {
923 GFX11_FEATURES(1, 1, subslices(4), 4, INTEL_PLATFORM_EHL),
924 GFX11_LP_FEATURES,
925 .max_eus_per_subslice = 5,
926 };
927
928 static const struct intel_device_info intel_device_info_ehl_4x4 = {
929 GFX11_FEATURES(1, 1, subslices(4), 4, INTEL_PLATFORM_EHL),
930 GFX11_LP_FEATURES,
931 .max_eus_per_subslice = 4,
932 };
933
934 static const struct intel_device_info intel_device_info_ehl_2x8 = {
935 GFX11_FEATURES(1, 1, subslices(2), 4, INTEL_PLATFORM_EHL),
936 GFX11_LP_FEATURES,
937 };
938
939 static const struct intel_device_info intel_device_info_ehl_2x4 = {
940 GFX11_FEATURES(1, 1, subslices(2), 4, INTEL_PLATFORM_EHL),
941 GFX11_LP_FEATURES,
942 .max_eus_per_subslice = 4,
943 };
944
945 #define GFX12_HW_INFO \
946 .ver = 12, \
947 .has_pln = false, \
948 .has_sample_with_hiz = false, \
949 .has_aux_map = true, \
950 .max_vs_threads = 546, \
951 .max_gs_threads = 336, \
952 .max_tcs_threads = 336, \
953 .max_tes_threads = 546, \
954 .max_threads_per_psd = 64, \
955 .max_cs_threads = 112, /* threads per DSS */ \
956 .urb = { \
957 .size = 512, /* For intel_stub_gpu */ \
958 .min_entries = { \
959 [MESA_SHADER_VERTEX] = 64, \
960 [MESA_SHADER_TESS_EVAL] = 34, \
961 }, \
962 .max_entries = { \
963 [MESA_SHADER_VERTEX] = 3576, \
964 [MESA_SHADER_TESS_CTRL] = 1548, \
965 [MESA_SHADER_TESS_EVAL] = 3576, \
966 [MESA_SHADER_GEOMETRY] = 1548, \
967 }, \
968 }
969
970 #define GFX12_FEATURES(_gt, _slices, _l3) \
971 GFX8_FEATURES, \
972 GFX12_HW_INFO, \
973 .has_64bit_float = false, \
974 .has_64bit_int = false, \
975 .has_integer_dword_mul = false, \
976 .gt = _gt, .num_slices = _slices, .l3_banks = _l3, \
977 .simulator_id = 22, \
978 .max_eus_per_subslice = 16, \
979 .pat = { \
980 .cached_coherent = PAT_ENTRY(0, WB, 2WAY), \
981 .scanout = PAT_ENTRY(1, WC, NONE), \
982 .writeback_incoherent = PAT_ENTRY(0, WB, 2WAY), \
983 .writecombining = PAT_ENTRY(1, WC, NONE), \
984 }, \
985 .cooperative_matrix_configurations = { \
986 { INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16 }, \
987 { INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT32, INTEL_CMAT_FLOAT32 }, \
988 { INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 32, INTEL_CMAT_SINT8, INTEL_CMAT_SINT8, INTEL_CMAT_SINT32, INTEL_CMAT_SINT32 }, \
989 { INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 32, INTEL_CMAT_UINT8, INTEL_CMAT_UINT8, INTEL_CMAT_UINT32, INTEL_CMAT_UINT32 }, \
990 }
991
992 #define dual_subslices(args...) { args, }
993
994 #define GFX12_GT05_FEATURES \
995 GFX12_FEATURES(1, 1, 4), \
996 .num_subslices = dual_subslices(1)
997
998 #define GFX12_GT_FEATURES(_gt) \
999 GFX12_FEATURES(_gt, 1, _gt == 1 ? 4 : 8), \
1000 .num_subslices = dual_subslices(_gt == 1 ? 2 : 6)
1001
1002 static const struct intel_device_info intel_device_info_tgl_gt1 = {
1003 GFX12_GT_FEATURES(1),
1004 .platform = INTEL_PLATFORM_TGL,
1005 };
1006
1007 static const struct intel_device_info intel_device_info_tgl_gt2 = {
1008 GFX12_GT_FEATURES(2),
1009 .platform = INTEL_PLATFORM_TGL,
1010 };
1011
1012 static const struct intel_device_info intel_device_info_rkl_gt05 = {
1013 GFX12_GT05_FEATURES,
1014 .platform = INTEL_PLATFORM_RKL,
1015 };
1016
1017 static const struct intel_device_info intel_device_info_rkl_gt1 = {
1018 GFX12_GT_FEATURES(1),
1019 .platform = INTEL_PLATFORM_RKL,
1020 };
1021
1022 static const struct intel_device_info intel_device_info_adl_gt05 = {
1023 GFX12_GT05_FEATURES,
1024 .platform = INTEL_PLATFORM_ADL,
1025 .display_ver = 13,
1026 };
1027
1028 static const struct intel_device_info intel_device_info_adl_gt1 = {
1029 GFX12_GT_FEATURES(1),
1030 .platform = INTEL_PLATFORM_ADL,
1031 .display_ver = 13,
1032 };
1033
1034 static const struct intel_device_info intel_device_info_adl_n = {
1035 GFX12_GT_FEATURES(1),
1036 .platform = INTEL_PLATFORM_ADL,
1037 .display_ver = 13,
1038 .is_adl_n = true,
1039 };
1040
1041 static const struct intel_device_info intel_device_info_adl_gt2 = {
1042 GFX12_GT_FEATURES(2),
1043 .platform = INTEL_PLATFORM_ADL,
1044 .display_ver = 13,
1045 };
1046
1047 static const struct intel_device_info intel_device_info_rpl = {
1048 GFX12_FEATURES(1, 1, 4),
1049 .num_subslices = dual_subslices(2),
1050 .platform = INTEL_PLATFORM_RPL,
1051 .display_ver = 13,
1052 };
1053
1054 static const struct intel_device_info intel_device_info_rpl_p = {
1055 GFX12_GT_FEATURES(2),
1056 .platform = INTEL_PLATFORM_RPL,
1057 .display_ver = 13,
1058 };
1059
1060 #define GFX12_DG1_SG1_FEATURES \
1061 GFX12_GT_FEATURES(2), \
1062 .platform = INTEL_PLATFORM_DG1, \
1063 .has_llc = false, \
1064 .has_local_mem = true, \
1065 .urb.size = 768, \
1066 .simulator_id = 30, \
1067 /* There is no PAT table for DG1, using TGL one */ \
1068 .pat = { \
1069 .cached_coherent = PAT_ENTRY(0, WB, 2WAY), \
1070 .scanout = PAT_ENTRY(1, WC, NONE), \
1071 .writeback_incoherent = PAT_ENTRY(0, WB, 2WAY), \
1072 .writecombining = PAT_ENTRY(1, WC, NONE), \
1073 }
1074
1075 static const struct intel_device_info intel_device_info_dg1 = {
1076 GFX12_DG1_SG1_FEATURES,
1077 };
1078
1079 static const struct intel_device_info intel_device_info_sg1 = {
1080 GFX12_DG1_SG1_FEATURES,
1081 };
1082
1083 #define XEHP_URB_MIN_MAX_ENTRIES \
1084 .min_entries = { \
1085 [MESA_SHADER_VERTEX] = 64, \
1086 [MESA_SHADER_TESS_EVAL] = 34, \
1087 }, \
1088 .max_entries = { \
1089 [MESA_SHADER_VERTEX] = 3832, /* BSpec 47138 */ \
1090 [MESA_SHADER_TESS_CTRL] = 1548, /* BSpec 47137 */ \
1091 [MESA_SHADER_TESS_EVAL] = 3576, /* BSpec 47135 */ \
1092 [MESA_SHADER_GEOMETRY] = 1548, /* BSpec 47136 */ \
1093 }
1094
1095 #define XEHP_FEATURES(_gt, _slices, _l3) \
1096 GFX8_FEATURES, \
1097 .has_64bit_float = false, \
1098 .has_64bit_int = false, \
1099 .has_integer_dword_mul = false, \
1100 .gt = _gt, .num_slices = _slices, .l3_banks = _l3, \
1101 .num_subslices = dual_subslices(1), /* updated by topology */\
1102 .ver = 12, \
1103 .has_pln = false, \
1104 .has_sample_with_hiz = false, \
1105 .max_vs_threads = 546, /* BSpec 46312 */ \
1106 .max_gs_threads = 336, /* BSpec 46299 */ \
1107 .max_tcs_threads = 336, /* BSpec 46300 */ \
1108 .max_tes_threads = 546, /* BSpec 46298 */ \
1109 .max_threads_per_psd = 64, \
1110 .max_cs_threads = 112, /* threads per DSS */ \
1111 .urb = { \
1112 .size = 768, /* For intel_stub_gpu */ \
1113 XEHP_URB_MIN_MAX_ENTRIES, \
1114 }, \
1115 .num_thread_per_eu = 8 /* BSpec 44472 */, \
1116 .max_eus_per_subslice = 16, \
1117 .verx10 = 125, \
1118 .has_llc = false, \
1119 .has_lsc = true, \
1120 .has_local_mem = true, \
1121 .has_aux_map = false, \
1122 .simulator_id = 29, \
1123 .cooperative_matrix_configurations = { \
1124 { INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16 }, \
1125 { INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT32, INTEL_CMAT_FLOAT32 }, \
1126 { INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 32, INTEL_CMAT_SINT8, INTEL_CMAT_SINT8, INTEL_CMAT_SINT32, INTEL_CMAT_SINT32 }, \
1127 { INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 32, INTEL_CMAT_UINT8, INTEL_CMAT_UINT8, INTEL_CMAT_UINT32, INTEL_CMAT_UINT32 }, \
1128 }
1129
1130 #define DG2_FEATURES \
1131 /* (Sub)slice info comes from the kernel topology info */ \
1132 XEHP_FEATURES(0, 1, 0), \
1133 .display_ver = 13, \
1134 .revision = 4, /* For offline compiler */ \
1135 .apply_hwconfig = true, \
1136 .has_coarse_pixel_primitive_and_cb = true, \
1137 .has_mesh_shading = true, \
1138 .has_ray_tracing = true, \
1139 .has_flat_ccs = true, \
1140 /* There is no PAT table for DG2, using TGL ones */ \
1141 .pat = { \
1142 .cached_coherent = PAT_ENTRY(0, WB, 1WAY), \
1143 .scanout = PAT_ENTRY(1, WC, NONE), \
1144 .writeback_incoherent = PAT_ENTRY(0, WB, 2WAY), \
1145 .writecombining = PAT_ENTRY(1, WC, NONE), \
1146 }
1147
1148 static const struct intel_device_info intel_device_info_dg2_g10 = {
1149 DG2_FEATURES,
1150 .platform = INTEL_PLATFORM_DG2_G10,
1151 };
1152
1153 static const struct intel_device_info intel_device_info_dg2_g11 = {
1154 DG2_FEATURES,
1155 .platform = INTEL_PLATFORM_DG2_G11,
1156 };
1157
1158 static const struct intel_device_info intel_device_info_dg2_g12 = {
1159 DG2_FEATURES,
1160 .platform = INTEL_PLATFORM_DG2_G12,
1161 };
1162
1163 static const struct intel_device_info intel_device_info_atsm_g10 = {
1164 DG2_FEATURES,
1165 .platform = INTEL_PLATFORM_ATSM_G10,
1166 };
1167
1168 static const struct intel_device_info intel_device_info_atsm_g11 = {
1169 DG2_FEATURES,
1170 .platform = INTEL_PLATFORM_ATSM_G11,
1171 };
1172
1173 #define MTL_FEATURES \
1174 /* (Sub)slice info comes from the kernel topology info */ \
1175 XEHP_FEATURES(0, 1, 0), \
1176 .has_local_mem = false, \
1177 .has_aux_map = true, \
1178 .apply_hwconfig = true, \
1179 .has_64bit_float = true, \
1180 .has_64bit_float_via_math_pipe = true, \
1181 .has_integer_dword_mul = false, \
1182 .has_coarse_pixel_primitive_and_cb = true, \
1183 .has_mesh_shading = true, \
1184 .has_ray_tracing = true, \
1185 .pat = { \
1186 .cached_coherent = PAT_ENTRY(3, WB, 1WAY), \
1187 .scanout = PAT_ENTRY(1, WC, NONE), \
1188 .writeback_incoherent = PAT_ENTRY(0, WB, NONE), \
1189 .writecombining = PAT_ENTRY(1, WC, NONE), \
1190 }
1191
1192 static const struct intel_device_info intel_device_info_mtl_u = {
1193 MTL_FEATURES,
1194 .platform = INTEL_PLATFORM_MTL_U,
1195 };
1196
1197 static const struct intel_device_info intel_device_info_mtl_h = {
1198 MTL_FEATURES,
1199 .platform = INTEL_PLATFORM_MTL_H,
1200 };
1201
1202 static const struct intel_device_info intel_device_info_arl_u = {
1203 MTL_FEATURES,
1204 .platform = INTEL_PLATFORM_ARL_U,
1205 };
1206
1207 static const struct intel_device_info intel_device_info_arl_h = {
1208 MTL_FEATURES,
1209 .platform = INTEL_PLATFORM_ARL_H,
1210 };
1211
1212 void
intel_device_info_topology_reset_masks(struct intel_device_info * devinfo)1213 intel_device_info_topology_reset_masks(struct intel_device_info *devinfo)
1214 {
1215 devinfo->subslice_slice_stride = 0;
1216 devinfo->eu_subslice_stride = 0;
1217 devinfo->eu_slice_stride = 0;
1218
1219 devinfo->num_slices = 0;
1220 memset(devinfo->num_subslices, 0, sizeof(devinfo->num_subslices));
1221
1222 memset(&devinfo->slice_masks, 0, sizeof(devinfo->slice_masks));
1223 memset(devinfo->subslice_masks, 0, sizeof(devinfo->subslice_masks));
1224 memset(devinfo->eu_masks, 0, sizeof(devinfo->eu_masks));
1225 memset(devinfo->ppipe_subslices, 0, sizeof(devinfo->ppipe_subslices));
1226 }
1227
1228 void
intel_device_info_topology_update_counts(struct intel_device_info * devinfo)1229 intel_device_info_topology_update_counts(struct intel_device_info *devinfo)
1230 {
1231 devinfo->num_slices = __builtin_popcount(devinfo->slice_masks);
1232 devinfo->subslice_total = 0;
1233 for (int s = 0; s < devinfo->max_slices; s++) {
1234 if (!intel_device_info_slice_available(devinfo, s))
1235 continue;
1236
1237 for (int b = 0; b < devinfo->subslice_slice_stride; b++) {
1238 devinfo->num_subslices[s] +=
1239 __builtin_popcount(devinfo->subslice_masks[s * devinfo->subslice_slice_stride + b]);
1240 }
1241 devinfo->subslice_total += devinfo->num_subslices[s];
1242 }
1243 assert(devinfo->num_slices > 0);
1244 assert(devinfo->subslice_total > 0);
1245 }
1246
1247 void
intel_device_info_update_pixel_pipes(struct intel_device_info * devinfo,uint8_t * subslice_masks)1248 intel_device_info_update_pixel_pipes(struct intel_device_info *devinfo, uint8_t *subslice_masks)
1249 {
1250 if (devinfo->ver < 11)
1251 return;
1252
1253 /* The kernel only reports one slice on all existing ICL+ platforms, even
1254 * if multiple slices are present. The slice mask is allowed to have the
1255 * accurate value greater than 1 on gfx12.5+ platforms though, in order to
1256 * be tolerant with the behavior of our simulation environment.
1257 */
1258 assert(devinfo->slice_masks == 1 || devinfo->verx10 >= 125);
1259
1260 /* Count the number of subslices on each pixel pipe. Assume that every
1261 * contiguous group of 4 subslices in the mask belong to the same pixel
1262 * pipe. However note that on TGL+ the kernel returns a mask of enabled
1263 * *dual* subslices instead of actual subslices somewhat confusingly, so
1264 * each pixel pipe only takes 2 bits in the mask even though it's still 4
1265 * subslices.
1266 */
1267 const unsigned ppipe_bits = devinfo->ver >= 12 ? 2 : 4;
1268 for (unsigned p = 0; p < INTEL_DEVICE_MAX_PIXEL_PIPES; p++) {
1269 const unsigned offset = p * ppipe_bits;
1270 const unsigned subslice_idx = offset /
1271 devinfo->max_subslices_per_slice * devinfo->subslice_slice_stride;
1272 const unsigned ppipe_mask =
1273 BITFIELD_RANGE(offset % devinfo->max_subslices_per_slice, ppipe_bits);
1274
1275 if (subslice_idx < ARRAY_SIZE(devinfo->subslice_masks))
1276 devinfo->ppipe_subslices[p] =
1277 __builtin_popcount(subslice_masks[subslice_idx] & ppipe_mask);
1278 else
1279 devinfo->ppipe_subslices[p] = 0;
1280 }
1281 }
1282
1283 void
intel_device_info_update_l3_banks(struct intel_device_info * devinfo)1284 intel_device_info_update_l3_banks(struct intel_device_info *devinfo)
1285 {
1286 if (devinfo->ver != 12)
1287 return;
1288
1289 if (devinfo->verx10 >= 125) {
1290 if (devinfo->subslice_total > 16) {
1291 assert(devinfo->subslice_total <= 32);
1292 devinfo->l3_banks = 32;
1293 } else if (devinfo->subslice_total > 8) {
1294 devinfo->l3_banks = 16;
1295 } else {
1296 devinfo->l3_banks = 8;
1297 }
1298 } else {
1299 assert(devinfo->num_slices == 1);
1300 if (devinfo->subslice_total >= 6) {
1301 assert(devinfo->subslice_total == 6);
1302 devinfo->l3_banks = 8;
1303 } else if (devinfo->subslice_total > 2) {
1304 devinfo->l3_banks = 6;
1305 } else {
1306 devinfo->l3_banks = 4;
1307 }
1308 }
1309 }
1310
1311 /* Generate mask from the device data. */
1312 static void
fill_masks(struct intel_device_info * devinfo)1313 fill_masks(struct intel_device_info *devinfo)
1314 {
1315 /* All of our internal device descriptions assign the same number of
1316 * subslices for each slice. Just verify that this is true.
1317 */
1318 for (int s = 1; s < devinfo->num_slices; s++)
1319 assert(devinfo->num_subslices[0] == devinfo->num_subslices[s]);
1320
1321 intel_device_info_i915_update_from_masks(devinfo,
1322 (1U << devinfo->num_slices) - 1,
1323 (1U << devinfo->num_subslices[0]) - 1,
1324 devinfo->num_slices * devinfo->num_subslices[0] *
1325 devinfo->max_eus_per_subslice);
1326 }
1327
1328 void
intel_device_info_update_cs_workgroup_threads(struct intel_device_info * devinfo)1329 intel_device_info_update_cs_workgroup_threads(struct intel_device_info *devinfo)
1330 {
1331 /* GPGPU_WALKER::ThreadWidthCounterMaximum is U6-1 so the most threads we
1332 * can program is 64 without going up to a rectangular group. This only
1333 * impacts Haswell and TGL which have higher thread counts.
1334 *
1335 * INTERFACE_DESCRIPTOR_DATA::NumberofThreadsinGPGPUThreadGroup on Xe-HP+
1336 * is 10 bits so we have no such restrictions.
1337 */
1338 devinfo->max_cs_workgroup_threads =
1339 devinfo->verx10 >= 125 ? devinfo->max_cs_threads :
1340 MIN2(devinfo->max_cs_threads, 64);
1341 }
1342
1343 static bool
intel_device_info_init_common(int pci_id,struct intel_device_info * devinfo)1344 intel_device_info_init_common(int pci_id,
1345 struct intel_device_info *devinfo)
1346 {
1347 switch (pci_id) {
1348 #undef CHIPSET
1349 #define CHIPSET(id, family, fam_str, name) \
1350 case id: *devinfo = intel_device_info_##family; break;
1351 #include "pci_ids/crocus_pci_ids.h"
1352 #include "pci_ids/iris_pci_ids.h"
1353
1354 #undef CHIPSET
1355 #define CHIPSET(id, fam_str, name) \
1356 case id: *devinfo = intel_device_info_gfx3; break;
1357 #include "pci_ids/i915_pci_ids.h"
1358
1359 default:
1360 mesa_logw("Driver does not support the 0x%x PCI ID.", pci_id);
1361 return false;
1362 }
1363
1364 switch (pci_id) {
1365 #undef CHIPSET
1366 #define CHIPSET(_id, _family, _fam_str, _name) \
1367 case _id: \
1368 /* sizeof(str_literal) includes the null */ \
1369 STATIC_ASSERT(sizeof(_name) + sizeof(_fam_str) + 2 <= \
1370 sizeof(devinfo->name)); \
1371 strncpy(devinfo->name, _name " (" _fam_str ")", sizeof(devinfo->name)); \
1372 break;
1373 #include "pci_ids/crocus_pci_ids.h"
1374 #include "pci_ids/iris_pci_ids.h"
1375 default:
1376 strncpy(devinfo->name, "Intel Unknown", sizeof(devinfo->name));
1377 }
1378
1379 devinfo->pci_device_id = pci_id;
1380
1381 fill_masks(devinfo);
1382
1383 /* From the Skylake PRM, 3DSTATE_PS::Scratch Space Base Pointer:
1384 *
1385 * "Scratch Space per slice is computed based on 4 sub-slices. SW must
1386 * allocate scratch space enough so that each slice has 4 slices allowed."
1387 *
1388 * The equivalent internal documentation says that this programming note
1389 * applies to all Gfx9+ platforms.
1390 *
1391 * The hardware typically calculates the scratch space pointer by taking
1392 * the base address, and adding per-thread-scratch-space * thread ID.
1393 * Extra padding can be necessary depending how the thread IDs are
1394 * calculated for a particular shader stage.
1395 */
1396
1397 switch(devinfo->ver) {
1398 case 9:
1399 devinfo->max_wm_threads = 64 /* threads-per-PSD */
1400 * devinfo->num_slices
1401 * 4; /* effective subslices per slice */
1402 break;
1403 case 11:
1404 case 12:
1405 case 20:
1406 devinfo->max_wm_threads = 128 /* threads-per-PSD */
1407 * devinfo->num_slices
1408 * 8; /* subslices per slice */
1409 break;
1410 default:
1411 assert(devinfo->ver < 9);
1412 break;
1413 }
1414
1415 assert(devinfo->num_slices <= ARRAY_SIZE(devinfo->num_subslices));
1416
1417 if (devinfo->verx10 == 0)
1418 devinfo->verx10 = devinfo->ver * 10;
1419
1420 if (devinfo->display_ver == 0)
1421 devinfo->display_ver = devinfo->ver;
1422
1423 if (devinfo->has_mesh_shading) {
1424 /* Half of push constant space matches the size used in the simplest
1425 * primitive pipeline (VS + FS). Tweaking this affects performance.
1426 */
1427 devinfo->mesh_max_constant_urb_size_kb =
1428 devinfo->max_constant_urb_size_kb / 2;
1429 }
1430
1431 intel_device_info_update_cs_workgroup_threads(devinfo);
1432
1433 return true;
1434 }
1435
1436 static void
intel_device_info_apply_workarounds(struct intel_device_info * devinfo)1437 intel_device_info_apply_workarounds(struct intel_device_info *devinfo)
1438 {
1439 if (intel_needs_workaround(devinfo, 18012660806))
1440 devinfo->urb.max_entries[MESA_SHADER_GEOMETRY] = 1536;
1441
1442 /* Fixes issues with:
1443 * dEQP-GLES31.functional.geometry_shading.layered.render_with_default_layer_cubemap
1444 * when running on GFX12 platforms with small EU count.
1445 */
1446 const uint32_t eu_total = intel_device_info_eu_total(devinfo);
1447 if (devinfo->verx10 == 120 && eu_total <= 32)
1448 devinfo->urb.max_entries[MESA_SHADER_GEOMETRY] = 1024;
1449 }
1450
1451 bool
intel_get_device_info_from_pci_id(int pci_id,struct intel_device_info * devinfo)1452 intel_get_device_info_from_pci_id(int pci_id,
1453 struct intel_device_info *devinfo)
1454 {
1455 intel_device_info_init_common(pci_id, devinfo);
1456
1457 /* This is a placeholder until a proper value is set. */
1458 devinfo->kmd_type = INTEL_KMD_TYPE_I915;
1459
1460 intel_device_info_init_was(devinfo);
1461 intel_device_info_apply_workarounds(devinfo);
1462
1463 return true;
1464 }
1465
1466 bool
intel_device_info_compute_system_memory(struct intel_device_info * devinfo,bool update)1467 intel_device_info_compute_system_memory(struct intel_device_info *devinfo, bool update)
1468 {
1469 if (!update) {
1470 if (!os_get_total_physical_memory(&devinfo->mem.sram.mappable.size))
1471 return false;
1472 }
1473
1474 os_get_available_system_memory(&devinfo->mem.sram.mappable.free);
1475
1476 return true;
1477 }
1478
1479 static void
intel_device_info_adjust_memory(struct intel_device_info * devinfo)1480 intel_device_info_adjust_memory(struct intel_device_info *devinfo)
1481 {
1482 uint64_t available;
1483
1484 /* Applications running without elevated privileges don't report valid
1485 * numbers for free sram
1486 */
1487 if (os_get_available_system_memory(&available)) {
1488 devinfo->mem.sram.mappable.free = MIN3(devinfo->mem.sram.mappable.free,
1489 devinfo->mem.sram.mappable.size,
1490 available);
1491 }
1492 }
1493
1494 static void
init_max_scratch_ids(struct intel_device_info * devinfo)1495 init_max_scratch_ids(struct intel_device_info *devinfo)
1496 {
1497 /* Determine the max number of subslices that potentially might be used in
1498 * scratch space ids.
1499 *
1500 * For, Gfx11+, scratch space allocation is based on the number of threads
1501 * in the base configuration.
1502 *
1503 * For Gfx9, devinfo->subslice_total is the TOTAL number of subslices and
1504 * we wish to view that there are 4 subslices per slice instead of the
1505 * actual number of subslices per slice. The documentation for 3DSTATE_PS
1506 * "Scratch Space Base Pointer" says:
1507 *
1508 * "Scratch Space per slice is computed based on 4 sub-slices. SW
1509 * must allocate scratch space enough so that each slice has 4
1510 * slices allowed."
1511 *
1512 * According to the other driver team, this applies to compute shaders
1513 * as well. This is not currently documented at all.
1514 *
1515 * For Gfx8 and older we user devinfo->subslice_total.
1516 */
1517 unsigned subslices;
1518 if (devinfo->verx10 == 125)
1519 subslices = 32;
1520 else if (devinfo->ver == 12)
1521 subslices = (devinfo->platform == INTEL_PLATFORM_DG1 || devinfo->gt == 2 ? 6 : 2);
1522 else if (devinfo->ver == 11)
1523 subslices = 8;
1524 else if (devinfo->ver >= 9 && devinfo->ver < 11)
1525 subslices = 4 * devinfo->num_slices;
1526 else
1527 subslices = devinfo->subslice_total;
1528 assert(subslices >= devinfo->subslice_total);
1529
1530 unsigned scratch_ids_per_subslice;
1531 if (devinfo->ver >= 12) {
1532 /* Same as ICL below, but with 16 EUs. */
1533 scratch_ids_per_subslice = 16 * 8;
1534 } else if (devinfo->ver >= 11) {
1535 /* The MEDIA_VFE_STATE docs say:
1536 *
1537 * "Starting with this configuration, the Maximum Number of
1538 * Threads must be set to (#EU * 8) for GPGPU dispatches.
1539 *
1540 * Although there are only 7 threads per EU in the configuration,
1541 * the FFTID is calculated as if there are 8 threads per EU,
1542 * which in turn requires a larger amount of Scratch Space to be
1543 * allocated by the driver."
1544 */
1545 scratch_ids_per_subslice = 8 * 8;
1546 } else if (devinfo->platform == INTEL_PLATFORM_HSW) {
1547 /* WaCSScratchSize:hsw
1548 *
1549 * Haswell's scratch space address calculation appears to be sparse
1550 * rather than tightly packed. The Thread ID has bits indicating
1551 * which subslice, EU within a subslice, and thread within an EU it
1552 * is. There's a maximum of two slices and two subslices, so these
1553 * can be stored with a single bit. Even though there are only 10 EUs
1554 * per subslice, this is stored in 4 bits, so there's an effective
1555 * maximum value of 16 EUs. Similarly, although there are only 7
1556 * threads per EU, this is stored in a 3 bit number, giving an
1557 * effective maximum value of 8 threads per EU.
1558 *
1559 * This means that we need to use 16 * 8 instead of 10 * 7 for the
1560 * number of threads per subslice.
1561 */
1562 scratch_ids_per_subslice = 16 * 8;
1563 } else if (devinfo->platform == INTEL_PLATFORM_CHV) {
1564 /* Cherryview devices have either 6 or 8 EUs per subslice, and each
1565 * EU has 7 threads. The 6 EU devices appear to calculate thread IDs
1566 * as if it had 8 EUs.
1567 */
1568 scratch_ids_per_subslice = 8 * 7;
1569 } else {
1570 scratch_ids_per_subslice = devinfo->max_cs_threads;
1571 }
1572
1573 unsigned max_thread_ids = scratch_ids_per_subslice * subslices;
1574
1575 if (devinfo->verx10 >= 125) {
1576 /* On GFX version 12.5, scratch access changed to a surface-based model.
1577 * Instead of each shader type having its own layout based on IDs passed
1578 * from the relevant fixed-function unit, all scratch access is based on
1579 * thread IDs like it always has been for compute.
1580 */
1581 for (int i = MESA_SHADER_VERTEX; i < MESA_SHADER_STAGES; i++)
1582 devinfo->max_scratch_ids[i] = max_thread_ids;
1583 } else {
1584 unsigned max_scratch_ids[] = {
1585 [MESA_SHADER_VERTEX] = devinfo->max_vs_threads,
1586 [MESA_SHADER_TESS_CTRL] = devinfo->max_tcs_threads,
1587 [MESA_SHADER_TESS_EVAL] = devinfo->max_tes_threads,
1588 [MESA_SHADER_GEOMETRY] = devinfo->max_gs_threads,
1589 [MESA_SHADER_FRAGMENT] = devinfo->max_wm_threads,
1590 [MESA_SHADER_COMPUTE] = max_thread_ids,
1591 };
1592 STATIC_ASSERT(sizeof(devinfo->max_scratch_ids) == sizeof(max_scratch_ids));
1593 memcpy(devinfo->max_scratch_ids, max_scratch_ids,
1594 sizeof(devinfo->max_scratch_ids));
1595 }
1596 }
1597
1598 static unsigned
intel_device_info_calc_engine_prefetch(const struct intel_device_info * devinfo,enum intel_engine_class engine_class)1599 intel_device_info_calc_engine_prefetch(const struct intel_device_info *devinfo,
1600 enum intel_engine_class engine_class)
1601 {
1602 if (devinfo->verx10 >= 200) {
1603 switch (engine_class) {
1604 case INTEL_ENGINE_CLASS_RENDER:
1605 return 4096;
1606 case INTEL_ENGINE_CLASS_COMPUTE:
1607 return 1024;
1608 default:
1609 return 512;
1610 }
1611 }
1612
1613 if (intel_device_info_is_mtl_or_arl(devinfo)) {
1614 switch (engine_class) {
1615 case INTEL_ENGINE_CLASS_RENDER:
1616 return 2048;
1617 case INTEL_ENGINE_CLASS_COMPUTE:
1618 return 1024;
1619 default:
1620 return 512;
1621 }
1622 }
1623
1624 /* DG2 */
1625 if (devinfo->verx10 == 125)
1626 return 1024;
1627
1628 /* Older than DG2/MTL */
1629 return 512;
1630 }
1631
1632 bool
intel_get_device_info_from_fd(int fd,struct intel_device_info * devinfo,int min_ver,int max_ver)1633 intel_get_device_info_from_fd(int fd, struct intel_device_info *devinfo, int min_ver, int max_ver)
1634 {
1635 if (NULL != getenv("INTEL_STUB_GPU_JSON")) {
1636 /* This call will succeed when shim-drm has been initialized with a
1637 * serialized intel_device_info structure.
1638 */
1639 struct drm_intel_stub_devinfo arg = {
1640 .addr = (uintptr_t)devinfo,
1641 .size = sizeof(*devinfo),
1642 };
1643 if (0 == intel_ioctl(fd, DRM_IOCTL_INTEL_STUB_DEVINFO, &arg)) {
1644 intel_device_info_init_was(devinfo);
1645 intel_device_info_apply_workarounds(devinfo);
1646 return true;
1647 }
1648 }
1649
1650 /* Get PCI info.
1651 *
1652 * Some callers may already have a valid drm device which holds values of
1653 * PCI fields queried here prior to calling this function. But making this
1654 * query optional leads to a more cumbersome implementation. These callers
1655 * still need to initialize the fields somewhere out of this function and
1656 * rely on an ioctl to get PCI device id for the next step when skipping
1657 * this drm query.
1658 */
1659 drmDevicePtr drmdev = NULL;
1660 if (drmGetDevice2(fd, DRM_DEVICE_GET_PCI_REVISION, &drmdev)) {
1661 mesa_loge("Failed to query drm device.");
1662 return false;
1663 }
1664 if (!intel_device_info_init_common(
1665 drmdev->deviceinfo.pci->device_id, devinfo)) {
1666 drmFreeDevice(&drmdev);
1667 return false;
1668 }
1669
1670 if ((min_ver > 0 && devinfo->ver < min_ver) || (max_ver > 0 && devinfo->ver > max_ver)) {
1671 drmFreeDevice(&drmdev);
1672 return false;
1673 }
1674
1675 devinfo->pci_domain = drmdev->businfo.pci->domain;
1676 devinfo->pci_bus = drmdev->businfo.pci->bus;
1677 devinfo->pci_dev = drmdev->businfo.pci->dev;
1678 devinfo->pci_func = drmdev->businfo.pci->func;
1679 devinfo->pci_device_id = drmdev->deviceinfo.pci->device_id;
1680 devinfo->pci_revision_id = drmdev->deviceinfo.pci->revision_id;
1681 drmFreeDevice(&drmdev);
1682 devinfo->no_hw = debug_get_bool_option("INTEL_NO_HW", false);
1683
1684 if (devinfo->ver == 10) {
1685 mesa_loge("Gfx10 support is redacted.");
1686 return false;
1687 }
1688
1689 devinfo->kmd_type = intel_get_kmd_type(fd);
1690 if (devinfo->kmd_type == INTEL_KMD_TYPE_INVALID) {
1691 mesa_loge("Unknown kernel mode driver");
1692 return false;
1693 }
1694
1695 /* remaining initialization queries the kernel for device info */
1696 if (devinfo->no_hw) {
1697 /* Provide some sensible values for NO_HW. */
1698 devinfo->gtt_size =
1699 devinfo->ver >= 8 ? (1ull << 48) : 2ull * 1024 * 1024 * 1024;
1700 intel_device_info_compute_system_memory(devinfo, false);
1701 return true;
1702 }
1703
1704 bool ret;
1705 switch (devinfo->kmd_type) {
1706 case INTEL_KMD_TYPE_I915:
1707 ret = intel_device_info_i915_get_info_from_fd(fd, devinfo);
1708 break;
1709 case INTEL_KMD_TYPE_XE:
1710 ret = intel_device_info_xe_get_info_from_fd(fd, devinfo);
1711 break;
1712 default:
1713 ret = false;
1714 unreachable("Missing");
1715 }
1716 if (!ret) {
1717 mesa_logw("Could not get intel_device_info.");
1718 return false;
1719 }
1720
1721 /* region info is required for lmem support */
1722 if (devinfo->has_local_mem && !devinfo->mem.use_class_instance) {
1723 mesa_logw("Could not query local memory size.");
1724 return false;
1725 }
1726
1727 intel_device_info_adjust_memory(devinfo);
1728
1729 /* Gfx7 and older do not support EU/Subslice info */
1730 assert(devinfo->subslice_total >= 1 || devinfo->ver <= 7);
1731 devinfo->subslice_total = MAX2(devinfo->subslice_total, 1);
1732
1733 init_max_scratch_ids(devinfo);
1734
1735 for (enum intel_engine_class engine = INTEL_ENGINE_CLASS_RENDER;
1736 engine < ARRAY_SIZE(devinfo->engine_class_prefetch); engine++)
1737 devinfo->engine_class_prefetch[engine] =
1738 intel_device_info_calc_engine_prefetch(devinfo, engine);
1739
1740 intel_device_info_init_was(devinfo);
1741 intel_device_info_apply_workarounds(devinfo);
1742
1743 return true;
1744 }
1745
intel_device_info_update_memory_info(struct intel_device_info * devinfo,int fd)1746 bool intel_device_info_update_memory_info(struct intel_device_info *devinfo, int fd)
1747 {
1748 bool ret;
1749
1750 switch (devinfo->kmd_type) {
1751 case INTEL_KMD_TYPE_I915:
1752 ret = intel_device_info_i915_query_regions(devinfo, fd, true);
1753 break;
1754 case INTEL_KMD_TYPE_XE:
1755 ret = intel_device_info_xe_query_regions(fd, devinfo, true);
1756 break;
1757 default:
1758 ret = false;
1759 }
1760
1761 if (ret)
1762 intel_device_info_adjust_memory(devinfo);
1763 return ret;
1764 }
1765
1766 void
intel_device_info_update_after_hwconfig(struct intel_device_info * devinfo)1767 intel_device_info_update_after_hwconfig(struct intel_device_info *devinfo)
1768 {
1769 /* After applying hwconfig values, some items need to be recalculated. */
1770 devinfo->max_cs_threads =
1771 devinfo->max_eus_per_subslice * devinfo->num_thread_per_eu;
1772
1773 intel_device_info_update_cs_workgroup_threads(devinfo);
1774 }
1775
1776 enum intel_wa_steppings
intel_device_info_wa_stepping(struct intel_device_info * devinfo)1777 intel_device_info_wa_stepping(struct intel_device_info *devinfo)
1778 {
1779 if (intel_device_info_is_mtl(devinfo)) {
1780 if (devinfo->revision < 4)
1781 return INTEL_STEPPING_A0;
1782 return INTEL_STEPPING_B0;
1783 } else if (devinfo->platform == INTEL_PLATFORM_TGL) {
1784 switch (devinfo->revision) {
1785 case 0:
1786 return INTEL_STEPPING_A0;
1787 case 1:
1788 return INTEL_STEPPING_B0;
1789 case 3:
1790 return INTEL_STEPPING_C0;
1791 default:
1792 return INTEL_STEPPING_RELEASE;
1793 }
1794 }
1795
1796 /* all other platforms support only released steppings */
1797 return INTEL_STEPPING_RELEASE;
1798 }
1799
1800