1 /*
2  * Copyright © 2013 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <unistd.h>
30 #include "gen_device_info.h"
31 #include "compiler/shader_enums.h"
32 #include "intel/common/gen_gem.h"
33 #include "util/bitscan.h"
34 #include "util/macros.h"
35 
36 #include "drm-uapi/i915_drm.h"
37 
38 static const struct {
39    const char *name;
40    int pci_id;
41 } name_map[] = {
42    { "lpt", 0x27a2 },
43    { "brw", 0x2a02 },
44    { "g4x", 0x2a42 },
45    { "ilk", 0x0042 },
46    { "snb", 0x0126 },
47    { "ivb", 0x016a },
48    { "hsw", 0x0d2e },
49    { "byt", 0x0f33 },
50    { "bdw", 0x162e },
51    { "chv", 0x22B3 },
52    { "skl", 0x1912 },
53    { "bxt", 0x5A85 },
54    { "kbl", 0x5912 },
55    { "aml", 0x591C },
56    { "glk", 0x3185 },
57    { "cfl", 0x3E9B },
58    { "whl", 0x3EA1 },
59    { "cml", 0x9b41 },
60    { "icl", 0x8a52 },
61    { "ehl", 0x4500 },
62    { "jsl", 0x4E71 },
63    { "tgl", 0x9a49 },
64    { "rkl", 0x4c8a },
65    { "dg1", 0x4905 },
66    { "adl", 0x4680 },
67 };
68 
69 /**
70  * Get the PCI ID for the device name.
71  *
72  * Returns -1 if the device is not known.
73  */
74 int
gen_device_name_to_pci_device_id(const char * name)75 gen_device_name_to_pci_device_id(const char *name)
76 {
77    for (unsigned i = 0; i < ARRAY_SIZE(name_map); i++) {
78       if (!strcmp(name_map[i].name, name))
79          return name_map[i].pci_id;
80    }
81 
82    return -1;
83 }
84 
85 static const struct gen_device_info gen_device_info_gen3 = {
86    .gen = 3,
87    .simulator_id = -1,
88 };
89 
90 static const struct gen_device_info gen_device_info_i965 = {
91    .gen = 4,
92    .has_negative_rhw_bug = true,
93    .num_slices = 1,
94    .num_subslices = { 1, },
95    .num_eu_per_subslice = 8,
96    .num_thread_per_eu = 4,
97    .max_vs_threads = 16,
98    .max_gs_threads = 2,
99    .max_wm_threads = 8 * 4,
100    .urb = {
101       .size = 256,
102    },
103    .timestamp_frequency = 12500000,
104    .simulator_id = -1,
105 };
106 
107 static const struct gen_device_info gen_device_info_g4x = {
108    .gen = 4,
109    .has_pln = true,
110    .has_compr4 = true,
111    .has_surface_tile_offset = true,
112    .is_g4x = true,
113    .num_slices = 1,
114    .num_subslices = { 1, },
115    .num_eu_per_subslice = 10,
116    .num_thread_per_eu = 5,
117    .max_vs_threads = 32,
118    .max_gs_threads = 2,
119    .max_wm_threads = 10 * 5,
120    .urb = {
121       .size = 384,
122    },
123    .timestamp_frequency = 12500000,
124    .simulator_id = -1,
125 };
126 
127 static const struct gen_device_info gen_device_info_ilk = {
128    .gen = 5,
129    .has_pln = true,
130    .has_compr4 = true,
131    .has_surface_tile_offset = true,
132    .num_slices = 1,
133    .num_subslices = { 1, },
134    .num_eu_per_subslice = 12,
135    .num_thread_per_eu = 6,
136    .max_vs_threads = 72,
137    .max_gs_threads = 32,
138    .max_wm_threads = 12 * 6,
139    .urb = {
140       .size = 1024,
141    },
142    .timestamp_frequency = 12500000,
143    .simulator_id = -1,
144 };
145 
146 static const struct gen_device_info gen_device_info_snb_gt1 = {
147    .gen = 6,
148    .gt = 1,
149    .has_hiz_and_separate_stencil = true,
150    .has_llc = true,
151    .has_pln = true,
152    .has_surface_tile_offset = true,
153    .needs_unlit_centroid_workaround = true,
154    .num_slices = 1,
155    .num_subslices = { 1, },
156    .num_eu_per_subslice = 6,
157    .num_thread_per_eu = 6, /* Not confirmed */
158    .max_vs_threads = 24,
159    .max_gs_threads = 21, /* conservative; 24 if rendering disabled. */
160    .max_wm_threads = 40,
161    .urb = {
162       .size = 32,
163       .min_entries = {
164          [MESA_SHADER_VERTEX]   = 24,
165       },
166       .max_entries = {
167          [MESA_SHADER_VERTEX]   = 256,
168          [MESA_SHADER_GEOMETRY] = 256,
169       },
170    },
171    .timestamp_frequency = 12500000,
172    .simulator_id = -1,
173 };
174 
175 static const struct gen_device_info gen_device_info_snb_gt2 = {
176    .gen = 6,
177    .gt = 2,
178    .has_hiz_and_separate_stencil = true,
179    .has_llc = true,
180    .has_pln = true,
181    .has_surface_tile_offset = true,
182    .needs_unlit_centroid_workaround = true,
183    .num_slices = 1,
184    .num_subslices = { 1, },
185    .num_eu_per_subslice = 12,
186    .num_thread_per_eu = 6, /* Not confirmed */
187    .max_vs_threads = 60,
188    .max_gs_threads = 60,
189    .max_wm_threads = 80,
190    .urb = {
191       .size = 64,
192       .min_entries = {
193          [MESA_SHADER_VERTEX]   = 24,
194       },
195       .max_entries = {
196          [MESA_SHADER_VERTEX]   = 256,
197          [MESA_SHADER_GEOMETRY] = 256,
198       },
199    },
200    .timestamp_frequency = 12500000,
201    .simulator_id = -1,
202 };
203 
204 #define GEN7_FEATURES                               \
205    .gen = 7,                                        \
206    .has_hiz_and_separate_stencil = true,            \
207    .must_use_separate_stencil = true,               \
208    .has_llc = true,                                 \
209    .has_pln = true,                                 \
210    .has_64bit_float = true,                         \
211    .has_surface_tile_offset = true,                 \
212    .timestamp_frequency = 12500000
213 
214 static const struct gen_device_info gen_device_info_ivb_gt1 = {
215    GEN7_FEATURES, .is_ivybridge = true, .gt = 1,
216    .num_slices = 1,
217    .num_subslices = { 1, },
218    .num_eu_per_subslice = 6,
219    .num_thread_per_eu = 6,
220    .l3_banks = 2,
221    .max_vs_threads = 36,
222    .max_tcs_threads = 36,
223    .max_tes_threads = 36,
224    .max_gs_threads = 36,
225    .max_wm_threads = 48,
226    .max_cs_threads = 36,
227    .urb = {
228       .min_entries = {
229          [MESA_SHADER_VERTEX]    = 32,
230          [MESA_SHADER_TESS_EVAL] = 10,
231       },
232       .max_entries = {
233          [MESA_SHADER_VERTEX]    = 512,
234          [MESA_SHADER_TESS_CTRL] = 32,
235          [MESA_SHADER_TESS_EVAL] = 288,
236          [MESA_SHADER_GEOMETRY]  = 192,
237       },
238    },
239    .simulator_id = 7,
240 };
241 
242 static const struct gen_device_info gen_device_info_ivb_gt2 = {
243    GEN7_FEATURES, .is_ivybridge = true, .gt = 2,
244    .num_slices = 1,
245    .num_subslices = { 1, },
246    .num_eu_per_subslice = 12,
247    .num_thread_per_eu = 8, /* Not sure why this isn't a multiple of
248                             * @max_wm_threads ... */
249    .l3_banks = 4,
250    .max_vs_threads = 128,
251    .max_tcs_threads = 128,
252    .max_tes_threads = 128,
253    .max_gs_threads = 128,
254    .max_wm_threads = 172,
255    .max_cs_threads = 64,
256    .urb = {
257       .min_entries = {
258          [MESA_SHADER_VERTEX]    = 32,
259          [MESA_SHADER_TESS_EVAL] = 10,
260       },
261       .max_entries = {
262          [MESA_SHADER_VERTEX]    = 704,
263          [MESA_SHADER_TESS_CTRL] = 64,
264          [MESA_SHADER_TESS_EVAL] = 448,
265          [MESA_SHADER_GEOMETRY]  = 320,
266       },
267    },
268    .simulator_id = 7,
269 };
270 
271 static const struct gen_device_info gen_device_info_byt = {
272    GEN7_FEATURES, .is_baytrail = true, .gt = 1,
273    .num_slices = 1,
274    .num_subslices = { 1, },
275    .num_eu_per_subslice = 4,
276    .num_thread_per_eu = 8,
277    .l3_banks = 1,
278    .has_llc = false,
279    .max_vs_threads = 36,
280    .max_tcs_threads = 36,
281    .max_tes_threads = 36,
282    .max_gs_threads = 36,
283    .max_wm_threads = 48,
284    .max_cs_threads = 32,
285    .urb = {
286       .min_entries = {
287          [MESA_SHADER_VERTEX]    = 32,
288          [MESA_SHADER_TESS_EVAL] = 10,
289       },
290       .max_entries = {
291          [MESA_SHADER_VERTEX]    = 512,
292          [MESA_SHADER_TESS_CTRL] = 32,
293          [MESA_SHADER_TESS_EVAL] = 288,
294          [MESA_SHADER_GEOMETRY]  = 192,
295       },
296    },
297    .simulator_id = 10,
298 };
299 
300 #define HSW_FEATURES             \
301    GEN7_FEATURES,                \
302    .is_haswell = true,           \
303    .supports_simd16_3src = true, \
304    .has_resource_streamer = true
305 
306 static const struct gen_device_info gen_device_info_hsw_gt1 = {
307    HSW_FEATURES, .gt = 1,
308    .num_slices = 1,
309    .num_subslices = { 1, },
310    .num_eu_per_subslice = 10,
311    .num_thread_per_eu = 7,
312    .l3_banks = 2,
313    .max_vs_threads = 70,
314    .max_tcs_threads = 70,
315    .max_tes_threads = 70,
316    .max_gs_threads = 70,
317    .max_wm_threads = 102,
318    .max_cs_threads = 70,
319    .urb = {
320       .min_entries = {
321          [MESA_SHADER_VERTEX]    = 32,
322          [MESA_SHADER_TESS_EVAL] = 10,
323       },
324       .max_entries = {
325          [MESA_SHADER_VERTEX]    = 640,
326          [MESA_SHADER_TESS_CTRL] = 64,
327          [MESA_SHADER_TESS_EVAL] = 384,
328          [MESA_SHADER_GEOMETRY]  = 256,
329       },
330    },
331    .simulator_id = 9,
332 };
333 
334 static const struct gen_device_info gen_device_info_hsw_gt2 = {
335    HSW_FEATURES, .gt = 2,
336    .num_slices = 1,
337    .num_subslices = { 2, },
338    .num_eu_per_subslice = 10,
339    .num_thread_per_eu = 7,
340    .l3_banks = 4,
341    .max_vs_threads = 280,
342    .max_tcs_threads = 256,
343    .max_tes_threads = 280,
344    .max_gs_threads = 256,
345    .max_wm_threads = 204,
346    .max_cs_threads = 70,
347    .urb = {
348       .min_entries = {
349          [MESA_SHADER_VERTEX]    = 64,
350          [MESA_SHADER_TESS_EVAL] = 10,
351       },
352       .max_entries = {
353          [MESA_SHADER_VERTEX]    = 1664,
354          [MESA_SHADER_TESS_CTRL] = 128,
355          [MESA_SHADER_TESS_EVAL] = 960,
356          [MESA_SHADER_GEOMETRY]  = 640,
357       },
358    },
359    .simulator_id = 9,
360 };
361 
362 static const struct gen_device_info gen_device_info_hsw_gt3 = {
363    HSW_FEATURES, .gt = 3,
364    .num_slices = 2,
365    .num_subslices = { 2, },
366    .num_eu_per_subslice = 10,
367    .num_thread_per_eu = 7,
368    .l3_banks = 8,
369    .max_vs_threads = 280,
370    .max_tcs_threads = 256,
371    .max_tes_threads = 280,
372    .max_gs_threads = 256,
373    .max_wm_threads = 408,
374    .max_cs_threads = 70,
375    .urb = {
376       .min_entries = {
377          [MESA_SHADER_VERTEX]    = 64,
378          [MESA_SHADER_TESS_EVAL] = 10,
379       },
380       .max_entries = {
381          [MESA_SHADER_VERTEX]    = 1664,
382          [MESA_SHADER_TESS_CTRL] = 128,
383          [MESA_SHADER_TESS_EVAL] = 960,
384          [MESA_SHADER_GEOMETRY]  = 640,
385       },
386    },
387    .simulator_id = 9,
388 };
389 
390 /* It's unclear how well supported sampling from the hiz buffer is on GEN8,
391  * so keep things conservative for now and set has_sample_with_hiz = false.
392  */
393 #define GEN8_FEATURES                               \
394    .gen = 8,                                        \
395    .has_hiz_and_separate_stencil = true,            \
396    .has_resource_streamer = true,                   \
397    .must_use_separate_stencil = true,               \
398    .has_llc = true,                                 \
399    .has_sample_with_hiz = false,                    \
400    .has_pln = true,                                 \
401    .has_integer_dword_mul = true,                   \
402    .has_64bit_float = true,                         \
403    .has_64bit_int = true,                           \
404    .supports_simd16_3src = true,                    \
405    .has_surface_tile_offset = true,                 \
406    .num_thread_per_eu = 7,                          \
407    .max_vs_threads = 504,                           \
408    .max_tcs_threads = 504,                          \
409    .max_tes_threads = 504,                          \
410    .max_gs_threads = 504,                           \
411    .max_wm_threads = 384,                           \
412    .timestamp_frequency = 12500000
413 
414 static const struct gen_device_info gen_device_info_bdw_gt1 = {
415    GEN8_FEATURES, .gt = 1,
416    .is_broadwell = true,
417    .num_slices = 1,
418    .num_subslices = { 2, },
419    .num_eu_per_subslice = 6,
420    .l3_banks = 2,
421    .max_cs_threads = 42,
422    .urb = {
423       .min_entries = {
424          [MESA_SHADER_VERTEX]    = 64,
425          [MESA_SHADER_TESS_EVAL] = 34,
426       },
427       .max_entries = {
428          [MESA_SHADER_VERTEX]    = 2560,
429          [MESA_SHADER_TESS_CTRL] = 504,
430          [MESA_SHADER_TESS_EVAL] = 1536,
431          /* Reduced from 960, seems to be similar to the bug on Gen9 GT1. */
432          [MESA_SHADER_GEOMETRY]  = 690,
433       },
434    },
435    .simulator_id = 11,
436 };
437 
438 static const struct gen_device_info gen_device_info_bdw_gt2 = {
439    GEN8_FEATURES, .gt = 2,
440    .is_broadwell = true,
441    .num_slices = 1,
442    .num_subslices = { 3, },
443    .num_eu_per_subslice = 8,
444    .l3_banks = 4,
445    .max_cs_threads = 56,
446    .urb = {
447       .min_entries = {
448          [MESA_SHADER_VERTEX]    = 64,
449          [MESA_SHADER_TESS_EVAL] = 34,
450       },
451       .max_entries = {
452          [MESA_SHADER_VERTEX]    = 2560,
453          [MESA_SHADER_TESS_CTRL] = 504,
454          [MESA_SHADER_TESS_EVAL] = 1536,
455          [MESA_SHADER_GEOMETRY]  = 960,
456       },
457    },
458    .simulator_id = 11,
459 };
460 
461 static const struct gen_device_info gen_device_info_bdw_gt3 = {
462    GEN8_FEATURES, .gt = 3,
463    .is_broadwell = true,
464    .num_slices = 2,
465    .num_subslices = { 3, 3, },
466    .num_eu_per_subslice = 8,
467    .l3_banks = 8,
468    .max_cs_threads = 56,
469    .urb = {
470       .min_entries = {
471          [MESA_SHADER_VERTEX]    = 64,
472          [MESA_SHADER_TESS_EVAL] = 34,
473       },
474       .max_entries = {
475          [MESA_SHADER_VERTEX]    = 2560,
476          [MESA_SHADER_TESS_CTRL] = 504,
477          [MESA_SHADER_TESS_EVAL] = 1536,
478          [MESA_SHADER_GEOMETRY]  = 960,
479       },
480    },
481    .simulator_id = 11,
482 };
483 
484 static const struct gen_device_info gen_device_info_chv = {
485    GEN8_FEATURES, .is_cherryview = 1, .gt = 1,
486    .has_llc = false,
487    .has_integer_dword_mul = false,
488    .num_slices = 1,
489    .num_subslices = { 2, },
490    .num_eu_per_subslice = 8,
491    .l3_banks = 2,
492    .max_vs_threads = 80,
493    .max_tcs_threads = 80,
494    .max_tes_threads = 80,
495    .max_gs_threads = 80,
496    .max_wm_threads = 128,
497    .max_cs_threads = 6 * 7,
498    .urb = {
499       .min_entries = {
500          [MESA_SHADER_VERTEX]    = 34,
501          [MESA_SHADER_TESS_EVAL] = 34,
502       },
503       .max_entries = {
504          [MESA_SHADER_VERTEX]    = 640,
505          [MESA_SHADER_TESS_CTRL] = 80,
506          [MESA_SHADER_TESS_EVAL] = 384,
507          [MESA_SHADER_GEOMETRY]  = 256,
508       },
509    },
510    .simulator_id = 13,
511 };
512 
513 #define GEN9_HW_INFO                                \
514    .gen = 9,                                        \
515    .max_vs_threads = 336,                           \
516    .max_gs_threads = 336,                           \
517    .max_tcs_threads = 336,                          \
518    .max_tes_threads = 336,                          \
519    .max_cs_threads = 56,                            \
520    .timestamp_frequency = 12000000,                 \
521    .urb = {                                         \
522       .min_entries = {                              \
523          [MESA_SHADER_VERTEX]    = 64,              \
524          [MESA_SHADER_TESS_EVAL] = 34,              \
525       },                                            \
526       .max_entries = {                              \
527          [MESA_SHADER_VERTEX]    = 1856,            \
528          [MESA_SHADER_TESS_CTRL] = 672,             \
529          [MESA_SHADER_TESS_EVAL] = 1120,            \
530          [MESA_SHADER_GEOMETRY]  = 640,             \
531       },                                            \
532    }
533 
534 #define GEN9_LP_FEATURES                           \
535    GEN8_FEATURES,                                  \
536    GEN9_HW_INFO,                                   \
537    .has_integer_dword_mul = false,                 \
538    .gt = 1,                                        \
539    .has_llc = false,                               \
540    .has_sample_with_hiz = true,                    \
541    .num_slices = 1,                                \
542    .num_thread_per_eu = 6,                         \
543    .max_vs_threads = 112,                          \
544    .max_tcs_threads = 112,                         \
545    .max_tes_threads = 112,                         \
546    .max_gs_threads = 112,                          \
547    .max_cs_threads = 6 * 6,                        \
548    .timestamp_frequency = 19200000,                \
549    .urb = {                                        \
550       .min_entries = {                             \
551          [MESA_SHADER_VERTEX]    = 34,             \
552          [MESA_SHADER_TESS_EVAL] = 34,             \
553       },                                           \
554       .max_entries = {                             \
555          [MESA_SHADER_VERTEX]    = 704,            \
556          [MESA_SHADER_TESS_CTRL] = 256,            \
557          [MESA_SHADER_TESS_EVAL] = 416,            \
558          [MESA_SHADER_GEOMETRY]  = 256,            \
559       },                                           \
560    }
561 
562 #define GEN9_LP_FEATURES_3X6                       \
563    GEN9_LP_FEATURES,                               \
564    .num_subslices = { 3, },                        \
565    .num_eu_per_subslice = 6
566 
567 #define GEN9_LP_FEATURES_2X6                       \
568    GEN9_LP_FEATURES,                               \
569    .num_subslices = { 2, },                        \
570    .num_eu_per_subslice = 6,                       \
571    .max_vs_threads = 56,                           \
572    .max_tcs_threads = 56,                          \
573    .max_tes_threads = 56,                          \
574    .max_gs_threads = 56,                           \
575    .max_cs_threads = 6 * 6,                        \
576    .urb = {                                        \
577       .min_entries = {                             \
578          [MESA_SHADER_VERTEX]    = 34,             \
579          [MESA_SHADER_TESS_EVAL] = 34,             \
580       },                                           \
581       .max_entries = {                             \
582          [MESA_SHADER_VERTEX]    = 352,            \
583          [MESA_SHADER_TESS_CTRL] = 128,            \
584          [MESA_SHADER_TESS_EVAL] = 208,            \
585          [MESA_SHADER_GEOMETRY]  = 128,            \
586       },                                           \
587    }
588 
589 #define GEN9_FEATURES                               \
590    GEN8_FEATURES,                                   \
591    GEN9_HW_INFO,                                    \
592    .has_sample_with_hiz = true
593 
594 static const struct gen_device_info gen_device_info_skl_gt1 = {
595    GEN9_FEATURES, .gt = 1,
596    .is_skylake = true,
597    .num_slices = 1,
598    .num_subslices = { 2, },
599    .num_eu_per_subslice = 6,
600    .l3_banks = 2,
601    /* GT1 seems to have a bug in the top of the pipe (VF/VS?) fixed functions
602     * leading to some vertices to go missing if we use too much URB.
603     */
604    .urb.max_entries[MESA_SHADER_VERTEX] = 928,
605    .simulator_id = 12,
606 };
607 
608 static const struct gen_device_info gen_device_info_skl_gt2 = {
609    GEN9_FEATURES, .gt = 2,
610    .is_skylake = true,
611    .num_slices = 1,
612    .num_subslices = { 3, },
613    .num_eu_per_subslice = 8,
614    .l3_banks = 4,
615    .simulator_id = 12,
616 };
617 
618 static const struct gen_device_info gen_device_info_skl_gt3 = {
619    GEN9_FEATURES, .gt = 3,
620    .is_skylake = true,
621    .num_slices = 2,
622    .num_subslices = { 3, 3, },
623    .num_eu_per_subslice = 8,
624    .l3_banks = 8,
625    .simulator_id = 12,
626 };
627 
628 static const struct gen_device_info gen_device_info_skl_gt4 = {
629    GEN9_FEATURES, .gt = 4,
630    .is_skylake = true,
631    .num_slices = 3,
632    .num_subslices = { 3, 3, 3, },
633    .num_eu_per_subslice = 8,
634    .l3_banks = 12,
635    /* From the "L3 Allocation and Programming" documentation:
636     *
637     * "URB is limited to 1008KB due to programming restrictions.  This is not a
638     * restriction of the L3 implementation, but of the FF and other clients.
639     * Therefore, in a GT4 implementation it is possible for the programmed
640     * allocation of the L3 data array to provide 3*384KB=1152KB for URB, but
641     * only 1008KB of this will be used."
642     */
643    .simulator_id = 12,
644 };
645 
646 static const struct gen_device_info gen_device_info_bxt = {
647    GEN9_LP_FEATURES_3X6,
648    .is_broxton = true,
649    .l3_banks = 2,
650    .simulator_id = 14,
651 };
652 
653 static const struct gen_device_info gen_device_info_bxt_2x6 = {
654    GEN9_LP_FEATURES_2X6,
655    .is_broxton = true,
656    .l3_banks = 1,
657    .simulator_id = 14,
658 };
659 /*
660  * Note: for all KBL SKUs, the PRM says SKL for GS entries, not SKL+.
661  * There's no KBL entry. Using the default SKL (GEN9) GS entries value.
662  */
663 
664 static const struct gen_device_info gen_device_info_kbl_gt1 = {
665    GEN9_FEATURES,
666    .is_kabylake = true,
667    .gt = 1,
668 
669    .max_cs_threads = 7 * 6,
670    .num_slices = 1,
671    .num_subslices = { 2, },
672    .num_eu_per_subslice = 6,
673    .l3_banks = 2,
674    /* GT1 seems to have a bug in the top of the pipe (VF/VS?) fixed functions
675     * leading to some vertices to go missing if we use too much URB.
676     */
677    .urb.max_entries[MESA_SHADER_VERTEX] = 928,
678    .simulator_id = 16,
679 };
680 
681 static const struct gen_device_info gen_device_info_kbl_gt1_5 = {
682    GEN9_FEATURES,
683    .is_kabylake = true,
684    .gt = 1,
685 
686    .max_cs_threads = 7 * 6,
687    .num_slices = 1,
688    .num_subslices = { 3, },
689    .num_eu_per_subslice = 6,
690    .l3_banks = 4,
691    .simulator_id = 16,
692 };
693 
694 static const struct gen_device_info gen_device_info_kbl_gt2 = {
695    GEN9_FEATURES,
696    .is_kabylake = true,
697    .gt = 2,
698 
699    .num_slices = 1,
700    .num_subslices = { 3, },
701    .num_eu_per_subslice = 8,
702    .l3_banks = 4,
703    .simulator_id = 16,
704 };
705 
706 static const struct gen_device_info gen_device_info_kbl_gt3 = {
707    GEN9_FEATURES,
708    .is_kabylake = true,
709    .gt = 3,
710 
711    .num_slices = 2,
712    .num_subslices = { 3, 3, },
713    .num_eu_per_subslice = 8,
714    .l3_banks = 8,
715    .simulator_id = 16,
716 };
717 
718 static const struct gen_device_info gen_device_info_kbl_gt4 = {
719    GEN9_FEATURES,
720    .is_kabylake = true,
721    .gt = 4,
722 
723    /*
724     * From the "L3 Allocation and Programming" documentation:
725     *
726     * "URB is limited to 1008KB due to programming restrictions.  This
727     *  is not a restriction of the L3 implementation, but of the FF and
728     *  other clients.  Therefore, in a GT4 implementation it is
729     *  possible for the programmed allocation of the L3 data array to
730     *  provide 3*384KB=1152KB for URB, but only 1008KB of this
731     *  will be used."
732     */
733    .num_slices = 3,
734    .num_subslices = { 3, 3, 3, },
735    .num_eu_per_subslice = 8,
736    .l3_banks = 12,
737    .simulator_id = 16,
738 };
739 
740 static const struct gen_device_info gen_device_info_glk = {
741    GEN9_LP_FEATURES_3X6,
742    .is_geminilake = true,
743    .l3_banks = 2,
744    .simulator_id = 17,
745 };
746 
747 static const struct gen_device_info gen_device_info_glk_2x6 = {
748    GEN9_LP_FEATURES_2X6,
749    .is_geminilake = true,
750    .l3_banks = 2,
751    .simulator_id = 17,
752 };
753 
754 static const struct gen_device_info gen_device_info_cfl_gt1 = {
755    GEN9_FEATURES,
756    .is_coffeelake = true,
757    .gt = 1,
758 
759    .num_slices = 1,
760    .num_subslices = { 2, },
761    .num_eu_per_subslice = 6,
762    .l3_banks = 2,
763    /* GT1 seems to have a bug in the top of the pipe (VF/VS?) fixed functions
764     * leading to some vertices to go missing if we use too much URB.
765     */
766    .urb.max_entries[MESA_SHADER_VERTEX] = 928,
767    .simulator_id = 24,
768 };
769 static const struct gen_device_info gen_device_info_cfl_gt2 = {
770    GEN9_FEATURES,
771    .is_coffeelake = true,
772    .gt = 2,
773 
774    .num_slices = 1,
775    .num_subslices = { 3, },
776    .num_eu_per_subslice = 8,
777    .l3_banks = 4,
778    .simulator_id = 24,
779 };
780 
781 static const struct gen_device_info gen_device_info_cfl_gt3 = {
782    GEN9_FEATURES,
783    .is_coffeelake = true,
784    .gt = 3,
785 
786    .num_slices = 2,
787    .num_subslices = { 3, 3, },
788    .num_eu_per_subslice = 8,
789    .l3_banks = 8,
790    .simulator_id = 24,
791 };
792 
793 #define subslices(args...) { args, }
794 
795 #define GEN11_HW_INFO                               \
796    .gen = 11,                                       \
797    .has_pln = false,                                \
798    .max_vs_threads = 364,                           \
799    .max_gs_threads = 224,                           \
800    .max_tcs_threads = 224,                          \
801    .max_tes_threads = 364,                          \
802    .max_cs_threads = 56
803 
804 #define GEN11_FEATURES(_gt, _slices, _subslices, _l3) \
805    GEN8_FEATURES,                                     \
806    GEN11_HW_INFO,                                     \
807    .has_64bit_float = false,                          \
808    .has_64bit_int = false,                            \
809    .has_integer_dword_mul = false,                    \
810    .has_sample_with_hiz = false,                      \
811    .gt = _gt, .num_slices = _slices, .l3_banks = _l3, \
812    .num_subslices = _subslices,                       \
813    .num_eu_per_subslice = 8
814 
815 #define GEN11_URB_MIN_MAX_ENTRIES                     \
816    .min_entries = {                                   \
817       [MESA_SHADER_VERTEX]    = 64,                   \
818       [MESA_SHADER_TESS_EVAL] = 34,                   \
819    },                                                 \
820    .max_entries = {                                   \
821       [MESA_SHADER_VERTEX]    = 2384,                 \
822       [MESA_SHADER_TESS_CTRL] = 1032,                 \
823       [MESA_SHADER_TESS_EVAL] = 2384,                 \
824       [MESA_SHADER_GEOMETRY]  = 1032,                 \
825    }
826 
827 static const struct gen_device_info gen_device_info_icl_gt2 = {
828    GEN11_FEATURES(2, 1, subslices(8), 8),
829    .urb = {
830       GEN11_URB_MIN_MAX_ENTRIES,
831    },
832    .simulator_id = 19,
833 };
834 
835 static const struct gen_device_info gen_device_info_icl_gt1_5 = {
836    GEN11_FEATURES(1, 1, subslices(6), 6),
837    .urb = {
838       GEN11_URB_MIN_MAX_ENTRIES,
839    },
840    .simulator_id = 19,
841 };
842 
843 static const struct gen_device_info gen_device_info_icl_gt1 = {
844    GEN11_FEATURES(1, 1, subslices(4), 6),
845    .urb = {
846       GEN11_URB_MIN_MAX_ENTRIES,
847    },
848    .simulator_id = 19,
849 };
850 
851 static const struct gen_device_info gen_device_info_icl_gt0_5 = {
852    GEN11_FEATURES(1, 1, subslices(1), 6),
853    .urb = {
854       GEN11_URB_MIN_MAX_ENTRIES,
855    },
856    .simulator_id = 19,
857 };
858 
859 #define GEN11_LP_FEATURES                           \
860    .is_elkhartlake = true,                          \
861    .urb = {                                         \
862       GEN11_URB_MIN_MAX_ENTRIES,                    \
863    },                                               \
864    .disable_ccs_repack = true,                      \
865    .simulator_id = 28
866 
867 static const struct gen_device_info gen_device_info_ehl_4x8 = {
868    GEN11_FEATURES(1, 1, subslices(4), 4),
869    GEN11_LP_FEATURES,
870 };
871 
872 static const struct gen_device_info gen_device_info_ehl_4x6 = {
873    GEN11_FEATURES(1, 1, subslices(4), 4),
874    GEN11_LP_FEATURES,
875    .num_eu_per_subslice = 6,
876 };
877 
878 static const struct gen_device_info gen_device_info_ehl_4x5 = {
879    GEN11_FEATURES(1, 1, subslices(4), 4),
880    GEN11_LP_FEATURES,
881    .num_eu_per_subslice = 5,
882 };
883 
884 static const struct gen_device_info gen_device_info_ehl_4x4 = {
885    GEN11_FEATURES(1, 1, subslices(4), 4),
886    GEN11_LP_FEATURES,
887    .num_eu_per_subslice = 4,
888 };
889 
890 static const struct gen_device_info gen_device_info_ehl_2x8 = {
891    GEN11_FEATURES(1, 1, subslices(2), 4),
892    GEN11_LP_FEATURES,
893 };
894 
895 static const struct gen_device_info gen_device_info_ehl_2x4 = {
896    GEN11_FEATURES(1, 1, subslices(2), 4),
897    GEN11_LP_FEATURES,
898    .num_eu_per_subslice =4,
899 };
900 
901 #define GEN12_URB_MIN_MAX_ENTRIES                   \
902    .min_entries = {                                 \
903       [MESA_SHADER_VERTEX]    = 64,                 \
904       [MESA_SHADER_TESS_EVAL] = 34,                 \
905    },                                               \
906    .max_entries = {                                 \
907       [MESA_SHADER_VERTEX]    = 3576,               \
908       [MESA_SHADER_TESS_CTRL] = 1548,               \
909       [MESA_SHADER_TESS_EVAL] = 3576,               \
910       [MESA_SHADER_GEOMETRY]  = 1548,               \
911    }
912 
913 #define GEN12_HW_INFO                               \
914    .gen = 12,                                       \
915    .has_pln = false,                                \
916    .has_sample_with_hiz = false,                    \
917    .has_aux_map = true,                             \
918    .max_vs_threads = 546,                           \
919    .max_gs_threads = 336,                           \
920    .max_tcs_threads = 336,                          \
921    .max_tes_threads = 546,                          \
922    .max_cs_threads = 112, /* threads per DSS */     \
923    .urb = {                                         \
924       GEN12_URB_MIN_MAX_ENTRIES,                    \
925    }
926 
927 #define GEN12_FEATURES(_gt, _slices, _l3)                       \
928    GEN8_FEATURES,                                               \
929    GEN12_HW_INFO,                                               \
930    .has_64bit_float = false,                                    \
931    .has_64bit_int = false,                                      \
932    .has_integer_dword_mul = false,                              \
933    .gt = _gt, .num_slices = _slices, .l3_banks = _l3,           \
934    .simulator_id = 22,                                          \
935    .num_eu_per_subslice = 16
936 
937 #define dual_subslices(args...) { args, }
938 
939 #define GEN12_GT05_FEATURES                                     \
940    GEN12_FEATURES(1, 1, 4),                                     \
941    .num_subslices = dual_subslices(1)
942 
943 #define GEN12_GT_FEATURES(_gt)                                  \
944    GEN12_FEATURES(_gt, 1, _gt == 1 ? 4 : 8),                    \
945    .num_subslices = dual_subslices(_gt == 1 ? 2 : 6)
946 
947 static const struct gen_device_info gen_device_info_tgl_gt1 = {
948    GEN12_GT_FEATURES(1),
949 };
950 
951 static const struct gen_device_info gen_device_info_tgl_gt2 = {
952    GEN12_GT_FEATURES(2),
953 };
954 
955 static const struct gen_device_info gen_device_info_rkl_gt05 = {
956    GEN12_GT05_FEATURES,
957 };
958 
959 static const struct gen_device_info gen_device_info_rkl_gt1 = {
960    GEN12_GT_FEATURES(1),
961 };
962 
963 static const struct gen_device_info gen_device_info_adl_gt05 = {
964    GEN12_GT05_FEATURES,
965 };
966 
967 static const struct gen_device_info gen_device_info_adl_gt1 = {
968    GEN12_GT_FEATURES(1),
969 };
970 
971 #define GEN12_DG1_FEATURES                      \
972    GEN12_GT_FEATURES(2),                        \
973    .is_dg1 = true,                              \
974    .has_llc = false,                            \
975    .urb.size = 768,                             \
976    .simulator_id = 30
977 
978 UNUSED static const struct gen_device_info gen_device_info_dg1 = {
979    GEN12_DG1_FEATURES,
980 };
981 
982 static void
gen_device_info_set_eu_mask(struct gen_device_info * devinfo,unsigned slice,unsigned subslice,unsigned eu_mask)983 gen_device_info_set_eu_mask(struct gen_device_info *devinfo,
984                             unsigned slice,
985                             unsigned subslice,
986                             unsigned eu_mask)
987 {
988    unsigned subslice_offset = slice * devinfo->eu_slice_stride +
989       subslice * devinfo->eu_subslice_stride;
990 
991    for (unsigned b_eu = 0; b_eu < devinfo->eu_subslice_stride; b_eu++) {
992       devinfo->eu_masks[subslice_offset + b_eu] =
993          (((1U << devinfo->num_eu_per_subslice) - 1) >> (b_eu * 8)) & 0xff;
994    }
995 }
996 
997 /* Generate slice/subslice/eu masks from number of
998  * slices/subslices/eu_per_subslices in the per generation/gt gen_device_info
999  * structure.
1000  *
1001  * These can be overridden with values reported by the kernel either from
1002  * getparam SLICE_MASK/SUBSLICE_MASK values or from the kernel version 4.17+
1003  * through the i915 query uapi.
1004  */
1005 static void
fill_masks(struct gen_device_info * devinfo)1006 fill_masks(struct gen_device_info *devinfo)
1007 {
1008    devinfo->slice_masks = (1U << devinfo->num_slices) - 1;
1009 
1010    /* Subslice masks */
1011    unsigned max_subslices = 0;
1012    for (int s = 0; s < devinfo->num_slices; s++)
1013       max_subslices = MAX2(devinfo->num_subslices[s], max_subslices);
1014    devinfo->subslice_slice_stride = DIV_ROUND_UP(max_subslices, 8);
1015 
1016    for (int s = 0; s < devinfo->num_slices; s++) {
1017       devinfo->subslice_masks[s * devinfo->subslice_slice_stride] =
1018          (1U << devinfo->num_subslices[s]) - 1;
1019    }
1020 
1021    /* EU masks */
1022    devinfo->eu_subslice_stride = DIV_ROUND_UP(devinfo->num_eu_per_subslice, 8);
1023    devinfo->eu_slice_stride = max_subslices * devinfo->eu_subslice_stride;
1024 
1025    for (int s = 0; s < devinfo->num_slices; s++) {
1026       for (int ss = 0; ss < devinfo->num_subslices[s]; ss++) {
1027          gen_device_info_set_eu_mask(devinfo, s, ss,
1028                                      (1U << devinfo->num_eu_per_subslice) - 1);
1029       }
1030    }
1031 }
1032 
1033 static void
reset_masks(struct gen_device_info * devinfo)1034 reset_masks(struct gen_device_info *devinfo)
1035 {
1036    devinfo->subslice_slice_stride = 0;
1037    devinfo->eu_subslice_stride = 0;
1038    devinfo->eu_slice_stride = 0;
1039 
1040    devinfo->num_slices = 0;
1041    devinfo->num_eu_per_subslice = 0;
1042    memset(devinfo->num_subslices, 0, sizeof(devinfo->num_subslices));
1043 
1044    memset(&devinfo->slice_masks, 0, sizeof(devinfo->slice_masks));
1045    memset(devinfo->subslice_masks, 0, sizeof(devinfo->subslice_masks));
1046    memset(devinfo->eu_masks, 0, sizeof(devinfo->eu_masks));
1047    memset(devinfo->ppipe_subslices, 0, sizeof(devinfo->ppipe_subslices));
1048 }
1049 
1050 static void
update_from_topology(struct gen_device_info * devinfo,const struct drm_i915_query_topology_info * topology)1051 update_from_topology(struct gen_device_info *devinfo,
1052                      const struct drm_i915_query_topology_info *topology)
1053 {
1054    reset_masks(devinfo);
1055 
1056    devinfo->subslice_slice_stride = topology->subslice_stride;
1057 
1058    devinfo->eu_subslice_stride = DIV_ROUND_UP(topology->max_eus_per_subslice, 8);
1059    devinfo->eu_slice_stride = topology->max_subslices * devinfo->eu_subslice_stride;
1060 
1061    assert(sizeof(devinfo->slice_masks) >= DIV_ROUND_UP(topology->max_slices, 8));
1062    memcpy(&devinfo->slice_masks, topology->data, DIV_ROUND_UP(topology->max_slices, 8));
1063    devinfo->num_slices = __builtin_popcount(devinfo->slice_masks);
1064 
1065    uint32_t subslice_mask_len =
1066       topology->max_slices * topology->subslice_stride;
1067    assert(sizeof(devinfo->subslice_masks) >= subslice_mask_len);
1068    memcpy(devinfo->subslice_masks, &topology->data[topology->subslice_offset],
1069           subslice_mask_len);
1070 
1071    uint32_t n_subslices = 0;
1072    for (int s = 0; s < topology->max_slices; s++) {
1073       if ((devinfo->slice_masks & (1 << s)) == 0)
1074          continue;
1075 
1076       for (int b = 0; b < devinfo->subslice_slice_stride; b++) {
1077          devinfo->num_subslices[s] +=
1078             __builtin_popcount(devinfo->subslice_masks[s * devinfo->subslice_slice_stride + b]);
1079       }
1080       n_subslices += devinfo->num_subslices[s];
1081    }
1082    assert(n_subslices > 0);
1083 
1084    if (devinfo->gen == 11) {
1085       /* On ICL we only have one slice */
1086       assert(devinfo->slice_masks == 1);
1087 
1088       /* Count the number of subslices on each pixel pipe. Assume that
1089        * subslices 0-3 are on pixel pipe 0, and 4-7 are on pixel pipe 1.
1090        */
1091       unsigned subslices = devinfo->subslice_masks[0];
1092       unsigned ss = 0;
1093       while (subslices > 0) {
1094          if (subslices & 1)
1095             devinfo->ppipe_subslices[ss >= 4 ? 1 : 0] += 1;
1096          subslices >>= 1;
1097          ss++;
1098       }
1099    }
1100 
1101    if (devinfo->gen == 12 && devinfo->num_slices == 1) {
1102       if (n_subslices >= 6) {
1103          assert(n_subslices == 6);
1104          devinfo->l3_banks = 8;
1105       } else if (n_subslices > 2) {
1106          devinfo->l3_banks = 6;
1107       } else {
1108          devinfo->l3_banks = 4;
1109       }
1110    }
1111 
1112    uint32_t eu_mask_len =
1113       topology->eu_stride * topology->max_subslices * topology->max_slices;
1114    assert(sizeof(devinfo->eu_masks) >= eu_mask_len);
1115    memcpy(devinfo->eu_masks, &topology->data[topology->eu_offset], eu_mask_len);
1116 
1117    uint32_t n_eus = 0;
1118    for (int b = 0; b < eu_mask_len; b++)
1119       n_eus += __builtin_popcount(devinfo->eu_masks[b]);
1120 
1121    devinfo->num_eu_per_subslice = DIV_ROUND_UP(n_eus, n_subslices);
1122 }
1123 
1124 static bool
update_from_masks(struct gen_device_info * devinfo,uint32_t slice_mask,uint32_t subslice_mask,uint32_t n_eus)1125 update_from_masks(struct gen_device_info *devinfo, uint32_t slice_mask,
1126                   uint32_t subslice_mask, uint32_t n_eus)
1127 {
1128    struct drm_i915_query_topology_info *topology;
1129 
1130    assert((slice_mask & 0xff) == slice_mask);
1131 
1132    size_t data_length = 100;
1133 
1134    topology = calloc(1, sizeof(*topology) + data_length);
1135    if (!topology)
1136       return false;
1137 
1138    topology->max_slices = util_last_bit(slice_mask);
1139    topology->max_subslices = util_last_bit(subslice_mask);
1140 
1141    topology->subslice_offset = DIV_ROUND_UP(topology->max_slices, 8);
1142    topology->subslice_stride = DIV_ROUND_UP(topology->max_subslices, 8);
1143 
1144    uint32_t n_subslices = __builtin_popcount(slice_mask) *
1145       __builtin_popcount(subslice_mask);
1146    uint32_t num_eu_per_subslice = DIV_ROUND_UP(n_eus, n_subslices);
1147    uint32_t eu_mask = (1U << num_eu_per_subslice) - 1;
1148 
1149    topology->eu_offset = topology->subslice_offset +
1150       DIV_ROUND_UP(topology->max_subslices, 8);
1151    topology->eu_stride = DIV_ROUND_UP(num_eu_per_subslice, 8);
1152 
1153    /* Set slice mask in topology */
1154    for (int b = 0; b < topology->subslice_offset; b++)
1155       topology->data[b] = (slice_mask >> (b * 8)) & 0xff;
1156 
1157    for (int s = 0; s < topology->max_slices; s++) {
1158 
1159       /* Set subslice mask in topology */
1160       for (int b = 0; b < topology->subslice_stride; b++) {
1161          int subslice_offset = topology->subslice_offset +
1162             s * topology->subslice_stride + b;
1163 
1164          topology->data[subslice_offset] = (subslice_mask >> (b * 8)) & 0xff;
1165       }
1166 
1167       /* Set eu mask in topology */
1168       for (int ss = 0; ss < topology->max_subslices; ss++) {
1169          for (int b = 0; b < topology->eu_stride; b++) {
1170             int eu_offset = topology->eu_offset +
1171                (s * topology->max_subslices + ss) * topology->eu_stride + b;
1172 
1173             topology->data[eu_offset] = (eu_mask >> (b * 8)) & 0xff;
1174          }
1175       }
1176    }
1177 
1178    update_from_topology(devinfo, topology);
1179    free(topology);
1180 
1181    return true;
1182 }
1183 
1184 static bool
getparam(int fd,uint32_t param,int * value)1185 getparam(int fd, uint32_t param, int *value)
1186 {
1187    int tmp;
1188 
1189    struct drm_i915_getparam gp = {
1190       .param = param,
1191       .value = &tmp,
1192    };
1193 
1194    int ret = gen_ioctl(fd, DRM_IOCTL_I915_GETPARAM, &gp);
1195    if (ret != 0)
1196       return false;
1197 
1198    *value = tmp;
1199    return true;
1200 }
1201 
1202 bool
gen_get_device_info_from_pci_id(int pci_id,struct gen_device_info * devinfo)1203 gen_get_device_info_from_pci_id(int pci_id,
1204                                 struct gen_device_info *devinfo)
1205 {
1206    switch (pci_id) {
1207 #undef CHIPSET
1208 #define CHIPSET(id, family, fam_str, name) \
1209       case id: *devinfo = gen_device_info_##family; break;
1210 #include "pci_ids/i965_pci_ids.h"
1211 #include "pci_ids/iris_pci_ids.h"
1212 
1213 #undef CHIPSET
1214 #define CHIPSET(id, fam_str, name) \
1215       case id: *devinfo = gen_device_info_gen3; break;
1216 #include "pci_ids/i915_pci_ids.h"
1217 
1218    default:
1219       fprintf(stderr, "Driver does not support the 0x%x PCI ID.\n", pci_id);
1220       return false;
1221    }
1222 
1223    fill_masks(devinfo);
1224 
1225    /* From the Skylake PRM, 3DSTATE_PS::Scratch Space Base Pointer:
1226     *
1227     * "Scratch Space per slice is computed based on 4 sub-slices.  SW must
1228     *  allocate scratch space enough so that each slice has 4 slices allowed."
1229     *
1230     * The equivalent internal documentation says that this programming note
1231     * applies to all Gen9+ platforms.
1232     *
1233     * The hardware typically calculates the scratch space pointer by taking
1234     * the base address, and adding per-thread-scratch-space * thread ID.
1235     * Extra padding can be necessary depending how the thread IDs are
1236     * calculated for a particular shader stage.
1237     */
1238 
1239    switch(devinfo->gen) {
1240    case 9:
1241       devinfo->max_wm_threads = 64 /* threads-per-PSD */
1242                               * devinfo->num_slices
1243                               * 4; /* effective subslices per slice */
1244       break;
1245    case 11:
1246    case 12:
1247       devinfo->max_wm_threads = 128 /* threads-per-PSD */
1248                               * devinfo->num_slices
1249                               * 8; /* subslices per slice */
1250       break;
1251    default:
1252       assert(devinfo->gen < 9);
1253       break;
1254    }
1255 
1256    assert(devinfo->num_slices <= ARRAY_SIZE(devinfo->num_subslices));
1257 
1258    devinfo->chipset_id = pci_id;
1259    return true;
1260 }
1261 
1262 const char *
gen_get_device_name(int devid)1263 gen_get_device_name(int devid)
1264 {
1265    switch (devid) {
1266 #undef CHIPSET
1267 #define CHIPSET(id, family, fam_str, name) case id: return name " (" fam_str ")"; break;
1268 #include "pci_ids/i965_pci_ids.h"
1269 #include "pci_ids/iris_pci_ids.h"
1270    default:
1271       return NULL;
1272    }
1273 }
1274 
1275 /**
1276  * for gen8/gen9, SLICE_MASK/SUBSLICE_MASK can be used to compute the topology
1277  * (kernel 4.13+)
1278  */
1279 static bool
getparam_topology(struct gen_device_info * devinfo,int fd)1280 getparam_topology(struct gen_device_info *devinfo, int fd)
1281 {
1282    int slice_mask = 0;
1283    if (!getparam(fd, I915_PARAM_SLICE_MASK, &slice_mask))
1284       return false;
1285 
1286    int n_eus;
1287    if (!getparam(fd, I915_PARAM_EU_TOTAL, &n_eus))
1288       return false;
1289 
1290    int subslice_mask = 0;
1291    if (!getparam(fd, I915_PARAM_SUBSLICE_MASK, &subslice_mask))
1292       return false;
1293 
1294    return update_from_masks(devinfo, slice_mask, subslice_mask, n_eus);
1295 }
1296 
1297 /**
1298  * preferred API for updating the topology in devinfo (kernel 4.17+)
1299  */
1300 static bool
query_topology(struct gen_device_info * devinfo,int fd)1301 query_topology(struct gen_device_info *devinfo, int fd)
1302 {
1303    struct drm_i915_query_item item = {
1304       .query_id = DRM_I915_QUERY_TOPOLOGY_INFO,
1305    };
1306    struct drm_i915_query query = {
1307       .num_items = 1,
1308       .items_ptr = (uintptr_t) &item,
1309    };
1310 
1311    if (gen_ioctl(fd, DRM_IOCTL_I915_QUERY, &query))
1312       return false;
1313 
1314    if (item.length < 0)
1315       return false;
1316 
1317    struct drm_i915_query_topology_info *topo_info =
1318       (struct drm_i915_query_topology_info *) calloc(1, item.length);
1319    item.data_ptr = (uintptr_t) topo_info;
1320 
1321    if (gen_ioctl(fd, DRM_IOCTL_I915_QUERY, &query) ||
1322        item.length <= 0)
1323       return false;
1324 
1325    update_from_topology(devinfo, topo_info);
1326 
1327    free(topo_info);
1328 
1329    return true;
1330 
1331 }
1332 
1333 int
gen_get_aperture_size(int fd,uint64_t * size)1334 gen_get_aperture_size(int fd, uint64_t *size)
1335 {
1336    struct drm_i915_gem_get_aperture aperture = { 0 };
1337 
1338    int ret = gen_ioctl(fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture);
1339    if (ret == 0 && size)
1340       *size = aperture.aper_size;
1341 
1342    return ret;
1343 }
1344 
1345 static bool
gen_has_get_tiling(int fd)1346 gen_has_get_tiling(int fd)
1347 {
1348    int ret;
1349 
1350    struct drm_i915_gem_create gem_create = {
1351       .size = 4096,
1352    };
1353 
1354    if (gen_ioctl(fd, DRM_IOCTL_I915_GEM_CREATE, &gem_create)) {
1355       unreachable("Failed to create GEM BO");
1356       return false;
1357    }
1358 
1359    struct drm_i915_gem_get_tiling get_tiling = {
1360       .handle = gem_create.handle,
1361    };
1362    ret = gen_ioctl(fd, DRM_IOCTL_I915_GEM_SET_TILING, &get_tiling);
1363 
1364    struct drm_gem_close close = {
1365       .handle = gem_create.handle,
1366    };
1367    gen_ioctl(fd, DRM_IOCTL_GEM_CLOSE, &close);
1368 
1369    return ret == 0;
1370 }
1371 
1372 bool
gen_get_device_info_from_fd(int fd,struct gen_device_info * devinfo)1373 gen_get_device_info_from_fd(int fd, struct gen_device_info *devinfo)
1374 {
1375    int devid = 0;
1376 
1377    const char *devid_override = getenv("INTEL_DEVID_OVERRIDE");
1378    if (devid_override && strlen(devid_override) > 0) {
1379       if (geteuid() == getuid()) {
1380          devid = gen_device_name_to_pci_device_id(devid_override);
1381          /* Fallback to PCI ID. */
1382          if (devid <= 0)
1383             devid = strtol(devid_override, NULL, 0);
1384          if (devid <= 0) {
1385             fprintf(stderr, "Invalid INTEL_DEVID_OVERRIDE=\"%s\". "
1386                     "Use a valid numeric PCI ID or one of the supported "
1387                     "platform names: %s", devid_override, name_map[0].name);
1388             for (unsigned i = 1; i < ARRAY_SIZE(name_map); i++)
1389                fprintf(stderr, ", %s", name_map[i].name);
1390             fprintf(stderr, "\n");
1391             return false;
1392          }
1393       } else {
1394          fprintf(stderr, "Ignoring INTEL_DEVID_OVERRIDE=\"%s\" because "
1395                  "real and effective user ID don't match.\n", devid_override);
1396       }
1397    }
1398 
1399    if (devid > 0) {
1400       if (!gen_get_device_info_from_pci_id(devid, devinfo))
1401          return false;
1402       devinfo->no_hw = true;
1403    } else {
1404       /* query the device id */
1405       if (!getparam(fd, I915_PARAM_CHIPSET_ID, &devid))
1406          return false;
1407       if (!gen_get_device_info_from_pci_id(devid, devinfo))
1408          return false;
1409       devinfo->no_hw = false;
1410    }
1411 
1412    if (devinfo->gen == 10) {
1413       fprintf(stderr, "Gen10 support is redacted.\n");
1414       return false;
1415    }
1416 
1417    /* remaining initializion queries the kernel for device info */
1418    if (devinfo->no_hw)
1419       return true;
1420 
1421    int timestamp_frequency;
1422    if (getparam(fd, I915_PARAM_CS_TIMESTAMP_FREQUENCY,
1423                 ×tamp_frequency))
1424       devinfo->timestamp_frequency = timestamp_frequency;
1425    else if (devinfo->gen >= 10)
1426       /* gen10 and later requires the timestamp_frequency to be updated */
1427       return false;
1428 
1429    if (!getparam(fd, I915_PARAM_REVISION, &devinfo->revision))
1430       devinfo->revision = 0;
1431 
1432    if (!query_topology(devinfo, fd)) {
1433       if (devinfo->gen >= 10) {
1434          /* topology uAPI required for CNL+ (kernel 4.17+) */
1435          return false;
1436       }
1437 
1438       /* else use the kernel 4.13+ api for gen8+.  For older kernels, topology
1439        * will be wrong, affecting GPU metrics. In this case, fail silently.
1440        */
1441       getparam_topology(devinfo, fd);
1442    }
1443 
1444    gen_get_aperture_size(fd, &devinfo->aperture_bytes);
1445    devinfo->has_tiling_uapi = gen_has_get_tiling(fd);
1446 
1447    return true;
1448 }
1449