1 /*
2 * Copyright © 2013 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include "gen_device_info.h"
28 #include "compiler/shader_enums.h"
29 #include "util/macros.h"
30
31 static const struct gen_device_info gen_device_info_i965 = {
32 .gen = 4,
33 .has_negative_rhw_bug = true,
34 .num_slices = 1,
35 .num_subslices = { 1, },
36 .num_thread_per_eu = 4,
37 .max_vs_threads = 16,
38 .max_gs_threads = 2,
39 .max_wm_threads = 8 * 4,
40 .urb = {
41 .size = 256,
42 },
43 .timestamp_frequency = 12500000,
44 };
45
46 static const struct gen_device_info gen_device_info_g4x = {
47 .gen = 4,
48 .has_pln = true,
49 .has_compr4 = true,
50 .has_surface_tile_offset = true,
51 .is_g4x = true,
52 .num_slices = 1,
53 .num_subslices = { 1, },
54 .num_thread_per_eu = 5,
55 .max_vs_threads = 32,
56 .max_gs_threads = 2,
57 .max_wm_threads = 10 * 5,
58 .urb = {
59 .size = 384,
60 },
61 .timestamp_frequency = 12500000,
62 };
63
64 static const struct gen_device_info gen_device_info_ilk = {
65 .gen = 5,
66 .has_pln = true,
67 .has_compr4 = true,
68 .has_surface_tile_offset = true,
69 .num_slices = 1,
70 .num_subslices = { 1, },
71 .num_thread_per_eu = 6,
72 .max_vs_threads = 72,
73 .max_gs_threads = 32,
74 .max_wm_threads = 12 * 6,
75 .urb = {
76 .size = 1024,
77 },
78 .timestamp_frequency = 12500000,
79 };
80
81 static const struct gen_device_info gen_device_info_snb_gt1 = {
82 .gen = 6,
83 .gt = 1,
84 .has_hiz_and_separate_stencil = true,
85 .has_llc = true,
86 .has_pln = true,
87 .has_surface_tile_offset = true,
88 .needs_unlit_centroid_workaround = true,
89 .num_slices = 1,
90 .num_subslices = { 1, },
91 .num_thread_per_eu = 6, /* Not confirmed */
92 .max_vs_threads = 24,
93 .max_gs_threads = 21, /* conservative; 24 if rendering disabled. */
94 .max_wm_threads = 40,
95 .urb = {
96 .size = 32,
97 .min_entries = {
98 [MESA_SHADER_VERTEX] = 24,
99 },
100 .max_entries = {
101 [MESA_SHADER_VERTEX] = 256,
102 [MESA_SHADER_GEOMETRY] = 256,
103 },
104 },
105 .timestamp_frequency = 12500000,
106 };
107
108 static const struct gen_device_info gen_device_info_snb_gt2 = {
109 .gen = 6,
110 .gt = 2,
111 .has_hiz_and_separate_stencil = true,
112 .has_llc = true,
113 .has_pln = true,
114 .has_surface_tile_offset = true,
115 .needs_unlit_centroid_workaround = true,
116 .num_slices = 1,
117 .num_subslices = { 1, },
118 .num_thread_per_eu = 6, /* Not confirmed */
119 .max_vs_threads = 60,
120 .max_gs_threads = 60,
121 .max_wm_threads = 80,
122 .urb = {
123 .size = 64,
124 .min_entries = {
125 [MESA_SHADER_VERTEX] = 24,
126 },
127 .max_entries = {
128 [MESA_SHADER_VERTEX] = 256,
129 [MESA_SHADER_GEOMETRY] = 256,
130 },
131 },
132 .timestamp_frequency = 12500000,
133 };
134
135 #define GEN7_FEATURES \
136 .gen = 7, \
137 .has_hiz_and_separate_stencil = true, \
138 .must_use_separate_stencil = true, \
139 .has_llc = true, \
140 .has_pln = true, \
141 .has_surface_tile_offset = true, \
142 .timestamp_frequency = 12500000
143
144 static const struct gen_device_info gen_device_info_ivb_gt1 = {
145 GEN7_FEATURES, .is_ivybridge = true, .gt = 1,
146 .num_slices = 1,
147 .num_subslices = { 1, },
148 .num_thread_per_eu = 6,
149 .l3_banks = 2,
150 .max_vs_threads = 36,
151 .max_tcs_threads = 36,
152 .max_tes_threads = 36,
153 .max_gs_threads = 36,
154 .max_wm_threads = 48,
155 .max_cs_threads = 36,
156 .urb = {
157 .size = 128,
158 .min_entries = {
159 [MESA_SHADER_VERTEX] = 32,
160 [MESA_SHADER_TESS_EVAL] = 10,
161 },
162 .max_entries = {
163 [MESA_SHADER_VERTEX] = 512,
164 [MESA_SHADER_TESS_CTRL] = 32,
165 [MESA_SHADER_TESS_EVAL] = 288,
166 [MESA_SHADER_GEOMETRY] = 192,
167 },
168 },
169 };
170
171 static const struct gen_device_info gen_device_info_ivb_gt2 = {
172 GEN7_FEATURES, .is_ivybridge = true, .gt = 2,
173 .num_slices = 1,
174 .num_subslices = { 1, },
175 .num_thread_per_eu = 8, /* Not sure why this isn't a multiple of
176 * @max_wm_threads ... */
177 .l3_banks = 4,
178 .max_vs_threads = 128,
179 .max_tcs_threads = 128,
180 .max_tes_threads = 128,
181 .max_gs_threads = 128,
182 .max_wm_threads = 172,
183 .max_cs_threads = 64,
184 .urb = {
185 .size = 256,
186 .min_entries = {
187 [MESA_SHADER_VERTEX] = 32,
188 [MESA_SHADER_TESS_EVAL] = 10,
189 },
190 .max_entries = {
191 [MESA_SHADER_VERTEX] = 704,
192 [MESA_SHADER_TESS_CTRL] = 64,
193 [MESA_SHADER_TESS_EVAL] = 448,
194 [MESA_SHADER_GEOMETRY] = 320,
195 },
196 },
197 };
198
199 static const struct gen_device_info gen_device_info_byt = {
200 GEN7_FEATURES, .is_baytrail = true, .gt = 1,
201 .num_slices = 1,
202 .num_subslices = { 1, },
203 .num_thread_per_eu = 8,
204 .l3_banks = 1,
205 .has_llc = false,
206 .max_vs_threads = 36,
207 .max_tcs_threads = 36,
208 .max_tes_threads = 36,
209 .max_gs_threads = 36,
210 .max_wm_threads = 48,
211 .max_cs_threads = 32,
212 .urb = {
213 .size = 128,
214 .min_entries = {
215 [MESA_SHADER_VERTEX] = 32,
216 [MESA_SHADER_TESS_EVAL] = 10,
217 },
218 .max_entries = {
219 [MESA_SHADER_VERTEX] = 512,
220 [MESA_SHADER_TESS_CTRL] = 32,
221 [MESA_SHADER_TESS_EVAL] = 288,
222 [MESA_SHADER_GEOMETRY] = 192,
223 },
224 },
225 };
226
227 #define HSW_FEATURES \
228 GEN7_FEATURES, \
229 .is_haswell = true, \
230 .supports_simd16_3src = true, \
231 .has_resource_streamer = true
232
233 static const struct gen_device_info gen_device_info_hsw_gt1 = {
234 HSW_FEATURES, .gt = 1,
235 .num_slices = 1,
236 .num_subslices = { 1, },
237 .num_thread_per_eu = 7,
238 .l3_banks = 2,
239 .max_vs_threads = 70,
240 .max_tcs_threads = 70,
241 .max_tes_threads = 70,
242 .max_gs_threads = 70,
243 .max_wm_threads = 102,
244 .max_cs_threads = 70,
245 .urb = {
246 .size = 128,
247 .min_entries = {
248 [MESA_SHADER_VERTEX] = 32,
249 [MESA_SHADER_TESS_EVAL] = 10,
250 },
251 .max_entries = {
252 [MESA_SHADER_VERTEX] = 640,
253 [MESA_SHADER_TESS_CTRL] = 64,
254 [MESA_SHADER_TESS_EVAL] = 384,
255 [MESA_SHADER_GEOMETRY] = 256,
256 },
257 },
258 };
259
260 static const struct gen_device_info gen_device_info_hsw_gt2 = {
261 HSW_FEATURES, .gt = 2,
262 .num_slices = 1,
263 .num_subslices = { 2, },
264 .num_thread_per_eu = 7,
265 .l3_banks = 4,
266 .max_vs_threads = 280,
267 .max_tcs_threads = 256,
268 .max_tes_threads = 280,
269 .max_gs_threads = 256,
270 .max_wm_threads = 204,
271 .max_cs_threads = 70,
272 .urb = {
273 .size = 256,
274 .min_entries = {
275 [MESA_SHADER_VERTEX] = 64,
276 [MESA_SHADER_TESS_EVAL] = 10,
277 },
278 .max_entries = {
279 [MESA_SHADER_VERTEX] = 1664,
280 [MESA_SHADER_TESS_CTRL] = 128,
281 [MESA_SHADER_TESS_EVAL] = 960,
282 [MESA_SHADER_GEOMETRY] = 640,
283 },
284 },
285 };
286
287 static const struct gen_device_info gen_device_info_hsw_gt3 = {
288 HSW_FEATURES, .gt = 3,
289 .num_slices = 2,
290 .num_subslices = { 2, },
291 .num_thread_per_eu = 7,
292 .l3_banks = 8,
293 .max_vs_threads = 280,
294 .max_tcs_threads = 256,
295 .max_tes_threads = 280,
296 .max_gs_threads = 256,
297 .max_wm_threads = 408,
298 .max_cs_threads = 70,
299 .urb = {
300 .size = 512,
301 .min_entries = {
302 [MESA_SHADER_VERTEX] = 64,
303 [MESA_SHADER_TESS_EVAL] = 10,
304 },
305 .max_entries = {
306 [MESA_SHADER_VERTEX] = 1664,
307 [MESA_SHADER_TESS_CTRL] = 128,
308 [MESA_SHADER_TESS_EVAL] = 960,
309 [MESA_SHADER_GEOMETRY] = 640,
310 },
311 },
312 };
313
314 #define GEN8_FEATURES \
315 .gen = 8, \
316 .has_hiz_and_separate_stencil = true, \
317 .has_resource_streamer = true, \
318 .must_use_separate_stencil = true, \
319 .has_llc = true, \
320 .has_pln = true, \
321 .supports_simd16_3src = true, \
322 .has_surface_tile_offset = true, \
323 .max_vs_threads = 504, \
324 .max_tcs_threads = 504, \
325 .max_tes_threads = 504, \
326 .max_gs_threads = 504, \
327 .max_wm_threads = 384, \
328 .timestamp_frequency = 12500000
329
330 static const struct gen_device_info gen_device_info_bdw_gt1 = {
331 GEN8_FEATURES, .gt = 1,
332 .is_broadwell = true,
333 .num_slices = 1,
334 .num_subslices = { 2, },
335 .num_thread_per_eu = 7,
336 .l3_banks = 2,
337 .max_cs_threads = 42,
338 .urb = {
339 .size = 192,
340 .min_entries = {
341 [MESA_SHADER_VERTEX] = 64,
342 [MESA_SHADER_TESS_EVAL] = 34,
343 },
344 .max_entries = {
345 [MESA_SHADER_VERTEX] = 2560,
346 [MESA_SHADER_TESS_CTRL] = 504,
347 [MESA_SHADER_TESS_EVAL] = 1536,
348 [MESA_SHADER_GEOMETRY] = 960,
349 },
350 }
351 };
352
353 static const struct gen_device_info gen_device_info_bdw_gt2 = {
354 GEN8_FEATURES, .gt = 2,
355 .is_broadwell = true,
356 .num_slices = 1,
357 .num_subslices = { 3, },
358 .num_thread_per_eu = 7,
359 .l3_banks = 4,
360 .max_cs_threads = 56,
361 .urb = {
362 .size = 384,
363 .min_entries = {
364 [MESA_SHADER_VERTEX] = 64,
365 [MESA_SHADER_TESS_EVAL] = 34,
366 },
367 .max_entries = {
368 [MESA_SHADER_VERTEX] = 2560,
369 [MESA_SHADER_TESS_CTRL] = 504,
370 [MESA_SHADER_TESS_EVAL] = 1536,
371 [MESA_SHADER_GEOMETRY] = 960,
372 },
373 }
374 };
375
376 static const struct gen_device_info gen_device_info_bdw_gt3 = {
377 GEN8_FEATURES, .gt = 3,
378 .is_broadwell = true,
379 .num_slices = 2,
380 .num_subslices = { 3, 3, },
381 .num_thread_per_eu = 7,
382 .l3_banks = 8,
383 .max_cs_threads = 56,
384 .urb = {
385 .size = 384,
386 .min_entries = {
387 [MESA_SHADER_VERTEX] = 64,
388 [MESA_SHADER_TESS_EVAL] = 34,
389 },
390 .max_entries = {
391 [MESA_SHADER_VERTEX] = 2560,
392 [MESA_SHADER_TESS_CTRL] = 504,
393 [MESA_SHADER_TESS_EVAL] = 1536,
394 [MESA_SHADER_GEOMETRY] = 960,
395 },
396 }
397 };
398
399 static const struct gen_device_info gen_device_info_chv = {
400 GEN8_FEATURES, .is_cherryview = 1, .gt = 1,
401 .has_llc = false,
402 .num_slices = 1,
403 .num_subslices = { 2, },
404 .num_thread_per_eu = 7,
405 .l3_banks = 2,
406 .max_vs_threads = 80,
407 .max_tcs_threads = 80,
408 .max_tes_threads = 80,
409 .max_gs_threads = 80,
410 .max_wm_threads = 128,
411 .max_cs_threads = 6 * 7,
412 .urb = {
413 .size = 192,
414 .min_entries = {
415 [MESA_SHADER_VERTEX] = 34,
416 [MESA_SHADER_TESS_EVAL] = 34,
417 },
418 .max_entries = {
419 [MESA_SHADER_VERTEX] = 640,
420 [MESA_SHADER_TESS_CTRL] = 80,
421 [MESA_SHADER_TESS_EVAL] = 384,
422 [MESA_SHADER_GEOMETRY] = 256,
423 },
424 }
425 };
426
427 #define GEN9_HW_INFO \
428 .gen = 9, \
429 .max_vs_threads = 336, \
430 .max_gs_threads = 336, \
431 .max_tcs_threads = 336, \
432 .max_tes_threads = 336, \
433 .max_cs_threads = 56, \
434 .timestamp_frequency = 12000000, \
435 .urb = { \
436 .size = 384, \
437 .min_entries = { \
438 [MESA_SHADER_VERTEX] = 64, \
439 [MESA_SHADER_TESS_EVAL] = 34, \
440 }, \
441 .max_entries = { \
442 [MESA_SHADER_VERTEX] = 1856, \
443 [MESA_SHADER_TESS_CTRL] = 672, \
444 [MESA_SHADER_TESS_EVAL] = 1120, \
445 [MESA_SHADER_GEOMETRY] = 640, \
446 }, \
447 }
448
449 #define GEN9_LP_FEATURES \
450 GEN8_FEATURES, \
451 GEN9_HW_INFO, \
452 .gt = 1, \
453 .has_llc = false, \
454 .num_slices = 1, \
455 .num_thread_per_eu = 6, \
456 .max_vs_threads = 112, \
457 .max_tcs_threads = 112, \
458 .max_tes_threads = 112, \
459 .max_gs_threads = 112, \
460 .max_cs_threads = 6 * 6, \
461 .timestamp_frequency = 19200000, \
462 .urb = { \
463 .size = 192, \
464 .min_entries = { \
465 [MESA_SHADER_VERTEX] = 34, \
466 [MESA_SHADER_TESS_EVAL] = 34, \
467 }, \
468 .max_entries = { \
469 [MESA_SHADER_VERTEX] = 704, \
470 [MESA_SHADER_TESS_CTRL] = 256, \
471 [MESA_SHADER_TESS_EVAL] = 416, \
472 [MESA_SHADER_GEOMETRY] = 256, \
473 }, \
474 }
475
476 #define GEN9_LP_FEATURES_3X6 \
477 GEN9_LP_FEATURES, \
478 .num_subslices = { 3, }
479
480 #define GEN9_LP_FEATURES_2X6 \
481 GEN9_LP_FEATURES, \
482 .num_subslices = { 2, }, \
483 .max_vs_threads = 56, \
484 .max_tcs_threads = 56, \
485 .max_tes_threads = 56, \
486 .max_gs_threads = 56, \
487 .max_cs_threads = 6 * 6, \
488 .urb = { \
489 .size = 128, \
490 .min_entries = { \
491 [MESA_SHADER_VERTEX] = 34, \
492 [MESA_SHADER_TESS_EVAL] = 34, \
493 }, \
494 .max_entries = { \
495 [MESA_SHADER_VERTEX] = 352, \
496 [MESA_SHADER_TESS_CTRL] = 128, \
497 [MESA_SHADER_TESS_EVAL] = 208, \
498 [MESA_SHADER_GEOMETRY] = 128, \
499 }, \
500 }
501
502 #define GEN9_FEATURES \
503 GEN8_FEATURES, \
504 GEN9_HW_INFO, \
505 .num_thread_per_eu = 7
506
507 static const struct gen_device_info gen_device_info_skl_gt1 = {
508 GEN9_FEATURES, .gt = 1,
509 .is_skylake = true,
510 .num_slices = 1,
511 .num_subslices = { 2, },
512 .l3_banks = 2,
513 .urb.size = 192,
514 };
515
516 static const struct gen_device_info gen_device_info_skl_gt2 = {
517 GEN9_FEATURES, .gt = 2,
518 .is_skylake = true,
519 .num_slices = 1,
520 .num_subslices = { 3, },
521 .l3_banks = 4,
522 };
523
524 static const struct gen_device_info gen_device_info_skl_gt3 = {
525 GEN9_FEATURES, .gt = 3,
526 .is_skylake = true,
527 .num_slices = 2,
528 .num_subslices = { 3, 3, },
529 .l3_banks = 8,
530 };
531
532 static const struct gen_device_info gen_device_info_skl_gt4 = {
533 GEN9_FEATURES, .gt = 4,
534 .is_skylake = true,
535 .num_slices = 3,
536 .num_subslices = { 3, 3, 3, },
537 .l3_banks = 12,
538 /* From the "L3 Allocation and Programming" documentation:
539 *
540 * "URB is limited to 1008KB due to programming restrictions. This is not a
541 * restriction of the L3 implementation, but of the FF and other clients.
542 * Therefore, in a GT4 implementation it is possible for the programmed
543 * allocation of the L3 data array to provide 3*384KB=1152KB for URB, but
544 * only 1008KB of this will be used."
545 */
546 .urb.size = 1008 / 3,
547 };
548
549 static const struct gen_device_info gen_device_info_bxt = {
550 GEN9_LP_FEATURES_3X6,
551 .is_broxton = true,
552 .l3_banks = 2,
553 };
554
555 static const struct gen_device_info gen_device_info_bxt_2x6 = {
556 GEN9_LP_FEATURES_2X6,
557 .is_broxton = true,
558 .l3_banks = 1,
559 };
560 /*
561 * Note: for all KBL SKUs, the PRM says SKL for GS entries, not SKL+.
562 * There's no KBL entry. Using the default SKL (GEN9) GS entries value.
563 */
564
565 static const struct gen_device_info gen_device_info_kbl_gt1 = {
566 GEN9_FEATURES,
567 .is_kabylake = true,
568 .gt = 1,
569
570 .max_cs_threads = 7 * 6,
571 .urb.size = 192,
572 .num_slices = 1,
573 .num_subslices = { 2, },
574 .l3_banks = 2,
575 };
576
577 static const struct gen_device_info gen_device_info_kbl_gt1_5 = {
578 GEN9_FEATURES,
579 .is_kabylake = true,
580 .gt = 1,
581
582 .max_cs_threads = 7 * 6,
583 .num_slices = 1,
584 .num_subslices = { 3, },
585 .l3_banks = 4,
586 };
587
588 static const struct gen_device_info gen_device_info_kbl_gt2 = {
589 GEN9_FEATURES,
590 .is_kabylake = true,
591 .gt = 2,
592
593 .num_slices = 1,
594 .num_subslices = { 3, },
595 .l3_banks = 4,
596 };
597
598 static const struct gen_device_info gen_device_info_kbl_gt3 = {
599 GEN9_FEATURES,
600 .is_kabylake = true,
601 .gt = 3,
602
603 .num_slices = 2,
604 .num_subslices = { 3, 3, },
605 .l3_banks = 8,
606 };
607
608 static const struct gen_device_info gen_device_info_kbl_gt4 = {
609 GEN9_FEATURES,
610 .is_kabylake = true,
611 .gt = 4,
612
613 /*
614 * From the "L3 Allocation and Programming" documentation:
615 *
616 * "URB is limited to 1008KB due to programming restrictions. This
617 * is not a restriction of the L3 implementation, but of the FF and
618 * other clients. Therefore, in a GT4 implementation it is
619 * possible for the programmed allocation of the L3 data array to
620 * provide 3*384KB=1152KB for URB, but only 1008KB of this
621 * will be used."
622 */
623 .urb.size = 1008 / 3,
624 .num_slices = 3,
625 .num_subslices = { 3, 3, 3, },
626 .l3_banks = 12,
627 };
628
629 static const struct gen_device_info gen_device_info_glk = {
630 GEN9_LP_FEATURES_3X6,
631 .is_geminilake = true,
632 .l3_banks = 2,
633 };
634
635 /*TODO: Initialize l3_banks when we know the number. */
636 static const struct gen_device_info gen_device_info_glk_2x6 = {
637 GEN9_LP_FEATURES_2X6,
638 .is_geminilake = true,
639 };
640
641 static const struct gen_device_info gen_device_info_cfl_gt1 = {
642 GEN9_FEATURES,
643 .is_coffeelake = true,
644 .gt = 1,
645
646 .num_slices = 1,
647 .num_subslices = { 2, },
648 .l3_banks = 2,
649 };
650 static const struct gen_device_info gen_device_info_cfl_gt2 = {
651 GEN9_FEATURES,
652 .is_coffeelake = true,
653 .gt = 2,
654
655 .num_slices = 1,
656 .num_subslices = { 3, },
657 .l3_banks = 4,
658 };
659
660 static const struct gen_device_info gen_device_info_cfl_gt3 = {
661 GEN9_FEATURES,
662 .is_coffeelake = true,
663 .gt = 3,
664
665 .num_slices = 2,
666 .num_subslices = { 3, 3, },
667 .l3_banks = 8,
668 };
669
670 #define GEN10_HW_INFO \
671 .gen = 10, \
672 .num_thread_per_eu = 7, \
673 .max_vs_threads = 728, \
674 .max_gs_threads = 432, \
675 .max_tcs_threads = 432, \
676 .max_tes_threads = 624, \
677 .max_cs_threads = 56, \
678 .timestamp_frequency = 19200000, \
679 .urb = { \
680 .size = 256, \
681 .min_entries = { \
682 [MESA_SHADER_VERTEX] = 64, \
683 [MESA_SHADER_TESS_EVAL] = 34, \
684 }, \
685 .max_entries = { \
686 [MESA_SHADER_VERTEX] = 3936, \
687 [MESA_SHADER_TESS_CTRL] = 896, \
688 [MESA_SHADER_TESS_EVAL] = 2064, \
689 [MESA_SHADER_GEOMETRY] = 832, \
690 }, \
691 }
692
693 #define subslices(args...) { args, }
694
695 #define GEN10_FEATURES(_gt, _slices, _subslices, _l3) \
696 GEN8_FEATURES, \
697 GEN10_HW_INFO, \
698 .gt = _gt, \
699 .num_slices = _slices, \
700 .num_subslices = _subslices, \
701 .l3_banks = _l3
702
703 static const struct gen_device_info gen_device_info_cnl_2x8 = {
704 /* GT0.5 */
705 GEN10_FEATURES(1, 1, subslices(2), 2),
706 .is_cannonlake = true,
707 };
708
709 static const struct gen_device_info gen_device_info_cnl_3x8 = {
710 /* GT1 */
711 GEN10_FEATURES(1, 1, subslices(3), 3),
712 .is_cannonlake = true,
713 };
714
715 static const struct gen_device_info gen_device_info_cnl_4x8 = {
716 /* GT 1.5 */
717 GEN10_FEATURES(1, 2, subslices(2, 2), 6),
718 .is_cannonlake = true,
719 };
720
721 static const struct gen_device_info gen_device_info_cnl_5x8 = {
722 /* GT2 */
723 GEN10_FEATURES(2, 2, subslices(3, 2), 6),
724 .is_cannonlake = true,
725 };
726
727 bool
gen_get_device_info(int devid,struct gen_device_info * devinfo)728 gen_get_device_info(int devid, struct gen_device_info *devinfo)
729 {
730 switch (devid) {
731 #undef CHIPSET
732 #define CHIPSET(id, family, name) \
733 case id: *devinfo = gen_device_info_##family; break;
734 #include "pci_ids/i965_pci_ids.h"
735 default:
736 fprintf(stderr, "i965_dri.so does not support the 0x%x PCI ID.\n", devid);
737 return false;
738 }
739
740 /* From the Skylake PRM, 3DSTATE_PS::Scratch Space Base Pointer:
741 *
742 * "Scratch Space per slice is computed based on 4 sub-slices. SW must
743 * allocate scratch space enough so that each slice has 4 slices allowed."
744 *
745 * The equivalent internal documentation says that this programming note
746 * applies to all Gen9+ platforms.
747 *
748 * The hardware typically calculates the scratch space pointer by taking
749 * the base address, and adding per-thread-scratch-space * thread ID.
750 * Extra padding can be necessary depending how the thread IDs are
751 * calculated for a particular shader stage.
752 */
753 if (devinfo->gen >= 9) {
754 devinfo->max_wm_threads = 64 /* threads-per-PSD */
755 * devinfo->num_slices
756 * 4; /* effective subslices per slice */
757 }
758
759 assert(devinfo->num_slices <= ARRAY_SIZE(devinfo->num_subslices));
760
761 return true;
762 }
763
764 const char *
gen_get_device_name(int devid)765 gen_get_device_name(int devid)
766 {
767 switch (devid) {
768 #undef CHIPSET
769 #define CHIPSET(id, family, name) case id: return name;
770 #include "pci_ids/i965_pci_ids.h"
771 default:
772 return NULL;
773 }
774 }
775