• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2013 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include "gen_device_info.h"
27 #include "compiler/shader_enums.h"
28 
29 static const struct gen_device_info gen_device_info_i965 = {
30    .gen = 4,
31    .has_negative_rhw_bug = true,
32    .num_slices = 1,
33    .max_vs_threads = 16,
34    .max_gs_threads = 2,
35    .max_wm_threads = 8 * 4,
36    .urb = {
37       .size = 256,
38    },
39 };
40 
41 static const struct gen_device_info gen_device_info_g4x = {
42    .gen = 4,
43    .has_pln = true,
44    .has_compr4 = true,
45    .has_surface_tile_offset = true,
46    .is_g4x = true,
47    .num_slices = 1,
48    .max_vs_threads = 32,
49    .max_gs_threads = 2,
50    .max_wm_threads = 10 * 5,
51    .urb = {
52       .size = 384,
53    },
54 };
55 
56 static const struct gen_device_info gen_device_info_ilk = {
57    .gen = 5,
58    .has_pln = true,
59    .has_compr4 = true,
60    .has_surface_tile_offset = true,
61    .num_slices = 1,
62    .max_vs_threads = 72,
63    .max_gs_threads = 32,
64    .max_wm_threads = 12 * 6,
65    .urb = {
66       .size = 1024,
67    },
68 };
69 
70 static const struct gen_device_info gen_device_info_snb_gt1 = {
71    .gen = 6,
72    .gt = 1,
73    .has_hiz_and_separate_stencil = true,
74    .has_llc = true,
75    .has_pln = true,
76    .has_surface_tile_offset = true,
77    .needs_unlit_centroid_workaround = true,
78    .num_slices = 1,
79    .max_vs_threads = 24,
80    .max_gs_threads = 21, /* conservative; 24 if rendering disabled. */
81    .max_wm_threads = 40,
82    .urb = {
83       .size = 32,
84       .min_entries = {
85          [MESA_SHADER_VERTEX]   = 24,
86       },
87       .max_entries = {
88          [MESA_SHADER_VERTEX]   = 256,
89          [MESA_SHADER_GEOMETRY] = 256,
90       },
91    },
92 };
93 
94 static const struct gen_device_info gen_device_info_snb_gt2 = {
95    .gen = 6,
96    .gt = 2,
97    .has_hiz_and_separate_stencil = true,
98    .has_llc = true,
99    .has_pln = true,
100    .has_surface_tile_offset = true,
101    .needs_unlit_centroid_workaround = true,
102    .num_slices = 1,
103    .max_vs_threads = 60,
104    .max_gs_threads = 60,
105    .max_wm_threads = 80,
106    .urb = {
107       .size = 64,
108       .min_entries = {
109          [MESA_SHADER_VERTEX]   = 24,
110       },
111       .max_entries = {
112          [MESA_SHADER_VERTEX]   = 256,
113          [MESA_SHADER_GEOMETRY] = 256,
114       },
115    },
116 };
117 
118 #define GEN7_FEATURES                               \
119    .gen = 7,                                        \
120    .has_hiz_and_separate_stencil = true,            \
121    .must_use_separate_stencil = true,               \
122    .has_llc = true,                                 \
123    .has_pln = true,                                 \
124    .has_surface_tile_offset = true
125 
126 static const struct gen_device_info gen_device_info_ivb_gt1 = {
127    GEN7_FEATURES, .is_ivybridge = true, .gt = 1,
128    .num_slices = 1,
129    .max_vs_threads = 36,
130    .max_tcs_threads = 36,
131    .max_tes_threads = 36,
132    .max_gs_threads = 36,
133    .max_wm_threads = 48,
134    .max_cs_threads = 36,
135    .urb = {
136       .size = 128,
137       .min_entries = {
138          [MESA_SHADER_VERTEX]    = 32,
139          [MESA_SHADER_TESS_EVAL] = 10,
140       },
141       .max_entries = {
142          [MESA_SHADER_VERTEX]    = 512,
143          [MESA_SHADER_TESS_CTRL] = 32,
144          [MESA_SHADER_TESS_EVAL] = 288,
145          [MESA_SHADER_GEOMETRY]  = 192,
146       },
147    },
148 };
149 
150 static const struct gen_device_info gen_device_info_ivb_gt2 = {
151    GEN7_FEATURES, .is_ivybridge = true, .gt = 2,
152    .num_slices = 1,
153    .max_vs_threads = 128,
154    .max_tcs_threads = 128,
155    .max_tes_threads = 128,
156    .max_gs_threads = 128,
157    .max_wm_threads = 172,
158    .max_cs_threads = 64,
159    .urb = {
160       .size = 256,
161       .min_entries = {
162          [MESA_SHADER_VERTEX]    = 32,
163          [MESA_SHADER_TESS_EVAL] = 10,
164       },
165       .max_entries = {
166          [MESA_SHADER_VERTEX]    = 704,
167          [MESA_SHADER_TESS_CTRL] = 64,
168          [MESA_SHADER_TESS_EVAL] = 448,
169          [MESA_SHADER_GEOMETRY]  = 320,
170       },
171    },
172 };
173 
174 static const struct gen_device_info gen_device_info_byt = {
175    GEN7_FEATURES, .is_baytrail = true, .gt = 1,
176    .num_slices = 1,
177    .has_llc = false,
178    .max_vs_threads = 36,
179    .max_tcs_threads = 36,
180    .max_tes_threads = 36,
181    .max_gs_threads = 36,
182    .max_wm_threads = 48,
183    .max_cs_threads = 32,
184    .urb = {
185       .size = 128,
186       .min_entries = {
187          [MESA_SHADER_VERTEX]    = 32,
188          [MESA_SHADER_TESS_EVAL] = 10,
189       },
190       .max_entries = {
191          [MESA_SHADER_VERTEX]    = 512,
192          [MESA_SHADER_TESS_CTRL] = 32,
193          [MESA_SHADER_TESS_EVAL] = 288,
194          [MESA_SHADER_GEOMETRY]  = 192,
195       },
196    },
197 };
198 
199 #define HSW_FEATURES             \
200    GEN7_FEATURES,                \
201    .is_haswell = true,           \
202    .supports_simd16_3src = true, \
203    .has_resource_streamer = true
204 
205 static const struct gen_device_info gen_device_info_hsw_gt1 = {
206    HSW_FEATURES, .gt = 1,
207    .num_slices = 1,
208    .max_vs_threads = 70,
209    .max_tcs_threads = 70,
210    .max_tes_threads = 70,
211    .max_gs_threads = 70,
212    .max_wm_threads = 102,
213    .max_cs_threads = 70,
214    .urb = {
215       .size = 128,
216       .min_entries = {
217          [MESA_SHADER_VERTEX]    = 32,
218          [MESA_SHADER_TESS_EVAL] = 10,
219       },
220       .max_entries = {
221          [MESA_SHADER_VERTEX]    = 640,
222          [MESA_SHADER_TESS_CTRL] = 64,
223          [MESA_SHADER_TESS_EVAL] = 384,
224          [MESA_SHADER_GEOMETRY]  = 256,
225       },
226    },
227 };
228 
229 static const struct gen_device_info gen_device_info_hsw_gt2 = {
230    HSW_FEATURES, .gt = 2,
231    .num_slices = 1,
232    .max_vs_threads = 280,
233    .max_tcs_threads = 256,
234    .max_tes_threads = 280,
235    .max_gs_threads = 256,
236    .max_wm_threads = 204,
237    .max_cs_threads = 70,
238    .urb = {
239       .size = 256,
240       .min_entries = {
241          [MESA_SHADER_VERTEX]    = 64,
242          [MESA_SHADER_TESS_EVAL] = 10,
243       },
244       .max_entries = {
245          [MESA_SHADER_VERTEX]    = 1664,
246          [MESA_SHADER_TESS_CTRL] = 128,
247          [MESA_SHADER_TESS_EVAL] = 960,
248          [MESA_SHADER_GEOMETRY]  = 640,
249       },
250    },
251 };
252 
253 static const struct gen_device_info gen_device_info_hsw_gt3 = {
254    HSW_FEATURES, .gt = 3,
255    .num_slices = 2,
256    .max_vs_threads = 280,
257    .max_tcs_threads = 256,
258    .max_tes_threads = 280,
259    .max_gs_threads = 256,
260    .max_wm_threads = 408,
261    .max_cs_threads = 70,
262    .urb = {
263       .size = 512,
264       .min_entries = {
265          [MESA_SHADER_VERTEX]    = 64,
266          [MESA_SHADER_TESS_EVAL] = 10,
267       },
268       .max_entries = {
269          [MESA_SHADER_VERTEX]    = 1664,
270          [MESA_SHADER_TESS_CTRL] = 128,
271          [MESA_SHADER_TESS_EVAL] = 960,
272          [MESA_SHADER_GEOMETRY]  = 640,
273       },
274    },
275 };
276 
277 #define GEN8_FEATURES                               \
278    .gen = 8,                                        \
279    .has_hiz_and_separate_stencil = true,            \
280    .has_resource_streamer = true,                   \
281    .must_use_separate_stencil = true,               \
282    .has_llc = true,                                 \
283    .has_pln = true,                                 \
284    .supports_simd16_3src = true,                    \
285    .has_surface_tile_offset = true,                 \
286    .max_vs_threads = 504,                           \
287    .max_tcs_threads = 504,                          \
288    .max_tes_threads = 504,                          \
289    .max_gs_threads = 504,                           \
290    .max_wm_threads = 384
291 
292 static const struct gen_device_info gen_device_info_bdw_gt1 = {
293    GEN8_FEATURES, .gt = 1,
294    .num_slices = 1,
295    .max_cs_threads = 42,
296    .urb = {
297       .size = 192,
298       .min_entries = {
299          [MESA_SHADER_VERTEX]    = 64,
300          [MESA_SHADER_TESS_EVAL] = 34,
301       },
302       .max_entries = {
303          [MESA_SHADER_VERTEX]    = 2560,
304          [MESA_SHADER_TESS_CTRL] = 504,
305          [MESA_SHADER_TESS_EVAL] = 1536,
306          [MESA_SHADER_GEOMETRY]  = 960,
307       },
308    }
309 };
310 
311 static const struct gen_device_info gen_device_info_bdw_gt2 = {
312    GEN8_FEATURES, .gt = 2,
313    .num_slices = 1,
314    .max_cs_threads = 56,
315    .urb = {
316       .size = 384,
317       .min_entries = {
318          [MESA_SHADER_VERTEX]    = 64,
319          [MESA_SHADER_TESS_EVAL] = 34,
320       },
321       .max_entries = {
322          [MESA_SHADER_VERTEX]    = 2560,
323          [MESA_SHADER_TESS_CTRL] = 504,
324          [MESA_SHADER_TESS_EVAL] = 1536,
325          [MESA_SHADER_GEOMETRY]  = 960,
326       },
327    }
328 };
329 
330 static const struct gen_device_info gen_device_info_bdw_gt3 = {
331    GEN8_FEATURES, .gt = 3,
332    .num_slices = 2,
333    .max_cs_threads = 56,
334    .urb = {
335       .size = 384,
336       .min_entries = {
337          [MESA_SHADER_VERTEX]    = 64,
338          [MESA_SHADER_TESS_EVAL] = 34,
339       },
340       .max_entries = {
341          [MESA_SHADER_VERTEX]    = 2560,
342          [MESA_SHADER_TESS_CTRL] = 504,
343          [MESA_SHADER_TESS_EVAL] = 1536,
344          [MESA_SHADER_GEOMETRY]  = 960,
345       },
346    }
347 };
348 
349 static const struct gen_device_info gen_device_info_chv = {
350    GEN8_FEATURES, .is_cherryview = 1, .gt = 1,
351    .has_llc = false,
352    .num_slices = 1,
353    .max_vs_threads = 80,
354    .max_tcs_threads = 80,
355    .max_tes_threads = 80,
356    .max_gs_threads = 80,
357    .max_wm_threads = 128,
358    .max_cs_threads = 6 * 7,
359    .urb = {
360       .size = 192,
361       .min_entries = {
362          [MESA_SHADER_VERTEX]    = 34,
363          [MESA_SHADER_TESS_EVAL] = 34,
364       },
365       .max_entries = {
366          [MESA_SHADER_VERTEX]    = 640,
367          [MESA_SHADER_TESS_CTRL] = 80,
368          [MESA_SHADER_TESS_EVAL] = 384,
369          [MESA_SHADER_GEOMETRY]  = 256,
370       },
371    }
372 };
373 
374 #define GEN9_FEATURES                               \
375    .gen = 9,                                        \
376    .has_hiz_and_separate_stencil = true,            \
377    .has_resource_streamer = true,                   \
378    .must_use_separate_stencil = true,               \
379    .has_llc = true,                                 \
380    .has_pln = true,                                 \
381    .supports_simd16_3src = true,                    \
382    .has_surface_tile_offset = true,                 \
383    .max_vs_threads = 336,                           \
384    .max_gs_threads = 336,                           \
385    .max_tcs_threads = 336,                          \
386    .max_tes_threads = 336,                          \
387    .max_cs_threads = 56,                            \
388    .urb = {                                         \
389       .size = 384,                                  \
390       .min_entries = {                              \
391          [MESA_SHADER_VERTEX]    = 64,              \
392          [MESA_SHADER_TESS_EVAL] = 34,              \
393       },                                            \
394       .max_entries = {                              \
395          [MESA_SHADER_VERTEX]    = 1856,            \
396          [MESA_SHADER_TESS_CTRL] = 672,             \
397          [MESA_SHADER_TESS_EVAL] = 1120,            \
398          [MESA_SHADER_GEOMETRY]  = 640,             \
399       },                                            \
400    }
401 
402 #define GEN9_LP_FEATURES                           \
403    GEN9_FEATURES,                                  \
404    .is_broxton = 1,                                \
405    .gt = 1,                                        \
406    .has_llc = false,                               \
407    .num_slices = 1,                                \
408    .max_vs_threads = 112,                          \
409    .max_tcs_threads = 112,                         \
410    .max_tes_threads = 112,                         \
411    .max_gs_threads = 112,                          \
412    .max_cs_threads = 6 * 6,                        \
413    .urb = {                                        \
414       .size = 192,                                 \
415       .min_entries = {                             \
416          [MESA_SHADER_VERTEX]    = 34,             \
417          [MESA_SHADER_TESS_EVAL] = 34,             \
418       },                                           \
419       .max_entries = {                             \
420          [MESA_SHADER_VERTEX]    = 704,            \
421          [MESA_SHADER_TESS_CTRL] = 256,            \
422          [MESA_SHADER_TESS_EVAL] = 416,            \
423          [MESA_SHADER_GEOMETRY]  = 256,            \
424       },                                           \
425    }
426 
427 #define GEN9_LP_FEATURES_2X6                       \
428    GEN9_LP_FEATURES,                               \
429    .max_vs_threads = 56,                           \
430    .max_tcs_threads = 56,                          \
431    .max_tes_threads = 56,                          \
432    .max_gs_threads = 56,                           \
433    .max_cs_threads = 6 * 6,                        \
434    .urb = {                                        \
435       .size = 128,                                 \
436       .min_entries = {                             \
437          [MESA_SHADER_VERTEX]    = 34,             \
438          [MESA_SHADER_TESS_EVAL] = 34,             \
439       },                                           \
440       .max_entries = {                             \
441          [MESA_SHADER_VERTEX]    = 352,            \
442          [MESA_SHADER_TESS_CTRL] = 128,            \
443          [MESA_SHADER_TESS_EVAL] = 208,            \
444          [MESA_SHADER_GEOMETRY]  = 128,            \
445       },                                           \
446    }
447 
448 static const struct gen_device_info gen_device_info_skl_gt1 = {
449    GEN9_FEATURES, .gt = 1,
450    .num_slices = 1,
451    .urb.size = 192,
452 };
453 
454 static const struct gen_device_info gen_device_info_skl_gt2 = {
455    GEN9_FEATURES, .gt = 2,
456    .num_slices = 1,
457 };
458 
459 static const struct gen_device_info gen_device_info_skl_gt3 = {
460    GEN9_FEATURES, .gt = 3,
461    .num_slices = 2,
462 };
463 
464 static const struct gen_device_info gen_device_info_skl_gt4 = {
465    GEN9_FEATURES, .gt = 4,
466    .num_slices = 3,
467    /* From the "L3 Allocation and Programming" documentation:
468     *
469     * "URB is limited to 1008KB due to programming restrictions.  This is not a
470     * restriction of the L3 implementation, but of the FF and other clients.
471     * Therefore, in a GT4 implementation it is possible for the programmed
472     * allocation of the L3 data array to provide 3*384KB=1152KB for URB, but
473     * only 1008KB of this will be used."
474     */
475    .urb.size = 1008 / 3,
476 };
477 
478 static const struct gen_device_info gen_device_info_bxt = {
479    GEN9_LP_FEATURES
480 };
481 
482 static const struct gen_device_info gen_device_info_bxt_2x6 = {
483    GEN9_LP_FEATURES_2X6
484 };
485 /*
486  * Note: for all KBL SKUs, the PRM says SKL for GS entries, not SKL+.
487  * There's no KBL entry. Using the default SKL (GEN9) GS entries value.
488  */
489 
490 static const struct gen_device_info gen_device_info_kbl_gt1 = {
491    GEN9_FEATURES,
492    .is_kabylake = true,
493    .gt = 1,
494 
495    .max_cs_threads = 7 * 6,
496    .urb.size = 192,
497    .num_slices = 1,
498 };
499 
500 static const struct gen_device_info gen_device_info_kbl_gt1_5 = {
501    GEN9_FEATURES,
502    .is_kabylake = true,
503    .gt = 1,
504 
505    .max_cs_threads = 7 * 6,
506    .num_slices = 1,
507 };
508 
509 static const struct gen_device_info gen_device_info_kbl_gt2 = {
510    GEN9_FEATURES,
511    .is_kabylake = true,
512    .gt = 2,
513 
514    .num_slices = 1,
515 };
516 
517 static const struct gen_device_info gen_device_info_kbl_gt3 = {
518    GEN9_FEATURES,
519    .is_kabylake = true,
520    .gt = 3,
521 
522    .num_slices = 2,
523 };
524 
525 static const struct gen_device_info gen_device_info_kbl_gt4 = {
526    GEN9_FEATURES,
527    .is_kabylake = true,
528    .gt = 4,
529 
530    /*
531     * From the "L3 Allocation and Programming" documentation:
532     *
533     * "URB is limited to 1008KB due to programming restrictions.  This
534     *  is not a restriction of the L3 implementation, but of the FF and
535     *  other clients.  Therefore, in a GT4 implementation it is
536     *  possible for the programmed allocation of the L3 data array to
537     *  provide 3*384KB=1152KB for URB, but only 1008KB of this
538     *  will be used."
539     */
540    .urb.size = 1008 / 3,
541    .num_slices = 3,
542 };
543 
544 static const struct gen_device_info gen_device_info_glk = {
545    GEN9_LP_FEATURES
546 };
547 
548 static const struct gen_device_info gen_device_info_glk_2x6 = {
549    GEN9_LP_FEATURES_2X6
550 };
551 
552 bool
gen_get_device_info(int devid,struct gen_device_info * devinfo)553 gen_get_device_info(int devid, struct gen_device_info *devinfo)
554 {
555    switch (devid) {
556 #undef CHIPSET
557 #define CHIPSET(id, family, name) \
558       case id: *devinfo = gen_device_info_##family; break;
559 #include "pci_ids/i965_pci_ids.h"
560    default:
561       fprintf(stderr, "i965_dri.so does not support the 0x%x PCI ID.\n", devid);
562       return false;
563    }
564 
565    /* From the Skylake PRM, 3DSTATE_PS::Scratch Space Base Pointer:
566     *
567     * "Scratch Space per slice is computed based on 4 sub-slices.  SW must
568     *  allocate scratch space enough so that each slice has 4 slices allowed."
569     *
570     * The equivalent internal documentation says that this programming note
571     * applies to all Gen9+ platforms.
572     *
573     * The hardware typically calculates the scratch space pointer by taking
574     * the base address, and adding per-thread-scratch-space * thread ID.
575     * Extra padding can be necessary depending how the thread IDs are
576     * calculated for a particular shader stage.
577     */
578    if (devinfo->gen >= 9) {
579       devinfo->max_wm_threads = 64 /* threads-per-PSD */
580                               * devinfo->num_slices
581                               * 4; /* effective subslices per slice */
582    }
583 
584    return true;
585 }
586 
587 const char *
gen_get_device_name(int devid)588 gen_get_device_name(int devid)
589 {
590    switch (devid) {
591 #undef CHIPSET
592 #define CHIPSET(id, family, name) case id: return name;
593 #include "pci_ids/i965_pci_ids.h"
594    default:
595       return NULL;
596    }
597 }
598