• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2023 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "xe/intel_device_info.h"
25 
26 #include "common/intel_gem.h"
27 #include "dev/intel_device_info.h"
28 #include "dev/intel_hwconfig.h"
29 
30 #include "util/log.h"
31 
32 #include "drm-uapi/xe_drm.h"
33 
34 static inline bool
has_gmd_ip_version(const struct intel_device_info * devinfo)35 has_gmd_ip_version(const struct intel_device_info *devinfo)
36 {
37    return devinfo->verx10 >= 200;
38 }
39 
40 static void *
xe_query_alloc_fetch(int fd,uint32_t query_id,int32_t * len)41 xe_query_alloc_fetch(int fd, uint32_t query_id, int32_t *len)
42 {
43    struct drm_xe_device_query query = {
44       .query = query_id,
45    };
46    if (intel_ioctl(fd, DRM_IOCTL_XE_DEVICE_QUERY, &query))
47       return NULL;
48 
49    void *data = calloc(1, query.size);
50    if (!data)
51       return NULL;
52 
53    query.data = (uintptr_t)data;
54    if (intel_ioctl(fd, DRM_IOCTL_XE_DEVICE_QUERY, &query))
55       goto data_query_failed;
56 
57    if (len)
58       *len = query.size;
59    return data;
60 
61 data_query_failed:
62    free(data);
63    return NULL;
64 }
65 
66 static bool
xe_query_config(int fd,struct intel_device_info * devinfo)67 xe_query_config(int fd, struct intel_device_info *devinfo)
68 {
69    struct drm_xe_query_config *config;
70    config = xe_query_alloc_fetch(fd, DRM_XE_DEVICE_QUERY_CONFIG, NULL);
71    if (!config)
72       return false;
73 
74    if (config->info[DRM_XE_QUERY_CONFIG_FLAGS] & DRM_XE_QUERY_CONFIG_FLAG_HAS_VRAM)
75       devinfo->has_local_mem = true;
76 
77    if (!has_gmd_ip_version(devinfo))
78       devinfo->revision = (config->info[DRM_XE_QUERY_CONFIG_REV_AND_DEVICE_ID] >> 16) & 0xFFFF;
79    devinfo->gtt_size = 1ull << config->info[DRM_XE_QUERY_CONFIG_VA_BITS];
80    devinfo->mem_alignment = config->info[DRM_XE_QUERY_CONFIG_MIN_ALIGNMENT];
81 
82    free(config);
83    return true;
84 }
85 
86 bool
intel_device_info_xe_query_regions(int fd,struct intel_device_info * devinfo,bool update)87 intel_device_info_xe_query_regions(int fd, struct intel_device_info *devinfo,
88                                    bool update)
89 {
90    struct drm_xe_query_mem_regions *regions;
91    regions = xe_query_alloc_fetch(fd, DRM_XE_DEVICE_QUERY_MEM_REGIONS, NULL);
92    if (!regions)
93       return false;
94 
95    for (int i = 0; i < regions->num_mem_regions; i++) {
96       struct drm_xe_mem_region *region = &regions->mem_regions[i];
97 
98       switch (region->mem_class) {
99       case DRM_XE_MEM_REGION_CLASS_SYSMEM: {
100          if (!update) {
101             devinfo->mem.sram.mem.klass = region->mem_class;
102             devinfo->mem.sram.mem.instance = region->instance;
103             devinfo->mem.sram.mappable.size = region->total_size;
104          } else {
105             assert(devinfo->mem.sram.mem.klass == region->mem_class);
106             assert(devinfo->mem.sram.mem.instance == region->instance);
107             assert(devinfo->mem.sram.mappable.size == region->total_size);
108          }
109          /* if running without elevated privileges Xe reports used == 0 */
110          devinfo->mem.sram.mappable.free = region->total_size - region->used;
111          break;
112       }
113       case DRM_XE_MEM_REGION_CLASS_VRAM: {
114          if (!update) {
115             devinfo->mem.vram.mem.klass = region->mem_class;
116             devinfo->mem.vram.mem.instance = region->instance;
117             devinfo->mem.vram.mappable.size = region->cpu_visible_size;
118             devinfo->mem.vram.unmappable.size = region->total_size - region->cpu_visible_size;
119          } else {
120             assert(devinfo->mem.vram.mem.klass == region->mem_class);
121             assert(devinfo->mem.vram.mem.instance == region->instance);
122             assert(devinfo->mem.vram.mappable.size == region->cpu_visible_size);
123             assert(devinfo->mem.vram.unmappable.size == (region->total_size - region->cpu_visible_size));
124          }
125          devinfo->mem.vram.mappable.free = devinfo->mem.vram.mappable.size - region->cpu_visible_used;
126          devinfo->mem.vram.unmappable.free = devinfo->mem.vram.unmappable.size - (region->used - region->cpu_visible_used);
127          break;
128       }
129       default:
130          mesa_loge("Unhandled Xe memory class");
131          break;
132       }
133    }
134 
135    devinfo->mem.use_class_instance = true;
136    free(regions);
137    return true;
138 }
139 
140 static bool
xe_query_gts(int fd,struct intel_device_info * devinfo)141 xe_query_gts(int fd, struct intel_device_info *devinfo)
142 {
143    struct drm_xe_query_gt_list *gt_list;
144    gt_list = xe_query_alloc_fetch(fd, DRM_XE_DEVICE_QUERY_GT_LIST, NULL);
145    if (!gt_list)
146       return false;
147 
148    for (uint32_t i = 0; i < gt_list->num_gt; i++) {
149       if (gt_list->gt_list[i].type == DRM_XE_QUERY_GT_TYPE_MAIN) {
150          devinfo->timestamp_frequency = gt_list->gt_list[i].reference_clock;
151 
152          if (has_gmd_ip_version(devinfo)) {
153             devinfo->gfx_ip_ver = GFX_IP_VER(gt_list->gt_list[i].ip_ver_major,
154                                              gt_list->gt_list[i].ip_ver_minor);
155             devinfo->revision = gt_list->gt_list[i].ip_ver_rev;
156          }
157          break;
158       }
159    }
160 
161    free(gt_list);
162    return true;
163 }
164 
165 void *
intel_device_info_xe_query_hwconfig(int fd,int32_t * len)166 intel_device_info_xe_query_hwconfig(int fd, int32_t *len)
167 {
168    return xe_query_alloc_fetch(fd, DRM_XE_DEVICE_QUERY_HWCONFIG, len);
169 }
170 
171 static bool
xe_query_process_hwconfig(int fd,struct intel_device_info * devinfo)172 xe_query_process_hwconfig(int fd, struct intel_device_info *devinfo)
173 {
174    int32_t len;
175    void *data = intel_device_info_xe_query_hwconfig(fd, &len);
176 
177    if (!data)
178       return false;
179 
180    bool ret = intel_hwconfig_process_table(devinfo, data, len);
181    free(data);
182    return ret;
183 }
184 
185 static void
xe_compute_topology(struct intel_device_info * devinfo,const uint8_t * geo_dss_mask,const uint32_t geo_dss_num_bytes,const uint64_t eu_per_dss_mask,const unsigned l3_banks)186 xe_compute_topology(struct intel_device_info * devinfo,
187                     const uint8_t *geo_dss_mask,
188                     const uint32_t geo_dss_num_bytes,
189                     const uint64_t eu_per_dss_mask,
190                     const unsigned l3_banks)
191 {
192    intel_device_info_topology_reset_masks(devinfo);
193    /* TGL/DG1/ADL-P: 1 slice x 6 dual sub slices
194     * RKL/ADL-S: 1 slice x 2 dual sub slices
195     * DG2: 8 slices x 4 dual sub slices
196     */
197    if (devinfo->verx10 >= 125) {
198       devinfo->max_slices = 8;
199       devinfo->max_subslices_per_slice = 4;
200    } else {
201       devinfo->max_slices = 1;
202       devinfo->max_subslices_per_slice = 6;
203    }
204    devinfo->max_eus_per_subslice = __builtin_popcount(eu_per_dss_mask);
205    devinfo->subslice_slice_stride = DIV_ROUND_UP(devinfo->max_slices, 8);
206    devinfo->eu_slice_stride = DIV_ROUND_UP(devinfo->max_eus_per_subslice * devinfo->max_subslices_per_slice, 8);
207    devinfo->eu_subslice_stride = DIV_ROUND_UP(devinfo->max_eus_per_subslice, 8);
208 
209    assert((sizeof(uint32_t) * 8) >= devinfo->max_subslices_per_slice);
210    assert((sizeof(uint32_t) * 8) >= devinfo->max_eus_per_subslice);
211 
212    const uint32_t dss_mask_in_slice = (1u << devinfo->max_subslices_per_slice) - 1;
213    struct slice {
214       uint32_t dss_mask;
215       struct {
216          bool enabled;
217          uint64_t eu_mask;
218       } dual_subslice[INTEL_DEVICE_MAX_SUBSLICES];
219    } slices[INTEL_DEVICE_MAX_SLICES] = {};
220 
221    /* Compute and fill slices */
222    for (unsigned s = 0; s < devinfo->max_slices; s++) {
223       const unsigned first_bit = s * devinfo->max_subslices_per_slice;
224       const unsigned dss_index = first_bit / 8;
225       const unsigned shift = first_bit % 8;
226 
227       assert(geo_dss_num_bytes > dss_index);
228 
229       const uint32_t *dss_mask_ptr = (const uint32_t *)&geo_dss_mask[dss_index];
230       uint32_t dss_mask = *dss_mask_ptr;
231       dss_mask >>= shift;
232       dss_mask &= dss_mask_in_slice;
233 
234       if (dss_mask) {
235          slices[s].dss_mask = dss_mask;
236          for (uint32_t dss = 0; dss < devinfo->max_subslices_per_slice; dss++) {
237             if ((1u << dss) & slices[s].dss_mask) {
238                slices[s].dual_subslice[dss].enabled = true;
239                slices[s].dual_subslice[dss].eu_mask = eu_per_dss_mask;
240             }
241          }
242       }
243    }
244 
245    /* Set devinfo masks */
246    for (unsigned s = 0; s < devinfo->max_slices; s++) {
247       if (!slices[s].dss_mask)
248          continue;
249 
250       devinfo->slice_masks |= (1u << s);
251 
252       for (unsigned ss = 0; ss < devinfo->max_subslices_per_slice; ss++) {
253          if (!slices[s].dual_subslice[ss].eu_mask)
254             continue;
255 
256          devinfo->subslice_masks[s * devinfo->subslice_slice_stride +
257                                  ss / 8] |= (1u << (ss % 8));
258 
259          for (unsigned eu = 0; eu < devinfo->max_eus_per_subslice; eu++) {
260             if (!(slices[s].dual_subslice[ss].eu_mask & (1ULL << eu)))
261                continue;
262 
263             devinfo->eu_masks[s * devinfo->eu_slice_stride +
264                               ss * devinfo->eu_subslice_stride +
265                               eu / 8] |= (1u << (eu % 8));
266          }
267       }
268 
269    }
270 
271    intel_device_info_topology_update_counts(devinfo);
272    intel_device_info_update_pixel_pipes(devinfo, devinfo->subslice_masks);
273    if (devinfo->ver != 12)
274       devinfo->l3_banks = l3_banks;
275    else
276       intel_device_info_update_l3_banks(devinfo);
277 }
278 
279 static bool
xe_query_topology(int fd,struct intel_device_info * devinfo)280 xe_query_topology(int fd, struct intel_device_info *devinfo)
281 {
282    struct drm_xe_query_topology_mask *topology;
283    int32_t len;
284    topology = xe_query_alloc_fetch(fd, DRM_XE_DEVICE_QUERY_GT_TOPOLOGY, &len);
285    if (!topology)
286       return false;
287 
288    uint64_t eu_per_dss_mask = 0;
289    uint32_t geo_dss_num_bytes = 0;
290    uint8_t *geo_dss_mask = NULL, *tmp;
291    unsigned l3_banks = 0;
292    const struct drm_xe_query_topology_mask *head = topology;
293 
294    tmp = (uint8_t *)topology + len;
295    const struct drm_xe_query_topology_mask *end = (struct drm_xe_query_topology_mask *)tmp;
296 
297    while (topology < end) {
298       if (topology->gt_id == 0) {
299          switch (topology->type) {
300          case DRM_XE_TOPO_DSS_GEOMETRY:
301             geo_dss_mask = topology->mask;
302             geo_dss_num_bytes = topology->num_bytes;
303             break;
304          case DRM_XE_TOPO_L3_BANK:
305             for (int i = 0; i < topology->num_bytes; i++)
306                l3_banks += util_bitcount(topology->mask[i]);
307             break;
308          case DRM_XE_TOPO_EU_PER_DSS:
309          case DRM_XE_TOPO_SIMD16_EU_PER_DSS:
310             assert(topology->num_bytes <= sizeof(eu_per_dss_mask));
311             for (int i = 0; i < topology->num_bytes; i++)
312                eu_per_dss_mask |= ((uint64_t)topology->mask[i]) << (8 * i);
313             break;
314          }
315       }
316 
317       topology = (struct drm_xe_query_topology_mask *)&topology->mask[topology->num_bytes];
318    }
319 
320    bool ret = true;
321    if (!geo_dss_num_bytes || !geo_dss_mask || !eu_per_dss_mask) {
322       ret = false;
323       goto parse_failed;
324    }
325 
326    xe_compute_topology(devinfo, geo_dss_mask, geo_dss_num_bytes,
327                        eu_per_dss_mask, l3_banks);
328 
329 parse_failed:
330    free((void *)head);
331    return ret;
332 }
333 
334 bool
intel_device_info_xe_get_info_from_fd(int fd,struct intel_device_info * devinfo)335 intel_device_info_xe_get_info_from_fd(int fd, struct intel_device_info *devinfo)
336 {
337    if (!intel_device_info_xe_query_regions(fd, devinfo, false))
338       return false;
339 
340    if (!xe_query_config(fd, devinfo))
341       return false;
342 
343    if (!xe_query_gts(fd, devinfo))
344       return false;
345 
346    if (!xe_query_topology(fd, devinfo))
347          return false;
348 
349    if (xe_query_process_hwconfig(fd, devinfo))
350       intel_device_info_update_after_hwconfig(devinfo);
351 
352    devinfo->has_context_isolation = true;
353    devinfo->has_mmap_offset = true;
354    devinfo->has_caching_uapi = false;
355    devinfo->has_set_pat_uapi = true;
356 
357    return true;
358 }
359