• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2024 Intel Corporation
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "perf/xe/intel_perf.h"
7 
8 #include <fcntl.h>
9 #include <sys/stat.h>
10 
11 #include "perf/intel_perf.h"
12 #include "intel_perf_common.h"
13 #include "intel/common/intel_gem.h"
14 #include "intel/common/xe/intel_device_query.h"
15 #include "intel/common/xe/intel_queue.h"
16 
17 #include "drm-uapi/xe_drm.h"
18 
19 #define FIELD_PREP_ULL(_mask, _val) (((_val) << (ffsll(_mask) - 1)) & (_mask))
20 
xe_perf_get_oa_format(struct intel_perf_config * perf)21 uint64_t xe_perf_get_oa_format(struct intel_perf_config *perf)
22 {
23    uint64_t fmt;
24 
25    if (perf->devinfo->verx10 >= 200) {
26       /* BSpec: 60942
27        * PEC64u64
28        */
29       fmt = FIELD_PREP_ULL(DRM_XE_OA_FORMAT_MASK_FMT_TYPE, DRM_XE_OA_FMT_TYPE_PEC);
30       fmt |= FIELD_PREP_ULL(DRM_XE_OA_FORMAT_MASK_COUNTER_SEL, 1);
31       fmt |= FIELD_PREP_ULL(DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE, 1);
32       fmt |= FIELD_PREP_ULL(DRM_XE_OA_FORMAT_MASK_BC_REPORT, 0);
33    } else {
34       /* BSpec: 52198
35        * same as I915_OA_FORMAT_A24u40_A14u32_B8_C8 and
36        * I915_OA_FORMAT_A32u40_A4u32_B8_C8 returned for gfx 125+ and gfx 120
37        * respectively.
38        */
39       fmt = FIELD_PREP_ULL(DRM_XE_OA_FORMAT_MASK_FMT_TYPE, DRM_XE_OA_FMT_TYPE_OAG);
40       fmt |= FIELD_PREP_ULL(DRM_XE_OA_FORMAT_MASK_COUNTER_SEL, 5);
41       fmt |= FIELD_PREP_ULL(DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE, 0);
42       fmt |= FIELD_PREP_ULL(DRM_XE_OA_FORMAT_MASK_BC_REPORT, 0);
43    }
44 
45    return fmt;
46 }
47 
48 bool
xe_oa_metrics_available(struct intel_perf_config * perf,int fd,bool use_register_snapshots)49 xe_oa_metrics_available(struct intel_perf_config *perf, int fd, bool use_register_snapshots)
50 {
51    struct drm_xe_query_oa_units *oa_units;
52    bool perf_oa_available = false;
53    struct stat sb;
54 
55    /* The existence of this file implies that this Xe KMD version supports
56     * observation interface.
57     */
58    if (stat("/proc/sys/dev/xe/observation_paranoid", &sb) == 0) {
59       uint64_t paranoid = 1;
60 
61       /* Now we need to check if application has privileges to access observation
62        * interface.
63        *
64        * TODO: this approach does not takes into account applications running
65        * with CAP_PERFMON privileges.
66        */
67       read_file_uint64("/proc/sys/dev/xe/observation_paranoid", &paranoid);
68       if (paranoid == 0 || geteuid() == 0)
69          perf_oa_available = true;
70    }
71 
72    if (!perf_oa_available)
73       return perf_oa_available;
74 
75    perf->features_supported |= INTEL_PERF_FEATURE_HOLD_PREEMPTION;
76 
77    oa_units = xe_device_query_alloc_fetch(fd, DRM_XE_DEVICE_QUERY_OA_UNITS, NULL);
78    if (oa_units) {
79       uint8_t *poau;
80       uint32_t i;
81 
82       poau = (uint8_t *)oa_units->oa_units;
83       for (i = 0; i < oa_units->num_oa_units; i++) {
84          struct drm_xe_oa_unit *oa_unit = (struct drm_xe_oa_unit *)poau;
85          uint32_t engine_i;
86          bool render_found = false;
87 
88          for (engine_i = 0; engine_i < oa_unit->num_engines; engine_i++) {
89             if (oa_unit->eci[engine_i].engine_class == DRM_XE_ENGINE_CLASS_RENDER) {
90                render_found = true;
91                break;
92             }
93          }
94 
95          if (!render_found)
96             continue;
97 
98          if (oa_unit->capabilities & DRM_XE_OA_CAPS_SYNCS) {
99             perf->features_supported |= INTEL_PERF_FEATURE_METRIC_SYNC;
100             break;
101          }
102          poau += sizeof(*oa_unit) + oa_unit->num_engines * sizeof(oa_unit->eci[0]);
103       }
104 
105       free(oa_units);
106    }
107 
108    return perf_oa_available;
109 }
110 
111 uint64_t
xe_add_config(struct intel_perf_config * perf,int fd,const struct intel_perf_registers * config,const char * guid)112 xe_add_config(struct intel_perf_config *perf, int fd,
113               const struct intel_perf_registers *config,
114               const char *guid)
115 {
116    struct drm_xe_oa_config xe_config = {};
117    struct drm_xe_observation_param observation_param = {
118       .observation_type = DRM_XE_OBSERVATION_TYPE_OA,
119       .observation_op = DRM_XE_OBSERVATION_OP_ADD_CONFIG,
120       .param = (uintptr_t)&xe_config,
121    };
122    uint32_t *regs;
123    int ret;
124 
125    memcpy(xe_config.uuid, guid, sizeof(xe_config.uuid));
126 
127    xe_config.n_regs = config->n_mux_regs + config->n_b_counter_regs + config->n_flex_regs;
128    assert(xe_config.n_regs > 0);
129 
130    regs = malloc(sizeof(uint64_t) * xe_config.n_regs);
131    xe_config.regs_ptr = (uintptr_t)regs;
132 
133    memcpy(regs, config->mux_regs, config->n_mux_regs * sizeof(uint64_t));
134    regs += 2 * config->n_mux_regs;
135    memcpy(regs, config->b_counter_regs, config->n_b_counter_regs * sizeof(uint64_t));
136    regs += 2 * config->n_b_counter_regs;
137    memcpy(regs, config->flex_regs, config->n_flex_regs * sizeof(uint64_t));
138 
139    ret = intel_ioctl(fd, DRM_IOCTL_XE_OBSERVATION, &observation_param);
140    free((void*)(uintptr_t)xe_config.regs_ptr);
141    return ret > 0 ? ret : 0;
142 }
143 
144 void
xe_remove_config(struct intel_perf_config * perf,int fd,uint64_t config_id)145 xe_remove_config(struct intel_perf_config *perf, int fd, uint64_t config_id)
146 {
147    struct drm_xe_observation_param observation_param = {
148       .observation_type = DRM_XE_OBSERVATION_TYPE_OA,
149       .observation_op = DRM_XE_OBSERVATION_OP_REMOVE_CONFIG,
150       .param = (uintptr_t)&config_id,
151    };
152 
153    intel_ioctl(fd, DRM_IOCTL_XE_OBSERVATION, &observation_param);
154 }
155 
156 static void
oa_prop_set(struct drm_xe_ext_set_property * props,uint32_t * index,enum drm_xe_oa_property_id prop_id,uint64_t value)157 oa_prop_set(struct drm_xe_ext_set_property *props, uint32_t *index,
158             enum drm_xe_oa_property_id prop_id, uint64_t value)
159 {
160    if (*index > 0)
161       props[*index - 1].base.next_extension = (uintptr_t)&props[*index];
162 
163    props[*index].base.name = DRM_XE_OA_EXTENSION_SET_PROPERTY;
164    props[*index].property = prop_id;
165    props[*index].value = value;
166    *index = *index + 1;
167 }
168 
169 int
xe_perf_stream_open(struct intel_perf_config * perf_config,int drm_fd,uint32_t exec_id,uint64_t metrics_set_id,uint64_t report_format,uint64_t period_exponent,bool hold_preemption,bool enable,struct intel_bind_timeline * timeline)170 xe_perf_stream_open(struct intel_perf_config *perf_config, int drm_fd,
171                     uint32_t exec_id, uint64_t metrics_set_id,
172                     uint64_t report_format, uint64_t period_exponent,
173                     bool hold_preemption, bool enable,
174                     struct intel_bind_timeline *timeline)
175 {
176    struct drm_xe_ext_set_property props[DRM_XE_OA_PROPERTY_NO_PREEMPT + 1] = {};
177    struct drm_xe_observation_param observation_param = {
178       .observation_type = DRM_XE_OBSERVATION_TYPE_OA,
179       .observation_op = DRM_XE_OBSERVATION_OP_STREAM_OPEN,
180       .param = (uintptr_t)&props,
181    };
182    struct drm_xe_sync sync = {
183       .type = DRM_XE_SYNC_TYPE_TIMELINE_SYNCOBJ,
184       .flags = DRM_XE_SYNC_FLAG_SIGNAL,
185    };
186    uint32_t i = 0;
187    int fd, flags;
188 
189    if (exec_id)
190       oa_prop_set(props, &i, DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID, exec_id);
191    oa_prop_set(props, &i, DRM_XE_OA_PROPERTY_OA_DISABLED, !enable);
192    oa_prop_set(props, &i, DRM_XE_OA_PROPERTY_SAMPLE_OA, true);
193    oa_prop_set(props, &i, DRM_XE_OA_PROPERTY_OA_METRIC_SET, metrics_set_id);
194    oa_prop_set(props, &i, DRM_XE_OA_PROPERTY_OA_FORMAT, report_format);
195    oa_prop_set(props, &i, DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT, period_exponent);
196    if (hold_preemption)
197       oa_prop_set(props, &i, DRM_XE_OA_PROPERTY_NO_PREEMPT, hold_preemption);
198 
199    if (timeline && intel_bind_timeline_get_syncobj(timeline)) {
200       oa_prop_set(props, &i, DRM_XE_OA_PROPERTY_NUM_SYNCS, 1);
201       oa_prop_set(props, &i, DRM_XE_OA_PROPERTY_SYNCS, (uintptr_t)&sync);
202 
203       sync.handle = intel_bind_timeline_get_syncobj(timeline);
204       sync.timeline_value = intel_bind_timeline_bind_begin(timeline);
205       fd = intel_ioctl(drm_fd, DRM_IOCTL_XE_OBSERVATION, &observation_param);
206       intel_bind_timeline_bind_end(timeline);
207    } else {
208       fd = intel_ioctl(drm_fd, DRM_IOCTL_XE_OBSERVATION, &observation_param);
209    }
210 
211    if (fd < 0)
212       return fd;
213 
214    flags = fcntl(fd, F_GETFL, 0);
215    flags |= O_CLOEXEC | O_NONBLOCK;
216    if (fcntl(fd, F_SETFL, flags)) {
217       close(fd);
218       return -1;
219    }
220 
221    return fd;
222 }
223 
224 int
xe_perf_stream_set_state(int perf_stream_fd,bool enable)225 xe_perf_stream_set_state(int perf_stream_fd, bool enable)
226 {
227    unsigned long uapi = enable ? DRM_XE_OBSERVATION_IOCTL_ENABLE :
228                                  DRM_XE_OBSERVATION_IOCTL_DISABLE;
229 
230    return intel_ioctl(perf_stream_fd, uapi, 0);
231 }
232 
233 int
xe_perf_stream_set_metrics_id(int perf_stream_fd,int drm_fd,uint32_t exec_queue,uint64_t metrics_set_id,struct intel_bind_timeline * timeline)234 xe_perf_stream_set_metrics_id(int perf_stream_fd, int drm_fd,
235                               uint32_t exec_queue, uint64_t metrics_set_id,
236                               struct intel_bind_timeline *timeline)
237 {
238    struct drm_xe_ext_set_property prop[3] = {};
239    uint32_t index = 0;
240    int ret;
241 
242    oa_prop_set(prop, &index, DRM_XE_OA_PROPERTY_OA_METRIC_SET,
243                metrics_set_id);
244 
245    if (timeline && intel_bind_timeline_get_syncobj(timeline)) {
246       struct drm_xe_sync xe_syncs[3] = {};
247       uint32_t syncobj;
248       int ret2;
249 
250       oa_prop_set(prop, &index, DRM_XE_OA_PROPERTY_NUM_SYNCS, ARRAY_SIZE(xe_syncs));
251       oa_prop_set(prop, &index, DRM_XE_OA_PROPERTY_SYNCS, (uintptr_t)xe_syncs);
252 
253       /* wait on all previous exec in queues */
254       ret = xe_queue_get_syncobj_for_idle(drm_fd, exec_queue, &syncobj);
255       if (ret)
256          return ret;
257       xe_syncs[0].type = DRM_XE_SYNC_TYPE_SYNCOBJ;
258       xe_syncs[0].flags = 0;/* wait */
259       xe_syncs[0].handle = syncobj;
260 
261       /* wait on previous set_metrics_id to complete */
262       xe_syncs[1].type = DRM_XE_SYNC_TYPE_TIMELINE_SYNCOBJ;
263       xe_syncs[1].flags = 0;/* wait */
264       xe_syncs[1].handle = intel_bind_timeline_get_syncobj(timeline);
265       xe_syncs[1].timeline_value = intel_bind_timeline_get_last_point(timeline);
266 
267       /* signal completion */
268       xe_syncs[2].type = DRM_XE_SYNC_TYPE_TIMELINE_SYNCOBJ;
269       xe_syncs[2].flags = DRM_XE_SYNC_FLAG_SIGNAL;
270       xe_syncs[2].handle = intel_bind_timeline_get_syncobj(timeline);
271       xe_syncs[2].timeline_value = intel_bind_timeline_bind_begin(timeline);
272 
273       ret = intel_ioctl(perf_stream_fd, DRM_XE_OBSERVATION_IOCTL_CONFIG,
274                         (void *)(uintptr_t)&prop);
275       intel_bind_timeline_bind_end(timeline);
276 
277       /* Looks safe to destroy as Xe KMD should increase the ref count until
278        * it is using it
279        */
280       struct drm_syncobj_destroy syncobj_destroy = {
281          .handle = syncobj,
282       };
283       ret2 = intel_ioctl(drm_fd, DRM_IOCTL_SYNCOBJ_DESTROY, &syncobj_destroy);
284       assert(ret2 == 0);
285    } else {
286       ret = intel_ioctl(perf_stream_fd, DRM_XE_OBSERVATION_IOCTL_CONFIG,
287                         (void *)(uintptr_t)&prop);
288    }
289 
290    return ret;
291 }
292 
293 static int
xe_perf_stream_read_error(int perf_stream_fd,uint8_t * buffer,size_t buffer_len)294 xe_perf_stream_read_error(int perf_stream_fd, uint8_t *buffer, size_t buffer_len)
295 {
296    struct drm_xe_oa_stream_status status = {};
297    struct intel_perf_record_header *header;
298    int ret;
299 
300    ret = intel_ioctl(perf_stream_fd, DRM_XE_OBSERVATION_IOCTL_STATUS, &status);
301    if (ret)
302       return -errno;
303 
304    header = (struct intel_perf_record_header *)buffer;
305    header->pad = 0;
306    header->type = 0;
307    header->size = sizeof(*header);
308    ret = header->size;
309 
310    if (status.oa_status & INTEL_PERF_RECORD_TYPE_OA_BUFFER_LOST)
311       header->type = INTEL_PERF_RECORD_TYPE_OA_BUFFER_LOST;
312    else if (status.oa_status & DRM_XE_OASTATUS_REPORT_LOST)
313       header->type = INTEL_PERF_RECORD_TYPE_OA_REPORT_LOST;
314    else if (status.oa_status & DRM_XE_OASTATUS_COUNTER_OVERFLOW)
315       header->type = INTEL_PERF_RECORD_TYPE_COUNTER_OVERFLOW;
316    else if (status.oa_status & DRM_XE_OASTATUS_MMIO_TRG_Q_FULL)
317       header->type = INTEL_PERF_RECORD_TYPE_MMIO_TRG_Q_FULL;
318    else
319       unreachable("missing");
320 
321    return header->type ? header->size : -1;
322 }
323 
324 int
xe_perf_stream_read_samples(struct intel_perf_config * perf_config,int perf_stream_fd,uint8_t * buffer,size_t buffer_len)325 xe_perf_stream_read_samples(struct intel_perf_config *perf_config, int perf_stream_fd,
326                             uint8_t *buffer, size_t buffer_len)
327 {
328    const size_t sample_size = perf_config->oa_sample_size;
329    const size_t sample_header_size = sample_size + sizeof(struct intel_perf_record_header);
330    uint32_t num_samples = buffer_len / sample_header_size;
331    const size_t max_bytes_read = num_samples * sample_size;
332    uint8_t *offset, *offset_samples;
333    int len, i;
334 
335    if (buffer_len < sample_header_size)
336       return -ENOSPC;
337 
338    do {
339       len = read(perf_stream_fd, buffer, max_bytes_read);
340    } while (len < 0 && errno == EINTR);
341 
342    if (len <= 0) {
343       if (errno == EIO)
344          return xe_perf_stream_read_error(perf_stream_fd, buffer, buffer_len);
345 
346       return len < 0 ? -errno : 0;
347    }
348 
349    num_samples = len / sample_size;
350    offset = buffer;
351    offset_samples = buffer + (buffer_len - len);
352    /* move all samples to the end of buffer */
353    memmove(offset_samples, buffer, len);
354 
355    /* setup header, then copy sample from the end of buffer */
356    for (i = 0; i < num_samples; i++) {
357       struct intel_perf_record_header *header = (struct intel_perf_record_header *)offset;
358 
359       /* TODO: also append REPORT_LOST and BUFFER_LOST */
360       header->type = INTEL_PERF_RECORD_TYPE_SAMPLE;
361       header->pad = 0;
362       header->size = sample_header_size;
363       offset += sizeof(*header);
364 
365       memmove(offset, offset_samples, sample_size);
366       offset += sample_size;
367       offset_samples += sample_size;
368    }
369 
370    return offset - buffer;
371 }
372