• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* SPDX-License-Identifier: MIT */
2 /*
3  * Copyright © 2019 Intel Corporation
4  */
5 
6 #ifndef _I915_PERF_TYPES_H_
7 #define _I915_PERF_TYPES_H_
8 
9 #include <linux/atomic.h>
10 #include <linux/device.h>
11 #include <linux/hrtimer.h>
12 #include <linux/llist.h>
13 #include <linux/poll.h>
14 #include <linux/sysfs.h>
15 #include <linux/types.h>
16 #include <linux/uuid.h>
17 #include <linux/wait.h>
18 
19 #include "gt/intel_sseu.h"
20 #include "i915_reg.h"
21 #include "intel_wakeref.h"
22 
23 struct drm_i915_private;
24 struct file;
25 struct i915_active;
26 struct i915_gem_context;
27 struct i915_perf;
28 struct i915_vma;
29 struct intel_context;
30 struct intel_engine_cs;
31 
32 struct i915_oa_format {
33 	u32 format;
34 	int size;
35 };
36 
37 struct i915_oa_reg {
38 	i915_reg_t addr;
39 	u32 value;
40 };
41 
42 struct i915_oa_config {
43 	struct i915_perf *perf;
44 
45 	char uuid[UUID_STRING_LEN + 1];
46 	int id;
47 
48 	const struct i915_oa_reg *mux_regs;
49 	u32 mux_regs_len;
50 	const struct i915_oa_reg *b_counter_regs;
51 	u32 b_counter_regs_len;
52 	const struct i915_oa_reg *flex_regs;
53 	u32 flex_regs_len;
54 
55 	struct attribute_group sysfs_metric;
56 	struct attribute *attrs[2];
57 	struct kobj_attribute sysfs_metric_id;
58 
59 	struct kref ref;
60 	struct rcu_head rcu;
61 };
62 
63 struct i915_perf_stream;
64 
65 /**
66  * struct i915_perf_stream_ops - the OPs to support a specific stream type
67  */
68 struct i915_perf_stream_ops {
69 	/**
70 	 * @enable: Enables the collection of HW samples, either in response to
71 	 * `I915_PERF_IOCTL_ENABLE` or implicitly called when stream is opened
72 	 * without `I915_PERF_FLAG_DISABLED`.
73 	 */
74 	void (*enable)(struct i915_perf_stream *stream);
75 
76 	/**
77 	 * @disable: Disables the collection of HW samples, either in response
78 	 * to `I915_PERF_IOCTL_DISABLE` or implicitly called before destroying
79 	 * the stream.
80 	 */
81 	void (*disable)(struct i915_perf_stream *stream);
82 
83 	/**
84 	 * @poll_wait: Call poll_wait, passing a wait queue that will be woken
85 	 * once there is something ready to read() for the stream
86 	 */
87 	void (*poll_wait)(struct i915_perf_stream *stream,
88 			  struct file *file,
89 			  poll_table *wait);
90 
91 	/**
92 	 * @wait_unlocked: For handling a blocking read, wait until there is
93 	 * something to ready to read() for the stream. E.g. wait on the same
94 	 * wait queue that would be passed to poll_wait().
95 	 */
96 	int (*wait_unlocked)(struct i915_perf_stream *stream);
97 
98 	/**
99 	 * @read: Copy buffered metrics as records to userspace
100 	 * **buf**: the userspace, destination buffer
101 	 * **count**: the number of bytes to copy, requested by userspace
102 	 * **offset**: zero at the start of the read, updated as the read
103 	 * proceeds, it represents how many bytes have been copied so far and
104 	 * the buffer offset for copying the next record.
105 	 *
106 	 * Copy as many buffered i915 perf samples and records for this stream
107 	 * to userspace as will fit in the given buffer.
108 	 *
109 	 * Only write complete records; returning -%ENOSPC if there isn't room
110 	 * for a complete record.
111 	 *
112 	 * Return any error condition that results in a short read such as
113 	 * -%ENOSPC or -%EFAULT, even though these may be squashed before
114 	 * returning to userspace.
115 	 */
116 	int (*read)(struct i915_perf_stream *stream,
117 		    char __user *buf,
118 		    size_t count,
119 		    size_t *offset);
120 
121 	/**
122 	 * @destroy: Cleanup any stream specific resources.
123 	 *
124 	 * The stream will always be disabled before this is called.
125 	 */
126 	void (*destroy)(struct i915_perf_stream *stream);
127 };
128 
129 /**
130  * struct i915_perf_stream - state for a single open stream FD
131  */
132 struct i915_perf_stream {
133 	/**
134 	 * @perf: i915_perf backpointer
135 	 */
136 	struct i915_perf *perf;
137 
138 	/**
139 	 * @uncore: mmio access path
140 	 */
141 	struct intel_uncore *uncore;
142 
143 	/**
144 	 * @engine: Engine associated with this performance stream.
145 	 */
146 	struct intel_engine_cs *engine;
147 
148 	/**
149 	 * @sample_flags: Flags representing the `DRM_I915_PERF_PROP_SAMPLE_*`
150 	 * properties given when opening a stream, representing the contents
151 	 * of a single sample as read() by userspace.
152 	 */
153 	u32 sample_flags;
154 
155 	/**
156 	 * @sample_size: Considering the configured contents of a sample
157 	 * combined with the required header size, this is the total size
158 	 * of a single sample record.
159 	 */
160 	int sample_size;
161 
162 	/**
163 	 * @ctx: %NULL if measuring system-wide across all contexts or a
164 	 * specific context that is being monitored.
165 	 */
166 	struct i915_gem_context *ctx;
167 
168 	/**
169 	 * @enabled: Whether the stream is currently enabled, considering
170 	 * whether the stream was opened in a disabled state and based
171 	 * on `I915_PERF_IOCTL_ENABLE` and `I915_PERF_IOCTL_DISABLE` calls.
172 	 */
173 	bool enabled;
174 
175 	/**
176 	 * @hold_preemption: Whether preemption is put on hold for command
177 	 * submissions done on the @ctx. This is useful for some drivers that
178 	 * cannot easily post process the OA buffer context to subtract delta
179 	 * of performance counters not associated with @ctx.
180 	 */
181 	bool hold_preemption;
182 
183 	/**
184 	 * @ops: The callbacks providing the implementation of this specific
185 	 * type of configured stream.
186 	 */
187 	const struct i915_perf_stream_ops *ops;
188 
189 	/**
190 	 * @oa_config: The OA configuration used by the stream.
191 	 */
192 	struct i915_oa_config *oa_config;
193 
194 	/**
195 	 * @oa_config_bos: A list of struct i915_oa_config_bo allocated lazily
196 	 * each time @oa_config changes.
197 	 */
198 	struct llist_head oa_config_bos;
199 
200 	/**
201 	 * @pinned_ctx: The OA context specific information.
202 	 */
203 	struct intel_context *pinned_ctx;
204 
205 	/**
206 	 * @specific_ctx_id: The id of the specific context.
207 	 */
208 	u32 specific_ctx_id;
209 
210 	/**
211 	 * @specific_ctx_id_mask: The mask used to masking specific_ctx_id bits.
212 	 */
213 	u32 specific_ctx_id_mask;
214 
215 	/**
216 	 * @poll_check_timer: High resolution timer that will periodically
217 	 * check for data in the circular OA buffer for notifying userspace
218 	 * (e.g. during a read() or poll()).
219 	 */
220 	struct hrtimer poll_check_timer;
221 
222 	/**
223 	 * @poll_wq: The wait queue that hrtimer callback wakes when it
224 	 * sees data ready to read in the circular OA buffer.
225 	 */
226 	wait_queue_head_t poll_wq;
227 
228 	/**
229 	 * @pollin: Whether there is data available to read.
230 	 */
231 	bool pollin;
232 
233 	/**
234 	 * @periodic: Whether periodic sampling is currently enabled.
235 	 */
236 	bool periodic;
237 
238 	/**
239 	 * @period_exponent: The OA unit sampling frequency is derived from this.
240 	 */
241 	int period_exponent;
242 
243 	/**
244 	 * @oa_buffer: State of the OA buffer.
245 	 */
246 	struct {
247 		struct i915_vma *vma;
248 		u8 *vaddr;
249 		u32 last_ctx_id;
250 		int format;
251 		int format_size;
252 		int size_exponent;
253 
254 		/**
255 		 * @ptr_lock: Locks reads and writes to all head/tail state
256 		 *
257 		 * Consider: the head and tail pointer state needs to be read
258 		 * consistently from a hrtimer callback (atomic context) and
259 		 * read() fop (user context) with tail pointer updates happening
260 		 * in atomic context and head updates in user context and the
261 		 * (unlikely) possibility of read() errors needing to reset all
262 		 * head/tail state.
263 		 *
264 		 * Note: Contention/performance aren't currently a significant
265 		 * concern here considering the relatively low frequency of
266 		 * hrtimer callbacks (5ms period) and that reads typically only
267 		 * happen in response to a hrtimer event and likely complete
268 		 * before the next callback.
269 		 *
270 		 * Note: This lock is not held *while* reading and copying data
271 		 * to userspace so the value of head observed in htrimer
272 		 * callbacks won't represent any partial consumption of data.
273 		 */
274 		spinlock_t ptr_lock;
275 
276 		/**
277 		 * @aging_tail: The last HW tail reported by HW. The data
278 		 * might not have made it to memory yet though.
279 		 */
280 		u32 aging_tail;
281 
282 		/**
283 		 * @aging_timestamp: A monotonic timestamp for when the current aging tail pointer
284 		 * was read; used to determine when it is old enough to trust.
285 		 */
286 		u64 aging_timestamp;
287 
288 		/**
289 		 * @head: Although we can always read back the head pointer register,
290 		 * we prefer to avoid trusting the HW state, just to avoid any
291 		 * risk that some hardware condition could * somehow bump the
292 		 * head pointer unpredictably and cause us to forward the wrong
293 		 * OA buffer data to userspace.
294 		 */
295 		u32 head;
296 
297 		/**
298 		 * @tail: The last verified tail that can be read by userspace.
299 		 */
300 		u32 tail;
301 	} oa_buffer;
302 
303 	/**
304 	 * @noa_wait: A batch buffer doing a wait on the GPU for the NOA logic to be
305 	 * reprogrammed.
306 	 */
307 	struct i915_vma *noa_wait;
308 
309 	/**
310 	 * @poll_oa_period: The period in nanoseconds at which the OA
311 	 * buffer should be checked for available data.
312 	 */
313 	u64 poll_oa_period;
314 };
315 
316 /**
317  * struct i915_oa_ops - Gen specific implementation of an OA unit stream
318  */
319 struct i915_oa_ops {
320 	/**
321 	 * @is_valid_b_counter_reg: Validates register's address for
322 	 * programming boolean counters for a particular platform.
323 	 */
324 	bool (*is_valid_b_counter_reg)(struct i915_perf *perf, u32 addr);
325 
326 	/**
327 	 * @is_valid_mux_reg: Validates register's address for programming mux
328 	 * for a particular platform.
329 	 */
330 	bool (*is_valid_mux_reg)(struct i915_perf *perf, u32 addr);
331 
332 	/**
333 	 * @is_valid_flex_reg: Validates register's address for programming
334 	 * flex EU filtering for a particular platform.
335 	 */
336 	bool (*is_valid_flex_reg)(struct i915_perf *perf, u32 addr);
337 
338 	/**
339 	 * @enable_metric_set: Selects and applies any MUX configuration to set
340 	 * up the Boolean and Custom (B/C) counters that are part of the
341 	 * counter reports being sampled. May apply system constraints such as
342 	 * disabling EU clock gating as required.
343 	 */
344 	int (*enable_metric_set)(struct i915_perf_stream *stream,
345 				 struct i915_active *active);
346 
347 	/**
348 	 * @disable_metric_set: Remove system constraints associated with using
349 	 * the OA unit.
350 	 */
351 	void (*disable_metric_set)(struct i915_perf_stream *stream);
352 
353 	/**
354 	 * @oa_enable: Enable periodic sampling
355 	 */
356 	void (*oa_enable)(struct i915_perf_stream *stream);
357 
358 	/**
359 	 * @oa_disable: Disable periodic sampling
360 	 */
361 	void (*oa_disable)(struct i915_perf_stream *stream);
362 
363 	/**
364 	 * @read: Copy data from the circular OA buffer into a given userspace
365 	 * buffer.
366 	 */
367 	int (*read)(struct i915_perf_stream *stream,
368 		    char __user *buf,
369 		    size_t count,
370 		    size_t *offset);
371 
372 	/**
373 	 * @oa_hw_tail_read: read the OA tail pointer register
374 	 *
375 	 * In particular this enables us to share all the fiddly code for
376 	 * handling the OA unit tail pointer race that affects multiple
377 	 * generations.
378 	 */
379 	u32 (*oa_hw_tail_read)(struct i915_perf_stream *stream);
380 };
381 
382 struct i915_perf {
383 	struct drm_i915_private *i915;
384 
385 	struct kobject *metrics_kobj;
386 
387 	/*
388 	 * Lock associated with adding/modifying/removing OA configs
389 	 * in perf->metrics_idr.
390 	 */
391 	struct mutex metrics_lock;
392 
393 	/*
394 	 * List of dynamic configurations (struct i915_oa_config), you
395 	 * need to hold perf->metrics_lock to access it.
396 	 */
397 	struct idr metrics_idr;
398 
399 	/*
400 	 * Lock associated with anything below within this structure
401 	 * except exclusive_stream.
402 	 */
403 	struct mutex lock;
404 
405 	/*
406 	 * The stream currently using the OA unit. If accessed
407 	 * outside a syscall associated to its file
408 	 * descriptor.
409 	 */
410 	struct i915_perf_stream *exclusive_stream;
411 
412 	/**
413 	 * @sseu: sseu configuration selected to run while perf is active,
414 	 * applies to all contexts.
415 	 */
416 	struct intel_sseu sseu;
417 
418 	/**
419 	 * For rate limiting any notifications of spurious
420 	 * invalid OA reports
421 	 */
422 	struct ratelimit_state spurious_report_rs;
423 
424 	/**
425 	 * For rate limiting any notifications of tail pointer
426 	 * race.
427 	 */
428 	struct ratelimit_state tail_pointer_race;
429 
430 	u32 gen7_latched_oastatus1;
431 	u32 ctx_oactxctrl_offset;
432 	u32 ctx_flexeu0_offset;
433 
434 	/**
435 	 * The RPT_ID/reason field for Gen8+ includes a bit
436 	 * to determine if the CTX ID in the report is valid
437 	 * but the specific bit differs between Gen 8 and 9
438 	 */
439 	u32 gen8_valid_ctx_bit;
440 
441 	struct i915_oa_ops ops;
442 	const struct i915_oa_format *oa_formats;
443 
444 	atomic64_t noa_programming_delay;
445 };
446 
447 #endif /* _I915_PERF_TYPES_H_ */
448