1 /* SPDX-License-Identifier: MIT */ 2 /* 3 * Copyright © 2019 Intel Corporation 4 */ 5 6 #ifndef _I915_PERF_TYPES_H_ 7 #define _I915_PERF_TYPES_H_ 8 9 #include <linux/atomic.h> 10 #include <linux/device.h> 11 #include <linux/hrtimer.h> 12 #include <linux/llist.h> 13 #include <linux/poll.h> 14 #include <linux/sysfs.h> 15 #include <linux/types.h> 16 #include <linux/uuid.h> 17 #include <linux/wait.h> 18 19 #include "gt/intel_sseu.h" 20 #include "i915_reg.h" 21 #include "intel_wakeref.h" 22 23 struct drm_i915_private; 24 struct file; 25 struct i915_active; 26 struct i915_gem_context; 27 struct i915_perf; 28 struct i915_vma; 29 struct intel_context; 30 struct intel_engine_cs; 31 32 struct i915_oa_format { 33 u32 format; 34 int size; 35 }; 36 37 struct i915_oa_reg { 38 i915_reg_t addr; 39 u32 value; 40 }; 41 42 struct i915_oa_config { 43 struct i915_perf *perf; 44 45 char uuid[UUID_STRING_LEN + 1]; 46 int id; 47 48 const struct i915_oa_reg *mux_regs; 49 u32 mux_regs_len; 50 const struct i915_oa_reg *b_counter_regs; 51 u32 b_counter_regs_len; 52 const struct i915_oa_reg *flex_regs; 53 u32 flex_regs_len; 54 55 struct attribute_group sysfs_metric; 56 struct attribute *attrs[2]; 57 struct kobj_attribute sysfs_metric_id; 58 59 struct kref ref; 60 struct rcu_head rcu; 61 }; 62 63 struct i915_perf_stream; 64 65 /** 66 * struct i915_perf_stream_ops - the OPs to support a specific stream type 67 */ 68 struct i915_perf_stream_ops { 69 /** 70 * @enable: Enables the collection of HW samples, either in response to 71 * `I915_PERF_IOCTL_ENABLE` or implicitly called when stream is opened 72 * without `I915_PERF_FLAG_DISABLED`. 73 */ 74 void (*enable)(struct i915_perf_stream *stream); 75 76 /** 77 * @disable: Disables the collection of HW samples, either in response 78 * to `I915_PERF_IOCTL_DISABLE` or implicitly called before destroying 79 * the stream. 80 */ 81 void (*disable)(struct i915_perf_stream *stream); 82 83 /** 84 * @poll_wait: Call poll_wait, passing a wait queue that will be woken 85 * once there is something ready to read() for the stream 86 */ 87 void (*poll_wait)(struct i915_perf_stream *stream, 88 struct file *file, 89 poll_table *wait); 90 91 /** 92 * @wait_unlocked: For handling a blocking read, wait until there is 93 * something to ready to read() for the stream. E.g. wait on the same 94 * wait queue that would be passed to poll_wait(). 95 */ 96 int (*wait_unlocked)(struct i915_perf_stream *stream); 97 98 /** 99 * @read: Copy buffered metrics as records to userspace 100 * **buf**: the userspace, destination buffer 101 * **count**: the number of bytes to copy, requested by userspace 102 * **offset**: zero at the start of the read, updated as the read 103 * proceeds, it represents how many bytes have been copied so far and 104 * the buffer offset for copying the next record. 105 * 106 * Copy as many buffered i915 perf samples and records for this stream 107 * to userspace as will fit in the given buffer. 108 * 109 * Only write complete records; returning -%ENOSPC if there isn't room 110 * for a complete record. 111 * 112 * Return any error condition that results in a short read such as 113 * -%ENOSPC or -%EFAULT, even though these may be squashed before 114 * returning to userspace. 115 */ 116 int (*read)(struct i915_perf_stream *stream, 117 char __user *buf, 118 size_t count, 119 size_t *offset); 120 121 /** 122 * @destroy: Cleanup any stream specific resources. 123 * 124 * The stream will always be disabled before this is called. 125 */ 126 void (*destroy)(struct i915_perf_stream *stream); 127 }; 128 129 /** 130 * struct i915_perf_stream - state for a single open stream FD 131 */ 132 struct i915_perf_stream { 133 /** 134 * @perf: i915_perf backpointer 135 */ 136 struct i915_perf *perf; 137 138 /** 139 * @uncore: mmio access path 140 */ 141 struct intel_uncore *uncore; 142 143 /** 144 * @engine: Engine associated with this performance stream. 145 */ 146 struct intel_engine_cs *engine; 147 148 /** 149 * @sample_flags: Flags representing the `DRM_I915_PERF_PROP_SAMPLE_*` 150 * properties given when opening a stream, representing the contents 151 * of a single sample as read() by userspace. 152 */ 153 u32 sample_flags; 154 155 /** 156 * @sample_size: Considering the configured contents of a sample 157 * combined with the required header size, this is the total size 158 * of a single sample record. 159 */ 160 int sample_size; 161 162 /** 163 * @ctx: %NULL if measuring system-wide across all contexts or a 164 * specific context that is being monitored. 165 */ 166 struct i915_gem_context *ctx; 167 168 /** 169 * @enabled: Whether the stream is currently enabled, considering 170 * whether the stream was opened in a disabled state and based 171 * on `I915_PERF_IOCTL_ENABLE` and `I915_PERF_IOCTL_DISABLE` calls. 172 */ 173 bool enabled; 174 175 /** 176 * @hold_preemption: Whether preemption is put on hold for command 177 * submissions done on the @ctx. This is useful for some drivers that 178 * cannot easily post process the OA buffer context to subtract delta 179 * of performance counters not associated with @ctx. 180 */ 181 bool hold_preemption; 182 183 /** 184 * @ops: The callbacks providing the implementation of this specific 185 * type of configured stream. 186 */ 187 const struct i915_perf_stream_ops *ops; 188 189 /** 190 * @oa_config: The OA configuration used by the stream. 191 */ 192 struct i915_oa_config *oa_config; 193 194 /** 195 * @oa_config_bos: A list of struct i915_oa_config_bo allocated lazily 196 * each time @oa_config changes. 197 */ 198 struct llist_head oa_config_bos; 199 200 /** 201 * @pinned_ctx: The OA context specific information. 202 */ 203 struct intel_context *pinned_ctx; 204 205 /** 206 * @specific_ctx_id: The id of the specific context. 207 */ 208 u32 specific_ctx_id; 209 210 /** 211 * @specific_ctx_id_mask: The mask used to masking specific_ctx_id bits. 212 */ 213 u32 specific_ctx_id_mask; 214 215 /** 216 * @poll_check_timer: High resolution timer that will periodically 217 * check for data in the circular OA buffer for notifying userspace 218 * (e.g. during a read() or poll()). 219 */ 220 struct hrtimer poll_check_timer; 221 222 /** 223 * @poll_wq: The wait queue that hrtimer callback wakes when it 224 * sees data ready to read in the circular OA buffer. 225 */ 226 wait_queue_head_t poll_wq; 227 228 /** 229 * @pollin: Whether there is data available to read. 230 */ 231 bool pollin; 232 233 /** 234 * @periodic: Whether periodic sampling is currently enabled. 235 */ 236 bool periodic; 237 238 /** 239 * @period_exponent: The OA unit sampling frequency is derived from this. 240 */ 241 int period_exponent; 242 243 /** 244 * @oa_buffer: State of the OA buffer. 245 */ 246 struct { 247 struct i915_vma *vma; 248 u8 *vaddr; 249 u32 last_ctx_id; 250 int format; 251 int format_size; 252 int size_exponent; 253 254 /** 255 * @ptr_lock: Locks reads and writes to all head/tail state 256 * 257 * Consider: the head and tail pointer state needs to be read 258 * consistently from a hrtimer callback (atomic context) and 259 * read() fop (user context) with tail pointer updates happening 260 * in atomic context and head updates in user context and the 261 * (unlikely) possibility of read() errors needing to reset all 262 * head/tail state. 263 * 264 * Note: Contention/performance aren't currently a significant 265 * concern here considering the relatively low frequency of 266 * hrtimer callbacks (5ms period) and that reads typically only 267 * happen in response to a hrtimer event and likely complete 268 * before the next callback. 269 * 270 * Note: This lock is not held *while* reading and copying data 271 * to userspace so the value of head observed in htrimer 272 * callbacks won't represent any partial consumption of data. 273 */ 274 spinlock_t ptr_lock; 275 276 /** 277 * @aging_tail: The last HW tail reported by HW. The data 278 * might not have made it to memory yet though. 279 */ 280 u32 aging_tail; 281 282 /** 283 * @aging_timestamp: A monotonic timestamp for when the current aging tail pointer 284 * was read; used to determine when it is old enough to trust. 285 */ 286 u64 aging_timestamp; 287 288 /** 289 * @head: Although we can always read back the head pointer register, 290 * we prefer to avoid trusting the HW state, just to avoid any 291 * risk that some hardware condition could * somehow bump the 292 * head pointer unpredictably and cause us to forward the wrong 293 * OA buffer data to userspace. 294 */ 295 u32 head; 296 297 /** 298 * @tail: The last verified tail that can be read by userspace. 299 */ 300 u32 tail; 301 } oa_buffer; 302 303 /** 304 * @noa_wait: A batch buffer doing a wait on the GPU for the NOA logic to be 305 * reprogrammed. 306 */ 307 struct i915_vma *noa_wait; 308 309 /** 310 * @poll_oa_period: The period in nanoseconds at which the OA 311 * buffer should be checked for available data. 312 */ 313 u64 poll_oa_period; 314 }; 315 316 /** 317 * struct i915_oa_ops - Gen specific implementation of an OA unit stream 318 */ 319 struct i915_oa_ops { 320 /** 321 * @is_valid_b_counter_reg: Validates register's address for 322 * programming boolean counters for a particular platform. 323 */ 324 bool (*is_valid_b_counter_reg)(struct i915_perf *perf, u32 addr); 325 326 /** 327 * @is_valid_mux_reg: Validates register's address for programming mux 328 * for a particular platform. 329 */ 330 bool (*is_valid_mux_reg)(struct i915_perf *perf, u32 addr); 331 332 /** 333 * @is_valid_flex_reg: Validates register's address for programming 334 * flex EU filtering for a particular platform. 335 */ 336 bool (*is_valid_flex_reg)(struct i915_perf *perf, u32 addr); 337 338 /** 339 * @enable_metric_set: Selects and applies any MUX configuration to set 340 * up the Boolean and Custom (B/C) counters that are part of the 341 * counter reports being sampled. May apply system constraints such as 342 * disabling EU clock gating as required. 343 */ 344 int (*enable_metric_set)(struct i915_perf_stream *stream, 345 struct i915_active *active); 346 347 /** 348 * @disable_metric_set: Remove system constraints associated with using 349 * the OA unit. 350 */ 351 void (*disable_metric_set)(struct i915_perf_stream *stream); 352 353 /** 354 * @oa_enable: Enable periodic sampling 355 */ 356 void (*oa_enable)(struct i915_perf_stream *stream); 357 358 /** 359 * @oa_disable: Disable periodic sampling 360 */ 361 void (*oa_disable)(struct i915_perf_stream *stream); 362 363 /** 364 * @read: Copy data from the circular OA buffer into a given userspace 365 * buffer. 366 */ 367 int (*read)(struct i915_perf_stream *stream, 368 char __user *buf, 369 size_t count, 370 size_t *offset); 371 372 /** 373 * @oa_hw_tail_read: read the OA tail pointer register 374 * 375 * In particular this enables us to share all the fiddly code for 376 * handling the OA unit tail pointer race that affects multiple 377 * generations. 378 */ 379 u32 (*oa_hw_tail_read)(struct i915_perf_stream *stream); 380 }; 381 382 struct i915_perf { 383 struct drm_i915_private *i915; 384 385 struct kobject *metrics_kobj; 386 387 /* 388 * Lock associated with adding/modifying/removing OA configs 389 * in perf->metrics_idr. 390 */ 391 struct mutex metrics_lock; 392 393 /* 394 * List of dynamic configurations (struct i915_oa_config), you 395 * need to hold perf->metrics_lock to access it. 396 */ 397 struct idr metrics_idr; 398 399 /* 400 * Lock associated with anything below within this structure 401 * except exclusive_stream. 402 */ 403 struct mutex lock; 404 405 /* 406 * The stream currently using the OA unit. If accessed 407 * outside a syscall associated to its file 408 * descriptor. 409 */ 410 struct i915_perf_stream *exclusive_stream; 411 412 /** 413 * @sseu: sseu configuration selected to run while perf is active, 414 * applies to all contexts. 415 */ 416 struct intel_sseu sseu; 417 418 /** 419 * For rate limiting any notifications of spurious 420 * invalid OA reports 421 */ 422 struct ratelimit_state spurious_report_rs; 423 424 /** 425 * For rate limiting any notifications of tail pointer 426 * race. 427 */ 428 struct ratelimit_state tail_pointer_race; 429 430 u32 gen7_latched_oastatus1; 431 u32 ctx_oactxctrl_offset; 432 u32 ctx_flexeu0_offset; 433 434 /** 435 * The RPT_ID/reason field for Gen8+ includes a bit 436 * to determine if the CTX ID in the report is valid 437 * but the specific bit differs between Gen 8 and 9 438 */ 439 u32 gen8_valid_ctx_bit; 440 441 struct i915_oa_ops ops; 442 const struct i915_oa_format *oa_formats; 443 444 atomic64_t noa_programming_delay; 445 }; 446 447 #endif /* _I915_PERF_TYPES_H_ */ 448