1 /*
2 * SPDX-License-Identifier: MIT
3 *
4 * Copyright © 2019 Intel Corporation
5 */
6
7 #ifndef __INTEL_ENGINE_TYPES__
8 #define __INTEL_ENGINE_TYPES__
9
10 #include <linux/hashtable.h>
11 #include <linux/irq_work.h>
12 #include <linux/kref.h>
13 #include <linux/list.h>
14 #include <linux/llist.h>
15 #include <linux/rbtree.h>
16 #include <linux/timer.h>
17 #include <linux/types.h>
18
19 #include "i915_gem.h"
20 #include "i915_pmu.h"
21 #include "i915_priolist_types.h"
22 #include "i915_selftest.h"
23 #include "intel_engine_pool_types.h"
24 #include "intel_sseu.h"
25 #include "intel_timeline_types.h"
26 #include "intel_wakeref.h"
27 #include "intel_workarounds_types.h"
28
29 /* Legacy HW Engine ID */
30
31 #define RCS0_HW 0
32 #define VCS0_HW 1
33 #define BCS0_HW 2
34 #define VECS0_HW 3
35 #define VCS1_HW 4
36 #define VCS2_HW 6
37 #define VCS3_HW 7
38 #define VECS1_HW 12
39
40 /* Gen11+ HW Engine class + instance */
41 #define RENDER_CLASS 0
42 #define VIDEO_DECODE_CLASS 1
43 #define VIDEO_ENHANCEMENT_CLASS 2
44 #define COPY_ENGINE_CLASS 3
45 #define OTHER_CLASS 4
46 #define MAX_ENGINE_CLASS 4
47 #define MAX_ENGINE_INSTANCE 3
48
49 #define I915_MAX_SLICES 3
50 #define I915_MAX_SUBSLICES 8
51
52 #define I915_CMD_HASH_ORDER 9
53
54 struct dma_fence;
55 struct drm_i915_gem_object;
56 struct drm_i915_reg_table;
57 struct i915_gem_context;
58 struct i915_request;
59 struct i915_sched_attr;
60 struct intel_gt;
61 struct intel_uncore;
62
63 typedef u8 intel_engine_mask_t;
64 #define ALL_ENGINES ((intel_engine_mask_t)~0ul)
65
66 struct intel_hw_status_page {
67 struct i915_vma *vma;
68 u32 *addr;
69 };
70
71 struct intel_instdone {
72 u32 instdone;
73 /* The following exist only in the RCS engine */
74 u32 slice_common;
75 u32 sampler[I915_MAX_SLICES][I915_MAX_SUBSLICES];
76 u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES];
77 };
78
79 struct intel_engine_hangcheck {
80 u64 acthd;
81 u32 last_ring;
82 u32 last_head;
83 unsigned long action_timestamp;
84 struct intel_instdone instdone;
85 };
86
87 struct intel_ring {
88 struct kref ref;
89 struct i915_vma *vma;
90 void *vaddr;
91
92 /*
93 * As we have two types of rings, one global to the engine used
94 * by ringbuffer submission and those that are exclusive to a
95 * context used by execlists, we have to play safe and allow
96 * atomic updates to the pin_count. However, the actual pinning
97 * of the context is either done during initialisation for
98 * ringbuffer submission or serialised as part of the context
99 * pinning for execlists, and so we do not need a mutex ourselves
100 * to serialise intel_ring_pin/intel_ring_unpin.
101 */
102 atomic_t pin_count;
103
104 u32 head;
105 u32 tail;
106 u32 emit;
107
108 u32 space;
109 u32 size;
110 u32 effective_size;
111 };
112
113 /*
114 * we use a single page to load ctx workarounds so all of these
115 * values are referred in terms of dwords
116 *
117 * struct i915_wa_ctx_bb:
118 * offset: specifies batch starting position, also helpful in case
119 * if we want to have multiple batches at different offsets based on
120 * some criteria. It is not a requirement at the moment but provides
121 * an option for future use.
122 * size: size of the batch in DWORDS
123 */
124 struct i915_ctx_workarounds {
125 struct i915_wa_ctx_bb {
126 u32 offset;
127 u32 size;
128 } indirect_ctx, per_ctx;
129 struct i915_vma *vma;
130 };
131
132 #define I915_MAX_VCS 4
133 #define I915_MAX_VECS 2
134
135 /*
136 * Engine IDs definitions.
137 * Keep instances of the same type engine together.
138 */
139 enum intel_engine_id {
140 RCS0 = 0,
141 BCS0,
142 VCS0,
143 VCS1,
144 VCS2,
145 VCS3,
146 #define _VCS(n) (VCS0 + (n))
147 VECS0,
148 VECS1,
149 #define _VECS(n) (VECS0 + (n))
150 I915_NUM_ENGINES
151 };
152
153 struct st_preempt_hang {
154 struct completion completion;
155 unsigned int count;
156 bool inject_hang;
157 };
158
159 /**
160 * struct intel_engine_execlists - execlist submission queue and port state
161 *
162 * The struct intel_engine_execlists represents the combined logical state of
163 * driver and the hardware state for execlist mode of submission.
164 */
165 struct intel_engine_execlists {
166 /**
167 * @tasklet: softirq tasklet for bottom handler
168 */
169 struct tasklet_struct tasklet;
170
171 /**
172 * @timer: kick the current context if its timeslice expires
173 */
174 struct timer_list timer;
175
176 /**
177 * @default_priolist: priority list for I915_PRIORITY_NORMAL
178 */
179 struct i915_priolist default_priolist;
180
181 /**
182 * @no_priolist: priority lists disabled
183 */
184 bool no_priolist;
185
186 /**
187 * @submit_reg: gen-specific execlist submission register
188 * set to the ExecList Submission Port (elsp) register pre-Gen11 and to
189 * the ExecList Submission Queue Contents register array for Gen11+
190 */
191 u32 __iomem *submit_reg;
192
193 /**
194 * @ctrl_reg: the enhanced execlists control register, used to load the
195 * submit queue on the HW and to request preemptions to idle
196 */
197 u32 __iomem *ctrl_reg;
198
199 #define EXECLIST_MAX_PORTS 2
200 /**
201 * @active: the currently known context executing on HW
202 */
203 struct i915_request * const *active;
204 /**
205 * @inflight: the set of contexts submitted and acknowleged by HW
206 *
207 * The set of inflight contexts is managed by reading CS events
208 * from the HW. On a context-switch event (not preemption), we
209 * know the HW has transitioned from port0 to port1, and we
210 * advance our inflight/active tracking accordingly.
211 */
212 struct i915_request *inflight[EXECLIST_MAX_PORTS + 1 /* sentinel */];
213 /**
214 * @pending: the next set of contexts submitted to ELSP
215 *
216 * We store the array of contexts that we submit to HW (via ELSP) and
217 * promote them to the inflight array once HW has signaled the
218 * preemption or idle-to-active event.
219 */
220 struct i915_request *pending[EXECLIST_MAX_PORTS + 1];
221
222 /**
223 * @port_mask: number of execlist ports - 1
224 */
225 unsigned int port_mask;
226
227 /**
228 * @switch_priority_hint: Second context priority.
229 *
230 * We submit multiple contexts to the HW simultaneously and would
231 * like to occasionally switch between them to emulate timeslicing.
232 * To know when timeslicing is suitable, we track the priority of
233 * the context submitted second.
234 */
235 int switch_priority_hint;
236
237 /**
238 * @queue_priority_hint: Highest pending priority.
239 *
240 * When we add requests into the queue, or adjust the priority of
241 * executing requests, we compute the maximum priority of those
242 * pending requests. We can then use this value to determine if
243 * we need to preempt the executing requests to service the queue.
244 * However, since the we may have recorded the priority of an inflight
245 * request we wanted to preempt but since completed, at the time of
246 * dequeuing the priority hint may no longer may match the highest
247 * available request priority.
248 */
249 int queue_priority_hint;
250
251 /**
252 * @queue: queue of requests, in priority lists
253 */
254 struct rb_root_cached queue;
255 struct rb_root_cached virtual;
256
257 /**
258 * @csb_write: control register for Context Switch buffer
259 *
260 * Note this register may be either mmio or HWSP shadow.
261 */
262 u32 *csb_write;
263
264 /**
265 * @csb_status: status array for Context Switch buffer
266 *
267 * Note these register may be either mmio or HWSP shadow.
268 */
269 u32 *csb_status;
270
271 /**
272 * @csb_size: context status buffer FIFO size
273 */
274 u8 csb_size;
275
276 /**
277 * @csb_head: context status buffer head
278 */
279 u8 csb_head;
280
281 I915_SELFTEST_DECLARE(struct st_preempt_hang preempt_hang;)
282 };
283
284 #define INTEL_ENGINE_CS_MAX_NAME 8
285
286 struct intel_engine_cs {
287 struct drm_i915_private *i915;
288 struct intel_gt *gt;
289 struct intel_uncore *uncore;
290 char name[INTEL_ENGINE_CS_MAX_NAME];
291
292 enum intel_engine_id id;
293 enum intel_engine_id legacy_idx;
294
295 unsigned int hw_id;
296 unsigned int guc_id;
297
298 intel_engine_mask_t mask;
299
300 u8 class;
301 u8 instance;
302
303 u16 uabi_class;
304 u16 uabi_instance;
305
306 u32 context_size;
307 u32 mmio_base;
308
309 u32 uabi_capabilities;
310
311 struct rb_node uabi_node;
312
313 struct intel_sseu sseu;
314
315 struct {
316 spinlock_t lock;
317 struct list_head requests;
318 } active;
319
320 struct llist_head barrier_tasks;
321
322 struct intel_context *kernel_context; /* pinned */
323
324 intel_engine_mask_t saturated; /* submitting semaphores too late? */
325
326 unsigned long serial;
327
328 unsigned long wakeref_serial;
329 struct intel_wakeref wakeref;
330 struct drm_i915_gem_object *default_state;
331 void *pinned_default_state;
332
333 struct {
334 struct intel_ring *ring;
335 struct intel_timeline *timeline;
336 } legacy;
337
338 /* Rather than have every client wait upon all user interrupts,
339 * with the herd waking after every interrupt and each doing the
340 * heavyweight seqno dance, we delegate the task (of being the
341 * bottom-half of the user interrupt) to the first client. After
342 * every interrupt, we wake up one client, who does the heavyweight
343 * coherent seqno read and either goes back to sleep (if incomplete),
344 * or wakes up all the completed clients in parallel, before then
345 * transferring the bottom-half status to the next client in the queue.
346 *
347 * Compared to walking the entire list of waiters in a single dedicated
348 * bottom-half, we reduce the latency of the first waiter by avoiding
349 * a context switch, but incur additional coherent seqno reads when
350 * following the chain of request breadcrumbs. Since it is most likely
351 * that we have a single client waiting on each seqno, then reducing
352 * the overhead of waking that client is much preferred.
353 */
354 struct intel_breadcrumbs {
355 spinlock_t irq_lock;
356 struct list_head signalers;
357
358 struct irq_work irq_work; /* for use from inside irq_lock */
359
360 unsigned int irq_enabled;
361
362 bool irq_armed;
363 } breadcrumbs;
364
365 struct intel_engine_pmu {
366 /**
367 * @enable: Bitmask of enable sample events on this engine.
368 *
369 * Bits correspond to sample event types, for instance
370 * I915_SAMPLE_QUEUED is bit 0 etc.
371 */
372 u32 enable;
373 /**
374 * @enable_count: Reference count for the enabled samplers.
375 *
376 * Index number corresponds to @enum drm_i915_pmu_engine_sample.
377 */
378 unsigned int enable_count[I915_ENGINE_SAMPLE_COUNT];
379 /**
380 * @sample: Counter values for sampling events.
381 *
382 * Our internal timer stores the current counters in this field.
383 *
384 * Index number corresponds to @enum drm_i915_pmu_engine_sample.
385 */
386 struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_COUNT];
387 } pmu;
388
389 /*
390 * A pool of objects to use as shadow copies of client batch buffers
391 * when the command parser is enabled. Prevents the client from
392 * modifying the batch contents after software parsing.
393 */
394 struct intel_engine_pool pool;
395
396 struct intel_hw_status_page status_page;
397 struct i915_ctx_workarounds wa_ctx;
398 struct i915_wa_list ctx_wa_list;
399 struct i915_wa_list wa_list;
400 struct i915_wa_list whitelist;
401
402 u32 irq_keep_mask; /* always keep these interrupts */
403 u32 irq_enable_mask; /* bitmask to enable ring interrupt */
404 void (*irq_enable)(struct intel_engine_cs *engine);
405 void (*irq_disable)(struct intel_engine_cs *engine);
406
407 int (*resume)(struct intel_engine_cs *engine);
408
409 struct {
410 void (*prepare)(struct intel_engine_cs *engine);
411 void (*reset)(struct intel_engine_cs *engine, bool stalled);
412 void (*finish)(struct intel_engine_cs *engine);
413 } reset;
414
415 void (*park)(struct intel_engine_cs *engine);
416 void (*unpark)(struct intel_engine_cs *engine);
417
418 void (*set_default_submission)(struct intel_engine_cs *engine);
419
420 const struct intel_context_ops *cops;
421
422 int (*request_alloc)(struct i915_request *rq);
423
424 int (*emit_flush)(struct i915_request *request, u32 mode);
425 #define EMIT_INVALIDATE BIT(0)
426 #define EMIT_FLUSH BIT(1)
427 #define EMIT_BARRIER (EMIT_INVALIDATE | EMIT_FLUSH)
428 int (*emit_bb_start)(struct i915_request *rq,
429 u64 offset, u32 length,
430 unsigned int dispatch_flags);
431 #define I915_DISPATCH_SECURE BIT(0)
432 #define I915_DISPATCH_PINNED BIT(1)
433 int (*emit_init_breadcrumb)(struct i915_request *rq);
434 u32 *(*emit_fini_breadcrumb)(struct i915_request *rq,
435 u32 *cs);
436 unsigned int emit_fini_breadcrumb_dw;
437
438 /* Pass the request to the hardware queue (e.g. directly into
439 * the legacy ringbuffer or to the end of an execlist).
440 *
441 * This is called from an atomic context with irqs disabled; must
442 * be irq safe.
443 */
444 void (*submit_request)(struct i915_request *rq);
445
446 /*
447 * Called on signaling of a SUBMIT_FENCE, passing along the signaling
448 * request down to the bonded pairs.
449 */
450 void (*bond_execute)(struct i915_request *rq,
451 struct dma_fence *signal);
452
453 /*
454 * Call when the priority on a request has changed and it and its
455 * dependencies may need rescheduling. Note the request itself may
456 * not be ready to run!
457 */
458 void (*schedule)(struct i915_request *request,
459 const struct i915_sched_attr *attr);
460
461 /*
462 * Cancel all requests on the hardware, or queued for execution.
463 * This should only cancel the ready requests that have been
464 * submitted to the engine (via the engine->submit_request callback).
465 * This is called when marking the device as wedged.
466 */
467 void (*cancel_requests)(struct intel_engine_cs *engine);
468
469 void (*destroy)(struct intel_engine_cs *engine);
470
471 struct intel_engine_execlists execlists;
472
473 /* status_notifier: list of callbacks for context-switch changes */
474 struct atomic_notifier_head context_status_notifier;
475
476 struct intel_engine_hangcheck hangcheck;
477
478 #define I915_ENGINE_USING_CMD_PARSER BIT(0)
479 #define I915_ENGINE_SUPPORTS_STATS BIT(1)
480 #define I915_ENGINE_HAS_PREEMPTION BIT(2)
481 #define I915_ENGINE_HAS_SEMAPHORES BIT(3)
482 #define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(4)
483 #define I915_ENGINE_IS_VIRTUAL BIT(5)
484 #define I915_ENGINE_REQUIRES_CMD_PARSER BIT(7)
485 unsigned int flags;
486
487 /*
488 * Table of commands the command parser needs to know about
489 * for this engine.
490 */
491 DECLARE_HASHTABLE(cmd_hash, I915_CMD_HASH_ORDER);
492
493 /*
494 * Table of registers allowed in commands that read/write registers.
495 */
496 const struct drm_i915_reg_table *reg_tables;
497 int reg_table_count;
498
499 /*
500 * Returns the bitmask for the length field of the specified command.
501 * Return 0 for an unrecognized/invalid command.
502 *
503 * If the command parser finds an entry for a command in the engine's
504 * cmd_tables, it gets the command's length based on the table entry.
505 * If not, it calls this function to determine the per-engine length
506 * field encoding for the command (i.e. different opcode ranges use
507 * certain bits to encode the command length in the header).
508 */
509 u32 (*get_cmd_length_mask)(u32 cmd_header);
510
511 struct {
512 /**
513 * @lock: Lock protecting the below fields.
514 */
515 seqlock_t lock;
516 /**
517 * @enabled: Reference count indicating number of listeners.
518 */
519 unsigned int enabled;
520 /**
521 * @active: Number of contexts currently scheduled in.
522 */
523 unsigned int active;
524 /**
525 * @enabled_at: Timestamp when busy stats were enabled.
526 */
527 ktime_t enabled_at;
528 /**
529 * @start: Timestamp of the last idle to active transition.
530 *
531 * Idle is defined as active == 0, active is active > 0.
532 */
533 ktime_t start;
534 /**
535 * @total: Total time this engine was busy.
536 *
537 * Accumulated time not counting the most recent block in cases
538 * where engine is currently busy (active > 0).
539 */
540 ktime_t total;
541 } stats;
542 };
543
544 static inline bool
intel_engine_using_cmd_parser(const struct intel_engine_cs * engine)545 intel_engine_using_cmd_parser(const struct intel_engine_cs *engine)
546 {
547 return engine->flags & I915_ENGINE_USING_CMD_PARSER;
548 }
549
550 static inline bool
intel_engine_requires_cmd_parser(const struct intel_engine_cs * engine)551 intel_engine_requires_cmd_parser(const struct intel_engine_cs *engine)
552 {
553 return engine->flags & I915_ENGINE_REQUIRES_CMD_PARSER;
554 }
555
556 static inline bool
intel_engine_supports_stats(const struct intel_engine_cs * engine)557 intel_engine_supports_stats(const struct intel_engine_cs *engine)
558 {
559 return engine->flags & I915_ENGINE_SUPPORTS_STATS;
560 }
561
562 static inline bool
intel_engine_has_preemption(const struct intel_engine_cs * engine)563 intel_engine_has_preemption(const struct intel_engine_cs *engine)
564 {
565 return engine->flags & I915_ENGINE_HAS_PREEMPTION;
566 }
567
568 static inline bool
intel_engine_has_semaphores(const struct intel_engine_cs * engine)569 intel_engine_has_semaphores(const struct intel_engine_cs *engine)
570 {
571 return engine->flags & I915_ENGINE_HAS_SEMAPHORES;
572 }
573
574 static inline bool
intel_engine_needs_breadcrumb_tasklet(const struct intel_engine_cs * engine)575 intel_engine_needs_breadcrumb_tasklet(const struct intel_engine_cs *engine)
576 {
577 return engine->flags & I915_ENGINE_NEEDS_BREADCRUMB_TASKLET;
578 }
579
580 static inline bool
intel_engine_is_virtual(const struct intel_engine_cs * engine)581 intel_engine_is_virtual(const struct intel_engine_cs *engine)
582 {
583 return engine->flags & I915_ENGINE_IS_VIRTUAL;
584 }
585
586 #define instdone_slice_mask(dev_priv__) \
587 (IS_GEN(dev_priv__, 7) ? \
588 1 : RUNTIME_INFO(dev_priv__)->sseu.slice_mask)
589
590 #define instdone_subslice_mask(dev_priv__) \
591 (IS_GEN(dev_priv__, 7) ? \
592 1 : RUNTIME_INFO(dev_priv__)->sseu.subslice_mask[0])
593
594 #define for_each_instdone_slice_subslice(dev_priv__, slice__, subslice__) \
595 for ((slice__) = 0, (subslice__) = 0; \
596 (slice__) < I915_MAX_SLICES; \
597 (subslice__) = ((subslice__) + 1) < I915_MAX_SUBSLICES ? (subslice__) + 1 : 0, \
598 (slice__) += ((subslice__) == 0)) \
599 for_each_if((BIT(slice__) & instdone_slice_mask(dev_priv__)) && \
600 (BIT(subslice__) & instdone_subslice_mask(dev_priv__)))
601
602 #endif /* __INTEL_ENGINE_TYPES_H__ */
603