• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2014 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Ben Widawsky <ben@bwidawsk.net>
25  *    Michel Thierry <michel.thierry@intel.com>
26  *    Thomas Daniel <thomas.daniel@intel.com>
27  *    Oscar Mateo <oscar.mateo@intel.com>
28  *
29  */
30 
31 /**
32  * DOC: Logical Rings, Logical Ring Contexts and Execlists
33  *
34  * Motivation:
35  * GEN8 brings an expansion of the HW contexts: "Logical Ring Contexts".
36  * These expanded contexts enable a number of new abilities, especially
37  * "Execlists" (also implemented in this file).
38  *
39  * One of the main differences with the legacy HW contexts is that logical
40  * ring contexts incorporate many more things to the context's state, like
41  * PDPs or ringbuffer control registers:
42  *
43  * The reason why PDPs are included in the context is straightforward: as
44  * PPGTTs (per-process GTTs) are actually per-context, having the PDPs
45  * contained there mean you don't need to do a ppgtt->switch_mm yourself,
46  * instead, the GPU will do it for you on the context switch.
47  *
48  * But, what about the ringbuffer control registers (head, tail, etc..)?
49  * shouldn't we just need a set of those per engine command streamer? This is
50  * where the name "Logical Rings" starts to make sense: by virtualizing the
51  * rings, the engine cs shifts to a new "ring buffer" with every context
52  * switch. When you want to submit a workload to the GPU you: A) choose your
53  * context, B) find its appropriate virtualized ring, C) write commands to it
54  * and then, finally, D) tell the GPU to switch to that context.
55  *
56  * Instead of the legacy MI_SET_CONTEXT, the way you tell the GPU to switch
57  * to a contexts is via a context execution list, ergo "Execlists".
58  *
59  * LRC implementation:
60  * Regarding the creation of contexts, we have:
61  *
62  * - One global default context.
63  * - One local default context for each opened fd.
64  * - One local extra context for each context create ioctl call.
65  *
66  * Now that ringbuffers belong per-context (and not per-engine, like before)
67  * and that contexts are uniquely tied to a given engine (and not reusable,
68  * like before) we need:
69  *
70  * - One ringbuffer per-engine inside each context.
71  * - One backing object per-engine inside each context.
72  *
73  * The global default context starts its life with these new objects fully
74  * allocated and populated. The local default context for each opened fd is
75  * more complex, because we don't know at creation time which engine is going
76  * to use them. To handle this, we have implemented a deferred creation of LR
77  * contexts:
78  *
79  * The local context starts its life as a hollow or blank holder, that only
80  * gets populated for a given engine once we receive an execbuffer. If later
81  * on we receive another execbuffer ioctl for the same context but a different
82  * engine, we allocate/populate a new ringbuffer and context backing object and
83  * so on.
84  *
85  * Finally, regarding local contexts created using the ioctl call: as they are
86  * only allowed with the render ring, we can allocate & populate them right
87  * away (no need to defer anything, at least for now).
88  *
89  * Execlists implementation:
90  * Execlists are the new method by which, on gen8+ hardware, workloads are
91  * submitted for execution (as opposed to the legacy, ringbuffer-based, method).
92  * This method works as follows:
93  *
94  * When a request is committed, its commands (the BB start and any leading or
95  * trailing commands, like the seqno breadcrumbs) are placed in the ringbuffer
96  * for the appropriate context. The tail pointer in the hardware context is not
97  * updated at this time, but instead, kept by the driver in the ringbuffer
98  * structure. A structure representing this request is added to a request queue
99  * for the appropriate engine: this structure contains a copy of the context's
100  * tail after the request was written to the ring buffer and a pointer to the
101  * context itself.
102  *
103  * If the engine's request queue was empty before the request was added, the
104  * queue is processed immediately. Otherwise the queue will be processed during
105  * a context switch interrupt. In any case, elements on the queue will get sent
106  * (in pairs) to the GPU's ExecLists Submit Port (ELSP, for short) with a
107  * globally unique 20-bits submission ID.
108  *
109  * When execution of a request completes, the GPU updates the context status
110  * buffer with a context complete event and generates a context switch interrupt.
111  * During the interrupt handling, the driver examines the events in the buffer:
112  * for each context complete event, if the announced ID matches that on the head
113  * of the request queue, then that request is retired and removed from the queue.
114  *
115  * After processing, if any requests were retired and the queue is not empty
116  * then a new execution list can be submitted. The two requests at the front of
117  * the queue are next to be submitted but since a context may not occur twice in
118  * an execution list, if subsequent requests have the same ID as the first then
119  * the two requests must be combined. This is done simply by discarding requests
120  * at the head of the queue until either only one requests is left (in which case
121  * we use a NULL second context) or the first two requests have unique IDs.
122  *
123  * By always executing the first two requests in the queue the driver ensures
124  * that the GPU is kept as busy as possible. In the case where a single context
125  * completes but a second context is still executing, the request for this second
126  * context will be at the head of the queue when we remove the first one. This
127  * request will then be resubmitted along with a new request for a different context,
128  * which will cause the hardware to continue executing the second request and queue
129  * the new request (the GPU detects the condition of a context getting preempted
130  * with the same context and optimizes the context switch flow by not doing
131  * preemption, but just sampling the new tail pointer).
132  *
133  */
134 #include <linux/interrupt.h>
135 
136 #include "i915_drv.h"
137 #include "i915_perf.h"
138 #include "i915_trace.h"
139 #include "i915_vgpu.h"
140 #include "intel_breadcrumbs.h"
141 #include "intel_context.h"
142 #include "intel_engine_pm.h"
143 #include "intel_gt.h"
144 #include "intel_gt_pm.h"
145 #include "intel_gt_requests.h"
146 #include "intel_lrc_reg.h"
147 #include "intel_mocs.h"
148 #include "intel_reset.h"
149 #include "intel_ring.h"
150 #include "intel_workarounds.h"
151 #include "shmem_utils.h"
152 
153 #define RING_EXECLIST_QFULL		(1 << 0x2)
154 #define RING_EXECLIST1_VALID		(1 << 0x3)
155 #define RING_EXECLIST0_VALID		(1 << 0x4)
156 #define RING_EXECLIST_ACTIVE_STATUS	(3 << 0xE)
157 #define RING_EXECLIST1_ACTIVE		(1 << 0x11)
158 #define RING_EXECLIST0_ACTIVE		(1 << 0x12)
159 
160 #define GEN8_CTX_STATUS_IDLE_ACTIVE	(1 << 0)
161 #define GEN8_CTX_STATUS_PREEMPTED	(1 << 1)
162 #define GEN8_CTX_STATUS_ELEMENT_SWITCH	(1 << 2)
163 #define GEN8_CTX_STATUS_ACTIVE_IDLE	(1 << 3)
164 #define GEN8_CTX_STATUS_COMPLETE	(1 << 4)
165 #define GEN8_CTX_STATUS_LITE_RESTORE	(1 << 15)
166 
167 #define GEN8_CTX_STATUS_COMPLETED_MASK \
168 	 (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
169 
170 #define CTX_DESC_FORCE_RESTORE BIT_ULL(2)
171 
172 #define GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE	(0x1) /* lower csb dword */
173 #define GEN12_CTX_SWITCH_DETAIL(csb_dw)	((csb_dw) & 0xF) /* upper csb dword */
174 #define GEN12_CSB_SW_CTX_ID_MASK		GENMASK(25, 15)
175 #define GEN12_IDLE_CTX_ID		0x7FF
176 #define GEN12_CSB_CTX_VALID(csb_dw) \
177 	(FIELD_GET(GEN12_CSB_SW_CTX_ID_MASK, csb_dw) != GEN12_IDLE_CTX_ID)
178 
179 /* Typical size of the average request (2 pipecontrols and a MI_BB) */
180 #define EXECLISTS_REQUEST_SIZE 64 /* bytes */
181 
182 struct virtual_engine {
183 	struct intel_engine_cs base;
184 	struct intel_context context;
185 	struct rcu_work rcu;
186 
187 	/*
188 	 * We allow only a single request through the virtual engine at a time
189 	 * (each request in the timeline waits for the completion fence of
190 	 * the previous before being submitted). By restricting ourselves to
191 	 * only submitting a single request, each request is placed on to a
192 	 * physical to maximise load spreading (by virtue of the late greedy
193 	 * scheduling -- each real engine takes the next available request
194 	 * upon idling).
195 	 */
196 	struct i915_request *request;
197 
198 	/*
199 	 * We keep a rbtree of available virtual engines inside each physical
200 	 * engine, sorted by priority. Here we preallocate the nodes we need
201 	 * for the virtual engine, indexed by physical_engine->id.
202 	 */
203 	struct ve_node {
204 		struct rb_node rb;
205 		int prio;
206 	} nodes[I915_NUM_ENGINES];
207 
208 	/*
209 	 * Keep track of bonded pairs -- restrictions upon on our selection
210 	 * of physical engines any particular request may be submitted to.
211 	 * If we receive a submit-fence from a master engine, we will only
212 	 * use one of sibling_mask physical engines.
213 	 */
214 	struct ve_bond {
215 		const struct intel_engine_cs *master;
216 		intel_engine_mask_t sibling_mask;
217 	} *bonds;
218 	unsigned int num_bonds;
219 
220 	/* And finally, which physical engines this virtual engine maps onto. */
221 	unsigned int num_siblings;
222 	struct intel_engine_cs *siblings[];
223 };
224 
to_virtual_engine(struct intel_engine_cs * engine)225 static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine)
226 {
227 	GEM_BUG_ON(!intel_engine_is_virtual(engine));
228 	return container_of(engine, struct virtual_engine, base);
229 }
230 
231 static int __execlists_context_alloc(struct intel_context *ce,
232 				     struct intel_engine_cs *engine);
233 
234 static void execlists_init_reg_state(u32 *reg_state,
235 				     const struct intel_context *ce,
236 				     const struct intel_engine_cs *engine,
237 				     const struct intel_ring *ring,
238 				     bool close);
239 static void
240 __execlists_update_reg_state(const struct intel_context *ce,
241 			     const struct intel_engine_cs *engine,
242 			     u32 head);
243 
lrc_ring_mi_mode(const struct intel_engine_cs * engine)244 static int lrc_ring_mi_mode(const struct intel_engine_cs *engine)
245 {
246 	if (INTEL_GEN(engine->i915) >= 12)
247 		return 0x60;
248 	else if (INTEL_GEN(engine->i915) >= 9)
249 		return 0x54;
250 	else if (engine->class == RENDER_CLASS)
251 		return 0x58;
252 	else
253 		return -1;
254 }
255 
lrc_ring_gpr0(const struct intel_engine_cs * engine)256 static int lrc_ring_gpr0(const struct intel_engine_cs *engine)
257 {
258 	if (INTEL_GEN(engine->i915) >= 12)
259 		return 0x74;
260 	else if (INTEL_GEN(engine->i915) >= 9)
261 		return 0x68;
262 	else if (engine->class == RENDER_CLASS)
263 		return 0xd8;
264 	else
265 		return -1;
266 }
267 
lrc_ring_wa_bb_per_ctx(const struct intel_engine_cs * engine)268 static int lrc_ring_wa_bb_per_ctx(const struct intel_engine_cs *engine)
269 {
270 	if (INTEL_GEN(engine->i915) >= 12)
271 		return 0x12;
272 	else if (INTEL_GEN(engine->i915) >= 9 || engine->class == RENDER_CLASS)
273 		return 0x18;
274 	else
275 		return -1;
276 }
277 
lrc_ring_indirect_ptr(const struct intel_engine_cs * engine)278 static int lrc_ring_indirect_ptr(const struct intel_engine_cs *engine)
279 {
280 	int x;
281 
282 	x = lrc_ring_wa_bb_per_ctx(engine);
283 	if (x < 0)
284 		return x;
285 
286 	return x + 2;
287 }
288 
lrc_ring_indirect_offset(const struct intel_engine_cs * engine)289 static int lrc_ring_indirect_offset(const struct intel_engine_cs *engine)
290 {
291 	int x;
292 
293 	x = lrc_ring_indirect_ptr(engine);
294 	if (x < 0)
295 		return x;
296 
297 	return x + 2;
298 }
299 
lrc_ring_cmd_buf_cctl(const struct intel_engine_cs * engine)300 static int lrc_ring_cmd_buf_cctl(const struct intel_engine_cs *engine)
301 {
302 	if (engine->class != RENDER_CLASS)
303 		return -1;
304 
305 	if (INTEL_GEN(engine->i915) >= 12)
306 		return 0xb6;
307 	else if (INTEL_GEN(engine->i915) >= 11)
308 		return 0xaa;
309 	else
310 		return -1;
311 }
312 
313 static u32
lrc_ring_indirect_offset_default(const struct intel_engine_cs * engine)314 lrc_ring_indirect_offset_default(const struct intel_engine_cs *engine)
315 {
316 	switch (INTEL_GEN(engine->i915)) {
317 	default:
318 		MISSING_CASE(INTEL_GEN(engine->i915));
319 		fallthrough;
320 	case 12:
321 		return GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
322 	case 11:
323 		return GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
324 	case 10:
325 		return GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
326 	case 9:
327 		return GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
328 	case 8:
329 		return GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
330 	}
331 }
332 
333 static void
lrc_ring_setup_indirect_ctx(u32 * regs,const struct intel_engine_cs * engine,u32 ctx_bb_ggtt_addr,u32 size)334 lrc_ring_setup_indirect_ctx(u32 *regs,
335 			    const struct intel_engine_cs *engine,
336 			    u32 ctx_bb_ggtt_addr,
337 			    u32 size)
338 {
339 	GEM_BUG_ON(!size);
340 	GEM_BUG_ON(!IS_ALIGNED(size, CACHELINE_BYTES));
341 	GEM_BUG_ON(lrc_ring_indirect_ptr(engine) == -1);
342 	regs[lrc_ring_indirect_ptr(engine) + 1] =
343 		ctx_bb_ggtt_addr | (size / CACHELINE_BYTES);
344 
345 	GEM_BUG_ON(lrc_ring_indirect_offset(engine) == -1);
346 	regs[lrc_ring_indirect_offset(engine) + 1] =
347 		lrc_ring_indirect_offset_default(engine) << 6;
348 }
349 
intel_context_get_runtime(const struct intel_context * ce)350 static u32 intel_context_get_runtime(const struct intel_context *ce)
351 {
352 	/*
353 	 * We can use either ppHWSP[16] which is recorded before the context
354 	 * switch (and so excludes the cost of context switches) or use the
355 	 * value from the context image itself, which is saved/restored earlier
356 	 * and so includes the cost of the save.
357 	 */
358 	return READ_ONCE(ce->lrc_reg_state[CTX_TIMESTAMP]);
359 }
360 
mark_eio(struct i915_request * rq)361 static void mark_eio(struct i915_request *rq)
362 {
363 	if (i915_request_completed(rq))
364 		return;
365 
366 	GEM_BUG_ON(i915_request_signaled(rq));
367 
368 	i915_request_set_error_once(rq, -EIO);
369 	i915_request_mark_complete(rq);
370 }
371 
372 static struct i915_request *
active_request(const struct intel_timeline * const tl,struct i915_request * rq)373 active_request(const struct intel_timeline * const tl, struct i915_request *rq)
374 {
375 	struct i915_request *active = rq;
376 
377 	rcu_read_lock();
378 	list_for_each_entry_continue_reverse(rq, &tl->requests, link) {
379 		if (i915_request_completed(rq))
380 			break;
381 
382 		active = rq;
383 	}
384 	rcu_read_unlock();
385 
386 	return active;
387 }
388 
intel_hws_preempt_address(struct intel_engine_cs * engine)389 static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine)
390 {
391 	return (i915_ggtt_offset(engine->status_page.vma) +
392 		I915_GEM_HWS_PREEMPT_ADDR);
393 }
394 
395 static inline void
ring_set_paused(const struct intel_engine_cs * engine,int state)396 ring_set_paused(const struct intel_engine_cs *engine, int state)
397 {
398 	/*
399 	 * We inspect HWS_PREEMPT with a semaphore inside
400 	 * engine->emit_fini_breadcrumb. If the dword is true,
401 	 * the ring is paused as the semaphore will busywait
402 	 * until the dword is false.
403 	 */
404 	engine->status_page.addr[I915_GEM_HWS_PREEMPT] = state;
405 	if (state)
406 		wmb();
407 }
408 
to_priolist(struct rb_node * rb)409 static inline struct i915_priolist *to_priolist(struct rb_node *rb)
410 {
411 	return rb_entry(rb, struct i915_priolist, node);
412 }
413 
rq_prio(const struct i915_request * rq)414 static inline int rq_prio(const struct i915_request *rq)
415 {
416 	return READ_ONCE(rq->sched.attr.priority);
417 }
418 
effective_prio(const struct i915_request * rq)419 static int effective_prio(const struct i915_request *rq)
420 {
421 	int prio = rq_prio(rq);
422 
423 	/*
424 	 * If this request is special and must not be interrupted at any
425 	 * cost, so be it. Note we are only checking the most recent request
426 	 * in the context and so may be masking an earlier vip request. It
427 	 * is hoped that under the conditions where nopreempt is used, this
428 	 * will not matter (i.e. all requests to that context will be
429 	 * nopreempt for as long as desired).
430 	 */
431 	if (i915_request_has_nopreempt(rq))
432 		prio = I915_PRIORITY_UNPREEMPTABLE;
433 
434 	return prio;
435 }
436 
queue_prio(const struct intel_engine_execlists * execlists)437 static int queue_prio(const struct intel_engine_execlists *execlists)
438 {
439 	struct i915_priolist *p;
440 	struct rb_node *rb;
441 
442 	rb = rb_first_cached(&execlists->queue);
443 	if (!rb)
444 		return INT_MIN;
445 
446 	/*
447 	 * As the priolist[] are inverted, with the highest priority in [0],
448 	 * we have to flip the index value to become priority.
449 	 */
450 	p = to_priolist(rb);
451 	if (!I915_USER_PRIORITY_SHIFT)
452 		return p->priority;
453 
454 	return ((p->priority + 1) << I915_USER_PRIORITY_SHIFT) - ffs(p->used);
455 }
456 
need_preempt(const struct intel_engine_cs * engine,const struct i915_request * rq,struct rb_node * rb)457 static inline bool need_preempt(const struct intel_engine_cs *engine,
458 				const struct i915_request *rq,
459 				struct rb_node *rb)
460 {
461 	int last_prio;
462 
463 	if (!intel_engine_has_semaphores(engine))
464 		return false;
465 
466 	/*
467 	 * Check if the current priority hint merits a preemption attempt.
468 	 *
469 	 * We record the highest value priority we saw during rescheduling
470 	 * prior to this dequeue, therefore we know that if it is strictly
471 	 * less than the current tail of ESLP[0], we do not need to force
472 	 * a preempt-to-idle cycle.
473 	 *
474 	 * However, the priority hint is a mere hint that we may need to
475 	 * preempt. If that hint is stale or we may be trying to preempt
476 	 * ourselves, ignore the request.
477 	 *
478 	 * More naturally we would write
479 	 *      prio >= max(0, last);
480 	 * except that we wish to prevent triggering preemption at the same
481 	 * priority level: the task that is running should remain running
482 	 * to preserve FIFO ordering of dependencies.
483 	 */
484 	last_prio = max(effective_prio(rq), I915_PRIORITY_NORMAL - 1);
485 	if (engine->execlists.queue_priority_hint <= last_prio)
486 		return false;
487 
488 	/*
489 	 * Check against the first request in ELSP[1], it will, thanks to the
490 	 * power of PI, be the highest priority of that context.
491 	 */
492 	if (!list_is_last(&rq->sched.link, &engine->active.requests) &&
493 	    rq_prio(list_next_entry(rq, sched.link)) > last_prio)
494 		return true;
495 
496 	if (rb) {
497 		struct virtual_engine *ve =
498 			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
499 		bool preempt = false;
500 
501 		if (engine == ve->siblings[0]) { /* only preempt one sibling */
502 			struct i915_request *next;
503 
504 			rcu_read_lock();
505 			next = READ_ONCE(ve->request);
506 			if (next)
507 				preempt = rq_prio(next) > last_prio;
508 			rcu_read_unlock();
509 		}
510 
511 		if (preempt)
512 			return preempt;
513 	}
514 
515 	/*
516 	 * If the inflight context did not trigger the preemption, then maybe
517 	 * it was the set of queued requests? Pick the highest priority in
518 	 * the queue (the first active priolist) and see if it deserves to be
519 	 * running instead of ELSP[0].
520 	 *
521 	 * The highest priority request in the queue can not be either
522 	 * ELSP[0] or ELSP[1] as, thanks again to PI, if it was the same
523 	 * context, it's priority would not exceed ELSP[0] aka last_prio.
524 	 */
525 	return queue_prio(&engine->execlists) > last_prio;
526 }
527 
528 __maybe_unused static inline bool
assert_priority_queue(const struct i915_request * prev,const struct i915_request * next)529 assert_priority_queue(const struct i915_request *prev,
530 		      const struct i915_request *next)
531 {
532 	/*
533 	 * Without preemption, the prev may refer to the still active element
534 	 * which we refuse to let go.
535 	 *
536 	 * Even with preemption, there are times when we think it is better not
537 	 * to preempt and leave an ostensibly lower priority request in flight.
538 	 */
539 	if (i915_request_is_active(prev))
540 		return true;
541 
542 	return rq_prio(prev) >= rq_prio(next);
543 }
544 
545 /*
546  * The context descriptor encodes various attributes of a context,
547  * including its GTT address and some flags. Because it's fairly
548  * expensive to calculate, we'll just do it once and cache the result,
549  * which remains valid until the context is unpinned.
550  *
551  * This is what a descriptor looks like, from LSB to MSB::
552  *
553  *      bits  0-11:    flags, GEN8_CTX_* (cached in ctx->desc_template)
554  *      bits 12-31:    LRCA, GTT address of (the HWSP of) this context
555  *      bits 32-52:    ctx ID, a globally unique tag (highest bit used by GuC)
556  *      bits 53-54:    mbz, reserved for use by hardware
557  *      bits 55-63:    group ID, currently unused and set to 0
558  *
559  * Starting from Gen11, the upper dword of the descriptor has a new format:
560  *
561  *      bits 32-36:    reserved
562  *      bits 37-47:    SW context ID
563  *      bits 48:53:    engine instance
564  *      bit 54:        mbz, reserved for use by hardware
565  *      bits 55-60:    SW counter
566  *      bits 61-63:    engine class
567  *
568  * engine info, SW context ID and SW counter need to form a unique number
569  * (Context ID) per lrc.
570  */
571 static u32
lrc_descriptor(struct intel_context * ce,struct intel_engine_cs * engine)572 lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
573 {
574 	u32 desc;
575 
576 	desc = INTEL_LEGACY_32B_CONTEXT;
577 	if (i915_vm_is_4lvl(ce->vm))
578 		desc = INTEL_LEGACY_64B_CONTEXT;
579 	desc <<= GEN8_CTX_ADDRESSING_MODE_SHIFT;
580 
581 	desc |= GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE;
582 	if (IS_GEN(engine->i915, 8))
583 		desc |= GEN8_CTX_L3LLC_COHERENT;
584 
585 	return i915_ggtt_offset(ce->state) | desc;
586 }
587 
dword_in_page(void * addr)588 static inline unsigned int dword_in_page(void *addr)
589 {
590 	return offset_in_page(addr) / sizeof(u32);
591 }
592 
set_offsets(u32 * regs,const u8 * data,const struct intel_engine_cs * engine,bool clear)593 static void set_offsets(u32 *regs,
594 			const u8 *data,
595 			const struct intel_engine_cs *engine,
596 			bool clear)
597 #define NOP(x) (BIT(7) | (x))
598 #define LRI(count, flags) ((flags) << 6 | (count) | BUILD_BUG_ON_ZERO(count >= BIT(6)))
599 #define POSTED BIT(0)
600 #define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
601 #define REG16(x) \
602 	(((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
603 	(((x) >> 2) & 0x7f)
604 #define END(total_state_size) 0, (total_state_size)
605 {
606 	const u32 base = engine->mmio_base;
607 
608 	while (*data) {
609 		u8 count, flags;
610 
611 		if (*data & BIT(7)) { /* skip */
612 			count = *data++ & ~BIT(7);
613 			if (clear)
614 				memset32(regs, MI_NOOP, count);
615 			regs += count;
616 			continue;
617 		}
618 
619 		count = *data & 0x3f;
620 		flags = *data >> 6;
621 		data++;
622 
623 		*regs = MI_LOAD_REGISTER_IMM(count);
624 		if (flags & POSTED)
625 			*regs |= MI_LRI_FORCE_POSTED;
626 		if (INTEL_GEN(engine->i915) >= 11)
627 			*regs |= MI_LRI_LRM_CS_MMIO;
628 		regs++;
629 
630 		GEM_BUG_ON(!count);
631 		do {
632 			u32 offset = 0;
633 			u8 v;
634 
635 			do {
636 				v = *data++;
637 				offset <<= 7;
638 				offset |= v & ~BIT(7);
639 			} while (v & BIT(7));
640 
641 			regs[0] = base + (offset << 2);
642 			if (clear)
643 				regs[1] = 0;
644 			regs += 2;
645 		} while (--count);
646 	}
647 
648 	if (clear) {
649 		u8 count = *++data;
650 
651 		/* Clear past the tail for HW access */
652 		GEM_BUG_ON(dword_in_page(regs) > count);
653 		memset32(regs, MI_NOOP, count - dword_in_page(regs));
654 
655 		/* Close the batch; used mainly by live_lrc_layout() */
656 		*regs = MI_BATCH_BUFFER_END;
657 		if (INTEL_GEN(engine->i915) >= 10)
658 			*regs |= BIT(0);
659 	}
660 }
661 
662 static const u8 gen8_xcs_offsets[] = {
663 	NOP(1),
664 	LRI(11, 0),
665 	REG16(0x244),
666 	REG(0x034),
667 	REG(0x030),
668 	REG(0x038),
669 	REG(0x03c),
670 	REG(0x168),
671 	REG(0x140),
672 	REG(0x110),
673 	REG(0x11c),
674 	REG(0x114),
675 	REG(0x118),
676 
677 	NOP(9),
678 	LRI(9, 0),
679 	REG16(0x3a8),
680 	REG16(0x28c),
681 	REG16(0x288),
682 	REG16(0x284),
683 	REG16(0x280),
684 	REG16(0x27c),
685 	REG16(0x278),
686 	REG16(0x274),
687 	REG16(0x270),
688 
689 	NOP(13),
690 	LRI(2, 0),
691 	REG16(0x200),
692 	REG(0x028),
693 
694 	END(80)
695 };
696 
697 static const u8 gen9_xcs_offsets[] = {
698 	NOP(1),
699 	LRI(14, POSTED),
700 	REG16(0x244),
701 	REG(0x034),
702 	REG(0x030),
703 	REG(0x038),
704 	REG(0x03c),
705 	REG(0x168),
706 	REG(0x140),
707 	REG(0x110),
708 	REG(0x11c),
709 	REG(0x114),
710 	REG(0x118),
711 	REG(0x1c0),
712 	REG(0x1c4),
713 	REG(0x1c8),
714 
715 	NOP(3),
716 	LRI(9, POSTED),
717 	REG16(0x3a8),
718 	REG16(0x28c),
719 	REG16(0x288),
720 	REG16(0x284),
721 	REG16(0x280),
722 	REG16(0x27c),
723 	REG16(0x278),
724 	REG16(0x274),
725 	REG16(0x270),
726 
727 	NOP(13),
728 	LRI(1, POSTED),
729 	REG16(0x200),
730 
731 	NOP(13),
732 	LRI(44, POSTED),
733 	REG(0x028),
734 	REG(0x09c),
735 	REG(0x0c0),
736 	REG(0x178),
737 	REG(0x17c),
738 	REG16(0x358),
739 	REG(0x170),
740 	REG(0x150),
741 	REG(0x154),
742 	REG(0x158),
743 	REG16(0x41c),
744 	REG16(0x600),
745 	REG16(0x604),
746 	REG16(0x608),
747 	REG16(0x60c),
748 	REG16(0x610),
749 	REG16(0x614),
750 	REG16(0x618),
751 	REG16(0x61c),
752 	REG16(0x620),
753 	REG16(0x624),
754 	REG16(0x628),
755 	REG16(0x62c),
756 	REG16(0x630),
757 	REG16(0x634),
758 	REG16(0x638),
759 	REG16(0x63c),
760 	REG16(0x640),
761 	REG16(0x644),
762 	REG16(0x648),
763 	REG16(0x64c),
764 	REG16(0x650),
765 	REG16(0x654),
766 	REG16(0x658),
767 	REG16(0x65c),
768 	REG16(0x660),
769 	REG16(0x664),
770 	REG16(0x668),
771 	REG16(0x66c),
772 	REG16(0x670),
773 	REG16(0x674),
774 	REG16(0x678),
775 	REG16(0x67c),
776 	REG(0x068),
777 
778 	END(176)
779 };
780 
781 static const u8 gen12_xcs_offsets[] = {
782 	NOP(1),
783 	LRI(13, POSTED),
784 	REG16(0x244),
785 	REG(0x034),
786 	REG(0x030),
787 	REG(0x038),
788 	REG(0x03c),
789 	REG(0x168),
790 	REG(0x140),
791 	REG(0x110),
792 	REG(0x1c0),
793 	REG(0x1c4),
794 	REG(0x1c8),
795 	REG(0x180),
796 	REG16(0x2b4),
797 
798 	NOP(5),
799 	LRI(9, POSTED),
800 	REG16(0x3a8),
801 	REG16(0x28c),
802 	REG16(0x288),
803 	REG16(0x284),
804 	REG16(0x280),
805 	REG16(0x27c),
806 	REG16(0x278),
807 	REG16(0x274),
808 	REG16(0x270),
809 
810 	END(80)
811 };
812 
813 static const u8 gen8_rcs_offsets[] = {
814 	NOP(1),
815 	LRI(14, POSTED),
816 	REG16(0x244),
817 	REG(0x034),
818 	REG(0x030),
819 	REG(0x038),
820 	REG(0x03c),
821 	REG(0x168),
822 	REG(0x140),
823 	REG(0x110),
824 	REG(0x11c),
825 	REG(0x114),
826 	REG(0x118),
827 	REG(0x1c0),
828 	REG(0x1c4),
829 	REG(0x1c8),
830 
831 	NOP(3),
832 	LRI(9, POSTED),
833 	REG16(0x3a8),
834 	REG16(0x28c),
835 	REG16(0x288),
836 	REG16(0x284),
837 	REG16(0x280),
838 	REG16(0x27c),
839 	REG16(0x278),
840 	REG16(0x274),
841 	REG16(0x270),
842 
843 	NOP(13),
844 	LRI(1, 0),
845 	REG(0x0c8),
846 
847 	END(80)
848 };
849 
850 static const u8 gen9_rcs_offsets[] = {
851 	NOP(1),
852 	LRI(14, POSTED),
853 	REG16(0x244),
854 	REG(0x34),
855 	REG(0x30),
856 	REG(0x38),
857 	REG(0x3c),
858 	REG(0x168),
859 	REG(0x140),
860 	REG(0x110),
861 	REG(0x11c),
862 	REG(0x114),
863 	REG(0x118),
864 	REG(0x1c0),
865 	REG(0x1c4),
866 	REG(0x1c8),
867 
868 	NOP(3),
869 	LRI(9, POSTED),
870 	REG16(0x3a8),
871 	REG16(0x28c),
872 	REG16(0x288),
873 	REG16(0x284),
874 	REG16(0x280),
875 	REG16(0x27c),
876 	REG16(0x278),
877 	REG16(0x274),
878 	REG16(0x270),
879 
880 	NOP(13),
881 	LRI(1, 0),
882 	REG(0xc8),
883 
884 	NOP(13),
885 	LRI(44, POSTED),
886 	REG(0x28),
887 	REG(0x9c),
888 	REG(0xc0),
889 	REG(0x178),
890 	REG(0x17c),
891 	REG16(0x358),
892 	REG(0x170),
893 	REG(0x150),
894 	REG(0x154),
895 	REG(0x158),
896 	REG16(0x41c),
897 	REG16(0x600),
898 	REG16(0x604),
899 	REG16(0x608),
900 	REG16(0x60c),
901 	REG16(0x610),
902 	REG16(0x614),
903 	REG16(0x618),
904 	REG16(0x61c),
905 	REG16(0x620),
906 	REG16(0x624),
907 	REG16(0x628),
908 	REG16(0x62c),
909 	REG16(0x630),
910 	REG16(0x634),
911 	REG16(0x638),
912 	REG16(0x63c),
913 	REG16(0x640),
914 	REG16(0x644),
915 	REG16(0x648),
916 	REG16(0x64c),
917 	REG16(0x650),
918 	REG16(0x654),
919 	REG16(0x658),
920 	REG16(0x65c),
921 	REG16(0x660),
922 	REG16(0x664),
923 	REG16(0x668),
924 	REG16(0x66c),
925 	REG16(0x670),
926 	REG16(0x674),
927 	REG16(0x678),
928 	REG16(0x67c),
929 	REG(0x68),
930 
931 	END(176)
932 };
933 
934 static const u8 gen11_rcs_offsets[] = {
935 	NOP(1),
936 	LRI(15, POSTED),
937 	REG16(0x244),
938 	REG(0x034),
939 	REG(0x030),
940 	REG(0x038),
941 	REG(0x03c),
942 	REG(0x168),
943 	REG(0x140),
944 	REG(0x110),
945 	REG(0x11c),
946 	REG(0x114),
947 	REG(0x118),
948 	REG(0x1c0),
949 	REG(0x1c4),
950 	REG(0x1c8),
951 	REG(0x180),
952 
953 	NOP(1),
954 	LRI(9, POSTED),
955 	REG16(0x3a8),
956 	REG16(0x28c),
957 	REG16(0x288),
958 	REG16(0x284),
959 	REG16(0x280),
960 	REG16(0x27c),
961 	REG16(0x278),
962 	REG16(0x274),
963 	REG16(0x270),
964 
965 	LRI(1, POSTED),
966 	REG(0x1b0),
967 
968 	NOP(10),
969 	LRI(1, 0),
970 	REG(0x0c8),
971 
972 	END(80)
973 };
974 
975 static const u8 gen12_rcs_offsets[] = {
976 	NOP(1),
977 	LRI(13, POSTED),
978 	REG16(0x244),
979 	REG(0x034),
980 	REG(0x030),
981 	REG(0x038),
982 	REG(0x03c),
983 	REG(0x168),
984 	REG(0x140),
985 	REG(0x110),
986 	REG(0x1c0),
987 	REG(0x1c4),
988 	REG(0x1c8),
989 	REG(0x180),
990 	REG16(0x2b4),
991 
992 	NOP(5),
993 	LRI(9, POSTED),
994 	REG16(0x3a8),
995 	REG16(0x28c),
996 	REG16(0x288),
997 	REG16(0x284),
998 	REG16(0x280),
999 	REG16(0x27c),
1000 	REG16(0x278),
1001 	REG16(0x274),
1002 	REG16(0x270),
1003 
1004 	LRI(3, POSTED),
1005 	REG(0x1b0),
1006 	REG16(0x5a8),
1007 	REG16(0x5ac),
1008 
1009 	NOP(6),
1010 	LRI(1, 0),
1011 	REG(0x0c8),
1012 	NOP(3 + 9 + 1),
1013 
1014 	LRI(51, POSTED),
1015 	REG16(0x588),
1016 	REG16(0x588),
1017 	REG16(0x588),
1018 	REG16(0x588),
1019 	REG16(0x588),
1020 	REG16(0x588),
1021 	REG(0x028),
1022 	REG(0x09c),
1023 	REG(0x0c0),
1024 	REG(0x178),
1025 	REG(0x17c),
1026 	REG16(0x358),
1027 	REG(0x170),
1028 	REG(0x150),
1029 	REG(0x154),
1030 	REG(0x158),
1031 	REG16(0x41c),
1032 	REG16(0x600),
1033 	REG16(0x604),
1034 	REG16(0x608),
1035 	REG16(0x60c),
1036 	REG16(0x610),
1037 	REG16(0x614),
1038 	REG16(0x618),
1039 	REG16(0x61c),
1040 	REG16(0x620),
1041 	REG16(0x624),
1042 	REG16(0x628),
1043 	REG16(0x62c),
1044 	REG16(0x630),
1045 	REG16(0x634),
1046 	REG16(0x638),
1047 	REG16(0x63c),
1048 	REG16(0x640),
1049 	REG16(0x644),
1050 	REG16(0x648),
1051 	REG16(0x64c),
1052 	REG16(0x650),
1053 	REG16(0x654),
1054 	REG16(0x658),
1055 	REG16(0x65c),
1056 	REG16(0x660),
1057 	REG16(0x664),
1058 	REG16(0x668),
1059 	REG16(0x66c),
1060 	REG16(0x670),
1061 	REG16(0x674),
1062 	REG16(0x678),
1063 	REG16(0x67c),
1064 	REG(0x068),
1065 	REG(0x084),
1066 	NOP(1),
1067 
1068 	END(192)
1069 };
1070 
1071 #undef END
1072 #undef REG16
1073 #undef REG
1074 #undef LRI
1075 #undef NOP
1076 
reg_offsets(const struct intel_engine_cs * engine)1077 static const u8 *reg_offsets(const struct intel_engine_cs *engine)
1078 {
1079 	/*
1080 	 * The gen12+ lists only have the registers we program in the basic
1081 	 * default state. We rely on the context image using relative
1082 	 * addressing to automatic fixup the register state between the
1083 	 * physical engines for virtual engine.
1084 	 */
1085 	GEM_BUG_ON(INTEL_GEN(engine->i915) >= 12 &&
1086 		   !intel_engine_has_relative_mmio(engine));
1087 
1088 	if (engine->class == RENDER_CLASS) {
1089 		if (INTEL_GEN(engine->i915) >= 12)
1090 			return gen12_rcs_offsets;
1091 		else if (INTEL_GEN(engine->i915) >= 11)
1092 			return gen11_rcs_offsets;
1093 		else if (INTEL_GEN(engine->i915) >= 9)
1094 			return gen9_rcs_offsets;
1095 		else
1096 			return gen8_rcs_offsets;
1097 	} else {
1098 		if (INTEL_GEN(engine->i915) >= 12)
1099 			return gen12_xcs_offsets;
1100 		else if (INTEL_GEN(engine->i915) >= 9)
1101 			return gen9_xcs_offsets;
1102 		else
1103 			return gen8_xcs_offsets;
1104 	}
1105 }
1106 
1107 static struct i915_request *
__unwind_incomplete_requests(struct intel_engine_cs * engine)1108 __unwind_incomplete_requests(struct intel_engine_cs *engine)
1109 {
1110 	struct i915_request *rq, *rn, *active = NULL;
1111 	struct list_head *pl;
1112 	int prio = I915_PRIORITY_INVALID;
1113 
1114 	lockdep_assert_held(&engine->active.lock);
1115 
1116 	list_for_each_entry_safe_reverse(rq, rn,
1117 					 &engine->active.requests,
1118 					 sched.link) {
1119 		if (i915_request_completed(rq))
1120 			continue; /* XXX */
1121 
1122 		__i915_request_unsubmit(rq);
1123 
1124 		/*
1125 		 * Push the request back into the queue for later resubmission.
1126 		 * If this request is not native to this physical engine (i.e.
1127 		 * it came from a virtual source), push it back onto the virtual
1128 		 * engine so that it can be moved across onto another physical
1129 		 * engine as load dictates.
1130 		 */
1131 		if (likely(rq->execution_mask == engine->mask)) {
1132 			GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
1133 			if (rq_prio(rq) != prio) {
1134 				prio = rq_prio(rq);
1135 				pl = i915_sched_lookup_priolist(engine, prio);
1136 			}
1137 			GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
1138 
1139 			list_move(&rq->sched.link, pl);
1140 			set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
1141 
1142 			/* Check in case we rollback so far we wrap [size/2] */
1143 			if (intel_ring_direction(rq->ring,
1144 						 rq->tail,
1145 						 rq->ring->tail + 8) > 0)
1146 				rq->context->lrc.desc |= CTX_DESC_FORCE_RESTORE;
1147 
1148 			active = rq;
1149 		} else {
1150 			struct intel_engine_cs *owner = rq->context->engine;
1151 
1152 			WRITE_ONCE(rq->engine, owner);
1153 			owner->submit_request(rq);
1154 			active = NULL;
1155 		}
1156 	}
1157 
1158 	return active;
1159 }
1160 
1161 struct i915_request *
execlists_unwind_incomplete_requests(struct intel_engine_execlists * execlists)1162 execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists)
1163 {
1164 	struct intel_engine_cs *engine =
1165 		container_of(execlists, typeof(*engine), execlists);
1166 
1167 	return __unwind_incomplete_requests(engine);
1168 }
1169 
1170 static inline void
execlists_context_status_change(struct i915_request * rq,unsigned long status)1171 execlists_context_status_change(struct i915_request *rq, unsigned long status)
1172 {
1173 	/*
1174 	 * Only used when GVT-g is enabled now. When GVT-g is disabled,
1175 	 * The compiler should eliminate this function as dead-code.
1176 	 */
1177 	if (!IS_ENABLED(CONFIG_DRM_I915_GVT))
1178 		return;
1179 
1180 	atomic_notifier_call_chain(&rq->engine->context_status_notifier,
1181 				   status, rq);
1182 }
1183 
intel_engine_context_in(struct intel_engine_cs * engine)1184 static void intel_engine_context_in(struct intel_engine_cs *engine)
1185 {
1186 	unsigned long flags;
1187 
1188 	if (atomic_add_unless(&engine->stats.active, 1, 0))
1189 		return;
1190 
1191 	write_seqlock_irqsave(&engine->stats.lock, flags);
1192 	if (!atomic_add_unless(&engine->stats.active, 1, 0)) {
1193 		engine->stats.start = ktime_get();
1194 		atomic_inc(&engine->stats.active);
1195 	}
1196 	write_sequnlock_irqrestore(&engine->stats.lock, flags);
1197 }
1198 
intel_engine_context_out(struct intel_engine_cs * engine)1199 static void intel_engine_context_out(struct intel_engine_cs *engine)
1200 {
1201 	unsigned long flags;
1202 
1203 	GEM_BUG_ON(!atomic_read(&engine->stats.active));
1204 
1205 	if (atomic_add_unless(&engine->stats.active, -1, 1))
1206 		return;
1207 
1208 	write_seqlock_irqsave(&engine->stats.lock, flags);
1209 	if (atomic_dec_and_test(&engine->stats.active)) {
1210 		engine->stats.total =
1211 			ktime_add(engine->stats.total,
1212 				  ktime_sub(ktime_get(), engine->stats.start));
1213 	}
1214 	write_sequnlock_irqrestore(&engine->stats.lock, flags);
1215 }
1216 
1217 static void
execlists_check_context(const struct intel_context * ce,const struct intel_engine_cs * engine)1218 execlists_check_context(const struct intel_context *ce,
1219 			const struct intel_engine_cs *engine)
1220 {
1221 	const struct intel_ring *ring = ce->ring;
1222 	u32 *regs = ce->lrc_reg_state;
1223 	bool valid = true;
1224 	int x;
1225 
1226 	if (regs[CTX_RING_START] != i915_ggtt_offset(ring->vma)) {
1227 		pr_err("%s: context submitted with incorrect RING_START [%08x], expected %08x\n",
1228 		       engine->name,
1229 		       regs[CTX_RING_START],
1230 		       i915_ggtt_offset(ring->vma));
1231 		regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
1232 		valid = false;
1233 	}
1234 
1235 	if ((regs[CTX_RING_CTL] & ~(RING_WAIT | RING_WAIT_SEMAPHORE)) !=
1236 	    (RING_CTL_SIZE(ring->size) | RING_VALID)) {
1237 		pr_err("%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n",
1238 		       engine->name,
1239 		       regs[CTX_RING_CTL],
1240 		       (u32)(RING_CTL_SIZE(ring->size) | RING_VALID));
1241 		regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
1242 		valid = false;
1243 	}
1244 
1245 	x = lrc_ring_mi_mode(engine);
1246 	if (x != -1 && regs[x + 1] & (regs[x + 1] >> 16) & STOP_RING) {
1247 		pr_err("%s: context submitted with STOP_RING [%08x] in RING_MI_MODE\n",
1248 		       engine->name, regs[x + 1]);
1249 		regs[x + 1] &= ~STOP_RING;
1250 		regs[x + 1] |= STOP_RING << 16;
1251 		valid = false;
1252 	}
1253 
1254 	WARN_ONCE(!valid, "Invalid lrc state found before submission\n");
1255 }
1256 
restore_default_state(struct intel_context * ce,struct intel_engine_cs * engine)1257 static void restore_default_state(struct intel_context *ce,
1258 				  struct intel_engine_cs *engine)
1259 {
1260 	u32 *regs;
1261 
1262 	regs = memset(ce->lrc_reg_state, 0, engine->context_size - PAGE_SIZE);
1263 	execlists_init_reg_state(regs, ce, engine, ce->ring, true);
1264 
1265 	ce->runtime.last = intel_context_get_runtime(ce);
1266 }
1267 
reset_active(struct i915_request * rq,struct intel_engine_cs * engine)1268 static void reset_active(struct i915_request *rq,
1269 			 struct intel_engine_cs *engine)
1270 {
1271 	struct intel_context * const ce = rq->context;
1272 	u32 head;
1273 
1274 	/*
1275 	 * The executing context has been cancelled. We want to prevent
1276 	 * further execution along this context and propagate the error on
1277 	 * to anything depending on its results.
1278 	 *
1279 	 * In __i915_request_submit(), we apply the -EIO and remove the
1280 	 * requests' payloads for any banned requests. But first, we must
1281 	 * rewind the context back to the start of the incomplete request so
1282 	 * that we do not jump back into the middle of the batch.
1283 	 *
1284 	 * We preserve the breadcrumbs and semaphores of the incomplete
1285 	 * requests so that inter-timeline dependencies (i.e other timelines)
1286 	 * remain correctly ordered. And we defer to __i915_request_submit()
1287 	 * so that all asynchronous waits are correctly handled.
1288 	 */
1289 	ENGINE_TRACE(engine, "{ rq=%llx:%lld }\n",
1290 		     rq->fence.context, rq->fence.seqno);
1291 
1292 	/* On resubmission of the active request, payload will be scrubbed */
1293 	if (i915_request_completed(rq))
1294 		head = rq->tail;
1295 	else
1296 		head = active_request(ce->timeline, rq)->head;
1297 	head = intel_ring_wrap(ce->ring, head);
1298 
1299 	/* Scrub the context image to prevent replaying the previous batch */
1300 	restore_default_state(ce, engine);
1301 	__execlists_update_reg_state(ce, engine, head);
1302 
1303 	/* We've switched away, so this should be a no-op, but intent matters */
1304 	ce->lrc.desc |= CTX_DESC_FORCE_RESTORE;
1305 }
1306 
st_update_runtime_underflow(struct intel_context * ce,s32 dt)1307 static void st_update_runtime_underflow(struct intel_context *ce, s32 dt)
1308 {
1309 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1310 	ce->runtime.num_underflow += dt < 0;
1311 	ce->runtime.max_underflow = max_t(u32, ce->runtime.max_underflow, -dt);
1312 #endif
1313 }
1314 
intel_context_update_runtime(struct intel_context * ce)1315 static void intel_context_update_runtime(struct intel_context *ce)
1316 {
1317 	u32 old;
1318 	s32 dt;
1319 
1320 	if (intel_context_is_barrier(ce))
1321 		return;
1322 
1323 	old = ce->runtime.last;
1324 	ce->runtime.last = intel_context_get_runtime(ce);
1325 	dt = ce->runtime.last - old;
1326 
1327 	if (unlikely(dt <= 0)) {
1328 		CE_TRACE(ce, "runtime underflow: last=%u, new=%u, delta=%d\n",
1329 			 old, ce->runtime.last, dt);
1330 		st_update_runtime_underflow(ce, dt);
1331 		return;
1332 	}
1333 
1334 	ewma_runtime_add(&ce->runtime.avg, dt);
1335 	ce->runtime.total += dt;
1336 }
1337 
1338 static inline struct intel_engine_cs *
__execlists_schedule_in(struct i915_request * rq)1339 __execlists_schedule_in(struct i915_request *rq)
1340 {
1341 	struct intel_engine_cs * const engine = rq->engine;
1342 	struct intel_context * const ce = rq->context;
1343 
1344 	intel_context_get(ce);
1345 
1346 	if (unlikely(intel_context_is_banned(ce)))
1347 		reset_active(rq, engine);
1348 
1349 	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
1350 		execlists_check_context(ce, engine);
1351 
1352 	if (ce->tag) {
1353 		/* Use a fixed tag for OA and friends */
1354 		GEM_BUG_ON(ce->tag <= BITS_PER_LONG);
1355 		ce->lrc.ccid = ce->tag;
1356 	} else {
1357 		/* We don't need a strict matching tag, just different values */
1358 		unsigned int tag = ffs(READ_ONCE(engine->context_tag));
1359 
1360 		GEM_BUG_ON(tag == 0 || tag >= BITS_PER_LONG);
1361 		clear_bit(tag - 1, &engine->context_tag);
1362 		ce->lrc.ccid = tag << (GEN11_SW_CTX_ID_SHIFT - 32);
1363 
1364 		BUILD_BUG_ON(BITS_PER_LONG > GEN12_MAX_CONTEXT_HW_ID);
1365 	}
1366 
1367 	ce->lrc.ccid |= engine->execlists.ccid;
1368 
1369 	__intel_gt_pm_get(engine->gt);
1370 	if (engine->fw_domain && !atomic_fetch_inc(&engine->fw_active))
1371 		intel_uncore_forcewake_get(engine->uncore, engine->fw_domain);
1372 	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
1373 	intel_engine_context_in(engine);
1374 
1375 	return engine;
1376 }
1377 
1378 static inline struct i915_request *
execlists_schedule_in(struct i915_request * rq,int idx)1379 execlists_schedule_in(struct i915_request *rq, int idx)
1380 {
1381 	struct intel_context * const ce = rq->context;
1382 	struct intel_engine_cs *old;
1383 
1384 	GEM_BUG_ON(!intel_engine_pm_is_awake(rq->engine));
1385 	trace_i915_request_in(rq, idx);
1386 
1387 	old = READ_ONCE(ce->inflight);
1388 	do {
1389 		if (!old) {
1390 			WRITE_ONCE(ce->inflight, __execlists_schedule_in(rq));
1391 			break;
1392 		}
1393 	} while (!try_cmpxchg(&ce->inflight, &old, ptr_inc(old)));
1394 
1395 	GEM_BUG_ON(intel_context_inflight(ce) != rq->engine);
1396 	return i915_request_get(rq);
1397 }
1398 
kick_siblings(struct i915_request * rq,struct intel_context * ce)1399 static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
1400 {
1401 	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
1402 	struct i915_request *next = READ_ONCE(ve->request);
1403 
1404 	if (next == rq || (next && next->execution_mask & ~rq->execution_mask))
1405 		tasklet_hi_schedule(&ve->base.execlists.tasklet);
1406 }
1407 
1408 static inline void
__execlists_schedule_out(struct i915_request * rq,struct intel_engine_cs * const engine,unsigned int ccid)1409 __execlists_schedule_out(struct i915_request *rq,
1410 			 struct intel_engine_cs * const engine,
1411 			 unsigned int ccid)
1412 {
1413 	struct intel_context * const ce = rq->context;
1414 
1415 	/*
1416 	 * NB process_csb() is not under the engine->active.lock and hence
1417 	 * schedule_out can race with schedule_in meaning that we should
1418 	 * refrain from doing non-trivial work here.
1419 	 */
1420 
1421 	/*
1422 	 * If we have just completed this context, the engine may now be
1423 	 * idle and we want to re-enter powersaving.
1424 	 */
1425 	if (list_is_last_rcu(&rq->link, &ce->timeline->requests) &&
1426 	    i915_request_completed(rq))
1427 		intel_engine_add_retire(engine, ce->timeline);
1428 
1429 	ccid >>= GEN11_SW_CTX_ID_SHIFT - 32;
1430 	ccid &= GEN12_MAX_CONTEXT_HW_ID;
1431 	if (ccid < BITS_PER_LONG) {
1432 		GEM_BUG_ON(ccid == 0);
1433 		GEM_BUG_ON(test_bit(ccid - 1, &engine->context_tag));
1434 		set_bit(ccid - 1, &engine->context_tag);
1435 	}
1436 
1437 	intel_context_update_runtime(ce);
1438 	intel_engine_context_out(engine);
1439 	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
1440 	if (engine->fw_domain && !atomic_dec_return(&engine->fw_active))
1441 		intel_uncore_forcewake_put(engine->uncore, engine->fw_domain);
1442 	intel_gt_pm_put_async(engine->gt);
1443 
1444 	/*
1445 	 * If this is part of a virtual engine, its next request may
1446 	 * have been blocked waiting for access to the active context.
1447 	 * We have to kick all the siblings again in case we need to
1448 	 * switch (e.g. the next request is not runnable on this
1449 	 * engine). Hopefully, we will already have submitted the next
1450 	 * request before the tasklet runs and do not need to rebuild
1451 	 * each virtual tree and kick everyone again.
1452 	 */
1453 	if (ce->engine != engine)
1454 		kick_siblings(rq, ce);
1455 
1456 	intel_context_put(ce);
1457 }
1458 
1459 static inline void
execlists_schedule_out(struct i915_request * rq)1460 execlists_schedule_out(struct i915_request *rq)
1461 {
1462 	struct intel_context * const ce = rq->context;
1463 	struct intel_engine_cs *cur, *old;
1464 	u32 ccid;
1465 
1466 	trace_i915_request_out(rq);
1467 
1468 	ccid = rq->context->lrc.ccid;
1469 	old = READ_ONCE(ce->inflight);
1470 	do
1471 		cur = ptr_unmask_bits(old, 2) ? ptr_dec(old) : NULL;
1472 	while (!try_cmpxchg(&ce->inflight, &old, cur));
1473 	if (!cur)
1474 		__execlists_schedule_out(rq, old, ccid);
1475 
1476 	i915_request_put(rq);
1477 }
1478 
execlists_update_context(struct i915_request * rq)1479 static u64 execlists_update_context(struct i915_request *rq)
1480 {
1481 	struct intel_context *ce = rq->context;
1482 	u64 desc = ce->lrc.desc;
1483 	u32 tail, prev;
1484 
1485 	/*
1486 	 * WaIdleLiteRestore:bdw,skl
1487 	 *
1488 	 * We should never submit the context with the same RING_TAIL twice
1489 	 * just in case we submit an empty ring, which confuses the HW.
1490 	 *
1491 	 * We append a couple of NOOPs (gen8_emit_wa_tail) after the end of
1492 	 * the normal request to be able to always advance the RING_TAIL on
1493 	 * subsequent resubmissions (for lite restore). Should that fail us,
1494 	 * and we try and submit the same tail again, force the context
1495 	 * reload.
1496 	 *
1497 	 * If we need to return to a preempted context, we need to skip the
1498 	 * lite-restore and force it to reload the RING_TAIL. Otherwise, the
1499 	 * HW has a tendency to ignore us rewinding the TAIL to the end of
1500 	 * an earlier request.
1501 	 */
1502 	GEM_BUG_ON(ce->lrc_reg_state[CTX_RING_TAIL] != rq->ring->tail);
1503 	prev = rq->ring->tail;
1504 	tail = intel_ring_set_tail(rq->ring, rq->tail);
1505 	if (unlikely(intel_ring_direction(rq->ring, tail, prev) <= 0))
1506 		desc |= CTX_DESC_FORCE_RESTORE;
1507 	ce->lrc_reg_state[CTX_RING_TAIL] = tail;
1508 	rq->tail = rq->wa_tail;
1509 
1510 	/*
1511 	 * Make sure the context image is complete before we submit it to HW.
1512 	 *
1513 	 * Ostensibly, writes (including the WCB) should be flushed prior to
1514 	 * an uncached write such as our mmio register access, the empirical
1515 	 * evidence (esp. on Braswell) suggests that the WC write into memory
1516 	 * may not be visible to the HW prior to the completion of the UC
1517 	 * register write and that we may begin execution from the context
1518 	 * before its image is complete leading to invalid PD chasing.
1519 	 */
1520 	wmb();
1521 
1522 	ce->lrc.desc &= ~CTX_DESC_FORCE_RESTORE;
1523 	return desc;
1524 }
1525 
write_desc(struct intel_engine_execlists * execlists,u64 desc,u32 port)1526 static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc, u32 port)
1527 {
1528 	if (execlists->ctrl_reg) {
1529 		writel(lower_32_bits(desc), execlists->submit_reg + port * 2);
1530 		writel(upper_32_bits(desc), execlists->submit_reg + port * 2 + 1);
1531 	} else {
1532 		writel(upper_32_bits(desc), execlists->submit_reg);
1533 		writel(lower_32_bits(desc), execlists->submit_reg);
1534 	}
1535 }
1536 
1537 static __maybe_unused char *
dump_port(char * buf,int buflen,const char * prefix,struct i915_request * rq)1538 dump_port(char *buf, int buflen, const char *prefix, struct i915_request *rq)
1539 {
1540 	if (!rq)
1541 		return "";
1542 
1543 	snprintf(buf, buflen, "%sccid:%x %llx:%lld%s prio %d",
1544 		 prefix,
1545 		 rq->context->lrc.ccid,
1546 		 rq->fence.context, rq->fence.seqno,
1547 		 i915_request_completed(rq) ? "!" :
1548 		 i915_request_started(rq) ? "*" :
1549 		 "",
1550 		 rq_prio(rq));
1551 
1552 	return buf;
1553 }
1554 
1555 static __maybe_unused void
trace_ports(const struct intel_engine_execlists * execlists,const char * msg,struct i915_request * const * ports)1556 trace_ports(const struct intel_engine_execlists *execlists,
1557 	    const char *msg,
1558 	    struct i915_request * const *ports)
1559 {
1560 	const struct intel_engine_cs *engine =
1561 		container_of(execlists, typeof(*engine), execlists);
1562 	char __maybe_unused p0[40], p1[40];
1563 
1564 	if (!ports[0])
1565 		return;
1566 
1567 	ENGINE_TRACE(engine, "%s { %s%s }\n", msg,
1568 		     dump_port(p0, sizeof(p0), "", ports[0]),
1569 		     dump_port(p1, sizeof(p1), ", ", ports[1]));
1570 }
1571 
1572 static inline bool
reset_in_progress(const struct intel_engine_execlists * execlists)1573 reset_in_progress(const struct intel_engine_execlists *execlists)
1574 {
1575 	return unlikely(!__tasklet_is_enabled(&execlists->tasklet));
1576 }
1577 
1578 static __maybe_unused bool
assert_pending_valid(const struct intel_engine_execlists * execlists,const char * msg)1579 assert_pending_valid(const struct intel_engine_execlists *execlists,
1580 		     const char *msg)
1581 {
1582 	struct intel_engine_cs *engine =
1583 		container_of(execlists, typeof(*engine), execlists);
1584 	struct i915_request * const *port, *rq;
1585 	struct intel_context *ce = NULL;
1586 	bool sentinel = false;
1587 	u32 ccid = -1;
1588 
1589 	trace_ports(execlists, msg, execlists->pending);
1590 
1591 	/* We may be messing around with the lists during reset, lalala */
1592 	if (reset_in_progress(execlists))
1593 		return true;
1594 
1595 	if (!execlists->pending[0]) {
1596 		GEM_TRACE_ERR("%s: Nothing pending for promotion!\n",
1597 			      engine->name);
1598 		return false;
1599 	}
1600 
1601 	if (execlists->pending[execlists_num_ports(execlists)]) {
1602 		GEM_TRACE_ERR("%s: Excess pending[%d] for promotion!\n",
1603 			      engine->name, execlists_num_ports(execlists));
1604 		return false;
1605 	}
1606 
1607 	for (port = execlists->pending; (rq = *port); port++) {
1608 		unsigned long flags;
1609 		bool ok = true;
1610 
1611 		GEM_BUG_ON(!kref_read(&rq->fence.refcount));
1612 		GEM_BUG_ON(!i915_request_is_active(rq));
1613 
1614 		if (ce == rq->context) {
1615 			GEM_TRACE_ERR("%s: Dup context:%llx in pending[%zd]\n",
1616 				      engine->name,
1617 				      ce->timeline->fence_context,
1618 				      port - execlists->pending);
1619 			return false;
1620 		}
1621 		ce = rq->context;
1622 
1623 		if (ccid == ce->lrc.ccid) {
1624 			GEM_TRACE_ERR("%s: Dup ccid:%x context:%llx in pending[%zd]\n",
1625 				      engine->name,
1626 				      ccid, ce->timeline->fence_context,
1627 				      port - execlists->pending);
1628 			return false;
1629 		}
1630 		ccid = ce->lrc.ccid;
1631 
1632 		/*
1633 		 * Sentinels are supposed to be the last request so they flush
1634 		 * the current execution off the HW. Check that they are the only
1635 		 * request in the pending submission.
1636 		 */
1637 		if (sentinel) {
1638 			GEM_TRACE_ERR("%s: context:%llx after sentinel in pending[%zd]\n",
1639 				      engine->name,
1640 				      ce->timeline->fence_context,
1641 				      port - execlists->pending);
1642 			return false;
1643 		}
1644 		sentinel = i915_request_has_sentinel(rq);
1645 
1646 		/* Hold tightly onto the lock to prevent concurrent retires! */
1647 		if (!spin_trylock_irqsave(&rq->lock, flags))
1648 			continue;
1649 
1650 		if (i915_request_completed(rq))
1651 			goto unlock;
1652 
1653 		if (i915_active_is_idle(&ce->active) &&
1654 		    !intel_context_is_barrier(ce)) {
1655 			GEM_TRACE_ERR("%s: Inactive context:%llx in pending[%zd]\n",
1656 				      engine->name,
1657 				      ce->timeline->fence_context,
1658 				      port - execlists->pending);
1659 			ok = false;
1660 			goto unlock;
1661 		}
1662 
1663 		if (!i915_vma_is_pinned(ce->state)) {
1664 			GEM_TRACE_ERR("%s: Unpinned context:%llx in pending[%zd]\n",
1665 				      engine->name,
1666 				      ce->timeline->fence_context,
1667 				      port - execlists->pending);
1668 			ok = false;
1669 			goto unlock;
1670 		}
1671 
1672 		if (!i915_vma_is_pinned(ce->ring->vma)) {
1673 			GEM_TRACE_ERR("%s: Unpinned ring:%llx in pending[%zd]\n",
1674 				      engine->name,
1675 				      ce->timeline->fence_context,
1676 				      port - execlists->pending);
1677 			ok = false;
1678 			goto unlock;
1679 		}
1680 
1681 unlock:
1682 		spin_unlock_irqrestore(&rq->lock, flags);
1683 		if (!ok)
1684 			return false;
1685 	}
1686 
1687 	return ce;
1688 }
1689 
execlists_submit_ports(struct intel_engine_cs * engine)1690 static void execlists_submit_ports(struct intel_engine_cs *engine)
1691 {
1692 	struct intel_engine_execlists *execlists = &engine->execlists;
1693 	unsigned int n;
1694 
1695 	GEM_BUG_ON(!assert_pending_valid(execlists, "submit"));
1696 
1697 	/*
1698 	 * We can skip acquiring intel_runtime_pm_get() here as it was taken
1699 	 * on our behalf by the request (see i915_gem_mark_busy()) and it will
1700 	 * not be relinquished until the device is idle (see
1701 	 * i915_gem_idle_work_handler()). As a precaution, we make sure
1702 	 * that all ELSP are drained i.e. we have processed the CSB,
1703 	 * before allowing ourselves to idle and calling intel_runtime_pm_put().
1704 	 */
1705 	GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
1706 
1707 	/*
1708 	 * ELSQ note: the submit queue is not cleared after being submitted
1709 	 * to the HW so we need to make sure we always clean it up. This is
1710 	 * currently ensured by the fact that we always write the same number
1711 	 * of elsq entries, keep this in mind before changing the loop below.
1712 	 */
1713 	for (n = execlists_num_ports(execlists); n--; ) {
1714 		struct i915_request *rq = execlists->pending[n];
1715 
1716 		write_desc(execlists,
1717 			   rq ? execlists_update_context(rq) : 0,
1718 			   n);
1719 	}
1720 
1721 	/* we need to manually load the submit queue */
1722 	if (execlists->ctrl_reg)
1723 		writel(EL_CTRL_LOAD, execlists->ctrl_reg);
1724 }
1725 
ctx_single_port_submission(const struct intel_context * ce)1726 static bool ctx_single_port_submission(const struct intel_context *ce)
1727 {
1728 	return (IS_ENABLED(CONFIG_DRM_I915_GVT) &&
1729 		intel_context_force_single_submission(ce));
1730 }
1731 
can_merge_ctx(const struct intel_context * prev,const struct intel_context * next)1732 static bool can_merge_ctx(const struct intel_context *prev,
1733 			  const struct intel_context *next)
1734 {
1735 	if (prev != next)
1736 		return false;
1737 
1738 	if (ctx_single_port_submission(prev))
1739 		return false;
1740 
1741 	return true;
1742 }
1743 
i915_request_flags(const struct i915_request * rq)1744 static unsigned long i915_request_flags(const struct i915_request *rq)
1745 {
1746 	return READ_ONCE(rq->fence.flags);
1747 }
1748 
can_merge_rq(const struct i915_request * prev,const struct i915_request * next)1749 static bool can_merge_rq(const struct i915_request *prev,
1750 			 const struct i915_request *next)
1751 {
1752 	GEM_BUG_ON(prev == next);
1753 	GEM_BUG_ON(!assert_priority_queue(prev, next));
1754 
1755 	/*
1756 	 * We do not submit known completed requests. Therefore if the next
1757 	 * request is already completed, we can pretend to merge it in
1758 	 * with the previous context (and we will skip updating the ELSP
1759 	 * and tracking). Thus hopefully keeping the ELSP full with active
1760 	 * contexts, despite the best efforts of preempt-to-busy to confuse
1761 	 * us.
1762 	 */
1763 	if (i915_request_completed(next))
1764 		return true;
1765 
1766 	if (unlikely((i915_request_flags(prev) ^ i915_request_flags(next)) &
1767 		     (BIT(I915_FENCE_FLAG_NOPREEMPT) |
1768 		      BIT(I915_FENCE_FLAG_SENTINEL))))
1769 		return false;
1770 
1771 	if (!can_merge_ctx(prev->context, next->context))
1772 		return false;
1773 
1774 	GEM_BUG_ON(i915_seqno_passed(prev->fence.seqno, next->fence.seqno));
1775 	return true;
1776 }
1777 
virtual_update_register_offsets(u32 * regs,struct intel_engine_cs * engine)1778 static void virtual_update_register_offsets(u32 *regs,
1779 					    struct intel_engine_cs *engine)
1780 {
1781 	set_offsets(regs, reg_offsets(engine), engine, false);
1782 }
1783 
virtual_matches(const struct virtual_engine * ve,const struct i915_request * rq,const struct intel_engine_cs * engine)1784 static bool virtual_matches(const struct virtual_engine *ve,
1785 			    const struct i915_request *rq,
1786 			    const struct intel_engine_cs *engine)
1787 {
1788 	const struct intel_engine_cs *inflight;
1789 
1790 	if (!(rq->execution_mask & engine->mask)) /* We peeked too soon! */
1791 		return false;
1792 
1793 	/*
1794 	 * We track when the HW has completed saving the context image
1795 	 * (i.e. when we have seen the final CS event switching out of
1796 	 * the context) and must not overwrite the context image before
1797 	 * then. This restricts us to only using the active engine
1798 	 * while the previous virtualized request is inflight (so
1799 	 * we reuse the register offsets). This is a very small
1800 	 * hystersis on the greedy seelction algorithm.
1801 	 */
1802 	inflight = intel_context_inflight(&ve->context);
1803 	if (inflight && inflight != engine)
1804 		return false;
1805 
1806 	return true;
1807 }
1808 
virtual_xfer_context(struct virtual_engine * ve,struct intel_engine_cs * engine)1809 static void virtual_xfer_context(struct virtual_engine *ve,
1810 				 struct intel_engine_cs *engine)
1811 {
1812 	unsigned int n;
1813 
1814 	if (likely(engine == ve->siblings[0]))
1815 		return;
1816 
1817 	GEM_BUG_ON(READ_ONCE(ve->context.inflight));
1818 	if (!intel_engine_has_relative_mmio(engine))
1819 		virtual_update_register_offsets(ve->context.lrc_reg_state,
1820 						engine);
1821 
1822 	/*
1823 	 * Move the bound engine to the top of the list for
1824 	 * future execution. We then kick this tasklet first
1825 	 * before checking others, so that we preferentially
1826 	 * reuse this set of bound registers.
1827 	 */
1828 	for (n = 1; n < ve->num_siblings; n++) {
1829 		if (ve->siblings[n] == engine) {
1830 			swap(ve->siblings[n], ve->siblings[0]);
1831 			break;
1832 		}
1833 	}
1834 }
1835 
1836 #define for_each_waiter(p__, rq__) \
1837 	list_for_each_entry_lockless(p__, \
1838 				     &(rq__)->sched.waiters_list, \
1839 				     wait_link)
1840 
1841 #define for_each_signaler(p__, rq__) \
1842 	list_for_each_entry_rcu(p__, \
1843 				&(rq__)->sched.signalers_list, \
1844 				signal_link)
1845 
defer_request(struct i915_request * rq,struct list_head * const pl)1846 static void defer_request(struct i915_request *rq, struct list_head * const pl)
1847 {
1848 	LIST_HEAD(list);
1849 
1850 	/*
1851 	 * We want to move the interrupted request to the back of
1852 	 * the round-robin list (i.e. its priority level), but
1853 	 * in doing so, we must then move all requests that were in
1854 	 * flight and were waiting for the interrupted request to
1855 	 * be run after it again.
1856 	 */
1857 	do {
1858 		struct i915_dependency *p;
1859 
1860 		GEM_BUG_ON(i915_request_is_active(rq));
1861 		list_move_tail(&rq->sched.link, pl);
1862 
1863 		for_each_waiter(p, rq) {
1864 			struct i915_request *w =
1865 				container_of(p->waiter, typeof(*w), sched);
1866 
1867 			if (p->flags & I915_DEPENDENCY_WEAK)
1868 				continue;
1869 
1870 			/* Leave semaphores spinning on the other engines */
1871 			if (w->engine != rq->engine)
1872 				continue;
1873 
1874 			/* No waiter should start before its signaler */
1875 			GEM_BUG_ON(i915_request_has_initial_breadcrumb(w) &&
1876 				   i915_request_started(w) &&
1877 				   !i915_request_completed(rq));
1878 
1879 			GEM_BUG_ON(i915_request_is_active(w));
1880 			if (!i915_request_is_ready(w))
1881 				continue;
1882 
1883 			if (rq_prio(w) < rq_prio(rq))
1884 				continue;
1885 
1886 			GEM_BUG_ON(rq_prio(w) > rq_prio(rq));
1887 			list_move_tail(&w->sched.link, &list);
1888 		}
1889 
1890 		rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
1891 	} while (rq);
1892 }
1893 
defer_active(struct intel_engine_cs * engine)1894 static void defer_active(struct intel_engine_cs *engine)
1895 {
1896 	struct i915_request *rq;
1897 
1898 	rq = __unwind_incomplete_requests(engine);
1899 	if (!rq)
1900 		return;
1901 
1902 	defer_request(rq, i915_sched_lookup_priolist(engine, rq_prio(rq)));
1903 }
1904 
1905 static bool
need_timeslice(const struct intel_engine_cs * engine,const struct i915_request * rq,const struct rb_node * rb)1906 need_timeslice(const struct intel_engine_cs *engine,
1907 	       const struct i915_request *rq,
1908 	       const struct rb_node *rb)
1909 {
1910 	int hint;
1911 
1912 	if (!intel_engine_has_timeslices(engine))
1913 		return false;
1914 
1915 	hint = engine->execlists.queue_priority_hint;
1916 
1917 	if (rb) {
1918 		const struct virtual_engine *ve =
1919 			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
1920 		const struct intel_engine_cs *inflight =
1921 			intel_context_inflight(&ve->context);
1922 
1923 		if (!inflight || inflight == engine) {
1924 			struct i915_request *next;
1925 
1926 			rcu_read_lock();
1927 			next = READ_ONCE(ve->request);
1928 			if (next)
1929 				hint = max(hint, rq_prio(next));
1930 			rcu_read_unlock();
1931 		}
1932 	}
1933 
1934 	if (!list_is_last(&rq->sched.link, &engine->active.requests))
1935 		hint = max(hint, rq_prio(list_next_entry(rq, sched.link)));
1936 
1937 	GEM_BUG_ON(hint >= I915_PRIORITY_UNPREEMPTABLE);
1938 	return hint >= effective_prio(rq);
1939 }
1940 
1941 static bool
timeslice_yield(const struct intel_engine_execlists * el,const struct i915_request * rq)1942 timeslice_yield(const struct intel_engine_execlists *el,
1943 		const struct i915_request *rq)
1944 {
1945 	/*
1946 	 * Once bitten, forever smitten!
1947 	 *
1948 	 * If the active context ever busy-waited on a semaphore,
1949 	 * it will be treated as a hog until the end of its timeslice (i.e.
1950 	 * until it is scheduled out and replaced by a new submission,
1951 	 * possibly even its own lite-restore). The HW only sends an interrupt
1952 	 * on the first miss, and we do know if that semaphore has been
1953 	 * signaled, or even if it is now stuck on another semaphore. Play
1954 	 * safe, yield if it might be stuck -- it will be given a fresh
1955 	 * timeslice in the near future.
1956 	 */
1957 	return rq->context->lrc.ccid == READ_ONCE(el->yield);
1958 }
1959 
1960 static bool
timeslice_expired(const struct intel_engine_execlists * el,const struct i915_request * rq)1961 timeslice_expired(const struct intel_engine_execlists *el,
1962 		  const struct i915_request *rq)
1963 {
1964 	return timer_expired(&el->timer) || timeslice_yield(el, rq);
1965 }
1966 
1967 static int
switch_prio(struct intel_engine_cs * engine,const struct i915_request * rq)1968 switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq)
1969 {
1970 	if (list_is_last(&rq->sched.link, &engine->active.requests))
1971 		return engine->execlists.queue_priority_hint;
1972 
1973 	return rq_prio(list_next_entry(rq, sched.link));
1974 }
1975 
1976 static inline unsigned long
timeslice(const struct intel_engine_cs * engine)1977 timeslice(const struct intel_engine_cs *engine)
1978 {
1979 	return READ_ONCE(engine->props.timeslice_duration_ms);
1980 }
1981 
active_timeslice(const struct intel_engine_cs * engine)1982 static unsigned long active_timeslice(const struct intel_engine_cs *engine)
1983 {
1984 	const struct intel_engine_execlists *execlists = &engine->execlists;
1985 	const struct i915_request *rq = *execlists->active;
1986 
1987 	if (!rq || i915_request_completed(rq))
1988 		return 0;
1989 
1990 	if (READ_ONCE(execlists->switch_priority_hint) < effective_prio(rq))
1991 		return 0;
1992 
1993 	return timeslice(engine);
1994 }
1995 
set_timeslice(struct intel_engine_cs * engine)1996 static void set_timeslice(struct intel_engine_cs *engine)
1997 {
1998 	unsigned long duration;
1999 
2000 	if (!intel_engine_has_timeslices(engine))
2001 		return;
2002 
2003 	duration = active_timeslice(engine);
2004 	ENGINE_TRACE(engine, "bump timeslicing, interval:%lu", duration);
2005 
2006 	set_timer_ms(&engine->execlists.timer, duration);
2007 }
2008 
start_timeslice(struct intel_engine_cs * engine,int prio)2009 static void start_timeslice(struct intel_engine_cs *engine, int prio)
2010 {
2011 	struct intel_engine_execlists *execlists = &engine->execlists;
2012 	unsigned long duration;
2013 
2014 	if (!intel_engine_has_timeslices(engine))
2015 		return;
2016 
2017 	WRITE_ONCE(execlists->switch_priority_hint, prio);
2018 	if (prio == INT_MIN)
2019 		return;
2020 
2021 	if (timer_pending(&execlists->timer))
2022 		return;
2023 
2024 	duration = timeslice(engine);
2025 	ENGINE_TRACE(engine,
2026 		     "start timeslicing, prio:%d, interval:%lu",
2027 		     prio, duration);
2028 
2029 	set_timer_ms(&execlists->timer, duration);
2030 }
2031 
record_preemption(struct intel_engine_execlists * execlists)2032 static void record_preemption(struct intel_engine_execlists *execlists)
2033 {
2034 	(void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++);
2035 }
2036 
active_preempt_timeout(struct intel_engine_cs * engine,const struct i915_request * rq)2037 static unsigned long active_preempt_timeout(struct intel_engine_cs *engine,
2038 					    const struct i915_request *rq)
2039 {
2040 	if (!rq)
2041 		return 0;
2042 
2043 	/* Force a fast reset for terminated contexts (ignoring sysfs!) */
2044 	if (unlikely(intel_context_is_banned(rq->context)))
2045 		return 1;
2046 
2047 	return READ_ONCE(engine->props.preempt_timeout_ms);
2048 }
2049 
set_preempt_timeout(struct intel_engine_cs * engine,const struct i915_request * rq)2050 static void set_preempt_timeout(struct intel_engine_cs *engine,
2051 				const struct i915_request *rq)
2052 {
2053 	if (!intel_engine_has_preempt_reset(engine))
2054 		return;
2055 
2056 	set_timer_ms(&engine->execlists.preempt,
2057 		     active_preempt_timeout(engine, rq));
2058 }
2059 
clear_ports(struct i915_request ** ports,int count)2060 static inline void clear_ports(struct i915_request **ports, int count)
2061 {
2062 	memset_p((void **)ports, NULL, count);
2063 }
2064 
2065 static inline void
copy_ports(struct i915_request ** dst,struct i915_request ** src,int count)2066 copy_ports(struct i915_request **dst, struct i915_request **src, int count)
2067 {
2068 	/* A memcpy_p() would be very useful here! */
2069 	while (count--)
2070 		WRITE_ONCE(*dst++, *src++); /* avoid write tearing */
2071 }
2072 
execlists_dequeue(struct intel_engine_cs * engine)2073 static void execlists_dequeue(struct intel_engine_cs *engine)
2074 {
2075 	struct intel_engine_execlists * const execlists = &engine->execlists;
2076 	struct i915_request **port = execlists->pending;
2077 	struct i915_request ** const last_port = port + execlists->port_mask;
2078 	struct i915_request * const *active;
2079 	struct i915_request *last;
2080 	struct rb_node *rb;
2081 	bool submit = false;
2082 
2083 	/*
2084 	 * Hardware submission is through 2 ports. Conceptually each port
2085 	 * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
2086 	 * static for a context, and unique to each, so we only execute
2087 	 * requests belonging to a single context from each ring. RING_HEAD
2088 	 * is maintained by the CS in the context image, it marks the place
2089 	 * where it got up to last time, and through RING_TAIL we tell the CS
2090 	 * where we want to execute up to this time.
2091 	 *
2092 	 * In this list the requests are in order of execution. Consecutive
2093 	 * requests from the same context are adjacent in the ringbuffer. We
2094 	 * can combine these requests into a single RING_TAIL update:
2095 	 *
2096 	 *              RING_HEAD...req1...req2
2097 	 *                                    ^- RING_TAIL
2098 	 * since to execute req2 the CS must first execute req1.
2099 	 *
2100 	 * Our goal then is to point each port to the end of a consecutive
2101 	 * sequence of requests as being the most optimal (fewest wake ups
2102 	 * and context switches) submission.
2103 	 */
2104 
2105 	for (rb = rb_first_cached(&execlists->virtual); rb; ) {
2106 		struct virtual_engine *ve =
2107 			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
2108 		struct i915_request *rq = READ_ONCE(ve->request);
2109 
2110 		if (!rq) { /* lazily cleanup after another engine handled rq */
2111 			rb_erase_cached(rb, &execlists->virtual);
2112 			RB_CLEAR_NODE(rb);
2113 			rb = rb_first_cached(&execlists->virtual);
2114 			continue;
2115 		}
2116 
2117 		if (!virtual_matches(ve, rq, engine)) {
2118 			rb = rb_next(rb);
2119 			continue;
2120 		}
2121 
2122 		break;
2123 	}
2124 
2125 	/*
2126 	 * If the queue is higher priority than the last
2127 	 * request in the currently active context, submit afresh.
2128 	 * We will resubmit again afterwards in case we need to split
2129 	 * the active context to interject the preemption request,
2130 	 * i.e. we will retrigger preemption following the ack in case
2131 	 * of trouble.
2132 	 */
2133 	active = READ_ONCE(execlists->active);
2134 
2135 	/*
2136 	 * In theory we can skip over completed contexts that have not
2137 	 * yet been processed by events (as those events are in flight):
2138 	 *
2139 	 * while ((last = *active) && i915_request_completed(last))
2140 	 *	active++;
2141 	 *
2142 	 * However, the GPU cannot handle this as it will ultimately
2143 	 * find itself trying to jump back into a context it has just
2144 	 * completed and barf.
2145 	 */
2146 
2147 	if ((last = *active)) {
2148 		if (need_preempt(engine, last, rb)) {
2149 			if (i915_request_completed(last)) {
2150 				tasklet_hi_schedule(&execlists->tasklet);
2151 				return;
2152 			}
2153 
2154 			ENGINE_TRACE(engine,
2155 				     "preempting last=%llx:%lld, prio=%d, hint=%d\n",
2156 				     last->fence.context,
2157 				     last->fence.seqno,
2158 				     last->sched.attr.priority,
2159 				     execlists->queue_priority_hint);
2160 			record_preemption(execlists);
2161 
2162 			/*
2163 			 * Don't let the RING_HEAD advance past the breadcrumb
2164 			 * as we unwind (and until we resubmit) so that we do
2165 			 * not accidentally tell it to go backwards.
2166 			 */
2167 			ring_set_paused(engine, 1);
2168 
2169 			/*
2170 			 * Note that we have not stopped the GPU at this point,
2171 			 * so we are unwinding the incomplete requests as they
2172 			 * remain inflight and so by the time we do complete
2173 			 * the preemption, some of the unwound requests may
2174 			 * complete!
2175 			 */
2176 			__unwind_incomplete_requests(engine);
2177 
2178 			last = NULL;
2179 		} else if (need_timeslice(engine, last, rb) &&
2180 			   timeslice_expired(execlists, last)) {
2181 			if (i915_request_completed(last)) {
2182 				tasklet_hi_schedule(&execlists->tasklet);
2183 				return;
2184 			}
2185 
2186 			ENGINE_TRACE(engine,
2187 				     "expired last=%llx:%lld, prio=%d, hint=%d, yield?=%s\n",
2188 				     last->fence.context,
2189 				     last->fence.seqno,
2190 				     last->sched.attr.priority,
2191 				     execlists->queue_priority_hint,
2192 				     yesno(timeslice_yield(execlists, last)));
2193 
2194 			ring_set_paused(engine, 1);
2195 			defer_active(engine);
2196 
2197 			/*
2198 			 * Unlike for preemption, if we rewind and continue
2199 			 * executing the same context as previously active,
2200 			 * the order of execution will remain the same and
2201 			 * the tail will only advance. We do not need to
2202 			 * force a full context restore, as a lite-restore
2203 			 * is sufficient to resample the monotonic TAIL.
2204 			 *
2205 			 * If we switch to any other context, similarly we
2206 			 * will not rewind TAIL of current context, and
2207 			 * normal save/restore will preserve state and allow
2208 			 * us to later continue executing the same request.
2209 			 */
2210 			last = NULL;
2211 		} else {
2212 			/*
2213 			 * Otherwise if we already have a request pending
2214 			 * for execution after the current one, we can
2215 			 * just wait until the next CS event before
2216 			 * queuing more. In either case we will force a
2217 			 * lite-restore preemption event, but if we wait
2218 			 * we hopefully coalesce several updates into a single
2219 			 * submission.
2220 			 */
2221 			if (!list_is_last(&last->sched.link,
2222 					  &engine->active.requests)) {
2223 				/*
2224 				 * Even if ELSP[1] is occupied and not worthy
2225 				 * of timeslices, our queue might be.
2226 				 */
2227 				start_timeslice(engine, queue_prio(execlists));
2228 				return;
2229 			}
2230 		}
2231 	}
2232 
2233 	while (rb) { /* XXX virtual is always taking precedence */
2234 		struct virtual_engine *ve =
2235 			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
2236 		struct i915_request *rq;
2237 
2238 		spin_lock(&ve->base.active.lock);
2239 
2240 		rq = ve->request;
2241 		if (unlikely(!rq)) { /* lost the race to a sibling */
2242 			spin_unlock(&ve->base.active.lock);
2243 			rb_erase_cached(rb, &execlists->virtual);
2244 			RB_CLEAR_NODE(rb);
2245 			rb = rb_first_cached(&execlists->virtual);
2246 			continue;
2247 		}
2248 
2249 		GEM_BUG_ON(rq != ve->request);
2250 		GEM_BUG_ON(rq->engine != &ve->base);
2251 		GEM_BUG_ON(rq->context != &ve->context);
2252 
2253 		if (rq_prio(rq) >= queue_prio(execlists)) {
2254 			if (!virtual_matches(ve, rq, engine)) {
2255 				spin_unlock(&ve->base.active.lock);
2256 				rb = rb_next(rb);
2257 				continue;
2258 			}
2259 
2260 			if (last && !can_merge_rq(last, rq)) {
2261 				spin_unlock(&ve->base.active.lock);
2262 				start_timeslice(engine, rq_prio(rq));
2263 				return; /* leave this for another sibling */
2264 			}
2265 
2266 			ENGINE_TRACE(engine,
2267 				     "virtual rq=%llx:%lld%s, new engine? %s\n",
2268 				     rq->fence.context,
2269 				     rq->fence.seqno,
2270 				     i915_request_completed(rq) ? "!" :
2271 				     i915_request_started(rq) ? "*" :
2272 				     "",
2273 				     yesno(engine != ve->siblings[0]));
2274 
2275 			WRITE_ONCE(ve->request, NULL);
2276 			WRITE_ONCE(ve->base.execlists.queue_priority_hint,
2277 				   INT_MIN);
2278 			rb_erase_cached(rb, &execlists->virtual);
2279 			RB_CLEAR_NODE(rb);
2280 
2281 			GEM_BUG_ON(!(rq->execution_mask & engine->mask));
2282 			WRITE_ONCE(rq->engine, engine);
2283 
2284 			if (__i915_request_submit(rq)) {
2285 				/*
2286 				 * Only after we confirm that we will submit
2287 				 * this request (i.e. it has not already
2288 				 * completed), do we want to update the context.
2289 				 *
2290 				 * This serves two purposes. It avoids
2291 				 * unnecessary work if we are resubmitting an
2292 				 * already completed request after timeslicing.
2293 				 * But more importantly, it prevents us altering
2294 				 * ve->siblings[] on an idle context, where
2295 				 * we may be using ve->siblings[] in
2296 				 * virtual_context_enter / virtual_context_exit.
2297 				 */
2298 				virtual_xfer_context(ve, engine);
2299 				GEM_BUG_ON(ve->siblings[0] != engine);
2300 
2301 				submit = true;
2302 				last = rq;
2303 			}
2304 			i915_request_put(rq);
2305 
2306 			/*
2307 			 * Hmm, we have a bunch of virtual engine requests,
2308 			 * but the first one was already completed (thanks
2309 			 * preempt-to-busy!). Keep looking at the veng queue
2310 			 * until we have no more relevant requests (i.e.
2311 			 * the normal submit queue has higher priority).
2312 			 */
2313 			if (!submit) {
2314 				spin_unlock(&ve->base.active.lock);
2315 				rb = rb_first_cached(&execlists->virtual);
2316 				continue;
2317 			}
2318 		}
2319 
2320 		spin_unlock(&ve->base.active.lock);
2321 		break;
2322 	}
2323 
2324 	while ((rb = rb_first_cached(&execlists->queue))) {
2325 		struct i915_priolist *p = to_priolist(rb);
2326 		struct i915_request *rq, *rn;
2327 		int i;
2328 
2329 		priolist_for_each_request_consume(rq, rn, p, i) {
2330 			bool merge = true;
2331 
2332 			/*
2333 			 * Can we combine this request with the current port?
2334 			 * It has to be the same context/ringbuffer and not
2335 			 * have any exceptions (e.g. GVT saying never to
2336 			 * combine contexts).
2337 			 *
2338 			 * If we can combine the requests, we can execute both
2339 			 * by updating the RING_TAIL to point to the end of the
2340 			 * second request, and so we never need to tell the
2341 			 * hardware about the first.
2342 			 */
2343 			if (last && !can_merge_rq(last, rq)) {
2344 				/*
2345 				 * If we are on the second port and cannot
2346 				 * combine this request with the last, then we
2347 				 * are done.
2348 				 */
2349 				if (port == last_port)
2350 					goto done;
2351 
2352 				/*
2353 				 * We must not populate both ELSP[] with the
2354 				 * same LRCA, i.e. we must submit 2 different
2355 				 * contexts if we submit 2 ELSP.
2356 				 */
2357 				if (last->context == rq->context)
2358 					goto done;
2359 
2360 				if (i915_request_has_sentinel(last))
2361 					goto done;
2362 
2363 				/*
2364 				 * If GVT overrides us we only ever submit
2365 				 * port[0], leaving port[1] empty. Note that we
2366 				 * also have to be careful that we don't queue
2367 				 * the same context (even though a different
2368 				 * request) to the second port.
2369 				 */
2370 				if (ctx_single_port_submission(last->context) ||
2371 				    ctx_single_port_submission(rq->context))
2372 					goto done;
2373 
2374 				merge = false;
2375 			}
2376 
2377 			if (__i915_request_submit(rq)) {
2378 				if (!merge) {
2379 					*port = execlists_schedule_in(last, port - execlists->pending);
2380 					port++;
2381 					last = NULL;
2382 				}
2383 
2384 				GEM_BUG_ON(last &&
2385 					   !can_merge_ctx(last->context,
2386 							  rq->context));
2387 				GEM_BUG_ON(last &&
2388 					   i915_seqno_passed(last->fence.seqno,
2389 							     rq->fence.seqno));
2390 
2391 				submit = true;
2392 				last = rq;
2393 			}
2394 		}
2395 
2396 		rb_erase_cached(&p->node, &execlists->queue);
2397 		i915_priolist_free(p);
2398 	}
2399 
2400 done:
2401 	/*
2402 	 * Here be a bit of magic! Or sleight-of-hand, whichever you prefer.
2403 	 *
2404 	 * We choose the priority hint such that if we add a request of greater
2405 	 * priority than this, we kick the submission tasklet to decide on
2406 	 * the right order of submitting the requests to hardware. We must
2407 	 * also be prepared to reorder requests as they are in-flight on the
2408 	 * HW. We derive the priority hint then as the first "hole" in
2409 	 * the HW submission ports and if there are no available slots,
2410 	 * the priority of the lowest executing request, i.e. last.
2411 	 *
2412 	 * When we do receive a higher priority request ready to run from the
2413 	 * user, see queue_request(), the priority hint is bumped to that
2414 	 * request triggering preemption on the next dequeue (or subsequent
2415 	 * interrupt for secondary ports).
2416 	 */
2417 	execlists->queue_priority_hint = queue_prio(execlists);
2418 
2419 	if (submit) {
2420 		*port = execlists_schedule_in(last, port - execlists->pending);
2421 		execlists->switch_priority_hint =
2422 			switch_prio(engine, *execlists->pending);
2423 
2424 		/*
2425 		 * Skip if we ended up with exactly the same set of requests,
2426 		 * e.g. trying to timeslice a pair of ordered contexts
2427 		 */
2428 		if (!memcmp(active, execlists->pending,
2429 			    (port - execlists->pending + 1) * sizeof(*port))) {
2430 			do
2431 				execlists_schedule_out(fetch_and_zero(port));
2432 			while (port-- != execlists->pending);
2433 
2434 			goto skip_submit;
2435 		}
2436 		clear_ports(port + 1, last_port - port);
2437 
2438 		WRITE_ONCE(execlists->yield, -1);
2439 		set_preempt_timeout(engine, *active);
2440 		execlists_submit_ports(engine);
2441 	} else {
2442 		start_timeslice(engine, execlists->queue_priority_hint);
2443 skip_submit:
2444 		ring_set_paused(engine, 0);
2445 	}
2446 }
2447 
2448 static void
cancel_port_requests(struct intel_engine_execlists * const execlists)2449 cancel_port_requests(struct intel_engine_execlists * const execlists)
2450 {
2451 	struct i915_request * const *port;
2452 
2453 	for (port = execlists->pending; *port; port++)
2454 		execlists_schedule_out(*port);
2455 	clear_ports(execlists->pending, ARRAY_SIZE(execlists->pending));
2456 
2457 	/* Mark the end of active before we overwrite *active */
2458 	for (port = xchg(&execlists->active, execlists->pending); *port; port++)
2459 		execlists_schedule_out(*port);
2460 	clear_ports(execlists->inflight, ARRAY_SIZE(execlists->inflight));
2461 
2462 	smp_wmb(); /* complete the seqlock for execlists_active() */
2463 	WRITE_ONCE(execlists->active, execlists->inflight);
2464 }
2465 
2466 static inline void
invalidate_csb_entries(const u64 * first,const u64 * last)2467 invalidate_csb_entries(const u64 *first, const u64 *last)
2468 {
2469 	clflush((void *)first);
2470 	clflush((void *)last);
2471 }
2472 
2473 /*
2474  * Starting with Gen12, the status has a new format:
2475  *
2476  *     bit  0:     switched to new queue
2477  *     bit  1:     reserved
2478  *     bit  2:     semaphore wait mode (poll or signal), only valid when
2479  *                 switch detail is set to "wait on semaphore"
2480  *     bits 3-5:   engine class
2481  *     bits 6-11:  engine instance
2482  *     bits 12-14: reserved
2483  *     bits 15-25: sw context id of the lrc the GT switched to
2484  *     bits 26-31: sw counter of the lrc the GT switched to
2485  *     bits 32-35: context switch detail
2486  *                  - 0: ctx complete
2487  *                  - 1: wait on sync flip
2488  *                  - 2: wait on vblank
2489  *                  - 3: wait on scanline
2490  *                  - 4: wait on semaphore
2491  *                  - 5: context preempted (not on SEMAPHORE_WAIT or
2492  *                       WAIT_FOR_EVENT)
2493  *     bit  36:    reserved
2494  *     bits 37-43: wait detail (for switch detail 1 to 4)
2495  *     bits 44-46: reserved
2496  *     bits 47-57: sw context id of the lrc the GT switched away from
2497  *     bits 58-63: sw counter of the lrc the GT switched away from
2498  */
gen12_csb_parse(const u64 * csb)2499 static inline bool gen12_csb_parse(const u64 *csb)
2500 {
2501 	bool ctx_away_valid;
2502 	bool new_queue;
2503 	u64 entry;
2504 
2505 	/* HSD#22011248461 */
2506 	entry = READ_ONCE(*csb);
2507 	if (unlikely(entry == -1)) {
2508 		preempt_disable();
2509 		if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 50))
2510 			GEM_WARN_ON("50us CSB timeout");
2511 		preempt_enable();
2512 	}
2513 	WRITE_ONCE(*(u64 *)csb, -1);
2514 
2515 	ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(entry));
2516 	new_queue =
2517 		lower_32_bits(entry) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
2518 
2519 	/*
2520 	 * The context switch detail is not guaranteed to be 5 when a preemption
2521 	 * occurs, so we can't just check for that. The check below works for
2522 	 * all the cases we care about, including preemptions of WAIT
2523 	 * instructions and lite-restore. Preempt-to-idle via the CTRL register
2524 	 * would require some extra handling, but we don't support that.
2525 	 */
2526 	if (!ctx_away_valid || new_queue) {
2527 		GEM_BUG_ON(!GEN12_CSB_CTX_VALID(lower_32_bits(entry)));
2528 		return true;
2529 	}
2530 
2531 	/*
2532 	 * switch detail = 5 is covered by the case above and we do not expect a
2533 	 * context switch on an unsuccessful wait instruction since we always
2534 	 * use polling mode.
2535 	 */
2536 	GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_32_bits(entry)));
2537 	return false;
2538 }
2539 
gen8_csb_parse(const u64 * csb)2540 static inline bool gen8_csb_parse(const u64 *csb)
2541 {
2542 	return *csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED);
2543 }
2544 
process_csb(struct intel_engine_cs * engine)2545 static void process_csb(struct intel_engine_cs *engine)
2546 {
2547 	struct intel_engine_execlists * const execlists = &engine->execlists;
2548 	const u64 * const buf = execlists->csb_status;
2549 	const u8 num_entries = execlists->csb_size;
2550 	u8 head, tail;
2551 
2552 	/*
2553 	 * As we modify our execlists state tracking we require exclusive
2554 	 * access. Either we are inside the tasklet, or the tasklet is disabled
2555 	 * and we assume that is only inside the reset paths and so serialised.
2556 	 */
2557 	GEM_BUG_ON(!tasklet_is_locked(&execlists->tasklet) &&
2558 		   !reset_in_progress(execlists));
2559 	GEM_BUG_ON(!intel_engine_in_execlists_submission_mode(engine));
2560 
2561 	/*
2562 	 * Note that csb_write, csb_status may be either in HWSP or mmio.
2563 	 * When reading from the csb_write mmio register, we have to be
2564 	 * careful to only use the GEN8_CSB_WRITE_PTR portion, which is
2565 	 * the low 4bits. As it happens we know the next 4bits are always
2566 	 * zero and so we can simply masked off the low u8 of the register
2567 	 * and treat it identically to reading from the HWSP (without having
2568 	 * to use explicit shifting and masking, and probably bifurcating
2569 	 * the code to handle the legacy mmio read).
2570 	 */
2571 	head = execlists->csb_head;
2572 	tail = READ_ONCE(*execlists->csb_write);
2573 	if (unlikely(head == tail))
2574 		return;
2575 
2576 	/*
2577 	 * We will consume all events from HW, or at least pretend to.
2578 	 *
2579 	 * The sequence of events from the HW is deterministic, and derived
2580 	 * from our writes to the ELSP, with a smidgen of variability for
2581 	 * the arrival of the asynchronous requests wrt to the inflight
2582 	 * execution. If the HW sends an event that does not correspond with
2583 	 * the one we are expecting, we have to abandon all hope as we lose
2584 	 * all tracking of what the engine is actually executing. We will
2585 	 * only detect we are out of sequence with the HW when we get an
2586 	 * 'impossible' event because we have already drained our own
2587 	 * preemption/promotion queue. If this occurs, we know that we likely
2588 	 * lost track of execution earlier and must unwind and restart, the
2589 	 * simplest way is by stop processing the event queue and force the
2590 	 * engine to reset.
2591 	 */
2592 	execlists->csb_head = tail;
2593 	ENGINE_TRACE(engine, "cs-irq head=%d, tail=%d\n", head, tail);
2594 
2595 	/*
2596 	 * Hopefully paired with a wmb() in HW!
2597 	 *
2598 	 * We must complete the read of the write pointer before any reads
2599 	 * from the CSB, so that we do not see stale values. Without an rmb
2600 	 * (lfence) the HW may speculatively perform the CSB[] reads *before*
2601 	 * we perform the READ_ONCE(*csb_write).
2602 	 */
2603 	rmb();
2604 	do {
2605 		bool promote;
2606 
2607 		if (++head == num_entries)
2608 			head = 0;
2609 
2610 		/*
2611 		 * We are flying near dragons again.
2612 		 *
2613 		 * We hold a reference to the request in execlist_port[]
2614 		 * but no more than that. We are operating in softirq
2615 		 * context and so cannot hold any mutex or sleep. That
2616 		 * prevents us stopping the requests we are processing
2617 		 * in port[] from being retired simultaneously (the
2618 		 * breadcrumb will be complete before we see the
2619 		 * context-switch). As we only hold the reference to the
2620 		 * request, any pointer chasing underneath the request
2621 		 * is subject to a potential use-after-free. Thus we
2622 		 * store all of the bookkeeping within port[] as
2623 		 * required, and avoid using unguarded pointers beneath
2624 		 * request itself. The same applies to the atomic
2625 		 * status notifier.
2626 		 */
2627 
2628 		ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n",
2629 			     head,
2630 			     upper_32_bits(buf[head]),
2631 			     lower_32_bits(buf[head]));
2632 
2633 		if (INTEL_GEN(engine->i915) >= 12)
2634 			promote = gen12_csb_parse(buf + head);
2635 		else
2636 			promote = gen8_csb_parse(buf + head);
2637 		if (promote) {
2638 			struct i915_request * const *old = execlists->active;
2639 
2640 			if (GEM_WARN_ON(!*execlists->pending)) {
2641 				execlists->error_interrupt |= ERROR_CSB;
2642 				break;
2643 			}
2644 
2645 			ring_set_paused(engine, 0);
2646 
2647 			/* Point active to the new ELSP; prevent overwriting */
2648 			WRITE_ONCE(execlists->active, execlists->pending);
2649 			smp_wmb(); /* notify execlists_active() */
2650 
2651 			/* cancel old inflight, prepare for switch */
2652 			trace_ports(execlists, "preempted", old);
2653 			while (*old)
2654 				execlists_schedule_out(*old++);
2655 
2656 			/* switch pending to inflight */
2657 			GEM_BUG_ON(!assert_pending_valid(execlists, "promote"));
2658 			copy_ports(execlists->inflight,
2659 				   execlists->pending,
2660 				   execlists_num_ports(execlists));
2661 			smp_wmb(); /* complete the seqlock */
2662 			WRITE_ONCE(execlists->active, execlists->inflight);
2663 
2664 			/* XXX Magic delay for tgl */
2665 			ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR);
2666 
2667 			WRITE_ONCE(execlists->pending[0], NULL);
2668 		} else {
2669 			if (GEM_WARN_ON(!*execlists->active)) {
2670 				execlists->error_interrupt |= ERROR_CSB;
2671 				break;
2672 			}
2673 
2674 			/* port0 completed, advanced to port1 */
2675 			trace_ports(execlists, "completed", execlists->active);
2676 
2677 			/*
2678 			 * We rely on the hardware being strongly
2679 			 * ordered, that the breadcrumb write is
2680 			 * coherent (visible from the CPU) before the
2681 			 * user interrupt is processed. One might assume
2682 			 * that the breadcrumb write being before the
2683 			 * user interrupt and the CS event for the context
2684 			 * switch would therefore be before the CS event
2685 			 * itself...
2686 			 */
2687 			if (GEM_SHOW_DEBUG() &&
2688 			    !i915_request_completed(*execlists->active)) {
2689 				struct i915_request *rq = *execlists->active;
2690 				const u32 *regs __maybe_unused =
2691 					rq->context->lrc_reg_state;
2692 
2693 				ENGINE_TRACE(engine,
2694 					     "context completed before request!\n");
2695 				ENGINE_TRACE(engine,
2696 					     "ring:{start:0x%08x, head:%04x, tail:%04x, ctl:%08x, mode:%08x}\n",
2697 					     ENGINE_READ(engine, RING_START),
2698 					     ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR,
2699 					     ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR,
2700 					     ENGINE_READ(engine, RING_CTL),
2701 					     ENGINE_READ(engine, RING_MI_MODE));
2702 				ENGINE_TRACE(engine,
2703 					     "rq:{start:%08x, head:%04x, tail:%04x, seqno:%llx:%d, hwsp:%d}, ",
2704 					     i915_ggtt_offset(rq->ring->vma),
2705 					     rq->head, rq->tail,
2706 					     rq->fence.context,
2707 					     lower_32_bits(rq->fence.seqno),
2708 					     hwsp_seqno(rq));
2709 				ENGINE_TRACE(engine,
2710 					     "ctx:{start:%08x, head:%04x, tail:%04x}, ",
2711 					     regs[CTX_RING_START],
2712 					     regs[CTX_RING_HEAD],
2713 					     regs[CTX_RING_TAIL]);
2714 			}
2715 
2716 			execlists_schedule_out(*execlists->active++);
2717 
2718 			GEM_BUG_ON(execlists->active - execlists->inflight >
2719 				   execlists_num_ports(execlists));
2720 		}
2721 	} while (head != tail);
2722 
2723 	set_timeslice(engine);
2724 
2725 	/*
2726 	 * Gen11 has proven to fail wrt global observation point between
2727 	 * entry and tail update, failing on the ordering and thus
2728 	 * we see an old entry in the context status buffer.
2729 	 *
2730 	 * Forcibly evict out entries for the next gpu csb update,
2731 	 * to increase the odds that we get a fresh entries with non
2732 	 * working hardware. The cost for doing so comes out mostly with
2733 	 * the wash as hardware, working or not, will need to do the
2734 	 * invalidation before.
2735 	 */
2736 	invalidate_csb_entries(&buf[0], &buf[num_entries - 1]);
2737 }
2738 
__execlists_submission_tasklet(struct intel_engine_cs * const engine)2739 static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
2740 {
2741 	lockdep_assert_held(&engine->active.lock);
2742 	if (!READ_ONCE(engine->execlists.pending[0])) {
2743 		rcu_read_lock(); /* protect peeking at execlists->active */
2744 		execlists_dequeue(engine);
2745 		rcu_read_unlock();
2746 	}
2747 }
2748 
__execlists_hold(struct i915_request * rq)2749 static void __execlists_hold(struct i915_request *rq)
2750 {
2751 	LIST_HEAD(list);
2752 
2753 	do {
2754 		struct i915_dependency *p;
2755 
2756 		if (i915_request_is_active(rq))
2757 			__i915_request_unsubmit(rq);
2758 
2759 		clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
2760 		list_move_tail(&rq->sched.link, &rq->engine->active.hold);
2761 		i915_request_set_hold(rq);
2762 		RQ_TRACE(rq, "on hold\n");
2763 
2764 		for_each_waiter(p, rq) {
2765 			struct i915_request *w =
2766 				container_of(p->waiter, typeof(*w), sched);
2767 
2768 			/* Leave semaphores spinning on the other engines */
2769 			if (w->engine != rq->engine)
2770 				continue;
2771 
2772 			if (!i915_request_is_ready(w))
2773 				continue;
2774 
2775 			if (i915_request_completed(w))
2776 				continue;
2777 
2778 			if (i915_request_on_hold(w))
2779 				continue;
2780 
2781 			list_move_tail(&w->sched.link, &list);
2782 		}
2783 
2784 		rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
2785 	} while (rq);
2786 }
2787 
execlists_hold(struct intel_engine_cs * engine,struct i915_request * rq)2788 static bool execlists_hold(struct intel_engine_cs *engine,
2789 			   struct i915_request *rq)
2790 {
2791 	if (i915_request_on_hold(rq))
2792 		return false;
2793 
2794 	spin_lock_irq(&engine->active.lock);
2795 
2796 	if (i915_request_completed(rq)) { /* too late! */
2797 		rq = NULL;
2798 		goto unlock;
2799 	}
2800 
2801 	if (rq->engine != engine) { /* preempted virtual engine */
2802 		struct virtual_engine *ve = to_virtual_engine(rq->engine);
2803 
2804 		/*
2805 		 * intel_context_inflight() is only protected by virtue
2806 		 * of process_csb() being called only by the tasklet (or
2807 		 * directly from inside reset while the tasklet is suspended).
2808 		 * Assert that neither of those are allowed to run while we
2809 		 * poke at the request queues.
2810 		 */
2811 		GEM_BUG_ON(!reset_in_progress(&engine->execlists));
2812 
2813 		/*
2814 		 * An unsubmitted request along a virtual engine will
2815 		 * remain on the active (this) engine until we are able
2816 		 * to process the context switch away (and so mark the
2817 		 * context as no longer in flight). That cannot have happened
2818 		 * yet, otherwise we would not be hanging!
2819 		 */
2820 		spin_lock(&ve->base.active.lock);
2821 		GEM_BUG_ON(intel_context_inflight(rq->context) != engine);
2822 		GEM_BUG_ON(ve->request != rq);
2823 		ve->request = NULL;
2824 		spin_unlock(&ve->base.active.lock);
2825 		i915_request_put(rq);
2826 
2827 		rq->engine = engine;
2828 	}
2829 
2830 	/*
2831 	 * Transfer this request onto the hold queue to prevent it
2832 	 * being resumbitted to HW (and potentially completed) before we have
2833 	 * released it. Since we may have already submitted following
2834 	 * requests, we need to remove those as well.
2835 	 */
2836 	GEM_BUG_ON(i915_request_on_hold(rq));
2837 	GEM_BUG_ON(rq->engine != engine);
2838 	__execlists_hold(rq);
2839 	GEM_BUG_ON(list_empty(&engine->active.hold));
2840 
2841 unlock:
2842 	spin_unlock_irq(&engine->active.lock);
2843 	return rq;
2844 }
2845 
hold_request(const struct i915_request * rq)2846 static bool hold_request(const struct i915_request *rq)
2847 {
2848 	struct i915_dependency *p;
2849 	bool result = false;
2850 
2851 	/*
2852 	 * If one of our ancestors is on hold, we must also be on hold,
2853 	 * otherwise we will bypass it and execute before it.
2854 	 */
2855 	rcu_read_lock();
2856 	for_each_signaler(p, rq) {
2857 		const struct i915_request *s =
2858 			container_of(p->signaler, typeof(*s), sched);
2859 
2860 		if (s->engine != rq->engine)
2861 			continue;
2862 
2863 		result = i915_request_on_hold(s);
2864 		if (result)
2865 			break;
2866 	}
2867 	rcu_read_unlock();
2868 
2869 	return result;
2870 }
2871 
__execlists_unhold(struct i915_request * rq)2872 static void __execlists_unhold(struct i915_request *rq)
2873 {
2874 	LIST_HEAD(list);
2875 
2876 	do {
2877 		struct i915_dependency *p;
2878 
2879 		RQ_TRACE(rq, "hold release\n");
2880 
2881 		GEM_BUG_ON(!i915_request_on_hold(rq));
2882 		GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
2883 
2884 		i915_request_clear_hold(rq);
2885 		list_move_tail(&rq->sched.link,
2886 			       i915_sched_lookup_priolist(rq->engine,
2887 							  rq_prio(rq)));
2888 		set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
2889 
2890 		/* Also release any children on this engine that are ready */
2891 		for_each_waiter(p, rq) {
2892 			struct i915_request *w =
2893 				container_of(p->waiter, typeof(*w), sched);
2894 
2895 			/* Propagate any change in error status */
2896 			if (rq->fence.error)
2897 				i915_request_set_error_once(w, rq->fence.error);
2898 
2899 			if (w->engine != rq->engine)
2900 				continue;
2901 
2902 			if (!i915_request_on_hold(w))
2903 				continue;
2904 
2905 			/* Check that no other parents are also on hold */
2906 			if (hold_request(w))
2907 				continue;
2908 
2909 			list_move_tail(&w->sched.link, &list);
2910 		}
2911 
2912 		rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
2913 	} while (rq);
2914 }
2915 
execlists_unhold(struct intel_engine_cs * engine,struct i915_request * rq)2916 static void execlists_unhold(struct intel_engine_cs *engine,
2917 			     struct i915_request *rq)
2918 {
2919 	spin_lock_irq(&engine->active.lock);
2920 
2921 	/*
2922 	 * Move this request back to the priority queue, and all of its
2923 	 * children and grandchildren that were suspended along with it.
2924 	 */
2925 	__execlists_unhold(rq);
2926 
2927 	if (rq_prio(rq) > engine->execlists.queue_priority_hint) {
2928 		engine->execlists.queue_priority_hint = rq_prio(rq);
2929 		tasklet_hi_schedule(&engine->execlists.tasklet);
2930 	}
2931 
2932 	spin_unlock_irq(&engine->active.lock);
2933 }
2934 
2935 struct execlists_capture {
2936 	struct work_struct work;
2937 	struct i915_request *rq;
2938 	struct i915_gpu_coredump *error;
2939 };
2940 
execlists_capture_work(struct work_struct * work)2941 static void execlists_capture_work(struct work_struct *work)
2942 {
2943 	struct execlists_capture *cap = container_of(work, typeof(*cap), work);
2944 	const gfp_t gfp = GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN;
2945 	struct intel_engine_cs *engine = cap->rq->engine;
2946 	struct intel_gt_coredump *gt = cap->error->gt;
2947 	struct intel_engine_capture_vma *vma;
2948 
2949 	/* Compress all the objects attached to the request, slow! */
2950 	vma = intel_engine_coredump_add_request(gt->engine, cap->rq, gfp);
2951 	if (vma) {
2952 		struct i915_vma_compress *compress =
2953 			i915_vma_capture_prepare(gt);
2954 
2955 		intel_engine_coredump_add_vma(gt->engine, vma, compress);
2956 		i915_vma_capture_finish(gt, compress);
2957 	}
2958 
2959 	gt->simulated = gt->engine->simulated;
2960 	cap->error->simulated = gt->simulated;
2961 
2962 	/* Publish the error state, and announce it to the world */
2963 	i915_error_state_store(cap->error);
2964 	i915_gpu_coredump_put(cap->error);
2965 
2966 	/* Return this request and all that depend upon it for signaling */
2967 	execlists_unhold(engine, cap->rq);
2968 	i915_request_put(cap->rq);
2969 
2970 	kfree(cap);
2971 }
2972 
capture_regs(struct intel_engine_cs * engine)2973 static struct execlists_capture *capture_regs(struct intel_engine_cs *engine)
2974 {
2975 	const gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN;
2976 	struct execlists_capture *cap;
2977 
2978 	cap = kmalloc(sizeof(*cap), gfp);
2979 	if (!cap)
2980 		return NULL;
2981 
2982 	cap->error = i915_gpu_coredump_alloc(engine->i915, gfp);
2983 	if (!cap->error)
2984 		goto err_cap;
2985 
2986 	cap->error->gt = intel_gt_coredump_alloc(engine->gt, gfp);
2987 	if (!cap->error->gt)
2988 		goto err_gpu;
2989 
2990 	cap->error->gt->engine = intel_engine_coredump_alloc(engine, gfp);
2991 	if (!cap->error->gt->engine)
2992 		goto err_gt;
2993 
2994 	return cap;
2995 
2996 err_gt:
2997 	kfree(cap->error->gt);
2998 err_gpu:
2999 	kfree(cap->error);
3000 err_cap:
3001 	kfree(cap);
3002 	return NULL;
3003 }
3004 
3005 static struct i915_request *
active_context(struct intel_engine_cs * engine,u32 ccid)3006 active_context(struct intel_engine_cs *engine, u32 ccid)
3007 {
3008 	const struct intel_engine_execlists * const el = &engine->execlists;
3009 	struct i915_request * const *port, *rq;
3010 
3011 	/*
3012 	 * Use the most recent result from process_csb(), but just in case
3013 	 * we trigger an error (via interrupt) before the first CS event has
3014 	 * been written, peek at the next submission.
3015 	 */
3016 
3017 	for (port = el->active; (rq = *port); port++) {
3018 		if (rq->context->lrc.ccid == ccid) {
3019 			ENGINE_TRACE(engine,
3020 				     "ccid found at active:%zd\n",
3021 				     port - el->active);
3022 			return rq;
3023 		}
3024 	}
3025 
3026 	for (port = el->pending; (rq = *port); port++) {
3027 		if (rq->context->lrc.ccid == ccid) {
3028 			ENGINE_TRACE(engine,
3029 				     "ccid found at pending:%zd\n",
3030 				     port - el->pending);
3031 			return rq;
3032 		}
3033 	}
3034 
3035 	ENGINE_TRACE(engine, "ccid:%x not found\n", ccid);
3036 	return NULL;
3037 }
3038 
active_ccid(struct intel_engine_cs * engine)3039 static u32 active_ccid(struct intel_engine_cs *engine)
3040 {
3041 	return ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI);
3042 }
3043 
execlists_capture(struct intel_engine_cs * engine)3044 static void execlists_capture(struct intel_engine_cs *engine)
3045 {
3046 	struct execlists_capture *cap;
3047 
3048 	if (!IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR))
3049 		return;
3050 
3051 	/*
3052 	 * We need to _quickly_ capture the engine state before we reset.
3053 	 * We are inside an atomic section (softirq) here and we are delaying
3054 	 * the forced preemption event.
3055 	 */
3056 	cap = capture_regs(engine);
3057 	if (!cap)
3058 		return;
3059 
3060 	spin_lock_irq(&engine->active.lock);
3061 	cap->rq = active_context(engine, active_ccid(engine));
3062 	if (cap->rq) {
3063 		cap->rq = active_request(cap->rq->context->timeline, cap->rq);
3064 		cap->rq = i915_request_get_rcu(cap->rq);
3065 	}
3066 	spin_unlock_irq(&engine->active.lock);
3067 	if (!cap->rq)
3068 		goto err_free;
3069 
3070 	/*
3071 	 * Remove the request from the execlists queue, and take ownership
3072 	 * of the request. We pass it to our worker who will _slowly_ compress
3073 	 * all the pages the _user_ requested for debugging their batch, after
3074 	 * which we return it to the queue for signaling.
3075 	 *
3076 	 * By removing them from the execlists queue, we also remove the
3077 	 * requests from being processed by __unwind_incomplete_requests()
3078 	 * during the intel_engine_reset(), and so they will *not* be replayed
3079 	 * afterwards.
3080 	 *
3081 	 * Note that because we have not yet reset the engine at this point,
3082 	 * it is possible for the request that we have identified as being
3083 	 * guilty, did in fact complete and we will then hit an arbitration
3084 	 * point allowing the outstanding preemption to succeed. The likelihood
3085 	 * of that is very low (as capturing of the engine registers should be
3086 	 * fast enough to run inside an irq-off atomic section!), so we will
3087 	 * simply hold that request accountable for being non-preemptible
3088 	 * long enough to force the reset.
3089 	 */
3090 	if (!execlists_hold(engine, cap->rq))
3091 		goto err_rq;
3092 
3093 	INIT_WORK(&cap->work, execlists_capture_work);
3094 	schedule_work(&cap->work);
3095 	return;
3096 
3097 err_rq:
3098 	i915_request_put(cap->rq);
3099 err_free:
3100 	i915_gpu_coredump_put(cap->error);
3101 	kfree(cap);
3102 }
3103 
execlists_reset(struct intel_engine_cs * engine,const char * msg)3104 static void execlists_reset(struct intel_engine_cs *engine, const char *msg)
3105 {
3106 	const unsigned int bit = I915_RESET_ENGINE + engine->id;
3107 	unsigned long *lock = &engine->gt->reset.flags;
3108 
3109 	if (!intel_has_reset_engine(engine->gt))
3110 		return;
3111 
3112 	if (test_and_set_bit(bit, lock))
3113 		return;
3114 
3115 	ENGINE_TRACE(engine, "reset for %s\n", msg);
3116 
3117 	/* Mark this tasklet as disabled to avoid waiting for it to complete */
3118 	tasklet_disable_nosync(&engine->execlists.tasklet);
3119 
3120 	ring_set_paused(engine, 1); /* Freeze the current request in place */
3121 	execlists_capture(engine);
3122 	intel_engine_reset(engine, msg);
3123 
3124 	tasklet_enable(&engine->execlists.tasklet);
3125 	clear_and_wake_up_bit(bit, lock);
3126 }
3127 
preempt_timeout(const struct intel_engine_cs * const engine)3128 static bool preempt_timeout(const struct intel_engine_cs *const engine)
3129 {
3130 	const struct timer_list *t = &engine->execlists.preempt;
3131 
3132 	if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT)
3133 		return false;
3134 
3135 	if (!timer_expired(t))
3136 		return false;
3137 
3138 	return READ_ONCE(engine->execlists.pending[0]);
3139 }
3140 
3141 /*
3142  * Check the unread Context Status Buffers and manage the submission of new
3143  * contexts to the ELSP accordingly.
3144  */
execlists_submission_tasklet(unsigned long data)3145 static void execlists_submission_tasklet(unsigned long data)
3146 {
3147 	struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
3148 	bool timeout = preempt_timeout(engine);
3149 
3150 	process_csb(engine);
3151 
3152 	if (unlikely(READ_ONCE(engine->execlists.error_interrupt))) {
3153 		const char *msg;
3154 
3155 		/* Generate the error message in priority wrt to the user! */
3156 		if (engine->execlists.error_interrupt & GENMASK(15, 0))
3157 			msg = "CS error"; /* thrown by a user payload */
3158 		else if (engine->execlists.error_interrupt & ERROR_CSB)
3159 			msg = "invalid CSB event";
3160 		else
3161 			msg = "internal error";
3162 
3163 		engine->execlists.error_interrupt = 0;
3164 		execlists_reset(engine, msg);
3165 	}
3166 
3167 	if (!READ_ONCE(engine->execlists.pending[0]) || timeout) {
3168 		unsigned long flags;
3169 
3170 		spin_lock_irqsave(&engine->active.lock, flags);
3171 		__execlists_submission_tasklet(engine);
3172 		spin_unlock_irqrestore(&engine->active.lock, flags);
3173 
3174 		/* Recheck after serialising with direct-submission */
3175 		if (unlikely(timeout && preempt_timeout(engine))) {
3176 			cancel_timer(&engine->execlists.preempt);
3177 			execlists_reset(engine, "preemption time out");
3178 		}
3179 	}
3180 }
3181 
__execlists_kick(struct intel_engine_execlists * execlists)3182 static void __execlists_kick(struct intel_engine_execlists *execlists)
3183 {
3184 	/* Kick the tasklet for some interrupt coalescing and reset handling */
3185 	tasklet_hi_schedule(&execlists->tasklet);
3186 }
3187 
3188 #define execlists_kick(t, member) \
3189 	__execlists_kick(container_of(t, struct intel_engine_execlists, member))
3190 
execlists_timeslice(struct timer_list * timer)3191 static void execlists_timeslice(struct timer_list *timer)
3192 {
3193 	execlists_kick(timer, timer);
3194 }
3195 
execlists_preempt(struct timer_list * timer)3196 static void execlists_preempt(struct timer_list *timer)
3197 {
3198 	execlists_kick(timer, preempt);
3199 }
3200 
queue_request(struct intel_engine_cs * engine,struct i915_request * rq)3201 static void queue_request(struct intel_engine_cs *engine,
3202 			  struct i915_request *rq)
3203 {
3204 	GEM_BUG_ON(!list_empty(&rq->sched.link));
3205 	list_add_tail(&rq->sched.link,
3206 		      i915_sched_lookup_priolist(engine, rq_prio(rq)));
3207 	set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
3208 }
3209 
__submit_queue_imm(struct intel_engine_cs * engine)3210 static void __submit_queue_imm(struct intel_engine_cs *engine)
3211 {
3212 	struct intel_engine_execlists * const execlists = &engine->execlists;
3213 
3214 	if (reset_in_progress(execlists))
3215 		return; /* defer until we restart the engine following reset */
3216 
3217 	__execlists_submission_tasklet(engine);
3218 }
3219 
submit_queue(struct intel_engine_cs * engine,const struct i915_request * rq)3220 static void submit_queue(struct intel_engine_cs *engine,
3221 			 const struct i915_request *rq)
3222 {
3223 	struct intel_engine_execlists *execlists = &engine->execlists;
3224 
3225 	if (rq_prio(rq) <= execlists->queue_priority_hint)
3226 		return;
3227 
3228 	execlists->queue_priority_hint = rq_prio(rq);
3229 	__submit_queue_imm(engine);
3230 }
3231 
ancestor_on_hold(const struct intel_engine_cs * engine,const struct i915_request * rq)3232 static bool ancestor_on_hold(const struct intel_engine_cs *engine,
3233 			     const struct i915_request *rq)
3234 {
3235 	GEM_BUG_ON(i915_request_on_hold(rq));
3236 	return !list_empty(&engine->active.hold) && hold_request(rq);
3237 }
3238 
flush_csb(struct intel_engine_cs * engine)3239 static void flush_csb(struct intel_engine_cs *engine)
3240 {
3241 	struct intel_engine_execlists *el = &engine->execlists;
3242 
3243 	if (READ_ONCE(el->pending[0]) && tasklet_trylock(&el->tasklet)) {
3244 		if (!reset_in_progress(el))
3245 			process_csb(engine);
3246 		tasklet_unlock(&el->tasklet);
3247 	}
3248 }
3249 
execlists_submit_request(struct i915_request * request)3250 static void execlists_submit_request(struct i915_request *request)
3251 {
3252 	struct intel_engine_cs *engine = request->engine;
3253 	unsigned long flags;
3254 
3255 	/* Hopefully we clear execlists->pending[] to let us through */
3256 	flush_csb(engine);
3257 
3258 	/* Will be called from irq-context when using foreign fences. */
3259 	spin_lock_irqsave(&engine->active.lock, flags);
3260 
3261 	if (unlikely(ancestor_on_hold(engine, request))) {
3262 		RQ_TRACE(request, "ancestor on hold\n");
3263 		list_add_tail(&request->sched.link, &engine->active.hold);
3264 		i915_request_set_hold(request);
3265 	} else {
3266 		queue_request(engine, request);
3267 
3268 		GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
3269 		GEM_BUG_ON(list_empty(&request->sched.link));
3270 
3271 		submit_queue(engine, request);
3272 	}
3273 
3274 	spin_unlock_irqrestore(&engine->active.lock, flags);
3275 }
3276 
__execlists_context_fini(struct intel_context * ce)3277 static void __execlists_context_fini(struct intel_context *ce)
3278 {
3279 	intel_ring_put(ce->ring);
3280 	i915_vma_put(ce->state);
3281 }
3282 
execlists_context_destroy(struct kref * kref)3283 static void execlists_context_destroy(struct kref *kref)
3284 {
3285 	struct intel_context *ce = container_of(kref, typeof(*ce), ref);
3286 
3287 	GEM_BUG_ON(!i915_active_is_idle(&ce->active));
3288 	GEM_BUG_ON(intel_context_is_pinned(ce));
3289 
3290 	if (ce->state)
3291 		__execlists_context_fini(ce);
3292 
3293 	intel_context_fini(ce);
3294 	intel_context_free(ce);
3295 }
3296 
3297 static void
set_redzone(void * vaddr,const struct intel_engine_cs * engine)3298 set_redzone(void *vaddr, const struct intel_engine_cs *engine)
3299 {
3300 	if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
3301 		return;
3302 
3303 	vaddr += engine->context_size;
3304 
3305 	memset(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE);
3306 }
3307 
3308 static void
check_redzone(const void * vaddr,const struct intel_engine_cs * engine)3309 check_redzone(const void *vaddr, const struct intel_engine_cs *engine)
3310 {
3311 	if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
3312 		return;
3313 
3314 	vaddr += engine->context_size;
3315 
3316 	if (memchr_inv(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE))
3317 		drm_err_once(&engine->i915->drm,
3318 			     "%s context redzone overwritten!\n",
3319 			     engine->name);
3320 }
3321 
execlists_context_unpin(struct intel_context * ce)3322 static void execlists_context_unpin(struct intel_context *ce)
3323 {
3324 	check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET,
3325 		      ce->engine);
3326 }
3327 
execlists_context_post_unpin(struct intel_context * ce)3328 static void execlists_context_post_unpin(struct intel_context *ce)
3329 {
3330 	i915_gem_object_unpin_map(ce->state->obj);
3331 }
3332 
3333 static u32 *
gen12_emit_timestamp_wa(const struct intel_context * ce,u32 * cs)3334 gen12_emit_timestamp_wa(const struct intel_context *ce, u32 *cs)
3335 {
3336 	*cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
3337 		MI_SRM_LRM_GLOBAL_GTT |
3338 		MI_LRI_LRM_CS_MMIO;
3339 	*cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
3340 	*cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
3341 		CTX_TIMESTAMP * sizeof(u32);
3342 	*cs++ = 0;
3343 
3344 	*cs++ = MI_LOAD_REGISTER_REG |
3345 		MI_LRR_SOURCE_CS_MMIO |
3346 		MI_LRI_LRM_CS_MMIO;
3347 	*cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
3348 	*cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
3349 
3350 	*cs++ = MI_LOAD_REGISTER_REG |
3351 		MI_LRR_SOURCE_CS_MMIO |
3352 		MI_LRI_LRM_CS_MMIO;
3353 	*cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
3354 	*cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
3355 
3356 	return cs;
3357 }
3358 
3359 static u32 *
gen12_emit_restore_scratch(const struct intel_context * ce,u32 * cs)3360 gen12_emit_restore_scratch(const struct intel_context *ce, u32 *cs)
3361 {
3362 	GEM_BUG_ON(lrc_ring_gpr0(ce->engine) == -1);
3363 
3364 	*cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
3365 		MI_SRM_LRM_GLOBAL_GTT |
3366 		MI_LRI_LRM_CS_MMIO;
3367 	*cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
3368 	*cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
3369 		(lrc_ring_gpr0(ce->engine) + 1) * sizeof(u32);
3370 	*cs++ = 0;
3371 
3372 	return cs;
3373 }
3374 
3375 static u32 *
gen12_emit_cmd_buf_wa(const struct intel_context * ce,u32 * cs)3376 gen12_emit_cmd_buf_wa(const struct intel_context *ce, u32 *cs)
3377 {
3378 	GEM_BUG_ON(lrc_ring_cmd_buf_cctl(ce->engine) == -1);
3379 
3380 	*cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
3381 		MI_SRM_LRM_GLOBAL_GTT |
3382 		MI_LRI_LRM_CS_MMIO;
3383 	*cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
3384 	*cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
3385 		(lrc_ring_cmd_buf_cctl(ce->engine) + 1) * sizeof(u32);
3386 	*cs++ = 0;
3387 
3388 	*cs++ = MI_LOAD_REGISTER_REG |
3389 		MI_LRR_SOURCE_CS_MMIO |
3390 		MI_LRI_LRM_CS_MMIO;
3391 	*cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
3392 	*cs++ = i915_mmio_reg_offset(RING_CMD_BUF_CCTL(0));
3393 
3394 	return cs;
3395 }
3396 
3397 static u32 *
gen12_emit_indirect_ctx_rcs(const struct intel_context * ce,u32 * cs)3398 gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs)
3399 {
3400 	cs = gen12_emit_timestamp_wa(ce, cs);
3401 	cs = gen12_emit_cmd_buf_wa(ce, cs);
3402 	cs = gen12_emit_restore_scratch(ce, cs);
3403 
3404 	return cs;
3405 }
3406 
3407 static u32 *
gen12_emit_indirect_ctx_xcs(const struct intel_context * ce,u32 * cs)3408 gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs)
3409 {
3410 	cs = gen12_emit_timestamp_wa(ce, cs);
3411 	cs = gen12_emit_restore_scratch(ce, cs);
3412 
3413 	return cs;
3414 }
3415 
context_wa_bb_offset(const struct intel_context * ce)3416 static inline u32 context_wa_bb_offset(const struct intel_context *ce)
3417 {
3418 	return PAGE_SIZE * ce->wa_bb_page;
3419 }
3420 
context_indirect_bb(const struct intel_context * ce)3421 static u32 *context_indirect_bb(const struct intel_context *ce)
3422 {
3423 	void *ptr;
3424 
3425 	GEM_BUG_ON(!ce->wa_bb_page);
3426 
3427 	ptr = ce->lrc_reg_state;
3428 	ptr -= LRC_STATE_OFFSET; /* back to start of context image */
3429 	ptr += context_wa_bb_offset(ce);
3430 
3431 	return ptr;
3432 }
3433 
3434 static void
setup_indirect_ctx_bb(const struct intel_context * ce,const struct intel_engine_cs * engine,u32 * (* emit)(const struct intel_context *,u32 *))3435 setup_indirect_ctx_bb(const struct intel_context *ce,
3436 		      const struct intel_engine_cs *engine,
3437 		      u32 *(*emit)(const struct intel_context *, u32 *))
3438 {
3439 	u32 * const start = context_indirect_bb(ce);
3440 	u32 *cs;
3441 
3442 	cs = emit(ce, start);
3443 	GEM_BUG_ON(cs - start > I915_GTT_PAGE_SIZE / sizeof(*cs));
3444 	while ((unsigned long)cs % CACHELINE_BYTES)
3445 		*cs++ = MI_NOOP;
3446 
3447 	lrc_ring_setup_indirect_ctx(ce->lrc_reg_state, engine,
3448 				    i915_ggtt_offset(ce->state) +
3449 				    context_wa_bb_offset(ce),
3450 				    (cs - start) * sizeof(*cs));
3451 }
3452 
3453 static void
__execlists_update_reg_state(const struct intel_context * ce,const struct intel_engine_cs * engine,u32 head)3454 __execlists_update_reg_state(const struct intel_context *ce,
3455 			     const struct intel_engine_cs *engine,
3456 			     u32 head)
3457 {
3458 	struct intel_ring *ring = ce->ring;
3459 	u32 *regs = ce->lrc_reg_state;
3460 
3461 	GEM_BUG_ON(!intel_ring_offset_valid(ring, head));
3462 	GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
3463 
3464 	regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
3465 	regs[CTX_RING_HEAD] = head;
3466 	regs[CTX_RING_TAIL] = ring->tail;
3467 	regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
3468 
3469 	/* RPCS */
3470 	if (engine->class == RENDER_CLASS) {
3471 		regs[CTX_R_PWR_CLK_STATE] =
3472 			intel_sseu_make_rpcs(engine->gt, &ce->sseu);
3473 
3474 		i915_oa_init_reg_state(ce, engine);
3475 	}
3476 
3477 	if (ce->wa_bb_page) {
3478 		u32 *(*fn)(const struct intel_context *ce, u32 *cs);
3479 
3480 		fn = gen12_emit_indirect_ctx_xcs;
3481 		if (ce->engine->class == RENDER_CLASS)
3482 			fn = gen12_emit_indirect_ctx_rcs;
3483 
3484 		/* Mutually exclusive wrt to global indirect bb */
3485 		GEM_BUG_ON(engine->wa_ctx.indirect_ctx.size);
3486 		setup_indirect_ctx_bb(ce, engine, fn);
3487 	}
3488 }
3489 
3490 static int
execlists_context_pre_pin(struct intel_context * ce,struct i915_gem_ww_ctx * ww,void ** vaddr)3491 execlists_context_pre_pin(struct intel_context *ce,
3492 			  struct i915_gem_ww_ctx *ww, void **vaddr)
3493 {
3494 	GEM_BUG_ON(!ce->state);
3495 	GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
3496 
3497 	*vaddr = i915_gem_object_pin_map(ce->state->obj,
3498 					i915_coherent_map_type(ce->engine->i915) |
3499 					I915_MAP_OVERRIDE);
3500 
3501 	return PTR_ERR_OR_ZERO(*vaddr);
3502 }
3503 
3504 static int
__execlists_context_pin(struct intel_context * ce,struct intel_engine_cs * engine,void * vaddr)3505 __execlists_context_pin(struct intel_context *ce,
3506 			struct intel_engine_cs *engine,
3507 			void *vaddr)
3508 {
3509 	ce->lrc.lrca = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE;
3510 	ce->lrc_reg_state = vaddr + LRC_STATE_OFFSET;
3511 	__execlists_update_reg_state(ce, engine, ce->ring->tail);
3512 
3513 	return 0;
3514 }
3515 
execlists_context_pin(struct intel_context * ce,void * vaddr)3516 static int execlists_context_pin(struct intel_context *ce, void *vaddr)
3517 {
3518 	return __execlists_context_pin(ce, ce->engine, vaddr);
3519 }
3520 
execlists_context_alloc(struct intel_context * ce)3521 static int execlists_context_alloc(struct intel_context *ce)
3522 {
3523 	return __execlists_context_alloc(ce, ce->engine);
3524 }
3525 
execlists_context_reset(struct intel_context * ce)3526 static void execlists_context_reset(struct intel_context *ce)
3527 {
3528 	CE_TRACE(ce, "reset\n");
3529 	GEM_BUG_ON(!intel_context_is_pinned(ce));
3530 
3531 	intel_ring_reset(ce->ring, ce->ring->emit);
3532 
3533 	/* Scrub away the garbage */
3534 	execlists_init_reg_state(ce->lrc_reg_state,
3535 				 ce, ce->engine, ce->ring, true);
3536 	__execlists_update_reg_state(ce, ce->engine, ce->ring->tail);
3537 
3538 	ce->lrc.desc |= CTX_DESC_FORCE_RESTORE;
3539 }
3540 
3541 static const struct intel_context_ops execlists_context_ops = {
3542 	.alloc = execlists_context_alloc,
3543 
3544 	.pre_pin = execlists_context_pre_pin,
3545 	.pin = execlists_context_pin,
3546 	.unpin = execlists_context_unpin,
3547 	.post_unpin = execlists_context_post_unpin,
3548 
3549 	.enter = intel_context_enter_engine,
3550 	.exit = intel_context_exit_engine,
3551 
3552 	.reset = execlists_context_reset,
3553 	.destroy = execlists_context_destroy,
3554 };
3555 
hwsp_offset(const struct i915_request * rq)3556 static u32 hwsp_offset(const struct i915_request *rq)
3557 {
3558 	const struct intel_timeline_cacheline *cl;
3559 
3560 	/* Before the request is executed, the timeline/cachline is fixed */
3561 
3562 	cl = rcu_dereference_protected(rq->hwsp_cacheline, 1);
3563 	if (cl)
3564 		return cl->ggtt_offset;
3565 
3566 	return rcu_dereference_protected(rq->timeline, 1)->hwsp_offset;
3567 }
3568 
gen8_emit_init_breadcrumb(struct i915_request * rq)3569 static int gen8_emit_init_breadcrumb(struct i915_request *rq)
3570 {
3571 	u32 *cs;
3572 
3573 	GEM_BUG_ON(i915_request_has_initial_breadcrumb(rq));
3574 	if (!i915_request_timeline(rq)->has_initial_breadcrumb)
3575 		return 0;
3576 
3577 	cs = intel_ring_begin(rq, 6);
3578 	if (IS_ERR(cs))
3579 		return PTR_ERR(cs);
3580 
3581 	/*
3582 	 * Check if we have been preempted before we even get started.
3583 	 *
3584 	 * After this point i915_request_started() reports true, even if
3585 	 * we get preempted and so are no longer running.
3586 	 */
3587 	*cs++ = MI_ARB_CHECK;
3588 	*cs++ = MI_NOOP;
3589 
3590 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
3591 	*cs++ = hwsp_offset(rq);
3592 	*cs++ = 0;
3593 	*cs++ = rq->fence.seqno - 1;
3594 
3595 	intel_ring_advance(rq, cs);
3596 
3597 	/* Record the updated position of the request's payload */
3598 	rq->infix = intel_ring_offset(rq, cs);
3599 
3600 	__set_bit(I915_FENCE_FLAG_INITIAL_BREADCRUMB, &rq->fence.flags);
3601 
3602 	return 0;
3603 }
3604 
emit_pdps(struct i915_request * rq)3605 static int emit_pdps(struct i915_request *rq)
3606 {
3607 	const struct intel_engine_cs * const engine = rq->engine;
3608 	struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(rq->context->vm);
3609 	int err, i;
3610 	u32 *cs;
3611 
3612 	GEM_BUG_ON(intel_vgpu_active(rq->engine->i915));
3613 
3614 	/*
3615 	 * Beware ye of the dragons, this sequence is magic!
3616 	 *
3617 	 * Small changes to this sequence can cause anything from
3618 	 * GPU hangs to forcewake errors and machine lockups!
3619 	 */
3620 
3621 	/* Flush any residual operations from the context load */
3622 	err = engine->emit_flush(rq, EMIT_FLUSH);
3623 	if (err)
3624 		return err;
3625 
3626 	/* Magic required to prevent forcewake errors! */
3627 	err = engine->emit_flush(rq, EMIT_INVALIDATE);
3628 	if (err)
3629 		return err;
3630 
3631 	cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES + 2);
3632 	if (IS_ERR(cs))
3633 		return PTR_ERR(cs);
3634 
3635 	/* Ensure the LRI have landed before we invalidate & continue */
3636 	*cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) | MI_LRI_FORCE_POSTED;
3637 	for (i = GEN8_3LVL_PDPES; i--; ) {
3638 		const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
3639 		u32 base = engine->mmio_base;
3640 
3641 		*cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i));
3642 		*cs++ = upper_32_bits(pd_daddr);
3643 		*cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i));
3644 		*cs++ = lower_32_bits(pd_daddr);
3645 	}
3646 	*cs++ = MI_NOOP;
3647 
3648 	intel_ring_advance(rq, cs);
3649 
3650 	return 0;
3651 }
3652 
execlists_request_alloc(struct i915_request * request)3653 static int execlists_request_alloc(struct i915_request *request)
3654 {
3655 	int ret;
3656 
3657 	GEM_BUG_ON(!intel_context_is_pinned(request->context));
3658 
3659 	/*
3660 	 * Flush enough space to reduce the likelihood of waiting after
3661 	 * we start building the request - in which case we will just
3662 	 * have to repeat work.
3663 	 */
3664 	request->reserved_space += EXECLISTS_REQUEST_SIZE;
3665 
3666 	/*
3667 	 * Note that after this point, we have committed to using
3668 	 * this request as it is being used to both track the
3669 	 * state of engine initialisation and liveness of the
3670 	 * golden renderstate above. Think twice before you try
3671 	 * to cancel/unwind this request now.
3672 	 */
3673 
3674 	if (!i915_vm_is_4lvl(request->context->vm)) {
3675 		ret = emit_pdps(request);
3676 		if (ret)
3677 			return ret;
3678 	}
3679 
3680 	/* Unconditionally invalidate GPU caches and TLBs. */
3681 	ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
3682 	if (ret)
3683 		return ret;
3684 
3685 	request->reserved_space -= EXECLISTS_REQUEST_SIZE;
3686 	return 0;
3687 }
3688 
3689 /*
3690  * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after
3691  * PIPE_CONTROL instruction. This is required for the flush to happen correctly
3692  * but there is a slight complication as this is applied in WA batch where the
3693  * values are only initialized once so we cannot take register value at the
3694  * beginning and reuse it further; hence we save its value to memory, upload a
3695  * constant value with bit21 set and then we restore it back with the saved value.
3696  * To simplify the WA, a constant value is formed by using the default value
3697  * of this register. This shouldn't be a problem because we are only modifying
3698  * it for a short period and this batch in non-premptible. We can ofcourse
3699  * use additional instructions that read the actual value of the register
3700  * at that time and set our bit of interest but it makes the WA complicated.
3701  *
3702  * This WA is also required for Gen9 so extracting as a function avoids
3703  * code duplication.
3704  */
3705 static u32 *
gen8_emit_flush_coherentl3_wa(struct intel_engine_cs * engine,u32 * batch)3706 gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
3707 {
3708 	/* NB no one else is allowed to scribble over scratch + 256! */
3709 	*batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
3710 	*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
3711 	*batch++ = intel_gt_scratch_offset(engine->gt,
3712 					   INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
3713 	*batch++ = 0;
3714 
3715 	*batch++ = MI_LOAD_REGISTER_IMM(1);
3716 	*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
3717 	*batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES;
3718 
3719 	batch = gen8_emit_pipe_control(batch,
3720 				       PIPE_CONTROL_CS_STALL |
3721 				       PIPE_CONTROL_DC_FLUSH_ENABLE,
3722 				       0);
3723 
3724 	*batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
3725 	*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
3726 	*batch++ = intel_gt_scratch_offset(engine->gt,
3727 					   INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
3728 	*batch++ = 0;
3729 
3730 	return batch;
3731 }
3732 
3733 /*
3734  * Typically we only have one indirect_ctx and per_ctx batch buffer which are
3735  * initialized at the beginning and shared across all contexts but this field
3736  * helps us to have multiple batches at different offsets and select them based
3737  * on a criteria. At the moment this batch always start at the beginning of the page
3738  * and at this point we don't have multiple wa_ctx batch buffers.
3739  *
3740  * The number of WA applied are not known at the beginning; we use this field
3741  * to return the no of DWORDS written.
3742  *
3743  * It is to be noted that this batch does not contain MI_BATCH_BUFFER_END
3744  * so it adds NOOPs as padding to make it cacheline aligned.
3745  * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together
3746  * makes a complete batch buffer.
3747  */
gen8_init_indirectctx_bb(struct intel_engine_cs * engine,u32 * batch)3748 static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
3749 {
3750 	/* WaDisableCtxRestoreArbitration:bdw,chv */
3751 	*batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
3752 
3753 	/* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */
3754 	if (IS_BROADWELL(engine->i915))
3755 		batch = gen8_emit_flush_coherentl3_wa(engine, batch);
3756 
3757 	/* WaClearSlmSpaceAtContextSwitch:bdw,chv */
3758 	/* Actual scratch location is at 128 bytes offset */
3759 	batch = gen8_emit_pipe_control(batch,
3760 				       PIPE_CONTROL_FLUSH_L3 |
3761 				       PIPE_CONTROL_STORE_DATA_INDEX |
3762 				       PIPE_CONTROL_CS_STALL |
3763 				       PIPE_CONTROL_QW_WRITE,
3764 				       LRC_PPHWSP_SCRATCH_ADDR);
3765 
3766 	*batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
3767 
3768 	/* Pad to end of cacheline */
3769 	while ((unsigned long)batch % CACHELINE_BYTES)
3770 		*batch++ = MI_NOOP;
3771 
3772 	/*
3773 	 * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because
3774 	 * execution depends on the length specified in terms of cache lines
3775 	 * in the register CTX_RCS_INDIRECT_CTX
3776 	 */
3777 
3778 	return batch;
3779 }
3780 
3781 struct lri {
3782 	i915_reg_t reg;
3783 	u32 value;
3784 };
3785 
emit_lri(u32 * batch,const struct lri * lri,unsigned int count)3786 static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count)
3787 {
3788 	GEM_BUG_ON(!count || count > 63);
3789 
3790 	*batch++ = MI_LOAD_REGISTER_IMM(count);
3791 	do {
3792 		*batch++ = i915_mmio_reg_offset(lri->reg);
3793 		*batch++ = lri->value;
3794 	} while (lri++, --count);
3795 	*batch++ = MI_NOOP;
3796 
3797 	return batch;
3798 }
3799 
gen9_init_indirectctx_bb(struct intel_engine_cs * engine,u32 * batch)3800 static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
3801 {
3802 	static const struct lri lri[] = {
3803 		/* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */
3804 		{
3805 			COMMON_SLICE_CHICKEN2,
3806 			__MASKED_FIELD(GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE,
3807 				       0),
3808 		},
3809 
3810 		/* BSpec: 11391 */
3811 		{
3812 			FF_SLICE_CHICKEN,
3813 			__MASKED_FIELD(FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX,
3814 				       FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX),
3815 		},
3816 
3817 		/* BSpec: 11299 */
3818 		{
3819 			_3D_CHICKEN3,
3820 			__MASKED_FIELD(_3D_CHICKEN_SF_PROVOKING_VERTEX_FIX,
3821 				       _3D_CHICKEN_SF_PROVOKING_VERTEX_FIX),
3822 		}
3823 	};
3824 
3825 	*batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
3826 
3827 	/* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
3828 	batch = gen8_emit_flush_coherentl3_wa(engine, batch);
3829 
3830 	/* WaClearSlmSpaceAtContextSwitch:skl,bxt,kbl,glk,cfl */
3831 	batch = gen8_emit_pipe_control(batch,
3832 				       PIPE_CONTROL_FLUSH_L3 |
3833 				       PIPE_CONTROL_STORE_DATA_INDEX |
3834 				       PIPE_CONTROL_CS_STALL |
3835 				       PIPE_CONTROL_QW_WRITE,
3836 				       LRC_PPHWSP_SCRATCH_ADDR);
3837 
3838 	batch = emit_lri(batch, lri, ARRAY_SIZE(lri));
3839 
3840 	/* WaMediaPoolStateCmdInWABB:bxt,glk */
3841 	if (HAS_POOLED_EU(engine->i915)) {
3842 		/*
3843 		 * EU pool configuration is setup along with golden context
3844 		 * during context initialization. This value depends on
3845 		 * device type (2x6 or 3x6) and needs to be updated based
3846 		 * on which subslice is disabled especially for 2x6
3847 		 * devices, however it is safe to load default
3848 		 * configuration of 3x6 device instead of masking off
3849 		 * corresponding bits because HW ignores bits of a disabled
3850 		 * subslice and drops down to appropriate config. Please
3851 		 * see render_state_setup() in i915_gem_render_state.c for
3852 		 * possible configurations, to avoid duplication they are
3853 		 * not shown here again.
3854 		 */
3855 		*batch++ = GEN9_MEDIA_POOL_STATE;
3856 		*batch++ = GEN9_MEDIA_POOL_ENABLE;
3857 		*batch++ = 0x00777000;
3858 		*batch++ = 0;
3859 		*batch++ = 0;
3860 		*batch++ = 0;
3861 	}
3862 
3863 	*batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
3864 
3865 	/* Pad to end of cacheline */
3866 	while ((unsigned long)batch % CACHELINE_BYTES)
3867 		*batch++ = MI_NOOP;
3868 
3869 	return batch;
3870 }
3871 
3872 static u32 *
gen10_init_indirectctx_bb(struct intel_engine_cs * engine,u32 * batch)3873 gen10_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
3874 {
3875 	int i;
3876 
3877 	/*
3878 	 * WaPipeControlBefore3DStateSamplePattern: cnl
3879 	 *
3880 	 * Ensure the engine is idle prior to programming a
3881 	 * 3DSTATE_SAMPLE_PATTERN during a context restore.
3882 	 */
3883 	batch = gen8_emit_pipe_control(batch,
3884 				       PIPE_CONTROL_CS_STALL,
3885 				       0);
3886 	/*
3887 	 * WaPipeControlBefore3DStateSamplePattern says we need 4 dwords for
3888 	 * the PIPE_CONTROL followed by 12 dwords of 0x0, so 16 dwords in
3889 	 * total. However, a PIPE_CONTROL is 6 dwords long, not 4, which is
3890 	 * confusing. Since gen8_emit_pipe_control() already advances the
3891 	 * batch by 6 dwords, we advance the other 10 here, completing a
3892 	 * cacheline. It's not clear if the workaround requires this padding
3893 	 * before other commands, or if it's just the regular padding we would
3894 	 * already have for the workaround bb, so leave it here for now.
3895 	 */
3896 	for (i = 0; i < 10; i++)
3897 		*batch++ = MI_NOOP;
3898 
3899 	/* Pad to end of cacheline */
3900 	while ((unsigned long)batch % CACHELINE_BYTES)
3901 		*batch++ = MI_NOOP;
3902 
3903 	return batch;
3904 }
3905 
3906 #define CTX_WA_BB_OBJ_SIZE (PAGE_SIZE)
3907 
lrc_setup_wa_ctx(struct intel_engine_cs * engine)3908 static int lrc_setup_wa_ctx(struct intel_engine_cs *engine)
3909 {
3910 	struct drm_i915_gem_object *obj;
3911 	struct i915_vma *vma;
3912 	int err;
3913 
3914 	obj = i915_gem_object_create_shmem(engine->i915, CTX_WA_BB_OBJ_SIZE);
3915 	if (IS_ERR(obj))
3916 		return PTR_ERR(obj);
3917 
3918 	vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
3919 	if (IS_ERR(vma)) {
3920 		err = PTR_ERR(vma);
3921 		goto err;
3922 	}
3923 
3924 	err = i915_ggtt_pin(vma, NULL, 0, PIN_HIGH);
3925 	if (err)
3926 		goto err;
3927 
3928 	engine->wa_ctx.vma = vma;
3929 	return 0;
3930 
3931 err:
3932 	i915_gem_object_put(obj);
3933 	return err;
3934 }
3935 
lrc_destroy_wa_ctx(struct intel_engine_cs * engine)3936 static void lrc_destroy_wa_ctx(struct intel_engine_cs *engine)
3937 {
3938 	i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0);
3939 
3940 	/* Called on error unwind, clear all flags to prevent further use */
3941 	memset(&engine->wa_ctx, 0, sizeof(engine->wa_ctx));
3942 }
3943 
3944 typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch);
3945 
intel_init_workaround_bb(struct intel_engine_cs * engine)3946 static int intel_init_workaround_bb(struct intel_engine_cs *engine)
3947 {
3948 	struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
3949 	struct i915_wa_ctx_bb *wa_bb[2] = { &wa_ctx->indirect_ctx,
3950 					    &wa_ctx->per_ctx };
3951 	wa_bb_func_t wa_bb_fn[2];
3952 	void *batch, *batch_ptr;
3953 	unsigned int i;
3954 	int ret;
3955 
3956 	if (engine->class != RENDER_CLASS)
3957 		return 0;
3958 
3959 	switch (INTEL_GEN(engine->i915)) {
3960 	case 12:
3961 	case 11:
3962 		return 0;
3963 	case 10:
3964 		wa_bb_fn[0] = gen10_init_indirectctx_bb;
3965 		wa_bb_fn[1] = NULL;
3966 		break;
3967 	case 9:
3968 		wa_bb_fn[0] = gen9_init_indirectctx_bb;
3969 		wa_bb_fn[1] = NULL;
3970 		break;
3971 	case 8:
3972 		wa_bb_fn[0] = gen8_init_indirectctx_bb;
3973 		wa_bb_fn[1] = NULL;
3974 		break;
3975 	default:
3976 		MISSING_CASE(INTEL_GEN(engine->i915));
3977 		return 0;
3978 	}
3979 
3980 	ret = lrc_setup_wa_ctx(engine);
3981 	if (ret) {
3982 		drm_dbg(&engine->i915->drm,
3983 			"Failed to setup context WA page: %d\n", ret);
3984 		return ret;
3985 	}
3986 
3987 	batch = i915_gem_object_pin_map(wa_ctx->vma->obj, I915_MAP_WB);
3988 
3989 	/*
3990 	 * Emit the two workaround batch buffers, recording the offset from the
3991 	 * start of the workaround batch buffer object for each and their
3992 	 * respective sizes.
3993 	 */
3994 	batch_ptr = batch;
3995 	for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) {
3996 		wa_bb[i]->offset = batch_ptr - batch;
3997 		if (GEM_DEBUG_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset,
3998 						  CACHELINE_BYTES))) {
3999 			ret = -EINVAL;
4000 			break;
4001 		}
4002 		if (wa_bb_fn[i])
4003 			batch_ptr = wa_bb_fn[i](engine, batch_ptr);
4004 		wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset);
4005 	}
4006 	GEM_BUG_ON(batch_ptr - batch > CTX_WA_BB_OBJ_SIZE);
4007 
4008 	__i915_gem_object_flush_map(wa_ctx->vma->obj, 0, batch_ptr - batch);
4009 	__i915_gem_object_release_map(wa_ctx->vma->obj);
4010 	if (ret)
4011 		lrc_destroy_wa_ctx(engine);
4012 
4013 	return ret;
4014 }
4015 
reset_csb_pointers(struct intel_engine_cs * engine)4016 static void reset_csb_pointers(struct intel_engine_cs *engine)
4017 {
4018 	struct intel_engine_execlists * const execlists = &engine->execlists;
4019 	const unsigned int reset_value = execlists->csb_size - 1;
4020 
4021 	ring_set_paused(engine, 0);
4022 
4023 	/*
4024 	 * Sometimes Icelake forgets to reset its pointers on a GPU reset.
4025 	 * Bludgeon them with a mmio update to be sure.
4026 	 */
4027 	ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR,
4028 		     0xffff << 16 | reset_value << 8 | reset_value);
4029 	ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR);
4030 
4031 	/*
4032 	 * After a reset, the HW starts writing into CSB entry [0]. We
4033 	 * therefore have to set our HEAD pointer back one entry so that
4034 	 * the *first* entry we check is entry 0. To complicate this further,
4035 	 * as we don't wait for the first interrupt after reset, we have to
4036 	 * fake the HW write to point back to the last entry so that our
4037 	 * inline comparison of our cached head position against the last HW
4038 	 * write works even before the first interrupt.
4039 	 */
4040 	execlists->csb_head = reset_value;
4041 	WRITE_ONCE(*execlists->csb_write, reset_value);
4042 	wmb(); /* Make sure this is visible to HW (paranoia?) */
4043 
4044 	/* Check that the GPU does indeed update the CSB entries! */
4045 	memset(execlists->csb_status, -1, (reset_value + 1) * sizeof(u64));
4046 	invalidate_csb_entries(&execlists->csb_status[0],
4047 			       &execlists->csb_status[reset_value]);
4048 
4049 	/* Once more for luck and our trusty paranoia */
4050 	ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR,
4051 		     0xffff << 16 | reset_value << 8 | reset_value);
4052 	ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR);
4053 
4054 	GEM_BUG_ON(READ_ONCE(*execlists->csb_write) != reset_value);
4055 }
4056 
execlists_sanitize(struct intel_engine_cs * engine)4057 static void execlists_sanitize(struct intel_engine_cs *engine)
4058 {
4059 	/*
4060 	 * Poison residual state on resume, in case the suspend didn't!
4061 	 *
4062 	 * We have to assume that across suspend/resume (or other loss
4063 	 * of control) that the contents of our pinned buffers has been
4064 	 * lost, replaced by garbage. Since this doesn't always happen,
4065 	 * let's poison such state so that we more quickly spot when
4066 	 * we falsely assume it has been preserved.
4067 	 */
4068 	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
4069 		memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE);
4070 
4071 	reset_csb_pointers(engine);
4072 
4073 	/*
4074 	 * The kernel_context HWSP is stored in the status_page. As above,
4075 	 * that may be lost on resume/initialisation, and so we need to
4076 	 * reset the value in the HWSP.
4077 	 */
4078 	intel_timeline_reset_seqno(engine->kernel_context->timeline);
4079 
4080 	/* And scrub the dirty cachelines for the HWSP */
4081 	clflush_cache_range(engine->status_page.addr, PAGE_SIZE);
4082 }
4083 
enable_error_interrupt(struct intel_engine_cs * engine)4084 static void enable_error_interrupt(struct intel_engine_cs *engine)
4085 {
4086 	u32 status;
4087 
4088 	engine->execlists.error_interrupt = 0;
4089 	ENGINE_WRITE(engine, RING_EMR, ~0u);
4090 	ENGINE_WRITE(engine, RING_EIR, ~0u); /* clear all existing errors */
4091 
4092 	status = ENGINE_READ(engine, RING_ESR);
4093 	if (unlikely(status)) {
4094 		drm_err(&engine->i915->drm,
4095 			"engine '%s' resumed still in error: %08x\n",
4096 			engine->name, status);
4097 		__intel_gt_reset(engine->gt, engine->mask);
4098 	}
4099 
4100 	/*
4101 	 * On current gen8+, we have 2 signals to play with
4102 	 *
4103 	 * - I915_ERROR_INSTUCTION (bit 0)
4104 	 *
4105 	 *    Generate an error if the command parser encounters an invalid
4106 	 *    instruction
4107 	 *
4108 	 *    This is a fatal error.
4109 	 *
4110 	 * - CP_PRIV (bit 2)
4111 	 *
4112 	 *    Generate an error on privilege violation (where the CP replaces
4113 	 *    the instruction with a no-op). This also fires for writes into
4114 	 *    read-only scratch pages.
4115 	 *
4116 	 *    This is a non-fatal error, parsing continues.
4117 	 *
4118 	 * * there are a few others defined for odd HW that we do not use
4119 	 *
4120 	 * Since CP_PRIV fires for cases where we have chosen to ignore the
4121 	 * error (as the HW is validating and suppressing the mistakes), we
4122 	 * only unmask the instruction error bit.
4123 	 */
4124 	ENGINE_WRITE(engine, RING_EMR, ~I915_ERROR_INSTRUCTION);
4125 }
4126 
enable_execlists(struct intel_engine_cs * engine)4127 static void enable_execlists(struct intel_engine_cs *engine)
4128 {
4129 	u32 mode;
4130 
4131 	assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL);
4132 
4133 	intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */
4134 
4135 	if (INTEL_GEN(engine->i915) >= 11)
4136 		mode = _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE);
4137 	else
4138 		mode = _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE);
4139 	ENGINE_WRITE_FW(engine, RING_MODE_GEN7, mode);
4140 
4141 	ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
4142 
4143 	ENGINE_WRITE_FW(engine,
4144 			RING_HWS_PGA,
4145 			i915_ggtt_offset(engine->status_page.vma));
4146 	ENGINE_POSTING_READ(engine, RING_HWS_PGA);
4147 
4148 	enable_error_interrupt(engine);
4149 
4150 	engine->context_tag = GENMASK(BITS_PER_LONG - 2, 0);
4151 }
4152 
unexpected_starting_state(struct intel_engine_cs * engine)4153 static bool unexpected_starting_state(struct intel_engine_cs *engine)
4154 {
4155 	bool unexpected = false;
4156 
4157 	if (ENGINE_READ_FW(engine, RING_MI_MODE) & STOP_RING) {
4158 		drm_dbg(&engine->i915->drm,
4159 			"STOP_RING still set in RING_MI_MODE\n");
4160 		unexpected = true;
4161 	}
4162 
4163 	return unexpected;
4164 }
4165 
execlists_resume(struct intel_engine_cs * engine)4166 static int execlists_resume(struct intel_engine_cs *engine)
4167 {
4168 	intel_mocs_init_engine(engine);
4169 
4170 	intel_breadcrumbs_reset(engine->breadcrumbs);
4171 
4172 	if (GEM_SHOW_DEBUG() && unexpected_starting_state(engine)) {
4173 		struct drm_printer p = drm_debug_printer(__func__);
4174 
4175 		intel_engine_dump(engine, &p, NULL);
4176 	}
4177 
4178 	enable_execlists(engine);
4179 
4180 	return 0;
4181 }
4182 
execlists_reset_prepare(struct intel_engine_cs * engine)4183 static void execlists_reset_prepare(struct intel_engine_cs *engine)
4184 {
4185 	struct intel_engine_execlists * const execlists = &engine->execlists;
4186 	unsigned long flags;
4187 
4188 	ENGINE_TRACE(engine, "depth<-%d\n",
4189 		     atomic_read(&execlists->tasklet.count));
4190 
4191 	/*
4192 	 * Prevent request submission to the hardware until we have
4193 	 * completed the reset in i915_gem_reset_finish(). If a request
4194 	 * is completed by one engine, it may then queue a request
4195 	 * to a second via its execlists->tasklet *just* as we are
4196 	 * calling engine->resume() and also writing the ELSP.
4197 	 * Turning off the execlists->tasklet until the reset is over
4198 	 * prevents the race.
4199 	 */
4200 	__tasklet_disable_sync_once(&execlists->tasklet);
4201 	GEM_BUG_ON(!reset_in_progress(execlists));
4202 
4203 	/* And flush any current direct submission. */
4204 	spin_lock_irqsave(&engine->active.lock, flags);
4205 	spin_unlock_irqrestore(&engine->active.lock, flags);
4206 
4207 	/*
4208 	 * We stop engines, otherwise we might get failed reset and a
4209 	 * dead gpu (on elk). Also as modern gpu as kbl can suffer
4210 	 * from system hang if batchbuffer is progressing when
4211 	 * the reset is issued, regardless of READY_TO_RESET ack.
4212 	 * Thus assume it is best to stop engines on all gens
4213 	 * where we have a gpu reset.
4214 	 *
4215 	 * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES)
4216 	 *
4217 	 * FIXME: Wa for more modern gens needs to be validated
4218 	 */
4219 	ring_set_paused(engine, 1);
4220 	intel_engine_stop_cs(engine);
4221 
4222 	engine->execlists.reset_ccid = active_ccid(engine);
4223 }
4224 
__reset_stop_ring(u32 * regs,const struct intel_engine_cs * engine)4225 static void __reset_stop_ring(u32 *regs, const struct intel_engine_cs *engine)
4226 {
4227 	int x;
4228 
4229 	x = lrc_ring_mi_mode(engine);
4230 	if (x != -1) {
4231 		regs[x + 1] &= ~STOP_RING;
4232 		regs[x + 1] |= STOP_RING << 16;
4233 	}
4234 }
4235 
__execlists_reset_reg_state(const struct intel_context * ce,const struct intel_engine_cs * engine)4236 static void __execlists_reset_reg_state(const struct intel_context *ce,
4237 					const struct intel_engine_cs *engine)
4238 {
4239 	u32 *regs = ce->lrc_reg_state;
4240 
4241 	__reset_stop_ring(regs, engine);
4242 }
4243 
__execlists_reset(struct intel_engine_cs * engine,bool stalled)4244 static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
4245 {
4246 	struct intel_engine_execlists * const execlists = &engine->execlists;
4247 	struct intel_context *ce;
4248 	struct i915_request *rq;
4249 	u32 head;
4250 
4251 	mb(); /* paranoia: read the CSB pointers from after the reset */
4252 	clflush(execlists->csb_write);
4253 	mb();
4254 
4255 	process_csb(engine); /* drain preemption events */
4256 
4257 	/* Following the reset, we need to reload the CSB read/write pointers */
4258 	reset_csb_pointers(engine);
4259 
4260 	/*
4261 	 * Save the currently executing context, even if we completed
4262 	 * its request, it was still running at the time of the
4263 	 * reset and will have been clobbered.
4264 	 */
4265 	rq = active_context(engine, engine->execlists.reset_ccid);
4266 	if (!rq)
4267 		goto unwind;
4268 
4269 	ce = rq->context;
4270 	GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
4271 
4272 	if (i915_request_completed(rq)) {
4273 		/* Idle context; tidy up the ring so we can restart afresh */
4274 		head = intel_ring_wrap(ce->ring, rq->tail);
4275 		goto out_replay;
4276 	}
4277 
4278 	/* We still have requests in-flight; the engine should be active */
4279 	GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
4280 
4281 	/* Context has requests still in-flight; it should not be idle! */
4282 	GEM_BUG_ON(i915_active_is_idle(&ce->active));
4283 
4284 	rq = active_request(ce->timeline, rq);
4285 	head = intel_ring_wrap(ce->ring, rq->head);
4286 	GEM_BUG_ON(head == ce->ring->tail);
4287 
4288 	/*
4289 	 * If this request hasn't started yet, e.g. it is waiting on a
4290 	 * semaphore, we need to avoid skipping the request or else we
4291 	 * break the signaling chain. However, if the context is corrupt
4292 	 * the request will not restart and we will be stuck with a wedged
4293 	 * device. It is quite often the case that if we issue a reset
4294 	 * while the GPU is loading the context image, that the context
4295 	 * image becomes corrupt.
4296 	 *
4297 	 * Otherwise, if we have not started yet, the request should replay
4298 	 * perfectly and we do not need to flag the result as being erroneous.
4299 	 */
4300 	if (!i915_request_started(rq))
4301 		goto out_replay;
4302 
4303 	/*
4304 	 * If the request was innocent, we leave the request in the ELSP
4305 	 * and will try to replay it on restarting. The context image may
4306 	 * have been corrupted by the reset, in which case we may have
4307 	 * to service a new GPU hang, but more likely we can continue on
4308 	 * without impact.
4309 	 *
4310 	 * If the request was guilty, we presume the context is corrupt
4311 	 * and have to at least restore the RING register in the context
4312 	 * image back to the expected values to skip over the guilty request.
4313 	 */
4314 	__i915_request_reset(rq, stalled);
4315 
4316 	/*
4317 	 * We want a simple context + ring to execute the breadcrumb update.
4318 	 * We cannot rely on the context being intact across the GPU hang,
4319 	 * so clear it and rebuild just what we need for the breadcrumb.
4320 	 * All pending requests for this context will be zapped, and any
4321 	 * future request will be after userspace has had the opportunity
4322 	 * to recreate its own state.
4323 	 */
4324 out_replay:
4325 	ENGINE_TRACE(engine, "replay {head:%04x, tail:%04x}\n",
4326 		     head, ce->ring->tail);
4327 	__execlists_reset_reg_state(ce, engine);
4328 	__execlists_update_reg_state(ce, engine, head);
4329 	ce->lrc.desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */
4330 
4331 unwind:
4332 	/* Push back any incomplete requests for replay after the reset. */
4333 	cancel_port_requests(execlists);
4334 	__unwind_incomplete_requests(engine);
4335 }
4336 
execlists_reset_rewind(struct intel_engine_cs * engine,bool stalled)4337 static void execlists_reset_rewind(struct intel_engine_cs *engine, bool stalled)
4338 {
4339 	unsigned long flags;
4340 
4341 	ENGINE_TRACE(engine, "\n");
4342 
4343 	spin_lock_irqsave(&engine->active.lock, flags);
4344 
4345 	__execlists_reset(engine, stalled);
4346 
4347 	spin_unlock_irqrestore(&engine->active.lock, flags);
4348 }
4349 
nop_submission_tasklet(unsigned long data)4350 static void nop_submission_tasklet(unsigned long data)
4351 {
4352 	struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
4353 
4354 	/* The driver is wedged; don't process any more events. */
4355 	WRITE_ONCE(engine->execlists.queue_priority_hint, INT_MIN);
4356 }
4357 
execlists_reset_cancel(struct intel_engine_cs * engine)4358 static void execlists_reset_cancel(struct intel_engine_cs *engine)
4359 {
4360 	struct intel_engine_execlists * const execlists = &engine->execlists;
4361 	struct i915_request *rq, *rn;
4362 	struct rb_node *rb;
4363 	unsigned long flags;
4364 
4365 	ENGINE_TRACE(engine, "\n");
4366 
4367 	/*
4368 	 * Before we call engine->cancel_requests(), we should have exclusive
4369 	 * access to the submission state. This is arranged for us by the
4370 	 * caller disabling the interrupt generation, the tasklet and other
4371 	 * threads that may then access the same state, giving us a free hand
4372 	 * to reset state. However, we still need to let lockdep be aware that
4373 	 * we know this state may be accessed in hardirq context, so we
4374 	 * disable the irq around this manipulation and we want to keep
4375 	 * the spinlock focused on its duties and not accidentally conflate
4376 	 * coverage to the submission's irq state. (Similarly, although we
4377 	 * shouldn't need to disable irq around the manipulation of the
4378 	 * submission's irq state, we also wish to remind ourselves that
4379 	 * it is irq state.)
4380 	 */
4381 	spin_lock_irqsave(&engine->active.lock, flags);
4382 
4383 	__execlists_reset(engine, true);
4384 
4385 	/* Mark all executing requests as skipped. */
4386 	list_for_each_entry(rq, &engine->active.requests, sched.link)
4387 		mark_eio(rq);
4388 
4389 	/* Flush the queued requests to the timeline list (for retiring). */
4390 	while ((rb = rb_first_cached(&execlists->queue))) {
4391 		struct i915_priolist *p = to_priolist(rb);
4392 		int i;
4393 
4394 		priolist_for_each_request_consume(rq, rn, p, i) {
4395 			mark_eio(rq);
4396 			__i915_request_submit(rq);
4397 		}
4398 
4399 		rb_erase_cached(&p->node, &execlists->queue);
4400 		i915_priolist_free(p);
4401 	}
4402 
4403 	/* On-hold requests will be flushed to timeline upon their release */
4404 	list_for_each_entry(rq, &engine->active.hold, sched.link)
4405 		mark_eio(rq);
4406 
4407 	/* Cancel all attached virtual engines */
4408 	while ((rb = rb_first_cached(&execlists->virtual))) {
4409 		struct virtual_engine *ve =
4410 			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
4411 
4412 		rb_erase_cached(rb, &execlists->virtual);
4413 		RB_CLEAR_NODE(rb);
4414 
4415 		spin_lock(&ve->base.active.lock);
4416 		rq = fetch_and_zero(&ve->request);
4417 		if (rq) {
4418 			mark_eio(rq);
4419 
4420 			rq->engine = engine;
4421 			__i915_request_submit(rq);
4422 			i915_request_put(rq);
4423 
4424 			ve->base.execlists.queue_priority_hint = INT_MIN;
4425 		}
4426 		spin_unlock(&ve->base.active.lock);
4427 	}
4428 
4429 	/* Remaining _unready_ requests will be nop'ed when submitted */
4430 
4431 	execlists->queue_priority_hint = INT_MIN;
4432 	execlists->queue = RB_ROOT_CACHED;
4433 
4434 	GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet));
4435 	execlists->tasklet.func = nop_submission_tasklet;
4436 
4437 	spin_unlock_irqrestore(&engine->active.lock, flags);
4438 }
4439 
execlists_reset_finish(struct intel_engine_cs * engine)4440 static void execlists_reset_finish(struct intel_engine_cs *engine)
4441 {
4442 	struct intel_engine_execlists * const execlists = &engine->execlists;
4443 
4444 	/*
4445 	 * After a GPU reset, we may have requests to replay. Do so now while
4446 	 * we still have the forcewake to be sure that the GPU is not allowed
4447 	 * to sleep before we restart and reload a context.
4448 	 */
4449 	GEM_BUG_ON(!reset_in_progress(execlists));
4450 	if (!RB_EMPTY_ROOT(&execlists->queue.rb_root))
4451 		execlists->tasklet.func(execlists->tasklet.data);
4452 
4453 	if (__tasklet_enable(&execlists->tasklet))
4454 		/* And kick in case we missed a new request submission. */
4455 		tasklet_hi_schedule(&execlists->tasklet);
4456 	ENGINE_TRACE(engine, "depth->%d\n",
4457 		     atomic_read(&execlists->tasklet.count));
4458 }
4459 
gen8_emit_bb_start_noarb(struct i915_request * rq,u64 offset,u32 len,const unsigned int flags)4460 static int gen8_emit_bb_start_noarb(struct i915_request *rq,
4461 				    u64 offset, u32 len,
4462 				    const unsigned int flags)
4463 {
4464 	u32 *cs;
4465 
4466 	cs = intel_ring_begin(rq, 4);
4467 	if (IS_ERR(cs))
4468 		return PTR_ERR(cs);
4469 
4470 	/*
4471 	 * WaDisableCtxRestoreArbitration:bdw,chv
4472 	 *
4473 	 * We don't need to perform MI_ARB_ENABLE as often as we do (in
4474 	 * particular all the gen that do not need the w/a at all!), if we
4475 	 * took care to make sure that on every switch into this context
4476 	 * (both ordinary and for preemption) that arbitrartion was enabled
4477 	 * we would be fine.  However, for gen8 there is another w/a that
4478 	 * requires us to not preempt inside GPGPU execution, so we keep
4479 	 * arbitration disabled for gen8 batches. Arbitration will be
4480 	 * re-enabled before we close the request
4481 	 * (engine->emit_fini_breadcrumb).
4482 	 */
4483 	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
4484 
4485 	/* FIXME(BDW+): Address space and security selectors. */
4486 	*cs++ = MI_BATCH_BUFFER_START_GEN8 |
4487 		(flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
4488 	*cs++ = lower_32_bits(offset);
4489 	*cs++ = upper_32_bits(offset);
4490 
4491 	intel_ring_advance(rq, cs);
4492 
4493 	return 0;
4494 }
4495 
gen8_emit_bb_start(struct i915_request * rq,u64 offset,u32 len,const unsigned int flags)4496 static int gen8_emit_bb_start(struct i915_request *rq,
4497 			      u64 offset, u32 len,
4498 			      const unsigned int flags)
4499 {
4500 	u32 *cs;
4501 
4502 	cs = intel_ring_begin(rq, 6);
4503 	if (IS_ERR(cs))
4504 		return PTR_ERR(cs);
4505 
4506 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
4507 
4508 	*cs++ = MI_BATCH_BUFFER_START_GEN8 |
4509 		(flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
4510 	*cs++ = lower_32_bits(offset);
4511 	*cs++ = upper_32_bits(offset);
4512 
4513 	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
4514 	*cs++ = MI_NOOP;
4515 
4516 	intel_ring_advance(rq, cs);
4517 
4518 	return 0;
4519 }
4520 
gen8_logical_ring_enable_irq(struct intel_engine_cs * engine)4521 static void gen8_logical_ring_enable_irq(struct intel_engine_cs *engine)
4522 {
4523 	ENGINE_WRITE(engine, RING_IMR,
4524 		     ~(engine->irq_enable_mask | engine->irq_keep_mask));
4525 	ENGINE_POSTING_READ(engine, RING_IMR);
4526 }
4527 
gen8_logical_ring_disable_irq(struct intel_engine_cs * engine)4528 static void gen8_logical_ring_disable_irq(struct intel_engine_cs *engine)
4529 {
4530 	ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask);
4531 }
4532 
gen8_emit_flush(struct i915_request * request,u32 mode)4533 static int gen8_emit_flush(struct i915_request *request, u32 mode)
4534 {
4535 	u32 cmd, *cs;
4536 
4537 	cs = intel_ring_begin(request, 4);
4538 	if (IS_ERR(cs))
4539 		return PTR_ERR(cs);
4540 
4541 	cmd = MI_FLUSH_DW + 1;
4542 
4543 	/* We always require a command barrier so that subsequent
4544 	 * commands, such as breadcrumb interrupts, are strictly ordered
4545 	 * wrt the contents of the write cache being flushed to memory
4546 	 * (and thus being coherent from the CPU).
4547 	 */
4548 	cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
4549 
4550 	if (mode & EMIT_INVALIDATE) {
4551 		cmd |= MI_INVALIDATE_TLB;
4552 		if (request->engine->class == VIDEO_DECODE_CLASS)
4553 			cmd |= MI_INVALIDATE_BSD;
4554 	}
4555 
4556 	*cs++ = cmd;
4557 	*cs++ = LRC_PPHWSP_SCRATCH_ADDR;
4558 	*cs++ = 0; /* upper addr */
4559 	*cs++ = 0; /* value */
4560 	intel_ring_advance(request, cs);
4561 
4562 	return 0;
4563 }
4564 
gen8_emit_flush_render(struct i915_request * request,u32 mode)4565 static int gen8_emit_flush_render(struct i915_request *request,
4566 				  u32 mode)
4567 {
4568 	bool vf_flush_wa = false, dc_flush_wa = false;
4569 	u32 *cs, flags = 0;
4570 	int len;
4571 
4572 	flags |= PIPE_CONTROL_CS_STALL;
4573 
4574 	if (mode & EMIT_FLUSH) {
4575 		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
4576 		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
4577 		flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
4578 		flags |= PIPE_CONTROL_FLUSH_ENABLE;
4579 	}
4580 
4581 	if (mode & EMIT_INVALIDATE) {
4582 		flags |= PIPE_CONTROL_TLB_INVALIDATE;
4583 		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
4584 		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
4585 		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
4586 		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
4587 		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
4588 		flags |= PIPE_CONTROL_QW_WRITE;
4589 		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
4590 
4591 		/*
4592 		 * On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL
4593 		 * pipe control.
4594 		 */
4595 		if (IS_GEN(request->engine->i915, 9))
4596 			vf_flush_wa = true;
4597 
4598 		/* WaForGAMHang:kbl */
4599 		if (IS_KBL_GT_REVID(request->engine->i915, 0, KBL_REVID_B0))
4600 			dc_flush_wa = true;
4601 	}
4602 
4603 	len = 6;
4604 
4605 	if (vf_flush_wa)
4606 		len += 6;
4607 
4608 	if (dc_flush_wa)
4609 		len += 12;
4610 
4611 	cs = intel_ring_begin(request, len);
4612 	if (IS_ERR(cs))
4613 		return PTR_ERR(cs);
4614 
4615 	if (vf_flush_wa)
4616 		cs = gen8_emit_pipe_control(cs, 0, 0);
4617 
4618 	if (dc_flush_wa)
4619 		cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_DC_FLUSH_ENABLE,
4620 					    0);
4621 
4622 	cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
4623 
4624 	if (dc_flush_wa)
4625 		cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_CS_STALL, 0);
4626 
4627 	intel_ring_advance(request, cs);
4628 
4629 	return 0;
4630 }
4631 
gen11_emit_flush_render(struct i915_request * request,u32 mode)4632 static int gen11_emit_flush_render(struct i915_request *request,
4633 				   u32 mode)
4634 {
4635 	if (mode & EMIT_FLUSH) {
4636 		u32 *cs;
4637 		u32 flags = 0;
4638 
4639 		flags |= PIPE_CONTROL_CS_STALL;
4640 
4641 		flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
4642 		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
4643 		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
4644 		flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
4645 		flags |= PIPE_CONTROL_FLUSH_ENABLE;
4646 		flags |= PIPE_CONTROL_QW_WRITE;
4647 		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
4648 
4649 		cs = intel_ring_begin(request, 6);
4650 		if (IS_ERR(cs))
4651 			return PTR_ERR(cs);
4652 
4653 		cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
4654 		intel_ring_advance(request, cs);
4655 	}
4656 
4657 	if (mode & EMIT_INVALIDATE) {
4658 		u32 *cs;
4659 		u32 flags = 0;
4660 
4661 		flags |= PIPE_CONTROL_CS_STALL;
4662 
4663 		flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
4664 		flags |= PIPE_CONTROL_TLB_INVALIDATE;
4665 		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
4666 		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
4667 		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
4668 		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
4669 		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
4670 		flags |= PIPE_CONTROL_QW_WRITE;
4671 		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
4672 
4673 		cs = intel_ring_begin(request, 6);
4674 		if (IS_ERR(cs))
4675 			return PTR_ERR(cs);
4676 
4677 		cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
4678 		intel_ring_advance(request, cs);
4679 	}
4680 
4681 	return 0;
4682 }
4683 
preparser_disable(bool state)4684 static u32 preparser_disable(bool state)
4685 {
4686 	return MI_ARB_CHECK | 1 << 8 | state;
4687 }
4688 
aux_inv_reg(const struct intel_engine_cs * engine)4689 static i915_reg_t aux_inv_reg(const struct intel_engine_cs *engine)
4690 {
4691 	static const i915_reg_t vd[] = {
4692 		GEN12_VD0_AUX_NV,
4693 		GEN12_VD1_AUX_NV,
4694 		GEN12_VD2_AUX_NV,
4695 		GEN12_VD3_AUX_NV,
4696 	};
4697 
4698 	static const i915_reg_t ve[] = {
4699 		GEN12_VE0_AUX_NV,
4700 		GEN12_VE1_AUX_NV,
4701 	};
4702 
4703 	if (engine->class == VIDEO_DECODE_CLASS)
4704 		return vd[engine->instance];
4705 
4706 	if (engine->class == VIDEO_ENHANCEMENT_CLASS)
4707 		return ve[engine->instance];
4708 
4709 	GEM_BUG_ON("unknown aux_inv_reg\n");
4710 
4711 	return INVALID_MMIO_REG;
4712 }
4713 
4714 static u32 *
gen12_emit_aux_table_inv(const i915_reg_t inv_reg,u32 * cs)4715 gen12_emit_aux_table_inv(const i915_reg_t inv_reg, u32 *cs)
4716 {
4717 	*cs++ = MI_LOAD_REGISTER_IMM(1);
4718 	*cs++ = i915_mmio_reg_offset(inv_reg);
4719 	*cs++ = AUX_INV;
4720 	*cs++ = MI_NOOP;
4721 
4722 	return cs;
4723 }
4724 
gen12_emit_flush_render(struct i915_request * request,u32 mode)4725 static int gen12_emit_flush_render(struct i915_request *request,
4726 				   u32 mode)
4727 {
4728 	if (mode & EMIT_FLUSH) {
4729 		u32 flags = 0;
4730 		u32 *cs;
4731 
4732 		flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
4733 		flags |= PIPE_CONTROL_FLUSH_L3;
4734 		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
4735 		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
4736 		/* Wa_1409600907:tgl */
4737 		flags |= PIPE_CONTROL_DEPTH_STALL;
4738 		flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
4739 		flags |= PIPE_CONTROL_FLUSH_ENABLE;
4740 
4741 		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
4742 		flags |= PIPE_CONTROL_QW_WRITE;
4743 
4744 		flags |= PIPE_CONTROL_CS_STALL;
4745 
4746 		cs = intel_ring_begin(request, 6);
4747 		if (IS_ERR(cs))
4748 			return PTR_ERR(cs);
4749 
4750 		cs = gen12_emit_pipe_control(cs,
4751 					     PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
4752 					     flags, LRC_PPHWSP_SCRATCH_ADDR);
4753 		intel_ring_advance(request, cs);
4754 	}
4755 
4756 	if (mode & EMIT_INVALIDATE) {
4757 		u32 flags = 0;
4758 		u32 *cs;
4759 
4760 		flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
4761 		flags |= PIPE_CONTROL_TLB_INVALIDATE;
4762 		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
4763 		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
4764 		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
4765 		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
4766 		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
4767 
4768 		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
4769 		flags |= PIPE_CONTROL_QW_WRITE;
4770 
4771 		flags |= PIPE_CONTROL_CS_STALL;
4772 
4773 		cs = intel_ring_begin(request, 8 + 4);
4774 		if (IS_ERR(cs))
4775 			return PTR_ERR(cs);
4776 
4777 		/*
4778 		 * Prevent the pre-parser from skipping past the TLB
4779 		 * invalidate and loading a stale page for the batch
4780 		 * buffer / request payload.
4781 		 */
4782 		*cs++ = preparser_disable(true);
4783 
4784 		cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
4785 
4786 		/* hsdes: 1809175790 */
4787 		cs = gen12_emit_aux_table_inv(GEN12_GFX_CCS_AUX_NV, cs);
4788 
4789 		*cs++ = preparser_disable(false);
4790 		intel_ring_advance(request, cs);
4791 	}
4792 
4793 	return 0;
4794 }
4795 
gen12_emit_flush(struct i915_request * request,u32 mode)4796 static int gen12_emit_flush(struct i915_request *request, u32 mode)
4797 {
4798 	intel_engine_mask_t aux_inv = 0;
4799 	u32 cmd, *cs;
4800 
4801 	cmd = 4;
4802 	if (mode & EMIT_INVALIDATE)
4803 		cmd += 2;
4804 	if (mode & EMIT_INVALIDATE)
4805 		aux_inv = request->engine->mask & ~BIT(BCS0);
4806 	if (aux_inv)
4807 		cmd += 2 * hweight8(aux_inv) + 2;
4808 
4809 	cs = intel_ring_begin(request, cmd);
4810 	if (IS_ERR(cs))
4811 		return PTR_ERR(cs);
4812 
4813 	if (mode & EMIT_INVALIDATE)
4814 		*cs++ = preparser_disable(true);
4815 
4816 	cmd = MI_FLUSH_DW + 1;
4817 
4818 	/* We always require a command barrier so that subsequent
4819 	 * commands, such as breadcrumb interrupts, are strictly ordered
4820 	 * wrt the contents of the write cache being flushed to memory
4821 	 * (and thus being coherent from the CPU).
4822 	 */
4823 	cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
4824 
4825 	if (mode & EMIT_INVALIDATE) {
4826 		cmd |= MI_INVALIDATE_TLB;
4827 		if (request->engine->class == VIDEO_DECODE_CLASS)
4828 			cmd |= MI_INVALIDATE_BSD;
4829 	}
4830 
4831 	*cs++ = cmd;
4832 	*cs++ = LRC_PPHWSP_SCRATCH_ADDR;
4833 	*cs++ = 0; /* upper addr */
4834 	*cs++ = 0; /* value */
4835 
4836 	if (aux_inv) { /* hsdes: 1809175790 */
4837 		struct intel_engine_cs *engine;
4838 		unsigned int tmp;
4839 
4840 		*cs++ = MI_LOAD_REGISTER_IMM(hweight8(aux_inv));
4841 		for_each_engine_masked(engine, request->engine->gt,
4842 				       aux_inv, tmp) {
4843 			*cs++ = i915_mmio_reg_offset(aux_inv_reg(engine));
4844 			*cs++ = AUX_INV;
4845 		}
4846 		*cs++ = MI_NOOP;
4847 	}
4848 
4849 	if (mode & EMIT_INVALIDATE)
4850 		*cs++ = preparser_disable(false);
4851 
4852 	intel_ring_advance(request, cs);
4853 
4854 	return 0;
4855 }
4856 
assert_request_valid(struct i915_request * rq)4857 static void assert_request_valid(struct i915_request *rq)
4858 {
4859 	struct intel_ring *ring __maybe_unused = rq->ring;
4860 
4861 	/* Can we unwind this request without appearing to go forwards? */
4862 	GEM_BUG_ON(intel_ring_direction(ring, rq->wa_tail, rq->head) <= 0);
4863 }
4864 
4865 /*
4866  * Reserve space for 2 NOOPs at the end of each request to be
4867  * used as a workaround for not being allowed to do lite
4868  * restore with HEAD==TAIL (WaIdleLiteRestore).
4869  */
gen8_emit_wa_tail(struct i915_request * request,u32 * cs)4870 static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs)
4871 {
4872 	/* Ensure there's always at least one preemption point per-request. */
4873 	*cs++ = MI_ARB_CHECK;
4874 	*cs++ = MI_NOOP;
4875 	request->wa_tail = intel_ring_offset(request, cs);
4876 
4877 	/* Check that entire request is less than half the ring */
4878 	assert_request_valid(request);
4879 
4880 	return cs;
4881 }
4882 
emit_preempt_busywait(struct i915_request * request,u32 * cs)4883 static u32 *emit_preempt_busywait(struct i915_request *request, u32 *cs)
4884 {
4885 	*cs++ = MI_SEMAPHORE_WAIT |
4886 		MI_SEMAPHORE_GLOBAL_GTT |
4887 		MI_SEMAPHORE_POLL |
4888 		MI_SEMAPHORE_SAD_EQ_SDD;
4889 	*cs++ = 0;
4890 	*cs++ = intel_hws_preempt_address(request->engine);
4891 	*cs++ = 0;
4892 
4893 	return cs;
4894 }
4895 
4896 static __always_inline u32*
gen8_emit_fini_breadcrumb_tail(struct i915_request * request,u32 * cs)4897 gen8_emit_fini_breadcrumb_tail(struct i915_request *request, u32 *cs)
4898 {
4899 	*cs++ = MI_USER_INTERRUPT;
4900 
4901 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
4902 	if (intel_engine_has_semaphores(request->engine))
4903 		cs = emit_preempt_busywait(request, cs);
4904 
4905 	request->tail = intel_ring_offset(request, cs);
4906 	assert_ring_tail_valid(request->ring, request->tail);
4907 
4908 	return gen8_emit_wa_tail(request, cs);
4909 }
4910 
emit_xcs_breadcrumb(struct i915_request * rq,u32 * cs)4911 static u32 *emit_xcs_breadcrumb(struct i915_request *rq, u32 *cs)
4912 {
4913 	return gen8_emit_ggtt_write(cs, rq->fence.seqno, hwsp_offset(rq), 0);
4914 }
4915 
gen8_emit_fini_breadcrumb(struct i915_request * rq,u32 * cs)4916 static u32 *gen8_emit_fini_breadcrumb(struct i915_request *rq, u32 *cs)
4917 {
4918 	return gen8_emit_fini_breadcrumb_tail(rq, emit_xcs_breadcrumb(rq, cs));
4919 }
4920 
gen8_emit_fini_breadcrumb_rcs(struct i915_request * request,u32 * cs)4921 static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
4922 {
4923 	cs = gen8_emit_pipe_control(cs,
4924 				    PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
4925 				    PIPE_CONTROL_DEPTH_CACHE_FLUSH |
4926 				    PIPE_CONTROL_DC_FLUSH_ENABLE,
4927 				    0);
4928 
4929 	/* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */
4930 	cs = gen8_emit_ggtt_write_rcs(cs,
4931 				      request->fence.seqno,
4932 				      hwsp_offset(request),
4933 				      PIPE_CONTROL_FLUSH_ENABLE |
4934 				      PIPE_CONTROL_CS_STALL);
4935 
4936 	return gen8_emit_fini_breadcrumb_tail(request, cs);
4937 }
4938 
4939 static u32 *
gen11_emit_fini_breadcrumb_rcs(struct i915_request * request,u32 * cs)4940 gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
4941 {
4942 	cs = gen8_emit_ggtt_write_rcs(cs,
4943 				      request->fence.seqno,
4944 				      hwsp_offset(request),
4945 				      PIPE_CONTROL_CS_STALL |
4946 				      PIPE_CONTROL_TILE_CACHE_FLUSH |
4947 				      PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
4948 				      PIPE_CONTROL_DEPTH_CACHE_FLUSH |
4949 				      PIPE_CONTROL_DC_FLUSH_ENABLE |
4950 				      PIPE_CONTROL_FLUSH_ENABLE);
4951 
4952 	return gen8_emit_fini_breadcrumb_tail(request, cs);
4953 }
4954 
4955 /*
4956  * Note that the CS instruction pre-parser will not stall on the breadcrumb
4957  * flush and will continue pre-fetching the instructions after it before the
4958  * memory sync is completed. On pre-gen12 HW, the pre-parser will stop at
4959  * BB_START/END instructions, so, even though we might pre-fetch the pre-amble
4960  * of the next request before the memory has been flushed, we're guaranteed that
4961  * we won't access the batch itself too early.
4962  * However, on gen12+ the parser can pre-fetch across the BB_START/END commands,
4963  * so, if the current request is modifying an instruction in the next request on
4964  * the same intel_context, we might pre-fetch and then execute the pre-update
4965  * instruction. To avoid this, the users of self-modifying code should either
4966  * disable the parser around the code emitting the memory writes, via a new flag
4967  * added to MI_ARB_CHECK, or emit the writes from a different intel_context. For
4968  * the in-kernel use-cases we've opted to use a separate context, see
4969  * reloc_gpu() as an example.
4970  * All the above applies only to the instructions themselves. Non-inline data
4971  * used by the instructions is not pre-fetched.
4972  */
4973 
gen12_emit_preempt_busywait(struct i915_request * request,u32 * cs)4974 static u32 *gen12_emit_preempt_busywait(struct i915_request *request, u32 *cs)
4975 {
4976 	*cs++ = MI_SEMAPHORE_WAIT_TOKEN |
4977 		MI_SEMAPHORE_GLOBAL_GTT |
4978 		MI_SEMAPHORE_POLL |
4979 		MI_SEMAPHORE_SAD_EQ_SDD;
4980 	*cs++ = 0;
4981 	*cs++ = intel_hws_preempt_address(request->engine);
4982 	*cs++ = 0;
4983 	*cs++ = 0;
4984 	*cs++ = MI_NOOP;
4985 
4986 	return cs;
4987 }
4988 
4989 static __always_inline u32*
gen12_emit_fini_breadcrumb_tail(struct i915_request * request,u32 * cs)4990 gen12_emit_fini_breadcrumb_tail(struct i915_request *request, u32 *cs)
4991 {
4992 	*cs++ = MI_USER_INTERRUPT;
4993 
4994 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
4995 	if (intel_engine_has_semaphores(request->engine))
4996 		cs = gen12_emit_preempt_busywait(request, cs);
4997 
4998 	request->tail = intel_ring_offset(request, cs);
4999 	assert_ring_tail_valid(request->ring, request->tail);
5000 
5001 	return gen8_emit_wa_tail(request, cs);
5002 }
5003 
gen12_emit_fini_breadcrumb(struct i915_request * rq,u32 * cs)5004 static u32 *gen12_emit_fini_breadcrumb(struct i915_request *rq, u32 *cs)
5005 {
5006 	/* XXX Stalling flush before seqno write; post-sync not */
5007 	cs = emit_xcs_breadcrumb(rq, __gen8_emit_flush_dw(cs, 0, 0, 0));
5008 	return gen12_emit_fini_breadcrumb_tail(rq, cs);
5009 }
5010 
5011 static u32 *
gen12_emit_fini_breadcrumb_rcs(struct i915_request * request,u32 * cs)5012 gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
5013 {
5014 	cs = gen12_emit_ggtt_write_rcs(cs,
5015 				       request->fence.seqno,
5016 				       hwsp_offset(request),
5017 				       PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
5018 				       PIPE_CONTROL_CS_STALL |
5019 				       PIPE_CONTROL_TILE_CACHE_FLUSH |
5020 				       PIPE_CONTROL_FLUSH_L3 |
5021 				       PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
5022 				       PIPE_CONTROL_DEPTH_CACHE_FLUSH |
5023 				       /* Wa_1409600907:tgl */
5024 				       PIPE_CONTROL_DEPTH_STALL |
5025 				       PIPE_CONTROL_DC_FLUSH_ENABLE |
5026 				       PIPE_CONTROL_FLUSH_ENABLE);
5027 
5028 	return gen12_emit_fini_breadcrumb_tail(request, cs);
5029 }
5030 
execlists_park(struct intel_engine_cs * engine)5031 static void execlists_park(struct intel_engine_cs *engine)
5032 {
5033 	cancel_timer(&engine->execlists.timer);
5034 	cancel_timer(&engine->execlists.preempt);
5035 }
5036 
intel_execlists_set_default_submission(struct intel_engine_cs * engine)5037 void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
5038 {
5039 	engine->submit_request = execlists_submit_request;
5040 	engine->schedule = i915_schedule;
5041 	engine->execlists.tasklet.func = execlists_submission_tasklet;
5042 
5043 	engine->reset.prepare = execlists_reset_prepare;
5044 	engine->reset.rewind = execlists_reset_rewind;
5045 	engine->reset.cancel = execlists_reset_cancel;
5046 	engine->reset.finish = execlists_reset_finish;
5047 
5048 	engine->park = execlists_park;
5049 	engine->unpark = NULL;
5050 
5051 	engine->flags |= I915_ENGINE_SUPPORTS_STATS;
5052 	if (!intel_vgpu_active(engine->i915)) {
5053 		engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
5054 		if (HAS_LOGICAL_RING_PREEMPTION(engine->i915)) {
5055 			engine->flags |= I915_ENGINE_HAS_PREEMPTION;
5056 			if (IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
5057 				engine->flags |= I915_ENGINE_HAS_TIMESLICES;
5058 		}
5059 	}
5060 
5061 	if (INTEL_GEN(engine->i915) >= 12)
5062 		engine->flags |= I915_ENGINE_HAS_RELATIVE_MMIO;
5063 
5064 	if (intel_engine_has_preemption(engine))
5065 		engine->emit_bb_start = gen8_emit_bb_start;
5066 	else
5067 		engine->emit_bb_start = gen8_emit_bb_start_noarb;
5068 }
5069 
execlists_shutdown(struct intel_engine_cs * engine)5070 static void execlists_shutdown(struct intel_engine_cs *engine)
5071 {
5072 	/* Synchronise with residual timers and any softirq they raise */
5073 	del_timer_sync(&engine->execlists.timer);
5074 	del_timer_sync(&engine->execlists.preempt);
5075 	tasklet_kill(&engine->execlists.tasklet);
5076 }
5077 
execlists_release(struct intel_engine_cs * engine)5078 static void execlists_release(struct intel_engine_cs *engine)
5079 {
5080 	engine->sanitize = NULL; /* no longer in control, nothing to sanitize */
5081 
5082 	execlists_shutdown(engine);
5083 
5084 	intel_engine_cleanup_common(engine);
5085 	lrc_destroy_wa_ctx(engine);
5086 }
5087 
5088 static void
logical_ring_default_vfuncs(struct intel_engine_cs * engine)5089 logical_ring_default_vfuncs(struct intel_engine_cs *engine)
5090 {
5091 	/* Default vfuncs which can be overriden by each engine. */
5092 
5093 	engine->resume = execlists_resume;
5094 
5095 	engine->cops = &execlists_context_ops;
5096 	engine->request_alloc = execlists_request_alloc;
5097 
5098 	engine->emit_flush = gen8_emit_flush;
5099 	engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
5100 	engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb;
5101 	if (INTEL_GEN(engine->i915) >= 12) {
5102 		engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb;
5103 		engine->emit_flush = gen12_emit_flush;
5104 	}
5105 	engine->set_default_submission = intel_execlists_set_default_submission;
5106 
5107 	if (INTEL_GEN(engine->i915) < 11) {
5108 		engine->irq_enable = gen8_logical_ring_enable_irq;
5109 		engine->irq_disable = gen8_logical_ring_disable_irq;
5110 	} else {
5111 		/*
5112 		 * TODO: On Gen11 interrupt masks need to be clear
5113 		 * to allow C6 entry. Keep interrupts enabled at
5114 		 * and take the hit of generating extra interrupts
5115 		 * until a more refined solution exists.
5116 		 */
5117 	}
5118 }
5119 
5120 static inline void
logical_ring_default_irqs(struct intel_engine_cs * engine)5121 logical_ring_default_irqs(struct intel_engine_cs *engine)
5122 {
5123 	unsigned int shift = 0;
5124 
5125 	if (INTEL_GEN(engine->i915) < 11) {
5126 		const u8 irq_shifts[] = {
5127 			[RCS0]  = GEN8_RCS_IRQ_SHIFT,
5128 			[BCS0]  = GEN8_BCS_IRQ_SHIFT,
5129 			[VCS0]  = GEN8_VCS0_IRQ_SHIFT,
5130 			[VCS1]  = GEN8_VCS1_IRQ_SHIFT,
5131 			[VECS0] = GEN8_VECS_IRQ_SHIFT,
5132 		};
5133 
5134 		shift = irq_shifts[engine->id];
5135 	}
5136 
5137 	engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift;
5138 	engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
5139 	engine->irq_keep_mask |= GT_CS_MASTER_ERROR_INTERRUPT << shift;
5140 	engine->irq_keep_mask |= GT_WAIT_SEMAPHORE_INTERRUPT << shift;
5141 }
5142 
rcs_submission_override(struct intel_engine_cs * engine)5143 static void rcs_submission_override(struct intel_engine_cs *engine)
5144 {
5145 	switch (INTEL_GEN(engine->i915)) {
5146 	case 12:
5147 		engine->emit_flush = gen12_emit_flush_render;
5148 		engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs;
5149 		break;
5150 	case 11:
5151 		engine->emit_flush = gen11_emit_flush_render;
5152 		engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs;
5153 		break;
5154 	default:
5155 		engine->emit_flush = gen8_emit_flush_render;
5156 		engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
5157 		break;
5158 	}
5159 }
5160 
intel_execlists_submission_setup(struct intel_engine_cs * engine)5161 int intel_execlists_submission_setup(struct intel_engine_cs *engine)
5162 {
5163 	struct intel_engine_execlists * const execlists = &engine->execlists;
5164 	struct drm_i915_private *i915 = engine->i915;
5165 	struct intel_uncore *uncore = engine->uncore;
5166 	u32 base = engine->mmio_base;
5167 
5168 	tasklet_init(&engine->execlists.tasklet,
5169 		     execlists_submission_tasklet, (unsigned long)engine);
5170 	timer_setup(&engine->execlists.timer, execlists_timeslice, 0);
5171 	timer_setup(&engine->execlists.preempt, execlists_preempt, 0);
5172 
5173 	logical_ring_default_vfuncs(engine);
5174 	logical_ring_default_irqs(engine);
5175 
5176 	if (engine->class == RENDER_CLASS)
5177 		rcs_submission_override(engine);
5178 
5179 	if (intel_init_workaround_bb(engine))
5180 		/*
5181 		 * We continue even if we fail to initialize WA batch
5182 		 * because we only expect rare glitches but nothing
5183 		 * critical to prevent us from using GPU
5184 		 */
5185 		drm_err(&i915->drm, "WA batch buffer initialization failed\n");
5186 
5187 	if (HAS_LOGICAL_RING_ELSQ(i915)) {
5188 		execlists->submit_reg = uncore->regs +
5189 			i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(base));
5190 		execlists->ctrl_reg = uncore->regs +
5191 			i915_mmio_reg_offset(RING_EXECLIST_CONTROL(base));
5192 	} else {
5193 		execlists->submit_reg = uncore->regs +
5194 			i915_mmio_reg_offset(RING_ELSP(base));
5195 	}
5196 
5197 	execlists->csb_status =
5198 		(u64 *)&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
5199 
5200 	execlists->csb_write =
5201 		&engine->status_page.addr[intel_hws_csb_write_index(i915)];
5202 
5203 	if (INTEL_GEN(i915) < 11)
5204 		execlists->csb_size = GEN8_CSB_ENTRIES;
5205 	else
5206 		execlists->csb_size = GEN11_CSB_ENTRIES;
5207 
5208 	if (INTEL_GEN(engine->i915) >= 11) {
5209 		execlists->ccid |= engine->instance << (GEN11_ENGINE_INSTANCE_SHIFT - 32);
5210 		execlists->ccid |= engine->class << (GEN11_ENGINE_CLASS_SHIFT - 32);
5211 	}
5212 
5213 	/* Finally, take ownership and responsibility for cleanup! */
5214 	engine->sanitize = execlists_sanitize;
5215 	engine->release = execlists_release;
5216 
5217 	return 0;
5218 }
5219 
init_common_reg_state(u32 * const regs,const struct intel_engine_cs * engine,const struct intel_ring * ring,bool inhibit)5220 static void init_common_reg_state(u32 * const regs,
5221 				  const struct intel_engine_cs *engine,
5222 				  const struct intel_ring *ring,
5223 				  bool inhibit)
5224 {
5225 	u32 ctl;
5226 
5227 	ctl = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH);
5228 	ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
5229 	if (inhibit)
5230 		ctl |= CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT;
5231 	if (INTEL_GEN(engine->i915) < 11)
5232 		ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
5233 					   CTX_CTRL_RS_CTX_ENABLE);
5234 	regs[CTX_CONTEXT_CONTROL] = ctl;
5235 
5236 	regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
5237 	regs[CTX_TIMESTAMP] = 0;
5238 }
5239 
init_wa_bb_reg_state(u32 * const regs,const struct intel_engine_cs * engine)5240 static void init_wa_bb_reg_state(u32 * const regs,
5241 				 const struct intel_engine_cs *engine)
5242 {
5243 	const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx;
5244 
5245 	if (wa_ctx->per_ctx.size) {
5246 		const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
5247 
5248 		GEM_BUG_ON(lrc_ring_wa_bb_per_ctx(engine) == -1);
5249 		regs[lrc_ring_wa_bb_per_ctx(engine) + 1] =
5250 			(ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
5251 	}
5252 
5253 	if (wa_ctx->indirect_ctx.size) {
5254 		lrc_ring_setup_indirect_ctx(regs, engine,
5255 					    i915_ggtt_offset(wa_ctx->vma) +
5256 					    wa_ctx->indirect_ctx.offset,
5257 					    wa_ctx->indirect_ctx.size);
5258 	}
5259 }
5260 
init_ppgtt_reg_state(u32 * regs,const struct i915_ppgtt * ppgtt)5261 static void init_ppgtt_reg_state(u32 *regs, const struct i915_ppgtt *ppgtt)
5262 {
5263 	if (i915_vm_is_4lvl(&ppgtt->vm)) {
5264 		/* 64b PPGTT (48bit canonical)
5265 		 * PDP0_DESCRIPTOR contains the base address to PML4 and
5266 		 * other PDP Descriptors are ignored.
5267 		 */
5268 		ASSIGN_CTX_PML4(ppgtt, regs);
5269 	} else {
5270 		ASSIGN_CTX_PDP(ppgtt, regs, 3);
5271 		ASSIGN_CTX_PDP(ppgtt, regs, 2);
5272 		ASSIGN_CTX_PDP(ppgtt, regs, 1);
5273 		ASSIGN_CTX_PDP(ppgtt, regs, 0);
5274 	}
5275 }
5276 
vm_alias(struct i915_address_space * vm)5277 static struct i915_ppgtt *vm_alias(struct i915_address_space *vm)
5278 {
5279 	if (i915_is_ggtt(vm))
5280 		return i915_vm_to_ggtt(vm)->alias;
5281 	else
5282 		return i915_vm_to_ppgtt(vm);
5283 }
5284 
execlists_init_reg_state(u32 * regs,const struct intel_context * ce,const struct intel_engine_cs * engine,const struct intel_ring * ring,bool inhibit)5285 static void execlists_init_reg_state(u32 *regs,
5286 				     const struct intel_context *ce,
5287 				     const struct intel_engine_cs *engine,
5288 				     const struct intel_ring *ring,
5289 				     bool inhibit)
5290 {
5291 	/*
5292 	 * A context is actually a big batch buffer with several
5293 	 * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
5294 	 * values we are setting here are only for the first context restore:
5295 	 * on a subsequent save, the GPU will recreate this batchbuffer with new
5296 	 * values (including all the missing MI_LOAD_REGISTER_IMM commands that
5297 	 * we are not initializing here).
5298 	 *
5299 	 * Must keep consistent with virtual_update_register_offsets().
5300 	 */
5301 	set_offsets(regs, reg_offsets(engine), engine, inhibit);
5302 
5303 	init_common_reg_state(regs, engine, ring, inhibit);
5304 	init_ppgtt_reg_state(regs, vm_alias(ce->vm));
5305 
5306 	init_wa_bb_reg_state(regs, engine);
5307 
5308 	__reset_stop_ring(regs, engine);
5309 }
5310 
5311 static int
populate_lr_context(struct intel_context * ce,struct drm_i915_gem_object * ctx_obj,struct intel_engine_cs * engine,struct intel_ring * ring)5312 populate_lr_context(struct intel_context *ce,
5313 		    struct drm_i915_gem_object *ctx_obj,
5314 		    struct intel_engine_cs *engine,
5315 		    struct intel_ring *ring)
5316 {
5317 	bool inhibit = true;
5318 	void *vaddr;
5319 
5320 	vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
5321 	if (IS_ERR(vaddr)) {
5322 		drm_dbg(&engine->i915->drm, "Could not map object pages!\n");
5323 		return PTR_ERR(vaddr);
5324 	}
5325 
5326 	set_redzone(vaddr, engine);
5327 
5328 	if (engine->default_state) {
5329 		shmem_read(engine->default_state, 0,
5330 			   vaddr, engine->context_size);
5331 		__set_bit(CONTEXT_VALID_BIT, &ce->flags);
5332 		inhibit = false;
5333 	}
5334 
5335 	/* Clear the ppHWSP (inc. per-context counters) */
5336 	memset(vaddr, 0, PAGE_SIZE);
5337 
5338 	/*
5339 	 * The second page of the context object contains some registers which
5340 	 * must be set up prior to the first execution.
5341 	 */
5342 	execlists_init_reg_state(vaddr + LRC_STATE_OFFSET,
5343 				 ce, engine, ring, inhibit);
5344 
5345 	__i915_gem_object_flush_map(ctx_obj, 0, engine->context_size);
5346 	i915_gem_object_unpin_map(ctx_obj);
5347 	return 0;
5348 }
5349 
pinned_timeline(struct intel_context * ce)5350 static struct intel_timeline *pinned_timeline(struct intel_context *ce)
5351 {
5352 	struct intel_timeline *tl = fetch_and_zero(&ce->timeline);
5353 
5354 	return intel_timeline_create_from_engine(ce->engine,
5355 						 page_unmask_bits(tl));
5356 }
5357 
__execlists_context_alloc(struct intel_context * ce,struct intel_engine_cs * engine)5358 static int __execlists_context_alloc(struct intel_context *ce,
5359 				     struct intel_engine_cs *engine)
5360 {
5361 	struct drm_i915_gem_object *ctx_obj;
5362 	struct intel_ring *ring;
5363 	struct i915_vma *vma;
5364 	u32 context_size;
5365 	int ret;
5366 
5367 	GEM_BUG_ON(ce->state);
5368 	context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE);
5369 
5370 	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
5371 		context_size += I915_GTT_PAGE_SIZE; /* for redzone */
5372 
5373 	if (INTEL_GEN(engine->i915) == 12) {
5374 		ce->wa_bb_page = context_size / PAGE_SIZE;
5375 		context_size += PAGE_SIZE;
5376 	}
5377 
5378 	ctx_obj = i915_gem_object_create_shmem(engine->i915, context_size);
5379 	if (IS_ERR(ctx_obj))
5380 		return PTR_ERR(ctx_obj);
5381 
5382 	vma = i915_vma_instance(ctx_obj, &engine->gt->ggtt->vm, NULL);
5383 	if (IS_ERR(vma)) {
5384 		ret = PTR_ERR(vma);
5385 		goto error_deref_obj;
5386 	}
5387 
5388 	if (!page_mask_bits(ce->timeline)) {
5389 		struct intel_timeline *tl;
5390 
5391 		/*
5392 		 * Use the static global HWSP for the kernel context, and
5393 		 * a dynamically allocated cacheline for everyone else.
5394 		 */
5395 		if (unlikely(ce->timeline))
5396 			tl = pinned_timeline(ce);
5397 		else
5398 			tl = intel_timeline_create(engine->gt);
5399 		if (IS_ERR(tl)) {
5400 			ret = PTR_ERR(tl);
5401 			goto error_deref_obj;
5402 		}
5403 
5404 		ce->timeline = tl;
5405 	}
5406 
5407 	ring = intel_engine_create_ring(engine, (unsigned long)ce->ring);
5408 	if (IS_ERR(ring)) {
5409 		ret = PTR_ERR(ring);
5410 		goto error_deref_obj;
5411 	}
5412 
5413 	ret = populate_lr_context(ce, ctx_obj, engine, ring);
5414 	if (ret) {
5415 		drm_dbg(&engine->i915->drm,
5416 			"Failed to populate LRC: %d\n", ret);
5417 		goto error_ring_free;
5418 	}
5419 
5420 	ce->ring = ring;
5421 	ce->state = vma;
5422 
5423 	return 0;
5424 
5425 error_ring_free:
5426 	intel_ring_put(ring);
5427 error_deref_obj:
5428 	i915_gem_object_put(ctx_obj);
5429 	return ret;
5430 }
5431 
virtual_queue(struct virtual_engine * ve)5432 static struct list_head *virtual_queue(struct virtual_engine *ve)
5433 {
5434 	return &ve->base.execlists.default_priolist.requests[0];
5435 }
5436 
rcu_virtual_context_destroy(struct work_struct * wrk)5437 static void rcu_virtual_context_destroy(struct work_struct *wrk)
5438 {
5439 	struct virtual_engine *ve =
5440 		container_of(wrk, typeof(*ve), rcu.work);
5441 	unsigned int n;
5442 
5443 	GEM_BUG_ON(ve->context.inflight);
5444 
5445 	/* Preempt-to-busy may leave a stale request behind. */
5446 	if (unlikely(ve->request)) {
5447 		struct i915_request *old;
5448 
5449 		spin_lock_irq(&ve->base.active.lock);
5450 
5451 		old = fetch_and_zero(&ve->request);
5452 		if (old) {
5453 			GEM_BUG_ON(!i915_request_completed(old));
5454 			__i915_request_submit(old);
5455 			i915_request_put(old);
5456 		}
5457 
5458 		spin_unlock_irq(&ve->base.active.lock);
5459 	}
5460 
5461 	/*
5462 	 * Flush the tasklet in case it is still running on another core.
5463 	 *
5464 	 * This needs to be done before we remove ourselves from the siblings'
5465 	 * rbtrees as in the case it is running in parallel, it may reinsert
5466 	 * the rb_node into a sibling.
5467 	 */
5468 	tasklet_kill(&ve->base.execlists.tasklet);
5469 
5470 	/* Decouple ourselves from the siblings, no more access allowed. */
5471 	for (n = 0; n < ve->num_siblings; n++) {
5472 		struct intel_engine_cs *sibling = ve->siblings[n];
5473 		struct rb_node *node = &ve->nodes[sibling->id].rb;
5474 
5475 		if (RB_EMPTY_NODE(node))
5476 			continue;
5477 
5478 		spin_lock_irq(&sibling->active.lock);
5479 
5480 		/* Detachment is lazily performed in the execlists tasklet */
5481 		if (!RB_EMPTY_NODE(node))
5482 			rb_erase_cached(node, &sibling->execlists.virtual);
5483 
5484 		spin_unlock_irq(&sibling->active.lock);
5485 	}
5486 	GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet));
5487 	GEM_BUG_ON(!list_empty(virtual_queue(ve)));
5488 
5489 	if (ve->context.state)
5490 		__execlists_context_fini(&ve->context);
5491 	intel_context_fini(&ve->context);
5492 
5493 	intel_breadcrumbs_free(ve->base.breadcrumbs);
5494 	intel_engine_free_request_pool(&ve->base);
5495 
5496 	kfree(ve->bonds);
5497 	kfree(ve);
5498 }
5499 
virtual_context_destroy(struct kref * kref)5500 static void virtual_context_destroy(struct kref *kref)
5501 {
5502 	struct virtual_engine *ve =
5503 		container_of(kref, typeof(*ve), context.ref);
5504 
5505 	GEM_BUG_ON(!list_empty(&ve->context.signals));
5506 
5507 	/*
5508 	 * When destroying the virtual engine, we have to be aware that
5509 	 * it may still be in use from an hardirq/softirq context causing
5510 	 * the resubmission of a completed request (background completion
5511 	 * due to preempt-to-busy). Before we can free the engine, we need
5512 	 * to flush the submission code and tasklets that are still potentially
5513 	 * accessing the engine. Flushing the tasklets requires process context,
5514 	 * and since we can guard the resubmit onto the engine with an RCU read
5515 	 * lock, we can delegate the free of the engine to an RCU worker.
5516 	 */
5517 	INIT_RCU_WORK(&ve->rcu, rcu_virtual_context_destroy);
5518 	queue_rcu_work(system_wq, &ve->rcu);
5519 }
5520 
virtual_engine_initial_hint(struct virtual_engine * ve)5521 static void virtual_engine_initial_hint(struct virtual_engine *ve)
5522 {
5523 	int swp;
5524 
5525 	/*
5526 	 * Pick a random sibling on starting to help spread the load around.
5527 	 *
5528 	 * New contexts are typically created with exactly the same order
5529 	 * of siblings, and often started in batches. Due to the way we iterate
5530 	 * the array of sibling when submitting requests, sibling[0] is
5531 	 * prioritised for dequeuing. If we make sure that sibling[0] is fairly
5532 	 * randomised across the system, we also help spread the load by the
5533 	 * first engine we inspect being different each time.
5534 	 *
5535 	 * NB This does not force us to execute on this engine, it will just
5536 	 * typically be the first we inspect for submission.
5537 	 */
5538 	swp = prandom_u32_max(ve->num_siblings);
5539 	if (swp)
5540 		swap(ve->siblings[swp], ve->siblings[0]);
5541 }
5542 
virtual_context_alloc(struct intel_context * ce)5543 static int virtual_context_alloc(struct intel_context *ce)
5544 {
5545 	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
5546 
5547 	return __execlists_context_alloc(ce, ve->siblings[0]);
5548 }
5549 
virtual_context_pin(struct intel_context * ce,void * vaddr)5550 static int virtual_context_pin(struct intel_context *ce, void *vaddr)
5551 {
5552 	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
5553 
5554 	/* Note: we must use a real engine class for setting up reg state */
5555 	return __execlists_context_pin(ce, ve->siblings[0], vaddr);
5556 }
5557 
virtual_context_enter(struct intel_context * ce)5558 static void virtual_context_enter(struct intel_context *ce)
5559 {
5560 	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
5561 	unsigned int n;
5562 
5563 	for (n = 0; n < ve->num_siblings; n++)
5564 		intel_engine_pm_get(ve->siblings[n]);
5565 
5566 	intel_timeline_enter(ce->timeline);
5567 }
5568 
virtual_context_exit(struct intel_context * ce)5569 static void virtual_context_exit(struct intel_context *ce)
5570 {
5571 	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
5572 	unsigned int n;
5573 
5574 	intel_timeline_exit(ce->timeline);
5575 
5576 	for (n = 0; n < ve->num_siblings; n++)
5577 		intel_engine_pm_put(ve->siblings[n]);
5578 }
5579 
5580 static const struct intel_context_ops virtual_context_ops = {
5581 	.alloc = virtual_context_alloc,
5582 
5583 	.pre_pin = execlists_context_pre_pin,
5584 	.pin = virtual_context_pin,
5585 	.unpin = execlists_context_unpin,
5586 	.post_unpin = execlists_context_post_unpin,
5587 
5588 	.enter = virtual_context_enter,
5589 	.exit = virtual_context_exit,
5590 
5591 	.destroy = virtual_context_destroy,
5592 };
5593 
virtual_submission_mask(struct virtual_engine * ve)5594 static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve)
5595 {
5596 	struct i915_request *rq;
5597 	intel_engine_mask_t mask;
5598 
5599 	rq = READ_ONCE(ve->request);
5600 	if (!rq)
5601 		return 0;
5602 
5603 	/* The rq is ready for submission; rq->execution_mask is now stable. */
5604 	mask = rq->execution_mask;
5605 	if (unlikely(!mask)) {
5606 		/* Invalid selection, submit to a random engine in error */
5607 		i915_request_set_error_once(rq, -ENODEV);
5608 		mask = ve->siblings[0]->mask;
5609 	}
5610 
5611 	ENGINE_TRACE(&ve->base, "rq=%llx:%lld, mask=%x, prio=%d\n",
5612 		     rq->fence.context, rq->fence.seqno,
5613 		     mask, ve->base.execlists.queue_priority_hint);
5614 
5615 	return mask;
5616 }
5617 
virtual_submission_tasklet(unsigned long data)5618 static void virtual_submission_tasklet(unsigned long data)
5619 {
5620 	struct virtual_engine * const ve = (struct virtual_engine *)data;
5621 	const int prio = READ_ONCE(ve->base.execlists.queue_priority_hint);
5622 	intel_engine_mask_t mask;
5623 	unsigned int n;
5624 
5625 	rcu_read_lock();
5626 	mask = virtual_submission_mask(ve);
5627 	rcu_read_unlock();
5628 	if (unlikely(!mask))
5629 		return;
5630 
5631 	local_irq_disable();
5632 	for (n = 0; n < ve->num_siblings; n++) {
5633 		struct intel_engine_cs *sibling = READ_ONCE(ve->siblings[n]);
5634 		struct ve_node * const node = &ve->nodes[sibling->id];
5635 		struct rb_node **parent, *rb;
5636 		bool first;
5637 
5638 		if (!READ_ONCE(ve->request))
5639 			break; /* already handled by a sibling's tasklet */
5640 
5641 		if (unlikely(!(mask & sibling->mask))) {
5642 			if (!RB_EMPTY_NODE(&node->rb)) {
5643 				spin_lock(&sibling->active.lock);
5644 				rb_erase_cached(&node->rb,
5645 						&sibling->execlists.virtual);
5646 				RB_CLEAR_NODE(&node->rb);
5647 				spin_unlock(&sibling->active.lock);
5648 			}
5649 			continue;
5650 		}
5651 
5652 		spin_lock(&sibling->active.lock);
5653 
5654 		if (!RB_EMPTY_NODE(&node->rb)) {
5655 			/*
5656 			 * Cheat and avoid rebalancing the tree if we can
5657 			 * reuse this node in situ.
5658 			 */
5659 			first = rb_first_cached(&sibling->execlists.virtual) ==
5660 				&node->rb;
5661 			if (prio == node->prio || (prio > node->prio && first))
5662 				goto submit_engine;
5663 
5664 			rb_erase_cached(&node->rb, &sibling->execlists.virtual);
5665 		}
5666 
5667 		rb = NULL;
5668 		first = true;
5669 		parent = &sibling->execlists.virtual.rb_root.rb_node;
5670 		while (*parent) {
5671 			struct ve_node *other;
5672 
5673 			rb = *parent;
5674 			other = rb_entry(rb, typeof(*other), rb);
5675 			if (prio > other->prio) {
5676 				parent = &rb->rb_left;
5677 			} else {
5678 				parent = &rb->rb_right;
5679 				first = false;
5680 			}
5681 		}
5682 
5683 		rb_link_node(&node->rb, rb, parent);
5684 		rb_insert_color_cached(&node->rb,
5685 				       &sibling->execlists.virtual,
5686 				       first);
5687 
5688 submit_engine:
5689 		GEM_BUG_ON(RB_EMPTY_NODE(&node->rb));
5690 		node->prio = prio;
5691 		if (first && prio > sibling->execlists.queue_priority_hint)
5692 			tasklet_hi_schedule(&sibling->execlists.tasklet);
5693 
5694 		spin_unlock(&sibling->active.lock);
5695 	}
5696 	local_irq_enable();
5697 }
5698 
virtual_submit_request(struct i915_request * rq)5699 static void virtual_submit_request(struct i915_request *rq)
5700 {
5701 	struct virtual_engine *ve = to_virtual_engine(rq->engine);
5702 	struct i915_request *old;
5703 	unsigned long flags;
5704 
5705 	ENGINE_TRACE(&ve->base, "rq=%llx:%lld\n",
5706 		     rq->fence.context,
5707 		     rq->fence.seqno);
5708 
5709 	GEM_BUG_ON(ve->base.submit_request != virtual_submit_request);
5710 
5711 	spin_lock_irqsave(&ve->base.active.lock, flags);
5712 
5713 	old = ve->request;
5714 	if (old) { /* background completion event from preempt-to-busy */
5715 		GEM_BUG_ON(!i915_request_completed(old));
5716 		__i915_request_submit(old);
5717 		i915_request_put(old);
5718 	}
5719 
5720 	if (i915_request_completed(rq)) {
5721 		__i915_request_submit(rq);
5722 
5723 		ve->base.execlists.queue_priority_hint = INT_MIN;
5724 		ve->request = NULL;
5725 	} else {
5726 		ve->base.execlists.queue_priority_hint = rq_prio(rq);
5727 		ve->request = i915_request_get(rq);
5728 
5729 		GEM_BUG_ON(!list_empty(virtual_queue(ve)));
5730 		list_move_tail(&rq->sched.link, virtual_queue(ve));
5731 
5732 		tasklet_hi_schedule(&ve->base.execlists.tasklet);
5733 	}
5734 
5735 	spin_unlock_irqrestore(&ve->base.active.lock, flags);
5736 }
5737 
5738 static struct ve_bond *
virtual_find_bond(struct virtual_engine * ve,const struct intel_engine_cs * master)5739 virtual_find_bond(struct virtual_engine *ve,
5740 		  const struct intel_engine_cs *master)
5741 {
5742 	int i;
5743 
5744 	for (i = 0; i < ve->num_bonds; i++) {
5745 		if (ve->bonds[i].master == master)
5746 			return &ve->bonds[i];
5747 	}
5748 
5749 	return NULL;
5750 }
5751 
5752 static void
virtual_bond_execute(struct i915_request * rq,struct dma_fence * signal)5753 virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal)
5754 {
5755 	struct virtual_engine *ve = to_virtual_engine(rq->engine);
5756 	intel_engine_mask_t allowed, exec;
5757 	struct ve_bond *bond;
5758 
5759 	allowed = ~to_request(signal)->engine->mask;
5760 
5761 	bond = virtual_find_bond(ve, to_request(signal)->engine);
5762 	if (bond)
5763 		allowed &= bond->sibling_mask;
5764 
5765 	/* Restrict the bonded request to run on only the available engines */
5766 	exec = READ_ONCE(rq->execution_mask);
5767 	while (!try_cmpxchg(&rq->execution_mask, &exec, exec & allowed))
5768 		;
5769 
5770 	/* Prevent the master from being re-run on the bonded engines */
5771 	to_request(signal)->execution_mask &= ~allowed;
5772 }
5773 
5774 struct intel_context *
intel_execlists_create_virtual(struct intel_engine_cs ** siblings,unsigned int count)5775 intel_execlists_create_virtual(struct intel_engine_cs **siblings,
5776 			       unsigned int count)
5777 {
5778 	struct virtual_engine *ve;
5779 	unsigned int n;
5780 	int err;
5781 
5782 	if (count == 0)
5783 		return ERR_PTR(-EINVAL);
5784 
5785 	if (count == 1)
5786 		return intel_context_create(siblings[0]);
5787 
5788 	ve = kzalloc(struct_size(ve, siblings, count), GFP_KERNEL);
5789 	if (!ve)
5790 		return ERR_PTR(-ENOMEM);
5791 
5792 	ve->base.i915 = siblings[0]->i915;
5793 	ve->base.gt = siblings[0]->gt;
5794 	ve->base.uncore = siblings[0]->uncore;
5795 	ve->base.id = -1;
5796 
5797 	ve->base.class = OTHER_CLASS;
5798 	ve->base.uabi_class = I915_ENGINE_CLASS_INVALID;
5799 	ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
5800 	ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
5801 
5802 	/*
5803 	 * The decision on whether to submit a request using semaphores
5804 	 * depends on the saturated state of the engine. We only compute
5805 	 * this during HW submission of the request, and we need for this
5806 	 * state to be globally applied to all requests being submitted
5807 	 * to this engine. Virtual engines encompass more than one physical
5808 	 * engine and so we cannot accurately tell in advance if one of those
5809 	 * engines is already saturated and so cannot afford to use a semaphore
5810 	 * and be pessimized in priority for doing so -- if we are the only
5811 	 * context using semaphores after all other clients have stopped, we
5812 	 * will be starved on the saturated system. Such a global switch for
5813 	 * semaphores is less than ideal, but alas is the current compromise.
5814 	 */
5815 	ve->base.saturated = ALL_ENGINES;
5816 
5817 	snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
5818 
5819 	intel_engine_init_active(&ve->base, ENGINE_VIRTUAL);
5820 	intel_engine_init_execlists(&ve->base);
5821 
5822 	ve->base.cops = &virtual_context_ops;
5823 	ve->base.request_alloc = execlists_request_alloc;
5824 
5825 	ve->base.schedule = i915_schedule;
5826 	ve->base.submit_request = virtual_submit_request;
5827 	ve->base.bond_execute = virtual_bond_execute;
5828 
5829 	INIT_LIST_HEAD(virtual_queue(ve));
5830 	ve->base.execlists.queue_priority_hint = INT_MIN;
5831 	tasklet_init(&ve->base.execlists.tasklet,
5832 		     virtual_submission_tasklet,
5833 		     (unsigned long)ve);
5834 
5835 	intel_context_init(&ve->context, &ve->base);
5836 
5837 	ve->base.breadcrumbs = intel_breadcrumbs_create(NULL);
5838 	if (!ve->base.breadcrumbs) {
5839 		err = -ENOMEM;
5840 		goto err_put;
5841 	}
5842 
5843 	for (n = 0; n < count; n++) {
5844 		struct intel_engine_cs *sibling = siblings[n];
5845 
5846 		GEM_BUG_ON(!is_power_of_2(sibling->mask));
5847 		if (sibling->mask & ve->base.mask) {
5848 			DRM_DEBUG("duplicate %s entry in load balancer\n",
5849 				  sibling->name);
5850 			err = -EINVAL;
5851 			goto err_put;
5852 		}
5853 
5854 		/*
5855 		 * The virtual engine implementation is tightly coupled to
5856 		 * the execlists backend -- we push out request directly
5857 		 * into a tree inside each physical engine. We could support
5858 		 * layering if we handle cloning of the requests and
5859 		 * submitting a copy into each backend.
5860 		 */
5861 		if (sibling->execlists.tasklet.func !=
5862 		    execlists_submission_tasklet) {
5863 			err = -ENODEV;
5864 			goto err_put;
5865 		}
5866 
5867 		GEM_BUG_ON(RB_EMPTY_NODE(&ve->nodes[sibling->id].rb));
5868 		RB_CLEAR_NODE(&ve->nodes[sibling->id].rb);
5869 
5870 		ve->siblings[ve->num_siblings++] = sibling;
5871 		ve->base.mask |= sibling->mask;
5872 
5873 		/*
5874 		 * All physical engines must be compatible for their emission
5875 		 * functions (as we build the instructions during request
5876 		 * construction and do not alter them before submission
5877 		 * on the physical engine). We use the engine class as a guide
5878 		 * here, although that could be refined.
5879 		 */
5880 		if (ve->base.class != OTHER_CLASS) {
5881 			if (ve->base.class != sibling->class) {
5882 				DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n",
5883 					  sibling->class, ve->base.class);
5884 				err = -EINVAL;
5885 				goto err_put;
5886 			}
5887 			continue;
5888 		}
5889 
5890 		ve->base.class = sibling->class;
5891 		ve->base.uabi_class = sibling->uabi_class;
5892 		snprintf(ve->base.name, sizeof(ve->base.name),
5893 			 "v%dx%d", ve->base.class, count);
5894 		ve->base.context_size = sibling->context_size;
5895 
5896 		ve->base.emit_bb_start = sibling->emit_bb_start;
5897 		ve->base.emit_flush = sibling->emit_flush;
5898 		ve->base.emit_init_breadcrumb = sibling->emit_init_breadcrumb;
5899 		ve->base.emit_fini_breadcrumb = sibling->emit_fini_breadcrumb;
5900 		ve->base.emit_fini_breadcrumb_dw =
5901 			sibling->emit_fini_breadcrumb_dw;
5902 
5903 		ve->base.flags = sibling->flags;
5904 	}
5905 
5906 	ve->base.flags |= I915_ENGINE_IS_VIRTUAL;
5907 
5908 	virtual_engine_initial_hint(ve);
5909 	return &ve->context;
5910 
5911 err_put:
5912 	intel_context_put(&ve->context);
5913 	return ERR_PTR(err);
5914 }
5915 
5916 struct intel_context *
intel_execlists_clone_virtual(struct intel_engine_cs * src)5917 intel_execlists_clone_virtual(struct intel_engine_cs *src)
5918 {
5919 	struct virtual_engine *se = to_virtual_engine(src);
5920 	struct intel_context *dst;
5921 
5922 	dst = intel_execlists_create_virtual(se->siblings,
5923 					     se->num_siblings);
5924 	if (IS_ERR(dst))
5925 		return dst;
5926 
5927 	if (se->num_bonds) {
5928 		struct virtual_engine *de = to_virtual_engine(dst->engine);
5929 
5930 		de->bonds = kmemdup(se->bonds,
5931 				    sizeof(*se->bonds) * se->num_bonds,
5932 				    GFP_KERNEL);
5933 		if (!de->bonds) {
5934 			intel_context_put(dst);
5935 			return ERR_PTR(-ENOMEM);
5936 		}
5937 
5938 		de->num_bonds = se->num_bonds;
5939 	}
5940 
5941 	return dst;
5942 }
5943 
intel_virtual_engine_attach_bond(struct intel_engine_cs * engine,const struct intel_engine_cs * master,const struct intel_engine_cs * sibling)5944 int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
5945 				     const struct intel_engine_cs *master,
5946 				     const struct intel_engine_cs *sibling)
5947 {
5948 	struct virtual_engine *ve = to_virtual_engine(engine);
5949 	struct ve_bond *bond;
5950 	int n;
5951 
5952 	/* Sanity check the sibling is part of the virtual engine */
5953 	for (n = 0; n < ve->num_siblings; n++)
5954 		if (sibling == ve->siblings[n])
5955 			break;
5956 	if (n == ve->num_siblings)
5957 		return -EINVAL;
5958 
5959 	bond = virtual_find_bond(ve, master);
5960 	if (bond) {
5961 		bond->sibling_mask |= sibling->mask;
5962 		return 0;
5963 	}
5964 
5965 	bond = krealloc(ve->bonds,
5966 			sizeof(*bond) * (ve->num_bonds + 1),
5967 			GFP_KERNEL);
5968 	if (!bond)
5969 		return -ENOMEM;
5970 
5971 	bond[ve->num_bonds].master = master;
5972 	bond[ve->num_bonds].sibling_mask = sibling->mask;
5973 
5974 	ve->bonds = bond;
5975 	ve->num_bonds++;
5976 
5977 	return 0;
5978 }
5979 
5980 struct intel_engine_cs *
intel_virtual_engine_get_sibling(struct intel_engine_cs * engine,unsigned int sibling)5981 intel_virtual_engine_get_sibling(struct intel_engine_cs *engine,
5982 				 unsigned int sibling)
5983 {
5984 	struct virtual_engine *ve = to_virtual_engine(engine);
5985 
5986 	if (sibling >= ve->num_siblings)
5987 		return NULL;
5988 
5989 	return ve->siblings[sibling];
5990 }
5991 
intel_execlists_show_requests(struct intel_engine_cs * engine,struct drm_printer * m,void (* show_request)(struct drm_printer * m,struct i915_request * rq,const char * prefix),unsigned int max)5992 void intel_execlists_show_requests(struct intel_engine_cs *engine,
5993 				   struct drm_printer *m,
5994 				   void (*show_request)(struct drm_printer *m,
5995 							struct i915_request *rq,
5996 							const char *prefix),
5997 				   unsigned int max)
5998 {
5999 	const struct intel_engine_execlists *execlists = &engine->execlists;
6000 	struct i915_request *rq, *last;
6001 	unsigned long flags;
6002 	unsigned int count;
6003 	struct rb_node *rb;
6004 
6005 	spin_lock_irqsave(&engine->active.lock, flags);
6006 
6007 	last = NULL;
6008 	count = 0;
6009 	list_for_each_entry(rq, &engine->active.requests, sched.link) {
6010 		if (count++ < max - 1)
6011 			show_request(m, rq, "\t\tE ");
6012 		else
6013 			last = rq;
6014 	}
6015 	if (last) {
6016 		if (count > max) {
6017 			drm_printf(m,
6018 				   "\t\t...skipping %d executing requests...\n",
6019 				   count - max);
6020 		}
6021 		show_request(m, last, "\t\tE ");
6022 	}
6023 
6024 	if (execlists->switch_priority_hint != INT_MIN)
6025 		drm_printf(m, "\t\tSwitch priority hint: %d\n",
6026 			   READ_ONCE(execlists->switch_priority_hint));
6027 	if (execlists->queue_priority_hint != INT_MIN)
6028 		drm_printf(m, "\t\tQueue priority hint: %d\n",
6029 			   READ_ONCE(execlists->queue_priority_hint));
6030 
6031 	last = NULL;
6032 	count = 0;
6033 	for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
6034 		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
6035 		int i;
6036 
6037 		priolist_for_each_request(rq, p, i) {
6038 			if (count++ < max - 1)
6039 				show_request(m, rq, "\t\tQ ");
6040 			else
6041 				last = rq;
6042 		}
6043 	}
6044 	if (last) {
6045 		if (count > max) {
6046 			drm_printf(m,
6047 				   "\t\t...skipping %d queued requests...\n",
6048 				   count - max);
6049 		}
6050 		show_request(m, last, "\t\tQ ");
6051 	}
6052 
6053 	last = NULL;
6054 	count = 0;
6055 	for (rb = rb_first_cached(&execlists->virtual); rb; rb = rb_next(rb)) {
6056 		struct virtual_engine *ve =
6057 			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
6058 		struct i915_request *rq = READ_ONCE(ve->request);
6059 
6060 		if (rq) {
6061 			if (count++ < max - 1)
6062 				show_request(m, rq, "\t\tV ");
6063 			else
6064 				last = rq;
6065 		}
6066 	}
6067 	if (last) {
6068 		if (count > max) {
6069 			drm_printf(m,
6070 				   "\t\t...skipping %d virtual requests...\n",
6071 				   count - max);
6072 		}
6073 		show_request(m, last, "\t\tV ");
6074 	}
6075 
6076 	spin_unlock_irqrestore(&engine->active.lock, flags);
6077 }
6078 
intel_lr_context_reset(struct intel_engine_cs * engine,struct intel_context * ce,u32 head,bool scrub)6079 void intel_lr_context_reset(struct intel_engine_cs *engine,
6080 			    struct intel_context *ce,
6081 			    u32 head,
6082 			    bool scrub)
6083 {
6084 	GEM_BUG_ON(!intel_context_is_pinned(ce));
6085 
6086 	/*
6087 	 * We want a simple context + ring to execute the breadcrumb update.
6088 	 * We cannot rely on the context being intact across the GPU hang,
6089 	 * so clear it and rebuild just what we need for the breadcrumb.
6090 	 * All pending requests for this context will be zapped, and any
6091 	 * future request will be after userspace has had the opportunity
6092 	 * to recreate its own state.
6093 	 */
6094 	if (scrub)
6095 		restore_default_state(ce, engine);
6096 
6097 	/* Rerun the request; its payload has been neutered (if guilty). */
6098 	__execlists_update_reg_state(ce, engine, head);
6099 }
6100 
6101 bool
intel_engine_in_execlists_submission_mode(const struct intel_engine_cs * engine)6102 intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine)
6103 {
6104 	return engine->set_default_submission ==
6105 	       intel_execlists_set_default_submission;
6106 }
6107 
6108 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
6109 #include "selftest_lrc.c"
6110 #endif
6111