• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2018 Intel Corporation
4  */
5 
6 #include <linux/prime_numbers.h>
7 
8 #include "gem/i915_gem_pm.h"
9 #include "gt/intel_engine_heartbeat.h"
10 #include "gt/intel_reset.h"
11 #include "gt/selftest_engine_heartbeat.h"
12 
13 #include "i915_selftest.h"
14 #include "selftests/i915_random.h"
15 #include "selftests/igt_flush_test.h"
16 #include "selftests/igt_live_test.h"
17 #include "selftests/igt_spinner.h"
18 #include "selftests/lib_sw_fence.h"
19 
20 #include "gem/selftests/igt_gem_utils.h"
21 #include "gem/selftests/mock_context.h"
22 
23 #define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
24 #define NUM_GPR 16
25 #define NUM_GPR_DW (NUM_GPR * 2) /* each GPR is 2 dwords */
26 
is_active(struct i915_request * rq)27 static bool is_active(struct i915_request *rq)
28 {
29 	if (i915_request_is_active(rq))
30 		return true;
31 
32 	if (i915_request_on_hold(rq))
33 		return true;
34 
35 	if (i915_request_has_initial_breadcrumb(rq) && i915_request_started(rq))
36 		return true;
37 
38 	return false;
39 }
40 
wait_for_submit(struct intel_engine_cs * engine,struct i915_request * rq,unsigned long timeout)41 static int wait_for_submit(struct intel_engine_cs *engine,
42 			   struct i915_request *rq,
43 			   unsigned long timeout)
44 {
45 	/* Ignore our own attempts to suppress excess tasklets */
46 	tasklet_hi_schedule(&engine->sched_engine->tasklet);
47 
48 	timeout += jiffies;
49 	do {
50 		bool done = time_after(jiffies, timeout);
51 
52 		if (i915_request_completed(rq)) /* that was quick! */
53 			return 0;
54 
55 		/* Wait until the HW has acknowleged the submission (or err) */
56 		intel_engine_flush_submission(engine);
57 		if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq))
58 			return 0;
59 
60 		if (done)
61 			return -ETIME;
62 
63 		cond_resched();
64 	} while (1);
65 }
66 
wait_for_reset(struct intel_engine_cs * engine,struct i915_request * rq,unsigned long timeout)67 static int wait_for_reset(struct intel_engine_cs *engine,
68 			  struct i915_request *rq,
69 			  unsigned long timeout)
70 {
71 	timeout += jiffies;
72 
73 	do {
74 		cond_resched();
75 		intel_engine_flush_submission(engine);
76 
77 		if (READ_ONCE(engine->execlists.pending[0]))
78 			continue;
79 
80 		if (i915_request_completed(rq))
81 			break;
82 
83 		if (READ_ONCE(rq->fence.error))
84 			break;
85 	} while (time_before(jiffies, timeout));
86 
87 	flush_scheduled_work();
88 
89 	if (rq->fence.error != -EIO) {
90 		pr_err("%s: hanging request %llx:%lld not reset\n",
91 		       engine->name,
92 		       rq->fence.context,
93 		       rq->fence.seqno);
94 		return -EINVAL;
95 	}
96 
97 	/* Give the request a jiffie to complete after flushing the worker */
98 	if (i915_request_wait(rq, 0,
99 			      max(0l, (long)(timeout - jiffies)) + 1) < 0) {
100 		pr_err("%s: hanging request %llx:%lld did not complete\n",
101 		       engine->name,
102 		       rq->fence.context,
103 		       rq->fence.seqno);
104 		return -ETIME;
105 	}
106 
107 	return 0;
108 }
109 
live_sanitycheck(void * arg)110 static int live_sanitycheck(void *arg)
111 {
112 	struct intel_gt *gt = arg;
113 	struct intel_engine_cs *engine;
114 	enum intel_engine_id id;
115 	struct igt_spinner spin;
116 	int err = 0;
117 
118 	if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915))
119 		return 0;
120 
121 	if (igt_spinner_init(&spin, gt))
122 		return -ENOMEM;
123 
124 	for_each_engine(engine, gt, id) {
125 		struct intel_context *ce;
126 		struct i915_request *rq;
127 
128 		ce = intel_context_create(engine);
129 		if (IS_ERR(ce)) {
130 			err = PTR_ERR(ce);
131 			break;
132 		}
133 
134 		rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
135 		if (IS_ERR(rq)) {
136 			err = PTR_ERR(rq);
137 			goto out_ctx;
138 		}
139 
140 		i915_request_add(rq);
141 		if (!igt_wait_for_spinner(&spin, rq)) {
142 			GEM_TRACE("spinner failed to start\n");
143 			GEM_TRACE_DUMP();
144 			intel_gt_set_wedged(gt);
145 			err = -EIO;
146 			goto out_ctx;
147 		}
148 
149 		igt_spinner_end(&spin);
150 		if (igt_flush_test(gt->i915)) {
151 			err = -EIO;
152 			goto out_ctx;
153 		}
154 
155 out_ctx:
156 		intel_context_put(ce);
157 		if (err)
158 			break;
159 	}
160 
161 	igt_spinner_fini(&spin);
162 	return err;
163 }
164 
live_unlite_restore(struct intel_gt * gt,int prio)165 static int live_unlite_restore(struct intel_gt *gt, int prio)
166 {
167 	struct intel_engine_cs *engine;
168 	enum intel_engine_id id;
169 	struct igt_spinner spin;
170 	int err = -ENOMEM;
171 
172 	/*
173 	 * Check that we can correctly context switch between 2 instances
174 	 * on the same engine from the same parent context.
175 	 */
176 
177 	if (igt_spinner_init(&spin, gt))
178 		return err;
179 
180 	err = 0;
181 	for_each_engine(engine, gt, id) {
182 		struct intel_context *ce[2] = {};
183 		struct i915_request *rq[2];
184 		struct igt_live_test t;
185 		int n;
186 
187 		if (prio && !intel_engine_has_preemption(engine))
188 			continue;
189 
190 		if (!intel_engine_can_store_dword(engine))
191 			continue;
192 
193 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
194 			err = -EIO;
195 			break;
196 		}
197 		st_engine_heartbeat_disable(engine);
198 
199 		for (n = 0; n < ARRAY_SIZE(ce); n++) {
200 			struct intel_context *tmp;
201 
202 			tmp = intel_context_create(engine);
203 			if (IS_ERR(tmp)) {
204 				err = PTR_ERR(tmp);
205 				goto err_ce;
206 			}
207 
208 			err = intel_context_pin(tmp);
209 			if (err) {
210 				intel_context_put(tmp);
211 				goto err_ce;
212 			}
213 
214 			/*
215 			 * Setup the pair of contexts such that if we
216 			 * lite-restore using the RING_TAIL from ce[1] it
217 			 * will execute garbage from ce[0]->ring.
218 			 */
219 			memset(tmp->ring->vaddr,
220 			       POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */
221 			       tmp->ring->vma->size);
222 
223 			ce[n] = tmp;
224 		}
225 		GEM_BUG_ON(!ce[1]->ring->size);
226 		intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2);
227 		lrc_update_regs(ce[1], engine, ce[1]->ring->head);
228 
229 		rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
230 		if (IS_ERR(rq[0])) {
231 			err = PTR_ERR(rq[0]);
232 			goto err_ce;
233 		}
234 
235 		i915_request_get(rq[0]);
236 		i915_request_add(rq[0]);
237 		GEM_BUG_ON(rq[0]->postfix > ce[1]->ring->emit);
238 
239 		if (!igt_wait_for_spinner(&spin, rq[0])) {
240 			i915_request_put(rq[0]);
241 			goto err_ce;
242 		}
243 
244 		rq[1] = i915_request_create(ce[1]);
245 		if (IS_ERR(rq[1])) {
246 			err = PTR_ERR(rq[1]);
247 			i915_request_put(rq[0]);
248 			goto err_ce;
249 		}
250 
251 		if (!prio) {
252 			/*
253 			 * Ensure we do the switch to ce[1] on completion.
254 			 *
255 			 * rq[0] is already submitted, so this should reduce
256 			 * to a no-op (a wait on a request on the same engine
257 			 * uses the submit fence, not the completion fence),
258 			 * but it will install a dependency on rq[1] for rq[0]
259 			 * that will prevent the pair being reordered by
260 			 * timeslicing.
261 			 */
262 			i915_request_await_dma_fence(rq[1], &rq[0]->fence);
263 		}
264 
265 		i915_request_get(rq[1]);
266 		i915_request_add(rq[1]);
267 		GEM_BUG_ON(rq[1]->postfix <= rq[0]->postfix);
268 		i915_request_put(rq[0]);
269 
270 		if (prio) {
271 			struct i915_sched_attr attr = {
272 				.priority = prio,
273 			};
274 
275 			/* Alternatively preempt the spinner with ce[1] */
276 			engine->sched_engine->schedule(rq[1], &attr);
277 		}
278 
279 		/* And switch back to ce[0] for good measure */
280 		rq[0] = i915_request_create(ce[0]);
281 		if (IS_ERR(rq[0])) {
282 			err = PTR_ERR(rq[0]);
283 			i915_request_put(rq[1]);
284 			goto err_ce;
285 		}
286 
287 		i915_request_await_dma_fence(rq[0], &rq[1]->fence);
288 		i915_request_get(rq[0]);
289 		i915_request_add(rq[0]);
290 		GEM_BUG_ON(rq[0]->postfix > rq[1]->postfix);
291 		i915_request_put(rq[1]);
292 		i915_request_put(rq[0]);
293 
294 err_ce:
295 		intel_engine_flush_submission(engine);
296 		igt_spinner_end(&spin);
297 		for (n = 0; n < ARRAY_SIZE(ce); n++) {
298 			if (IS_ERR_OR_NULL(ce[n]))
299 				break;
300 
301 			intel_context_unpin(ce[n]);
302 			intel_context_put(ce[n]);
303 		}
304 
305 		st_engine_heartbeat_enable(engine);
306 		if (igt_live_test_end(&t))
307 			err = -EIO;
308 		if (err)
309 			break;
310 	}
311 
312 	igt_spinner_fini(&spin);
313 	return err;
314 }
315 
live_unlite_switch(void * arg)316 static int live_unlite_switch(void *arg)
317 {
318 	return live_unlite_restore(arg, 0);
319 }
320 
live_unlite_preempt(void * arg)321 static int live_unlite_preempt(void *arg)
322 {
323 	return live_unlite_restore(arg, I915_PRIORITY_MAX);
324 }
325 
live_unlite_ring(void * arg)326 static int live_unlite_ring(void *arg)
327 {
328 	struct intel_gt *gt = arg;
329 	struct intel_engine_cs *engine;
330 	struct igt_spinner spin;
331 	enum intel_engine_id id;
332 	int err = 0;
333 
334 	/*
335 	 * Setup a preemption event that will cause almost the entire ring
336 	 * to be unwound, potentially fooling our intel_ring_direction()
337 	 * into emitting a forward lite-restore instead of the rollback.
338 	 */
339 
340 	if (igt_spinner_init(&spin, gt))
341 		return -ENOMEM;
342 
343 	for_each_engine(engine, gt, id) {
344 		struct intel_context *ce[2] = {};
345 		struct i915_request *rq;
346 		struct igt_live_test t;
347 		int n;
348 
349 		if (!intel_engine_has_preemption(engine))
350 			continue;
351 
352 		if (!intel_engine_can_store_dword(engine))
353 			continue;
354 
355 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
356 			err = -EIO;
357 			break;
358 		}
359 		st_engine_heartbeat_disable(engine);
360 
361 		for (n = 0; n < ARRAY_SIZE(ce); n++) {
362 			struct intel_context *tmp;
363 
364 			tmp = intel_context_create(engine);
365 			if (IS_ERR(tmp)) {
366 				err = PTR_ERR(tmp);
367 				goto err_ce;
368 			}
369 
370 			err = intel_context_pin(tmp);
371 			if (err) {
372 				intel_context_put(tmp);
373 				goto err_ce;
374 			}
375 
376 			memset32(tmp->ring->vaddr,
377 				 0xdeadbeef, /* trigger a hang if executed */
378 				 tmp->ring->vma->size / sizeof(u32));
379 
380 			ce[n] = tmp;
381 		}
382 
383 		/* Create max prio spinner, followed by N low prio nops */
384 		rq = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
385 		if (IS_ERR(rq)) {
386 			err = PTR_ERR(rq);
387 			goto err_ce;
388 		}
389 
390 		i915_request_get(rq);
391 		rq->sched.attr.priority = I915_PRIORITY_BARRIER;
392 		i915_request_add(rq);
393 
394 		if (!igt_wait_for_spinner(&spin, rq)) {
395 			intel_gt_set_wedged(gt);
396 			i915_request_put(rq);
397 			err = -ETIME;
398 			goto err_ce;
399 		}
400 
401 		/* Fill the ring, until we will cause a wrap */
402 		n = 0;
403 		while (intel_ring_direction(ce[0]->ring,
404 					    rq->wa_tail,
405 					    ce[0]->ring->tail) <= 0) {
406 			struct i915_request *tmp;
407 
408 			tmp = intel_context_create_request(ce[0]);
409 			if (IS_ERR(tmp)) {
410 				err = PTR_ERR(tmp);
411 				i915_request_put(rq);
412 				goto err_ce;
413 			}
414 
415 			i915_request_add(tmp);
416 			intel_engine_flush_submission(engine);
417 			n++;
418 		}
419 		intel_engine_flush_submission(engine);
420 		pr_debug("%s: Filled ring with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n",
421 			 engine->name, n,
422 			 ce[0]->ring->size,
423 			 ce[0]->ring->tail,
424 			 ce[0]->ring->emit,
425 			 rq->tail);
426 		GEM_BUG_ON(intel_ring_direction(ce[0]->ring,
427 						rq->tail,
428 						ce[0]->ring->tail) <= 0);
429 		i915_request_put(rq);
430 
431 		/* Create a second ring to preempt the first ring after rq[0] */
432 		rq = intel_context_create_request(ce[1]);
433 		if (IS_ERR(rq)) {
434 			err = PTR_ERR(rq);
435 			goto err_ce;
436 		}
437 
438 		rq->sched.attr.priority = I915_PRIORITY_BARRIER;
439 		i915_request_get(rq);
440 		i915_request_add(rq);
441 
442 		err = wait_for_submit(engine, rq, HZ / 2);
443 		i915_request_put(rq);
444 		if (err) {
445 			pr_err("%s: preemption request was not submitted\n",
446 			       engine->name);
447 			err = -ETIME;
448 		}
449 
450 		pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n",
451 			 engine->name,
452 			 ce[0]->ring->tail, ce[0]->ring->emit,
453 			 ce[1]->ring->tail, ce[1]->ring->emit);
454 
455 err_ce:
456 		intel_engine_flush_submission(engine);
457 		igt_spinner_end(&spin);
458 		for (n = 0; n < ARRAY_SIZE(ce); n++) {
459 			if (IS_ERR_OR_NULL(ce[n]))
460 				break;
461 
462 			intel_context_unpin(ce[n]);
463 			intel_context_put(ce[n]);
464 		}
465 		st_engine_heartbeat_enable(engine);
466 		if (igt_live_test_end(&t))
467 			err = -EIO;
468 		if (err)
469 			break;
470 	}
471 
472 	igt_spinner_fini(&spin);
473 	return err;
474 }
475 
live_pin_rewind(void * arg)476 static int live_pin_rewind(void *arg)
477 {
478 	struct intel_gt *gt = arg;
479 	struct intel_engine_cs *engine;
480 	enum intel_engine_id id;
481 	int err = 0;
482 
483 	/*
484 	 * We have to be careful not to trust intel_ring too much, for example
485 	 * ring->head is updated upon retire which is out of sync with pinning
486 	 * the context. Thus we cannot use ring->head to set CTX_RING_HEAD,
487 	 * or else we risk writing an older, stale value.
488 	 *
489 	 * To simulate this, let's apply a bit of deliberate sabotague.
490 	 */
491 
492 	for_each_engine(engine, gt, id) {
493 		struct intel_context *ce;
494 		struct i915_request *rq;
495 		struct intel_ring *ring;
496 		struct igt_live_test t;
497 
498 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
499 			err = -EIO;
500 			break;
501 		}
502 
503 		ce = intel_context_create(engine);
504 		if (IS_ERR(ce)) {
505 			err = PTR_ERR(ce);
506 			break;
507 		}
508 
509 		err = intel_context_pin(ce);
510 		if (err) {
511 			intel_context_put(ce);
512 			break;
513 		}
514 
515 		/* Keep the context awake while we play games */
516 		err = i915_active_acquire(&ce->active);
517 		if (err) {
518 			intel_context_unpin(ce);
519 			intel_context_put(ce);
520 			break;
521 		}
522 		ring = ce->ring;
523 
524 		/* Poison the ring, and offset the next request from HEAD */
525 		memset32(ring->vaddr, STACK_MAGIC, ring->size / sizeof(u32));
526 		ring->emit = ring->size / 2;
527 		ring->tail = ring->emit;
528 		GEM_BUG_ON(ring->head);
529 
530 		intel_context_unpin(ce);
531 
532 		/* Submit a simple nop request */
533 		GEM_BUG_ON(intel_context_is_pinned(ce));
534 		rq = intel_context_create_request(ce);
535 		i915_active_release(&ce->active); /* e.g. async retire */
536 		intel_context_put(ce);
537 		if (IS_ERR(rq)) {
538 			err = PTR_ERR(rq);
539 			break;
540 		}
541 		GEM_BUG_ON(!rq->head);
542 		i915_request_add(rq);
543 
544 		/* Expect not to hang! */
545 		if (igt_live_test_end(&t)) {
546 			err = -EIO;
547 			break;
548 		}
549 	}
550 
551 	return err;
552 }
553 
engine_lock_reset_tasklet(struct intel_engine_cs * engine)554 static int engine_lock_reset_tasklet(struct intel_engine_cs *engine)
555 {
556 	tasklet_disable(&engine->sched_engine->tasklet);
557 	local_bh_disable();
558 
559 	if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
560 			     &engine->gt->reset.flags)) {
561 		local_bh_enable();
562 		tasklet_enable(&engine->sched_engine->tasklet);
563 
564 		intel_gt_set_wedged(engine->gt);
565 		return -EBUSY;
566 	}
567 
568 	return 0;
569 }
570 
engine_unlock_reset_tasklet(struct intel_engine_cs * engine)571 static void engine_unlock_reset_tasklet(struct intel_engine_cs *engine)
572 {
573 	clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id,
574 			      &engine->gt->reset.flags);
575 
576 	local_bh_enable();
577 	tasklet_enable(&engine->sched_engine->tasklet);
578 }
579 
live_hold_reset(void * arg)580 static int live_hold_reset(void *arg)
581 {
582 	struct intel_gt *gt = arg;
583 	struct intel_engine_cs *engine;
584 	enum intel_engine_id id;
585 	struct igt_spinner spin;
586 	int err = 0;
587 
588 	/*
589 	 * In order to support offline error capture for fast preempt reset,
590 	 * we need to decouple the guilty request and ensure that it and its
591 	 * descendents are not executed while the capture is in progress.
592 	 */
593 
594 	if (!intel_has_reset_engine(gt))
595 		return 0;
596 
597 	if (igt_spinner_init(&spin, gt))
598 		return -ENOMEM;
599 
600 	for_each_engine(engine, gt, id) {
601 		struct intel_context *ce;
602 		struct i915_request *rq;
603 
604 		ce = intel_context_create(engine);
605 		if (IS_ERR(ce)) {
606 			err = PTR_ERR(ce);
607 			break;
608 		}
609 
610 		st_engine_heartbeat_disable(engine);
611 
612 		rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
613 		if (IS_ERR(rq)) {
614 			err = PTR_ERR(rq);
615 			goto out;
616 		}
617 		i915_request_add(rq);
618 
619 		if (!igt_wait_for_spinner(&spin, rq)) {
620 			intel_gt_set_wedged(gt);
621 			err = -ETIME;
622 			goto out;
623 		}
624 
625 		/* We have our request executing, now remove it and reset */
626 
627 		err = engine_lock_reset_tasklet(engine);
628 		if (err)
629 			goto out;
630 
631 		engine->sched_engine->tasklet.callback(&engine->sched_engine->tasklet);
632 		GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
633 
634 		i915_request_get(rq);
635 		execlists_hold(engine, rq);
636 		GEM_BUG_ON(!i915_request_on_hold(rq));
637 
638 		__intel_engine_reset_bh(engine, NULL);
639 		GEM_BUG_ON(rq->fence.error != -EIO);
640 
641 		engine_unlock_reset_tasklet(engine);
642 
643 		/* Check that we do not resubmit the held request */
644 		if (!i915_request_wait(rq, 0, HZ / 5)) {
645 			pr_err("%s: on hold request completed!\n",
646 			       engine->name);
647 			i915_request_put(rq);
648 			err = -EIO;
649 			goto out;
650 		}
651 		GEM_BUG_ON(!i915_request_on_hold(rq));
652 
653 		/* But is resubmitted on release */
654 		execlists_unhold(engine, rq);
655 		if (i915_request_wait(rq, 0, HZ / 5) < 0) {
656 			pr_err("%s: held request did not complete!\n",
657 			       engine->name);
658 			intel_gt_set_wedged(gt);
659 			err = -ETIME;
660 		}
661 		i915_request_put(rq);
662 
663 out:
664 		st_engine_heartbeat_enable(engine);
665 		intel_context_put(ce);
666 		if (err)
667 			break;
668 	}
669 
670 	igt_spinner_fini(&spin);
671 	return err;
672 }
673 
error_repr(int err)674 static const char *error_repr(int err)
675 {
676 	return err ? "bad" : "good";
677 }
678 
live_error_interrupt(void * arg)679 static int live_error_interrupt(void *arg)
680 {
681 	static const struct error_phase {
682 		enum { GOOD = 0, BAD = -EIO } error[2];
683 	} phases[] = {
684 		{ { BAD,  GOOD } },
685 		{ { BAD,  BAD  } },
686 		{ { BAD,  GOOD } },
687 		{ { GOOD, GOOD } }, /* sentinel */
688 	};
689 	struct intel_gt *gt = arg;
690 	struct intel_engine_cs *engine;
691 	enum intel_engine_id id;
692 
693 	/*
694 	 * We hook up the CS_MASTER_ERROR_INTERRUPT to have forewarning
695 	 * of invalid commands in user batches that will cause a GPU hang.
696 	 * This is a faster mechanism than using hangcheck/heartbeats, but
697 	 * only detects problems the HW knows about -- it will not warn when
698 	 * we kill the HW!
699 	 *
700 	 * To verify our detection and reset, we throw some invalid commands
701 	 * at the HW and wait for the interrupt.
702 	 */
703 
704 	if (!intel_has_reset_engine(gt))
705 		return 0;
706 
707 	for_each_engine(engine, gt, id) {
708 		const struct error_phase *p;
709 		int err = 0;
710 
711 		st_engine_heartbeat_disable(engine);
712 
713 		for (p = phases; p->error[0] != GOOD; p++) {
714 			struct i915_request *client[ARRAY_SIZE(phases->error)];
715 			u32 *cs;
716 			int i;
717 
718 			memset(client, 0, sizeof(*client));
719 			for (i = 0; i < ARRAY_SIZE(client); i++) {
720 				struct intel_context *ce;
721 				struct i915_request *rq;
722 
723 				ce = intel_context_create(engine);
724 				if (IS_ERR(ce)) {
725 					err = PTR_ERR(ce);
726 					goto out;
727 				}
728 
729 				rq = intel_context_create_request(ce);
730 				intel_context_put(ce);
731 				if (IS_ERR(rq)) {
732 					err = PTR_ERR(rq);
733 					goto out;
734 				}
735 
736 				if (rq->engine->emit_init_breadcrumb) {
737 					err = rq->engine->emit_init_breadcrumb(rq);
738 					if (err) {
739 						i915_request_add(rq);
740 						goto out;
741 					}
742 				}
743 
744 				cs = intel_ring_begin(rq, 2);
745 				if (IS_ERR(cs)) {
746 					i915_request_add(rq);
747 					err = PTR_ERR(cs);
748 					goto out;
749 				}
750 
751 				if (p->error[i]) {
752 					*cs++ = 0xdeadbeef;
753 					*cs++ = 0xdeadbeef;
754 				} else {
755 					*cs++ = MI_NOOP;
756 					*cs++ = MI_NOOP;
757 				}
758 
759 				client[i] = i915_request_get(rq);
760 				i915_request_add(rq);
761 			}
762 
763 			err = wait_for_submit(engine, client[0], HZ / 2);
764 			if (err) {
765 				pr_err("%s: first request did not start within time!\n",
766 				       engine->name);
767 				err = -ETIME;
768 				goto out;
769 			}
770 
771 			for (i = 0; i < ARRAY_SIZE(client); i++) {
772 				if (i915_request_wait(client[i], 0, HZ / 5) < 0)
773 					pr_debug("%s: %s request incomplete!\n",
774 						 engine->name,
775 						 error_repr(p->error[i]));
776 
777 				if (!i915_request_started(client[i])) {
778 					pr_err("%s: %s request not started!\n",
779 					       engine->name,
780 					       error_repr(p->error[i]));
781 					err = -ETIME;
782 					goto out;
783 				}
784 
785 				/* Kick the tasklet to process the error */
786 				intel_engine_flush_submission(engine);
787 				if (client[i]->fence.error != p->error[i]) {
788 					pr_err("%s: %s request (%s) with wrong error code: %d\n",
789 					       engine->name,
790 					       error_repr(p->error[i]),
791 					       i915_request_completed(client[i]) ? "completed" : "running",
792 					       client[i]->fence.error);
793 					err = -EINVAL;
794 					goto out;
795 				}
796 			}
797 
798 out:
799 			for (i = 0; i < ARRAY_SIZE(client); i++)
800 				if (client[i])
801 					i915_request_put(client[i]);
802 			if (err) {
803 				pr_err("%s: failed at phase[%zd] { %d, %d }\n",
804 				       engine->name, p - phases,
805 				       p->error[0], p->error[1]);
806 				break;
807 			}
808 		}
809 
810 		st_engine_heartbeat_enable(engine);
811 		if (err) {
812 			intel_gt_set_wedged(gt);
813 			return err;
814 		}
815 	}
816 
817 	return 0;
818 }
819 
820 static int
emit_semaphore_chain(struct i915_request * rq,struct i915_vma * vma,int idx)821 emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx)
822 {
823 	u32 *cs;
824 
825 	cs = intel_ring_begin(rq, 10);
826 	if (IS_ERR(cs))
827 		return PTR_ERR(cs);
828 
829 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
830 
831 	*cs++ = MI_SEMAPHORE_WAIT |
832 		MI_SEMAPHORE_GLOBAL_GTT |
833 		MI_SEMAPHORE_POLL |
834 		MI_SEMAPHORE_SAD_NEQ_SDD;
835 	*cs++ = 0;
836 	*cs++ = i915_ggtt_offset(vma) + 4 * idx;
837 	*cs++ = 0;
838 
839 	if (idx > 0) {
840 		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
841 		*cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
842 		*cs++ = 0;
843 		*cs++ = 1;
844 	} else {
845 		*cs++ = MI_NOOP;
846 		*cs++ = MI_NOOP;
847 		*cs++ = MI_NOOP;
848 		*cs++ = MI_NOOP;
849 	}
850 
851 	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
852 
853 	intel_ring_advance(rq, cs);
854 	return 0;
855 }
856 
857 static struct i915_request *
semaphore_queue(struct intel_engine_cs * engine,struct i915_vma * vma,int idx)858 semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx)
859 {
860 	struct intel_context *ce;
861 	struct i915_request *rq;
862 	int err;
863 
864 	ce = intel_context_create(engine);
865 	if (IS_ERR(ce))
866 		return ERR_CAST(ce);
867 
868 	rq = intel_context_create_request(ce);
869 	if (IS_ERR(rq))
870 		goto out_ce;
871 
872 	err = 0;
873 	if (rq->engine->emit_init_breadcrumb)
874 		err = rq->engine->emit_init_breadcrumb(rq);
875 	if (err == 0)
876 		err = emit_semaphore_chain(rq, vma, idx);
877 	if (err == 0)
878 		i915_request_get(rq);
879 	i915_request_add(rq);
880 	if (err)
881 		rq = ERR_PTR(err);
882 
883 out_ce:
884 	intel_context_put(ce);
885 	return rq;
886 }
887 
888 static int
release_queue(struct intel_engine_cs * engine,struct i915_vma * vma,int idx,int prio)889 release_queue(struct intel_engine_cs *engine,
890 	      struct i915_vma *vma,
891 	      int idx, int prio)
892 {
893 	struct i915_sched_attr attr = {
894 		.priority = prio,
895 	};
896 	struct i915_request *rq;
897 	u32 *cs;
898 
899 	rq = intel_engine_create_kernel_request(engine);
900 	if (IS_ERR(rq))
901 		return PTR_ERR(rq);
902 
903 	cs = intel_ring_begin(rq, 4);
904 	if (IS_ERR(cs)) {
905 		i915_request_add(rq);
906 		return PTR_ERR(cs);
907 	}
908 
909 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
910 	*cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
911 	*cs++ = 0;
912 	*cs++ = 1;
913 
914 	intel_ring_advance(rq, cs);
915 
916 	i915_request_get(rq);
917 	i915_request_add(rq);
918 
919 	local_bh_disable();
920 	engine->sched_engine->schedule(rq, &attr);
921 	local_bh_enable(); /* kick tasklet */
922 
923 	i915_request_put(rq);
924 
925 	return 0;
926 }
927 
928 static int
slice_semaphore_queue(struct intel_engine_cs * outer,struct i915_vma * vma,int count)929 slice_semaphore_queue(struct intel_engine_cs *outer,
930 		      struct i915_vma *vma,
931 		      int count)
932 {
933 	struct intel_engine_cs *engine;
934 	struct i915_request *head;
935 	enum intel_engine_id id;
936 	int err, i, n = 0;
937 
938 	head = semaphore_queue(outer, vma, n++);
939 	if (IS_ERR(head))
940 		return PTR_ERR(head);
941 
942 	for_each_engine(engine, outer->gt, id) {
943 		if (!intel_engine_has_preemption(engine))
944 			continue;
945 
946 		for (i = 0; i < count; i++) {
947 			struct i915_request *rq;
948 
949 			rq = semaphore_queue(engine, vma, n++);
950 			if (IS_ERR(rq)) {
951 				err = PTR_ERR(rq);
952 				goto out;
953 			}
954 
955 			i915_request_put(rq);
956 		}
957 	}
958 
959 	err = release_queue(outer, vma, n, I915_PRIORITY_BARRIER);
960 	if (err)
961 		goto out;
962 
963 	if (i915_request_wait(head, 0,
964 			      2 * outer->gt->info.num_engines * (count + 2) * (count + 3)) < 0) {
965 		pr_err("%s: Failed to slice along semaphore chain of length (%d, %d)!\n",
966 		       outer->name, count, n);
967 		GEM_TRACE_DUMP();
968 		intel_gt_set_wedged(outer->gt);
969 		err = -EIO;
970 	}
971 
972 out:
973 	i915_request_put(head);
974 	return err;
975 }
976 
live_timeslice_preempt(void * arg)977 static int live_timeslice_preempt(void *arg)
978 {
979 	struct intel_gt *gt = arg;
980 	struct drm_i915_gem_object *obj;
981 	struct intel_engine_cs *engine;
982 	enum intel_engine_id id;
983 	struct i915_vma *vma;
984 	void *vaddr;
985 	int err = 0;
986 
987 	/*
988 	 * If a request takes too long, we would like to give other users
989 	 * a fair go on the GPU. In particular, users may create batches
990 	 * that wait upon external input, where that input may even be
991 	 * supplied by another GPU job. To avoid blocking forever, we
992 	 * need to preempt the current task and replace it with another
993 	 * ready task.
994 	 */
995 	if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
996 		return 0;
997 
998 	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
999 	if (IS_ERR(obj))
1000 		return PTR_ERR(obj);
1001 
1002 	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
1003 	if (IS_ERR(vma)) {
1004 		err = PTR_ERR(vma);
1005 		goto err_obj;
1006 	}
1007 
1008 	vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
1009 	if (IS_ERR(vaddr)) {
1010 		err = PTR_ERR(vaddr);
1011 		goto err_obj;
1012 	}
1013 
1014 	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1015 	if (err)
1016 		goto err_map;
1017 
1018 	err = i915_vma_sync(vma);
1019 	if (err)
1020 		goto err_pin;
1021 
1022 	for_each_engine(engine, gt, id) {
1023 		if (!intel_engine_has_preemption(engine))
1024 			continue;
1025 
1026 		memset(vaddr, 0, PAGE_SIZE);
1027 
1028 		st_engine_heartbeat_disable(engine);
1029 		err = slice_semaphore_queue(engine, vma, 5);
1030 		st_engine_heartbeat_enable(engine);
1031 		if (err)
1032 			goto err_pin;
1033 
1034 		if (igt_flush_test(gt->i915)) {
1035 			err = -EIO;
1036 			goto err_pin;
1037 		}
1038 	}
1039 
1040 err_pin:
1041 	i915_vma_unpin(vma);
1042 err_map:
1043 	i915_gem_object_unpin_map(obj);
1044 err_obj:
1045 	i915_gem_object_put(obj);
1046 	return err;
1047 }
1048 
1049 static struct i915_request *
create_rewinder(struct intel_context * ce,struct i915_request * wait,void * slot,int idx)1050 create_rewinder(struct intel_context *ce,
1051 		struct i915_request *wait,
1052 		void *slot, int idx)
1053 {
1054 	const u32 offset =
1055 		i915_ggtt_offset(ce->engine->status_page.vma) +
1056 		offset_in_page(slot);
1057 	struct i915_request *rq;
1058 	u32 *cs;
1059 	int err;
1060 
1061 	rq = intel_context_create_request(ce);
1062 	if (IS_ERR(rq))
1063 		return rq;
1064 
1065 	if (wait) {
1066 		err = i915_request_await_dma_fence(rq, &wait->fence);
1067 		if (err)
1068 			goto err;
1069 	}
1070 
1071 	cs = intel_ring_begin(rq, 14);
1072 	if (IS_ERR(cs)) {
1073 		err = PTR_ERR(cs);
1074 		goto err;
1075 	}
1076 
1077 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1078 	*cs++ = MI_NOOP;
1079 
1080 	*cs++ = MI_SEMAPHORE_WAIT |
1081 		MI_SEMAPHORE_GLOBAL_GTT |
1082 		MI_SEMAPHORE_POLL |
1083 		MI_SEMAPHORE_SAD_GTE_SDD;
1084 	*cs++ = idx;
1085 	*cs++ = offset;
1086 	*cs++ = 0;
1087 
1088 	*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
1089 	*cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base));
1090 	*cs++ = offset + idx * sizeof(u32);
1091 	*cs++ = 0;
1092 
1093 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1094 	*cs++ = offset;
1095 	*cs++ = 0;
1096 	*cs++ = idx + 1;
1097 
1098 	intel_ring_advance(rq, cs);
1099 
1100 	err = 0;
1101 err:
1102 	i915_request_get(rq);
1103 	i915_request_add(rq);
1104 	if (err) {
1105 		i915_request_put(rq);
1106 		return ERR_PTR(err);
1107 	}
1108 
1109 	return rq;
1110 }
1111 
live_timeslice_rewind(void * arg)1112 static int live_timeslice_rewind(void *arg)
1113 {
1114 	struct intel_gt *gt = arg;
1115 	struct intel_engine_cs *engine;
1116 	enum intel_engine_id id;
1117 
1118 	/*
1119 	 * The usual presumption on timeslice expiration is that we replace
1120 	 * the active context with another. However, given a chain of
1121 	 * dependencies we may end up with replacing the context with itself,
1122 	 * but only a few of those requests, forcing us to rewind the
1123 	 * RING_TAIL of the original request.
1124 	 */
1125 	if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
1126 		return 0;
1127 
1128 	for_each_engine(engine, gt, id) {
1129 		enum { A1, A2, B1 };
1130 		enum { X = 1, Z, Y };
1131 		struct i915_request *rq[3] = {};
1132 		struct intel_context *ce;
1133 		unsigned long timeslice;
1134 		int i, err = 0;
1135 		u32 *slot;
1136 
1137 		if (!intel_engine_has_timeslices(engine))
1138 			continue;
1139 
1140 		/*
1141 		 * A:rq1 -- semaphore wait, timestamp X
1142 		 * A:rq2 -- write timestamp Y
1143 		 *
1144 		 * B:rq1 [await A:rq1] -- write timestamp Z
1145 		 *
1146 		 * Force timeslice, release semaphore.
1147 		 *
1148 		 * Expect execution/evaluation order XZY
1149 		 */
1150 
1151 		st_engine_heartbeat_disable(engine);
1152 		timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
1153 
1154 		slot = memset32(engine->status_page.addr + 1000, 0, 4);
1155 
1156 		ce = intel_context_create(engine);
1157 		if (IS_ERR(ce)) {
1158 			err = PTR_ERR(ce);
1159 			goto err;
1160 		}
1161 
1162 		rq[A1] = create_rewinder(ce, NULL, slot, X);
1163 		if (IS_ERR(rq[A1])) {
1164 			intel_context_put(ce);
1165 			goto err;
1166 		}
1167 
1168 		rq[A2] = create_rewinder(ce, NULL, slot, Y);
1169 		intel_context_put(ce);
1170 		if (IS_ERR(rq[A2]))
1171 			goto err;
1172 
1173 		err = wait_for_submit(engine, rq[A2], HZ / 2);
1174 		if (err) {
1175 			pr_err("%s: failed to submit first context\n",
1176 			       engine->name);
1177 			goto err;
1178 		}
1179 
1180 		ce = intel_context_create(engine);
1181 		if (IS_ERR(ce)) {
1182 			err = PTR_ERR(ce);
1183 			goto err;
1184 		}
1185 
1186 		rq[B1] = create_rewinder(ce, rq[A1], slot, Z);
1187 		intel_context_put(ce);
1188 		if (IS_ERR(rq[2]))
1189 			goto err;
1190 
1191 		err = wait_for_submit(engine, rq[B1], HZ / 2);
1192 		if (err) {
1193 			pr_err("%s: failed to submit second context\n",
1194 			       engine->name);
1195 			goto err;
1196 		}
1197 
1198 		/* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */
1199 		ENGINE_TRACE(engine, "forcing tasklet for rewind\n");
1200 		while (i915_request_is_active(rq[A2])) { /* semaphore yield! */
1201 			/* Wait for the timeslice to kick in */
1202 			del_timer(&engine->execlists.timer);
1203 			tasklet_hi_schedule(&engine->sched_engine->tasklet);
1204 			intel_engine_flush_submission(engine);
1205 		}
1206 		/* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */
1207 		GEM_BUG_ON(!i915_request_is_active(rq[A1]));
1208 		GEM_BUG_ON(!i915_request_is_active(rq[B1]));
1209 		GEM_BUG_ON(i915_request_is_active(rq[A2]));
1210 
1211 		/* Release the hounds! */
1212 		slot[0] = 1;
1213 		wmb(); /* "pairs" with GPU; paranoid kick of internal CPU$ */
1214 
1215 		for (i = 1; i <= 3; i++) {
1216 			unsigned long timeout = jiffies + HZ / 2;
1217 
1218 			while (!READ_ONCE(slot[i]) &&
1219 			       time_before(jiffies, timeout))
1220 				;
1221 
1222 			if (!time_before(jiffies, timeout)) {
1223 				pr_err("%s: rq[%d] timed out\n",
1224 				       engine->name, i - 1);
1225 				err = -ETIME;
1226 				goto err;
1227 			}
1228 
1229 			pr_debug("%s: slot[%d]:%x\n", engine->name, i, slot[i]);
1230 		}
1231 
1232 		/* XZY: XZ < XY */
1233 		if (slot[Z] - slot[X] >= slot[Y] - slot[X]) {
1234 			pr_err("%s: timeslicing did not run context B [%u] before A [%u]!\n",
1235 			       engine->name,
1236 			       slot[Z] - slot[X],
1237 			       slot[Y] - slot[X]);
1238 			err = -EINVAL;
1239 		}
1240 
1241 err:
1242 		memset32(&slot[0], -1, 4);
1243 		wmb();
1244 
1245 		engine->props.timeslice_duration_ms = timeslice;
1246 		st_engine_heartbeat_enable(engine);
1247 		for (i = 0; i < 3; i++)
1248 			i915_request_put(rq[i]);
1249 		if (igt_flush_test(gt->i915))
1250 			err = -EIO;
1251 		if (err)
1252 			return err;
1253 	}
1254 
1255 	return 0;
1256 }
1257 
nop_request(struct intel_engine_cs * engine)1258 static struct i915_request *nop_request(struct intel_engine_cs *engine)
1259 {
1260 	struct i915_request *rq;
1261 
1262 	rq = intel_engine_create_kernel_request(engine);
1263 	if (IS_ERR(rq))
1264 		return rq;
1265 
1266 	i915_request_get(rq);
1267 	i915_request_add(rq);
1268 
1269 	return rq;
1270 }
1271 
slice_timeout(struct intel_engine_cs * engine)1272 static long slice_timeout(struct intel_engine_cs *engine)
1273 {
1274 	long timeout;
1275 
1276 	/* Enough time for a timeslice to kick in, and kick out */
1277 	timeout = 2 * msecs_to_jiffies_timeout(timeslice(engine));
1278 
1279 	/* Enough time for the nop request to complete */
1280 	timeout += HZ / 5;
1281 
1282 	return timeout + 1;
1283 }
1284 
live_timeslice_queue(void * arg)1285 static int live_timeslice_queue(void *arg)
1286 {
1287 	struct intel_gt *gt = arg;
1288 	struct drm_i915_gem_object *obj;
1289 	struct intel_engine_cs *engine;
1290 	enum intel_engine_id id;
1291 	struct i915_vma *vma;
1292 	void *vaddr;
1293 	int err = 0;
1294 
1295 	/*
1296 	 * Make sure that even if ELSP[0] and ELSP[1] are filled with
1297 	 * timeslicing between them disabled, we *do* enable timeslicing
1298 	 * if the queue demands it. (Normally, we do not submit if
1299 	 * ELSP[1] is already occupied, so must rely on timeslicing to
1300 	 * eject ELSP[0] in favour of the queue.)
1301 	 */
1302 	if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
1303 		return 0;
1304 
1305 	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1306 	if (IS_ERR(obj))
1307 		return PTR_ERR(obj);
1308 
1309 	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
1310 	if (IS_ERR(vma)) {
1311 		err = PTR_ERR(vma);
1312 		goto err_obj;
1313 	}
1314 
1315 	vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
1316 	if (IS_ERR(vaddr)) {
1317 		err = PTR_ERR(vaddr);
1318 		goto err_obj;
1319 	}
1320 
1321 	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1322 	if (err)
1323 		goto err_map;
1324 
1325 	err = i915_vma_sync(vma);
1326 	if (err)
1327 		goto err_pin;
1328 
1329 	for_each_engine(engine, gt, id) {
1330 		struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX };
1331 		struct i915_request *rq, *nop;
1332 
1333 		if (!intel_engine_has_preemption(engine))
1334 			continue;
1335 
1336 		st_engine_heartbeat_disable(engine);
1337 		memset(vaddr, 0, PAGE_SIZE);
1338 
1339 		/* ELSP[0]: semaphore wait */
1340 		rq = semaphore_queue(engine, vma, 0);
1341 		if (IS_ERR(rq)) {
1342 			err = PTR_ERR(rq);
1343 			goto err_heartbeat;
1344 		}
1345 		engine->sched_engine->schedule(rq, &attr);
1346 		err = wait_for_submit(engine, rq, HZ / 2);
1347 		if (err) {
1348 			pr_err("%s: Timed out trying to submit semaphores\n",
1349 			       engine->name);
1350 			goto err_rq;
1351 		}
1352 
1353 		/* ELSP[1]: nop request */
1354 		nop = nop_request(engine);
1355 		if (IS_ERR(nop)) {
1356 			err = PTR_ERR(nop);
1357 			goto err_rq;
1358 		}
1359 		err = wait_for_submit(engine, nop, HZ / 2);
1360 		i915_request_put(nop);
1361 		if (err) {
1362 			pr_err("%s: Timed out trying to submit nop\n",
1363 			       engine->name);
1364 			goto err_rq;
1365 		}
1366 
1367 		GEM_BUG_ON(i915_request_completed(rq));
1368 		GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
1369 
1370 		/* Queue: semaphore signal, matching priority as semaphore */
1371 		err = release_queue(engine, vma, 1, effective_prio(rq));
1372 		if (err)
1373 			goto err_rq;
1374 
1375 		/* Wait until we ack the release_queue and start timeslicing */
1376 		do {
1377 			cond_resched();
1378 			intel_engine_flush_submission(engine);
1379 		} while (READ_ONCE(engine->execlists.pending[0]));
1380 
1381 		/* Timeslice every jiffy, so within 2 we should signal */
1382 		if (i915_request_wait(rq, 0, slice_timeout(engine)) < 0) {
1383 			struct drm_printer p =
1384 				drm_info_printer(gt->i915->drm.dev);
1385 
1386 			pr_err("%s: Failed to timeslice into queue\n",
1387 			       engine->name);
1388 			intel_engine_dump(engine, &p,
1389 					  "%s\n", engine->name);
1390 
1391 			memset(vaddr, 0xff, PAGE_SIZE);
1392 			err = -EIO;
1393 		}
1394 err_rq:
1395 		i915_request_put(rq);
1396 err_heartbeat:
1397 		st_engine_heartbeat_enable(engine);
1398 		if (err)
1399 			break;
1400 	}
1401 
1402 err_pin:
1403 	i915_vma_unpin(vma);
1404 err_map:
1405 	i915_gem_object_unpin_map(obj);
1406 err_obj:
1407 	i915_gem_object_put(obj);
1408 	return err;
1409 }
1410 
live_timeslice_nopreempt(void * arg)1411 static int live_timeslice_nopreempt(void *arg)
1412 {
1413 	struct intel_gt *gt = arg;
1414 	struct intel_engine_cs *engine;
1415 	enum intel_engine_id id;
1416 	struct igt_spinner spin;
1417 	int err = 0;
1418 
1419 	/*
1420 	 * We should not timeslice into a request that is marked with
1421 	 * I915_REQUEST_NOPREEMPT.
1422 	 */
1423 	if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
1424 		return 0;
1425 
1426 	if (igt_spinner_init(&spin, gt))
1427 		return -ENOMEM;
1428 
1429 	for_each_engine(engine, gt, id) {
1430 		struct intel_context *ce;
1431 		struct i915_request *rq;
1432 		unsigned long timeslice;
1433 
1434 		if (!intel_engine_has_preemption(engine))
1435 			continue;
1436 
1437 		ce = intel_context_create(engine);
1438 		if (IS_ERR(ce)) {
1439 			err = PTR_ERR(ce);
1440 			break;
1441 		}
1442 
1443 		st_engine_heartbeat_disable(engine);
1444 		timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
1445 
1446 		/* Create an unpreemptible spinner */
1447 
1448 		rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
1449 		intel_context_put(ce);
1450 		if (IS_ERR(rq)) {
1451 			err = PTR_ERR(rq);
1452 			goto out_heartbeat;
1453 		}
1454 
1455 		i915_request_get(rq);
1456 		i915_request_add(rq);
1457 
1458 		if (!igt_wait_for_spinner(&spin, rq)) {
1459 			i915_request_put(rq);
1460 			err = -ETIME;
1461 			goto out_spin;
1462 		}
1463 
1464 		set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq->fence.flags);
1465 		i915_request_put(rq);
1466 
1467 		/* Followed by a maximum priority barrier (heartbeat) */
1468 
1469 		ce = intel_context_create(engine);
1470 		if (IS_ERR(ce)) {
1471 			err = PTR_ERR(ce);
1472 			goto out_spin;
1473 		}
1474 
1475 		rq = intel_context_create_request(ce);
1476 		intel_context_put(ce);
1477 		if (IS_ERR(rq)) {
1478 			err = PTR_ERR(rq);
1479 			goto out_spin;
1480 		}
1481 
1482 		rq->sched.attr.priority = I915_PRIORITY_BARRIER;
1483 		i915_request_get(rq);
1484 		i915_request_add(rq);
1485 
1486 		/*
1487 		 * Wait until the barrier is in ELSP, and we know timeslicing
1488 		 * will have been activated.
1489 		 */
1490 		if (wait_for_submit(engine, rq, HZ / 2)) {
1491 			i915_request_put(rq);
1492 			err = -ETIME;
1493 			goto out_spin;
1494 		}
1495 
1496 		/*
1497 		 * Since the ELSP[0] request is unpreemptible, it should not
1498 		 * allow the maximum priority barrier through. Wait long
1499 		 * enough to see if it is timesliced in by mistake.
1500 		 */
1501 		if (i915_request_wait(rq, 0, slice_timeout(engine)) >= 0) {
1502 			pr_err("%s: I915_PRIORITY_BARRIER request completed, bypassing no-preempt request\n",
1503 			       engine->name);
1504 			err = -EINVAL;
1505 		}
1506 		i915_request_put(rq);
1507 
1508 out_spin:
1509 		igt_spinner_end(&spin);
1510 out_heartbeat:
1511 		xchg(&engine->props.timeslice_duration_ms, timeslice);
1512 		st_engine_heartbeat_enable(engine);
1513 		if (err)
1514 			break;
1515 
1516 		if (igt_flush_test(gt->i915)) {
1517 			err = -EIO;
1518 			break;
1519 		}
1520 	}
1521 
1522 	igt_spinner_fini(&spin);
1523 	return err;
1524 }
1525 
live_busywait_preempt(void * arg)1526 static int live_busywait_preempt(void *arg)
1527 {
1528 	struct intel_gt *gt = arg;
1529 	struct i915_gem_context *ctx_hi, *ctx_lo;
1530 	struct intel_engine_cs *engine;
1531 	struct drm_i915_gem_object *obj;
1532 	struct i915_vma *vma;
1533 	enum intel_engine_id id;
1534 	u32 *map;
1535 	int err;
1536 
1537 	/*
1538 	 * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can
1539 	 * preempt the busywaits used to synchronise between rings.
1540 	 */
1541 
1542 	ctx_hi = kernel_context(gt->i915, NULL);
1543 	if (IS_ERR(ctx_hi))
1544 		return PTR_ERR(ctx_hi);
1545 
1546 	ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
1547 
1548 	ctx_lo = kernel_context(gt->i915, NULL);
1549 	if (IS_ERR(ctx_lo)) {
1550 		err = PTR_ERR(ctx_lo);
1551 		goto err_ctx_hi;
1552 	}
1553 
1554 	ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
1555 
1556 	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1557 	if (IS_ERR(obj)) {
1558 		err = PTR_ERR(obj);
1559 		goto err_ctx_lo;
1560 	}
1561 
1562 	map = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
1563 	if (IS_ERR(map)) {
1564 		err = PTR_ERR(map);
1565 		goto err_obj;
1566 	}
1567 
1568 	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
1569 	if (IS_ERR(vma)) {
1570 		err = PTR_ERR(vma);
1571 		goto err_map;
1572 	}
1573 
1574 	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1575 	if (err)
1576 		goto err_map;
1577 
1578 	err = i915_vma_sync(vma);
1579 	if (err)
1580 		goto err_vma;
1581 
1582 	for_each_engine(engine, gt, id) {
1583 		struct i915_request *lo, *hi;
1584 		struct igt_live_test t;
1585 		u32 *cs;
1586 
1587 		if (!intel_engine_has_preemption(engine))
1588 			continue;
1589 
1590 		if (!intel_engine_can_store_dword(engine))
1591 			continue;
1592 
1593 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1594 			err = -EIO;
1595 			goto err_vma;
1596 		}
1597 
1598 		/*
1599 		 * We create two requests. The low priority request
1600 		 * busywaits on a semaphore (inside the ringbuffer where
1601 		 * is should be preemptible) and the high priority requests
1602 		 * uses a MI_STORE_DWORD_IMM to update the semaphore value
1603 		 * allowing the first request to complete. If preemption
1604 		 * fails, we hang instead.
1605 		 */
1606 
1607 		lo = igt_request_alloc(ctx_lo, engine);
1608 		if (IS_ERR(lo)) {
1609 			err = PTR_ERR(lo);
1610 			goto err_vma;
1611 		}
1612 
1613 		cs = intel_ring_begin(lo, 8);
1614 		if (IS_ERR(cs)) {
1615 			err = PTR_ERR(cs);
1616 			i915_request_add(lo);
1617 			goto err_vma;
1618 		}
1619 
1620 		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1621 		*cs++ = i915_ggtt_offset(vma);
1622 		*cs++ = 0;
1623 		*cs++ = 1;
1624 
1625 		/* XXX Do we need a flush + invalidate here? */
1626 
1627 		*cs++ = MI_SEMAPHORE_WAIT |
1628 			MI_SEMAPHORE_GLOBAL_GTT |
1629 			MI_SEMAPHORE_POLL |
1630 			MI_SEMAPHORE_SAD_EQ_SDD;
1631 		*cs++ = 0;
1632 		*cs++ = i915_ggtt_offset(vma);
1633 		*cs++ = 0;
1634 
1635 		intel_ring_advance(lo, cs);
1636 
1637 		i915_request_get(lo);
1638 		i915_request_add(lo);
1639 
1640 		if (wait_for(READ_ONCE(*map), 10)) {
1641 			i915_request_put(lo);
1642 			err = -ETIMEDOUT;
1643 			goto err_vma;
1644 		}
1645 
1646 		/* Low priority request should be busywaiting now */
1647 		if (i915_request_wait(lo, 0, 1) != -ETIME) {
1648 			i915_request_put(lo);
1649 			pr_err("%s: Busywaiting request did not!\n",
1650 			       engine->name);
1651 			err = -EIO;
1652 			goto err_vma;
1653 		}
1654 
1655 		hi = igt_request_alloc(ctx_hi, engine);
1656 		if (IS_ERR(hi)) {
1657 			err = PTR_ERR(hi);
1658 			i915_request_put(lo);
1659 			goto err_vma;
1660 		}
1661 
1662 		cs = intel_ring_begin(hi, 4);
1663 		if (IS_ERR(cs)) {
1664 			err = PTR_ERR(cs);
1665 			i915_request_add(hi);
1666 			i915_request_put(lo);
1667 			goto err_vma;
1668 		}
1669 
1670 		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1671 		*cs++ = i915_ggtt_offset(vma);
1672 		*cs++ = 0;
1673 		*cs++ = 0;
1674 
1675 		intel_ring_advance(hi, cs);
1676 		i915_request_add(hi);
1677 
1678 		if (i915_request_wait(lo, 0, HZ / 5) < 0) {
1679 			struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
1680 
1681 			pr_err("%s: Failed to preempt semaphore busywait!\n",
1682 			       engine->name);
1683 
1684 			intel_engine_dump(engine, &p, "%s\n", engine->name);
1685 			GEM_TRACE_DUMP();
1686 
1687 			i915_request_put(lo);
1688 			intel_gt_set_wedged(gt);
1689 			err = -EIO;
1690 			goto err_vma;
1691 		}
1692 		GEM_BUG_ON(READ_ONCE(*map));
1693 		i915_request_put(lo);
1694 
1695 		if (igt_live_test_end(&t)) {
1696 			err = -EIO;
1697 			goto err_vma;
1698 		}
1699 	}
1700 
1701 	err = 0;
1702 err_vma:
1703 	i915_vma_unpin(vma);
1704 err_map:
1705 	i915_gem_object_unpin_map(obj);
1706 err_obj:
1707 	i915_gem_object_put(obj);
1708 err_ctx_lo:
1709 	kernel_context_close(ctx_lo);
1710 err_ctx_hi:
1711 	kernel_context_close(ctx_hi);
1712 	return err;
1713 }
1714 
1715 static struct i915_request *
spinner_create_request(struct igt_spinner * spin,struct i915_gem_context * ctx,struct intel_engine_cs * engine,u32 arb)1716 spinner_create_request(struct igt_spinner *spin,
1717 		       struct i915_gem_context *ctx,
1718 		       struct intel_engine_cs *engine,
1719 		       u32 arb)
1720 {
1721 	struct intel_context *ce;
1722 	struct i915_request *rq;
1723 
1724 	ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
1725 	if (IS_ERR(ce))
1726 		return ERR_CAST(ce);
1727 
1728 	rq = igt_spinner_create_request(spin, ce, arb);
1729 	intel_context_put(ce);
1730 	return rq;
1731 }
1732 
live_preempt(void * arg)1733 static int live_preempt(void *arg)
1734 {
1735 	struct intel_gt *gt = arg;
1736 	struct i915_gem_context *ctx_hi, *ctx_lo;
1737 	struct igt_spinner spin_hi, spin_lo;
1738 	struct intel_engine_cs *engine;
1739 	enum intel_engine_id id;
1740 	int err = -ENOMEM;
1741 
1742 	if (igt_spinner_init(&spin_hi, gt))
1743 		return -ENOMEM;
1744 
1745 	if (igt_spinner_init(&spin_lo, gt))
1746 		goto err_spin_hi;
1747 
1748 	ctx_hi = kernel_context(gt->i915, NULL);
1749 	if (!ctx_hi)
1750 		goto err_spin_lo;
1751 	ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
1752 
1753 	ctx_lo = kernel_context(gt->i915, NULL);
1754 	if (!ctx_lo)
1755 		goto err_ctx_hi;
1756 	ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
1757 
1758 	for_each_engine(engine, gt, id) {
1759 		struct igt_live_test t;
1760 		struct i915_request *rq;
1761 
1762 		if (!intel_engine_has_preemption(engine))
1763 			continue;
1764 
1765 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1766 			err = -EIO;
1767 			goto err_ctx_lo;
1768 		}
1769 
1770 		rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1771 					    MI_ARB_CHECK);
1772 		if (IS_ERR(rq)) {
1773 			err = PTR_ERR(rq);
1774 			goto err_ctx_lo;
1775 		}
1776 
1777 		i915_request_add(rq);
1778 		if (!igt_wait_for_spinner(&spin_lo, rq)) {
1779 			GEM_TRACE("lo spinner failed to start\n");
1780 			GEM_TRACE_DUMP();
1781 			intel_gt_set_wedged(gt);
1782 			err = -EIO;
1783 			goto err_ctx_lo;
1784 		}
1785 
1786 		rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1787 					    MI_ARB_CHECK);
1788 		if (IS_ERR(rq)) {
1789 			igt_spinner_end(&spin_lo);
1790 			err = PTR_ERR(rq);
1791 			goto err_ctx_lo;
1792 		}
1793 
1794 		i915_request_add(rq);
1795 		if (!igt_wait_for_spinner(&spin_hi, rq)) {
1796 			GEM_TRACE("hi spinner failed to start\n");
1797 			GEM_TRACE_DUMP();
1798 			intel_gt_set_wedged(gt);
1799 			err = -EIO;
1800 			goto err_ctx_lo;
1801 		}
1802 
1803 		igt_spinner_end(&spin_hi);
1804 		igt_spinner_end(&spin_lo);
1805 
1806 		if (igt_live_test_end(&t)) {
1807 			err = -EIO;
1808 			goto err_ctx_lo;
1809 		}
1810 	}
1811 
1812 	err = 0;
1813 err_ctx_lo:
1814 	kernel_context_close(ctx_lo);
1815 err_ctx_hi:
1816 	kernel_context_close(ctx_hi);
1817 err_spin_lo:
1818 	igt_spinner_fini(&spin_lo);
1819 err_spin_hi:
1820 	igt_spinner_fini(&spin_hi);
1821 	return err;
1822 }
1823 
live_late_preempt(void * arg)1824 static int live_late_preempt(void *arg)
1825 {
1826 	struct intel_gt *gt = arg;
1827 	struct i915_gem_context *ctx_hi, *ctx_lo;
1828 	struct igt_spinner spin_hi, spin_lo;
1829 	struct intel_engine_cs *engine;
1830 	struct i915_sched_attr attr = {};
1831 	enum intel_engine_id id;
1832 	int err = -ENOMEM;
1833 
1834 	if (igt_spinner_init(&spin_hi, gt))
1835 		return -ENOMEM;
1836 
1837 	if (igt_spinner_init(&spin_lo, gt))
1838 		goto err_spin_hi;
1839 
1840 	ctx_hi = kernel_context(gt->i915, NULL);
1841 	if (!ctx_hi)
1842 		goto err_spin_lo;
1843 
1844 	ctx_lo = kernel_context(gt->i915, NULL);
1845 	if (!ctx_lo)
1846 		goto err_ctx_hi;
1847 
1848 	/* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */
1849 	ctx_lo->sched.priority = 1;
1850 
1851 	for_each_engine(engine, gt, id) {
1852 		struct igt_live_test t;
1853 		struct i915_request *rq;
1854 
1855 		if (!intel_engine_has_preemption(engine))
1856 			continue;
1857 
1858 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1859 			err = -EIO;
1860 			goto err_ctx_lo;
1861 		}
1862 
1863 		rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1864 					    MI_ARB_CHECK);
1865 		if (IS_ERR(rq)) {
1866 			err = PTR_ERR(rq);
1867 			goto err_ctx_lo;
1868 		}
1869 
1870 		i915_request_add(rq);
1871 		if (!igt_wait_for_spinner(&spin_lo, rq)) {
1872 			pr_err("First context failed to start\n");
1873 			goto err_wedged;
1874 		}
1875 
1876 		rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1877 					    MI_NOOP);
1878 		if (IS_ERR(rq)) {
1879 			igt_spinner_end(&spin_lo);
1880 			err = PTR_ERR(rq);
1881 			goto err_ctx_lo;
1882 		}
1883 
1884 		i915_request_add(rq);
1885 		if (igt_wait_for_spinner(&spin_hi, rq)) {
1886 			pr_err("Second context overtook first?\n");
1887 			goto err_wedged;
1888 		}
1889 
1890 		attr.priority = I915_PRIORITY_MAX;
1891 		engine->sched_engine->schedule(rq, &attr);
1892 
1893 		if (!igt_wait_for_spinner(&spin_hi, rq)) {
1894 			pr_err("High priority context failed to preempt the low priority context\n");
1895 			GEM_TRACE_DUMP();
1896 			goto err_wedged;
1897 		}
1898 
1899 		igt_spinner_end(&spin_hi);
1900 		igt_spinner_end(&spin_lo);
1901 
1902 		if (igt_live_test_end(&t)) {
1903 			err = -EIO;
1904 			goto err_ctx_lo;
1905 		}
1906 	}
1907 
1908 	err = 0;
1909 err_ctx_lo:
1910 	kernel_context_close(ctx_lo);
1911 err_ctx_hi:
1912 	kernel_context_close(ctx_hi);
1913 err_spin_lo:
1914 	igt_spinner_fini(&spin_lo);
1915 err_spin_hi:
1916 	igt_spinner_fini(&spin_hi);
1917 	return err;
1918 
1919 err_wedged:
1920 	igt_spinner_end(&spin_hi);
1921 	igt_spinner_end(&spin_lo);
1922 	intel_gt_set_wedged(gt);
1923 	err = -EIO;
1924 	goto err_ctx_lo;
1925 }
1926 
1927 struct preempt_client {
1928 	struct igt_spinner spin;
1929 	struct i915_gem_context *ctx;
1930 };
1931 
preempt_client_init(struct intel_gt * gt,struct preempt_client * c)1932 static int preempt_client_init(struct intel_gt *gt, struct preempt_client *c)
1933 {
1934 	c->ctx = kernel_context(gt->i915, NULL);
1935 	if (!c->ctx)
1936 		return -ENOMEM;
1937 
1938 	if (igt_spinner_init(&c->spin, gt))
1939 		goto err_ctx;
1940 
1941 	return 0;
1942 
1943 err_ctx:
1944 	kernel_context_close(c->ctx);
1945 	return -ENOMEM;
1946 }
1947 
preempt_client_fini(struct preempt_client * c)1948 static void preempt_client_fini(struct preempt_client *c)
1949 {
1950 	igt_spinner_fini(&c->spin);
1951 	kernel_context_close(c->ctx);
1952 }
1953 
live_nopreempt(void * arg)1954 static int live_nopreempt(void *arg)
1955 {
1956 	struct intel_gt *gt = arg;
1957 	struct intel_engine_cs *engine;
1958 	struct preempt_client a, b;
1959 	enum intel_engine_id id;
1960 	int err = -ENOMEM;
1961 
1962 	/*
1963 	 * Verify that we can disable preemption for an individual request
1964 	 * that may be being observed and not want to be interrupted.
1965 	 */
1966 
1967 	if (preempt_client_init(gt, &a))
1968 		return -ENOMEM;
1969 	if (preempt_client_init(gt, &b))
1970 		goto err_client_a;
1971 	b.ctx->sched.priority = I915_PRIORITY_MAX;
1972 
1973 	for_each_engine(engine, gt, id) {
1974 		struct i915_request *rq_a, *rq_b;
1975 
1976 		if (!intel_engine_has_preemption(engine))
1977 			continue;
1978 
1979 		engine->execlists.preempt_hang.count = 0;
1980 
1981 		rq_a = spinner_create_request(&a.spin,
1982 					      a.ctx, engine,
1983 					      MI_ARB_CHECK);
1984 		if (IS_ERR(rq_a)) {
1985 			err = PTR_ERR(rq_a);
1986 			goto err_client_b;
1987 		}
1988 
1989 		/* Low priority client, but unpreemptable! */
1990 		__set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq_a->fence.flags);
1991 
1992 		i915_request_add(rq_a);
1993 		if (!igt_wait_for_spinner(&a.spin, rq_a)) {
1994 			pr_err("First client failed to start\n");
1995 			goto err_wedged;
1996 		}
1997 
1998 		rq_b = spinner_create_request(&b.spin,
1999 					      b.ctx, engine,
2000 					      MI_ARB_CHECK);
2001 		if (IS_ERR(rq_b)) {
2002 			err = PTR_ERR(rq_b);
2003 			goto err_client_b;
2004 		}
2005 
2006 		i915_request_add(rq_b);
2007 
2008 		/* B is much more important than A! (But A is unpreemptable.) */
2009 		GEM_BUG_ON(rq_prio(rq_b) <= rq_prio(rq_a));
2010 
2011 		/* Wait long enough for preemption and timeslicing */
2012 		if (igt_wait_for_spinner(&b.spin, rq_b)) {
2013 			pr_err("Second client started too early!\n");
2014 			goto err_wedged;
2015 		}
2016 
2017 		igt_spinner_end(&a.spin);
2018 
2019 		if (!igt_wait_for_spinner(&b.spin, rq_b)) {
2020 			pr_err("Second client failed to start\n");
2021 			goto err_wedged;
2022 		}
2023 
2024 		igt_spinner_end(&b.spin);
2025 
2026 		if (engine->execlists.preempt_hang.count) {
2027 			pr_err("Preemption recorded x%d; should have been suppressed!\n",
2028 			       engine->execlists.preempt_hang.count);
2029 			err = -EINVAL;
2030 			goto err_wedged;
2031 		}
2032 
2033 		if (igt_flush_test(gt->i915))
2034 			goto err_wedged;
2035 	}
2036 
2037 	err = 0;
2038 err_client_b:
2039 	preempt_client_fini(&b);
2040 err_client_a:
2041 	preempt_client_fini(&a);
2042 	return err;
2043 
2044 err_wedged:
2045 	igt_spinner_end(&b.spin);
2046 	igt_spinner_end(&a.spin);
2047 	intel_gt_set_wedged(gt);
2048 	err = -EIO;
2049 	goto err_client_b;
2050 }
2051 
2052 struct live_preempt_cancel {
2053 	struct intel_engine_cs *engine;
2054 	struct preempt_client a, b;
2055 };
2056 
__cancel_active0(struct live_preempt_cancel * arg)2057 static int __cancel_active0(struct live_preempt_cancel *arg)
2058 {
2059 	struct i915_request *rq;
2060 	struct igt_live_test t;
2061 	int err;
2062 
2063 	/* Preempt cancel of ELSP0 */
2064 	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2065 	if (igt_live_test_begin(&t, arg->engine->i915,
2066 				__func__, arg->engine->name))
2067 		return -EIO;
2068 
2069 	rq = spinner_create_request(&arg->a.spin,
2070 				    arg->a.ctx, arg->engine,
2071 				    MI_ARB_CHECK);
2072 	if (IS_ERR(rq))
2073 		return PTR_ERR(rq);
2074 
2075 	clear_bit(CONTEXT_BANNED, &rq->context->flags);
2076 	i915_request_get(rq);
2077 	i915_request_add(rq);
2078 	if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2079 		err = -EIO;
2080 		goto out;
2081 	}
2082 
2083 	intel_context_set_banned(rq->context);
2084 	err = intel_engine_pulse(arg->engine);
2085 	if (err)
2086 		goto out;
2087 
2088 	err = wait_for_reset(arg->engine, rq, HZ / 2);
2089 	if (err) {
2090 		pr_err("Cancelled inflight0 request did not reset\n");
2091 		goto out;
2092 	}
2093 
2094 out:
2095 	i915_request_put(rq);
2096 	if (igt_live_test_end(&t))
2097 		err = -EIO;
2098 	return err;
2099 }
2100 
__cancel_active1(struct live_preempt_cancel * arg)2101 static int __cancel_active1(struct live_preempt_cancel *arg)
2102 {
2103 	struct i915_request *rq[2] = {};
2104 	struct igt_live_test t;
2105 	int err;
2106 
2107 	/* Preempt cancel of ELSP1 */
2108 	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2109 	if (igt_live_test_begin(&t, arg->engine->i915,
2110 				__func__, arg->engine->name))
2111 		return -EIO;
2112 
2113 	rq[0] = spinner_create_request(&arg->a.spin,
2114 				       arg->a.ctx, arg->engine,
2115 				       MI_NOOP); /* no preemption */
2116 	if (IS_ERR(rq[0]))
2117 		return PTR_ERR(rq[0]);
2118 
2119 	clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
2120 	i915_request_get(rq[0]);
2121 	i915_request_add(rq[0]);
2122 	if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
2123 		err = -EIO;
2124 		goto out;
2125 	}
2126 
2127 	rq[1] = spinner_create_request(&arg->b.spin,
2128 				       arg->b.ctx, arg->engine,
2129 				       MI_ARB_CHECK);
2130 	if (IS_ERR(rq[1])) {
2131 		err = PTR_ERR(rq[1]);
2132 		goto out;
2133 	}
2134 
2135 	clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
2136 	i915_request_get(rq[1]);
2137 	err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
2138 	i915_request_add(rq[1]);
2139 	if (err)
2140 		goto out;
2141 
2142 	intel_context_set_banned(rq[1]->context);
2143 	err = intel_engine_pulse(arg->engine);
2144 	if (err)
2145 		goto out;
2146 
2147 	igt_spinner_end(&arg->a.spin);
2148 	err = wait_for_reset(arg->engine, rq[1], HZ / 2);
2149 	if (err)
2150 		goto out;
2151 
2152 	if (rq[0]->fence.error != 0) {
2153 		pr_err("Normal inflight0 request did not complete\n");
2154 		err = -EINVAL;
2155 		goto out;
2156 	}
2157 
2158 	if (rq[1]->fence.error != -EIO) {
2159 		pr_err("Cancelled inflight1 request did not report -EIO\n");
2160 		err = -EINVAL;
2161 		goto out;
2162 	}
2163 
2164 out:
2165 	i915_request_put(rq[1]);
2166 	i915_request_put(rq[0]);
2167 	if (igt_live_test_end(&t))
2168 		err = -EIO;
2169 	return err;
2170 }
2171 
__cancel_queued(struct live_preempt_cancel * arg)2172 static int __cancel_queued(struct live_preempt_cancel *arg)
2173 {
2174 	struct i915_request *rq[3] = {};
2175 	struct igt_live_test t;
2176 	int err;
2177 
2178 	/* Full ELSP and one in the wings */
2179 	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2180 	if (igt_live_test_begin(&t, arg->engine->i915,
2181 				__func__, arg->engine->name))
2182 		return -EIO;
2183 
2184 	rq[0] = spinner_create_request(&arg->a.spin,
2185 				       arg->a.ctx, arg->engine,
2186 				       MI_ARB_CHECK);
2187 	if (IS_ERR(rq[0]))
2188 		return PTR_ERR(rq[0]);
2189 
2190 	clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
2191 	i915_request_get(rq[0]);
2192 	i915_request_add(rq[0]);
2193 	if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
2194 		err = -EIO;
2195 		goto out;
2196 	}
2197 
2198 	rq[1] = igt_request_alloc(arg->b.ctx, arg->engine);
2199 	if (IS_ERR(rq[1])) {
2200 		err = PTR_ERR(rq[1]);
2201 		goto out;
2202 	}
2203 
2204 	clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
2205 	i915_request_get(rq[1]);
2206 	err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
2207 	i915_request_add(rq[1]);
2208 	if (err)
2209 		goto out;
2210 
2211 	rq[2] = spinner_create_request(&arg->b.spin,
2212 				       arg->a.ctx, arg->engine,
2213 				       MI_ARB_CHECK);
2214 	if (IS_ERR(rq[2])) {
2215 		err = PTR_ERR(rq[2]);
2216 		goto out;
2217 	}
2218 
2219 	i915_request_get(rq[2]);
2220 	err = i915_request_await_dma_fence(rq[2], &rq[1]->fence);
2221 	i915_request_add(rq[2]);
2222 	if (err)
2223 		goto out;
2224 
2225 	intel_context_set_banned(rq[2]->context);
2226 	err = intel_engine_pulse(arg->engine);
2227 	if (err)
2228 		goto out;
2229 
2230 	err = wait_for_reset(arg->engine, rq[2], HZ / 2);
2231 	if (err)
2232 		goto out;
2233 
2234 	if (rq[0]->fence.error != -EIO) {
2235 		pr_err("Cancelled inflight0 request did not report -EIO\n");
2236 		err = -EINVAL;
2237 		goto out;
2238 	}
2239 
2240 	if (rq[1]->fence.error != 0) {
2241 		pr_err("Normal inflight1 request did not complete\n");
2242 		err = -EINVAL;
2243 		goto out;
2244 	}
2245 
2246 	if (rq[2]->fence.error != -EIO) {
2247 		pr_err("Cancelled queued request did not report -EIO\n");
2248 		err = -EINVAL;
2249 		goto out;
2250 	}
2251 
2252 out:
2253 	i915_request_put(rq[2]);
2254 	i915_request_put(rq[1]);
2255 	i915_request_put(rq[0]);
2256 	if (igt_live_test_end(&t))
2257 		err = -EIO;
2258 	return err;
2259 }
2260 
__cancel_hostile(struct live_preempt_cancel * arg)2261 static int __cancel_hostile(struct live_preempt_cancel *arg)
2262 {
2263 	struct i915_request *rq;
2264 	int err;
2265 
2266 	/* Preempt cancel non-preemptible spinner in ELSP0 */
2267 	if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
2268 		return 0;
2269 
2270 	if (!intel_has_reset_engine(arg->engine->gt))
2271 		return 0;
2272 
2273 	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2274 	rq = spinner_create_request(&arg->a.spin,
2275 				    arg->a.ctx, arg->engine,
2276 				    MI_NOOP); /* preemption disabled */
2277 	if (IS_ERR(rq))
2278 		return PTR_ERR(rq);
2279 
2280 	clear_bit(CONTEXT_BANNED, &rq->context->flags);
2281 	i915_request_get(rq);
2282 	i915_request_add(rq);
2283 	if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2284 		err = -EIO;
2285 		goto out;
2286 	}
2287 
2288 	intel_context_set_banned(rq->context);
2289 	err = intel_engine_pulse(arg->engine); /* force reset */
2290 	if (err)
2291 		goto out;
2292 
2293 	err = wait_for_reset(arg->engine, rq, HZ / 2);
2294 	if (err) {
2295 		pr_err("Cancelled inflight0 request did not reset\n");
2296 		goto out;
2297 	}
2298 
2299 out:
2300 	i915_request_put(rq);
2301 	if (igt_flush_test(arg->engine->i915))
2302 		err = -EIO;
2303 	return err;
2304 }
2305 
force_reset_timeout(struct intel_engine_cs * engine)2306 static void force_reset_timeout(struct intel_engine_cs *engine)
2307 {
2308 	engine->reset_timeout.probability = 999;
2309 	atomic_set(&engine->reset_timeout.times, -1);
2310 }
2311 
cancel_reset_timeout(struct intel_engine_cs * engine)2312 static void cancel_reset_timeout(struct intel_engine_cs *engine)
2313 {
2314 	memset(&engine->reset_timeout, 0, sizeof(engine->reset_timeout));
2315 }
2316 
__cancel_fail(struct live_preempt_cancel * arg)2317 static int __cancel_fail(struct live_preempt_cancel *arg)
2318 {
2319 	struct intel_engine_cs *engine = arg->engine;
2320 	struct i915_request *rq;
2321 	int err;
2322 
2323 	if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
2324 		return 0;
2325 
2326 	if (!intel_has_reset_engine(engine->gt))
2327 		return 0;
2328 
2329 	GEM_TRACE("%s(%s)\n", __func__, engine->name);
2330 	rq = spinner_create_request(&arg->a.spin,
2331 				    arg->a.ctx, engine,
2332 				    MI_NOOP); /* preemption disabled */
2333 	if (IS_ERR(rq))
2334 		return PTR_ERR(rq);
2335 
2336 	clear_bit(CONTEXT_BANNED, &rq->context->flags);
2337 	i915_request_get(rq);
2338 	i915_request_add(rq);
2339 	if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2340 		err = -EIO;
2341 		goto out;
2342 	}
2343 
2344 	intel_context_set_banned(rq->context);
2345 
2346 	err = intel_engine_pulse(engine);
2347 	if (err)
2348 		goto out;
2349 
2350 	force_reset_timeout(engine);
2351 
2352 	/* force preempt reset [failure] */
2353 	while (!engine->execlists.pending[0])
2354 		intel_engine_flush_submission(engine);
2355 	del_timer_sync(&engine->execlists.preempt);
2356 	intel_engine_flush_submission(engine);
2357 
2358 	cancel_reset_timeout(engine);
2359 
2360 	/* after failure, require heartbeats to reset device */
2361 	intel_engine_set_heartbeat(engine, 1);
2362 	err = wait_for_reset(engine, rq, HZ / 2);
2363 	intel_engine_set_heartbeat(engine,
2364 				   engine->defaults.heartbeat_interval_ms);
2365 	if (err) {
2366 		pr_err("Cancelled inflight0 request did not reset\n");
2367 		goto out;
2368 	}
2369 
2370 out:
2371 	i915_request_put(rq);
2372 	if (igt_flush_test(engine->i915))
2373 		err = -EIO;
2374 	return err;
2375 }
2376 
live_preempt_cancel(void * arg)2377 static int live_preempt_cancel(void *arg)
2378 {
2379 	struct intel_gt *gt = arg;
2380 	struct live_preempt_cancel data;
2381 	enum intel_engine_id id;
2382 	int err = -ENOMEM;
2383 
2384 	/*
2385 	 * To cancel an inflight context, we need to first remove it from the
2386 	 * GPU. That sounds like preemption! Plus a little bit of bookkeeping.
2387 	 */
2388 
2389 	if (preempt_client_init(gt, &data.a))
2390 		return -ENOMEM;
2391 	if (preempt_client_init(gt, &data.b))
2392 		goto err_client_a;
2393 
2394 	for_each_engine(data.engine, gt, id) {
2395 		if (!intel_engine_has_preemption(data.engine))
2396 			continue;
2397 
2398 		err = __cancel_active0(&data);
2399 		if (err)
2400 			goto err_wedged;
2401 
2402 		err = __cancel_active1(&data);
2403 		if (err)
2404 			goto err_wedged;
2405 
2406 		err = __cancel_queued(&data);
2407 		if (err)
2408 			goto err_wedged;
2409 
2410 		err = __cancel_hostile(&data);
2411 		if (err)
2412 			goto err_wedged;
2413 
2414 		err = __cancel_fail(&data);
2415 		if (err)
2416 			goto err_wedged;
2417 	}
2418 
2419 	err = 0;
2420 err_client_b:
2421 	preempt_client_fini(&data.b);
2422 err_client_a:
2423 	preempt_client_fini(&data.a);
2424 	return err;
2425 
2426 err_wedged:
2427 	GEM_TRACE_DUMP();
2428 	igt_spinner_end(&data.b.spin);
2429 	igt_spinner_end(&data.a.spin);
2430 	intel_gt_set_wedged(gt);
2431 	goto err_client_b;
2432 }
2433 
live_suppress_self_preempt(void * arg)2434 static int live_suppress_self_preempt(void *arg)
2435 {
2436 	struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX };
2437 	struct intel_gt *gt = arg;
2438 	struct intel_engine_cs *engine;
2439 	struct preempt_client a, b;
2440 	enum intel_engine_id id;
2441 	int err = -ENOMEM;
2442 
2443 	/*
2444 	 * Verify that if a preemption request does not cause a change in
2445 	 * the current execution order, the preempt-to-idle injection is
2446 	 * skipped and that we do not accidentally apply it after the CS
2447 	 * completion event.
2448 	 */
2449 
2450 	if (intel_uc_uses_guc_submission(&gt->uc))
2451 		return 0; /* presume black blox */
2452 
2453 	if (intel_vgpu_active(gt->i915))
2454 		return 0; /* GVT forces single port & request submission */
2455 
2456 	if (preempt_client_init(gt, &a))
2457 		return -ENOMEM;
2458 	if (preempt_client_init(gt, &b))
2459 		goto err_client_a;
2460 
2461 	for_each_engine(engine, gt, id) {
2462 		struct i915_request *rq_a, *rq_b;
2463 		int depth;
2464 
2465 		if (!intel_engine_has_preemption(engine))
2466 			continue;
2467 
2468 		if (igt_flush_test(gt->i915))
2469 			goto err_wedged;
2470 
2471 		st_engine_heartbeat_disable(engine);
2472 		engine->execlists.preempt_hang.count = 0;
2473 
2474 		rq_a = spinner_create_request(&a.spin,
2475 					      a.ctx, engine,
2476 					      MI_NOOP);
2477 		if (IS_ERR(rq_a)) {
2478 			err = PTR_ERR(rq_a);
2479 			st_engine_heartbeat_enable(engine);
2480 			goto err_client_b;
2481 		}
2482 
2483 		i915_request_add(rq_a);
2484 		if (!igt_wait_for_spinner(&a.spin, rq_a)) {
2485 			pr_err("First client failed to start\n");
2486 			st_engine_heartbeat_enable(engine);
2487 			goto err_wedged;
2488 		}
2489 
2490 		/* Keep postponing the timer to avoid premature slicing */
2491 		mod_timer(&engine->execlists.timer, jiffies + HZ);
2492 		for (depth = 0; depth < 8; depth++) {
2493 			rq_b = spinner_create_request(&b.spin,
2494 						      b.ctx, engine,
2495 						      MI_NOOP);
2496 			if (IS_ERR(rq_b)) {
2497 				err = PTR_ERR(rq_b);
2498 				st_engine_heartbeat_enable(engine);
2499 				goto err_client_b;
2500 			}
2501 			i915_request_add(rq_b);
2502 
2503 			GEM_BUG_ON(i915_request_completed(rq_a));
2504 			engine->sched_engine->schedule(rq_a, &attr);
2505 			igt_spinner_end(&a.spin);
2506 
2507 			if (!igt_wait_for_spinner(&b.spin, rq_b)) {
2508 				pr_err("Second client failed to start\n");
2509 				st_engine_heartbeat_enable(engine);
2510 				goto err_wedged;
2511 			}
2512 
2513 			swap(a, b);
2514 			rq_a = rq_b;
2515 		}
2516 		igt_spinner_end(&a.spin);
2517 
2518 		if (engine->execlists.preempt_hang.count) {
2519 			pr_err("Preemption on %s recorded x%d, depth %d; should have been suppressed!\n",
2520 			       engine->name,
2521 			       engine->execlists.preempt_hang.count,
2522 			       depth);
2523 			st_engine_heartbeat_enable(engine);
2524 			err = -EINVAL;
2525 			goto err_client_b;
2526 		}
2527 
2528 		st_engine_heartbeat_enable(engine);
2529 		if (igt_flush_test(gt->i915))
2530 			goto err_wedged;
2531 	}
2532 
2533 	err = 0;
2534 err_client_b:
2535 	preempt_client_fini(&b);
2536 err_client_a:
2537 	preempt_client_fini(&a);
2538 	return err;
2539 
2540 err_wedged:
2541 	igt_spinner_end(&b.spin);
2542 	igt_spinner_end(&a.spin);
2543 	intel_gt_set_wedged(gt);
2544 	err = -EIO;
2545 	goto err_client_b;
2546 }
2547 
live_chain_preempt(void * arg)2548 static int live_chain_preempt(void *arg)
2549 {
2550 	struct intel_gt *gt = arg;
2551 	struct intel_engine_cs *engine;
2552 	struct preempt_client hi, lo;
2553 	enum intel_engine_id id;
2554 	int err = -ENOMEM;
2555 
2556 	/*
2557 	 * Build a chain AB...BA between two contexts (A, B) and request
2558 	 * preemption of the last request. It should then complete before
2559 	 * the previously submitted spinner in B.
2560 	 */
2561 
2562 	if (preempt_client_init(gt, &hi))
2563 		return -ENOMEM;
2564 
2565 	if (preempt_client_init(gt, &lo))
2566 		goto err_client_hi;
2567 
2568 	for_each_engine(engine, gt, id) {
2569 		struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX };
2570 		struct igt_live_test t;
2571 		struct i915_request *rq;
2572 		int ring_size, count, i;
2573 
2574 		if (!intel_engine_has_preemption(engine))
2575 			continue;
2576 
2577 		rq = spinner_create_request(&lo.spin,
2578 					    lo.ctx, engine,
2579 					    MI_ARB_CHECK);
2580 		if (IS_ERR(rq))
2581 			goto err_wedged;
2582 
2583 		i915_request_get(rq);
2584 		i915_request_add(rq);
2585 
2586 		ring_size = rq->wa_tail - rq->head;
2587 		if (ring_size < 0)
2588 			ring_size += rq->ring->size;
2589 		ring_size = rq->ring->size / ring_size;
2590 		pr_debug("%s(%s): Using maximum of %d requests\n",
2591 			 __func__, engine->name, ring_size);
2592 
2593 		igt_spinner_end(&lo.spin);
2594 		if (i915_request_wait(rq, 0, HZ / 2) < 0) {
2595 			pr_err("Timed out waiting to flush %s\n", engine->name);
2596 			i915_request_put(rq);
2597 			goto err_wedged;
2598 		}
2599 		i915_request_put(rq);
2600 
2601 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
2602 			err = -EIO;
2603 			goto err_wedged;
2604 		}
2605 
2606 		for_each_prime_number_from(count, 1, ring_size) {
2607 			rq = spinner_create_request(&hi.spin,
2608 						    hi.ctx, engine,
2609 						    MI_ARB_CHECK);
2610 			if (IS_ERR(rq))
2611 				goto err_wedged;
2612 			i915_request_add(rq);
2613 			if (!igt_wait_for_spinner(&hi.spin, rq))
2614 				goto err_wedged;
2615 
2616 			rq = spinner_create_request(&lo.spin,
2617 						    lo.ctx, engine,
2618 						    MI_ARB_CHECK);
2619 			if (IS_ERR(rq))
2620 				goto err_wedged;
2621 			i915_request_add(rq);
2622 
2623 			for (i = 0; i < count; i++) {
2624 				rq = igt_request_alloc(lo.ctx, engine);
2625 				if (IS_ERR(rq))
2626 					goto err_wedged;
2627 				i915_request_add(rq);
2628 			}
2629 
2630 			rq = igt_request_alloc(hi.ctx, engine);
2631 			if (IS_ERR(rq))
2632 				goto err_wedged;
2633 
2634 			i915_request_get(rq);
2635 			i915_request_add(rq);
2636 			engine->sched_engine->schedule(rq, &attr);
2637 
2638 			igt_spinner_end(&hi.spin);
2639 			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2640 				struct drm_printer p =
2641 					drm_info_printer(gt->i915->drm.dev);
2642 
2643 				pr_err("Failed to preempt over chain of %d\n",
2644 				       count);
2645 				intel_engine_dump(engine, &p,
2646 						  "%s\n", engine->name);
2647 				i915_request_put(rq);
2648 				goto err_wedged;
2649 			}
2650 			igt_spinner_end(&lo.spin);
2651 			i915_request_put(rq);
2652 
2653 			rq = igt_request_alloc(lo.ctx, engine);
2654 			if (IS_ERR(rq))
2655 				goto err_wedged;
2656 
2657 			i915_request_get(rq);
2658 			i915_request_add(rq);
2659 
2660 			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2661 				struct drm_printer p =
2662 					drm_info_printer(gt->i915->drm.dev);
2663 
2664 				pr_err("Failed to flush low priority chain of %d requests\n",
2665 				       count);
2666 				intel_engine_dump(engine, &p,
2667 						  "%s\n", engine->name);
2668 
2669 				i915_request_put(rq);
2670 				goto err_wedged;
2671 			}
2672 			i915_request_put(rq);
2673 		}
2674 
2675 		if (igt_live_test_end(&t)) {
2676 			err = -EIO;
2677 			goto err_wedged;
2678 		}
2679 	}
2680 
2681 	err = 0;
2682 err_client_lo:
2683 	preempt_client_fini(&lo);
2684 err_client_hi:
2685 	preempt_client_fini(&hi);
2686 	return err;
2687 
2688 err_wedged:
2689 	igt_spinner_end(&hi.spin);
2690 	igt_spinner_end(&lo.spin);
2691 	intel_gt_set_wedged(gt);
2692 	err = -EIO;
2693 	goto err_client_lo;
2694 }
2695 
create_gang(struct intel_engine_cs * engine,struct i915_request ** prev)2696 static int create_gang(struct intel_engine_cs *engine,
2697 		       struct i915_request **prev)
2698 {
2699 	struct drm_i915_gem_object *obj;
2700 	struct intel_context *ce;
2701 	struct i915_request *rq;
2702 	struct i915_vma *vma;
2703 	u32 *cs;
2704 	int err;
2705 
2706 	ce = intel_context_create(engine);
2707 	if (IS_ERR(ce))
2708 		return PTR_ERR(ce);
2709 
2710 	obj = i915_gem_object_create_internal(engine->i915, 4096);
2711 	if (IS_ERR(obj)) {
2712 		err = PTR_ERR(obj);
2713 		goto err_ce;
2714 	}
2715 
2716 	vma = i915_vma_instance(obj, ce->vm, NULL);
2717 	if (IS_ERR(vma)) {
2718 		err = PTR_ERR(vma);
2719 		goto err_obj;
2720 	}
2721 
2722 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
2723 	if (err)
2724 		goto err_obj;
2725 
2726 	cs = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
2727 	if (IS_ERR(cs)) {
2728 		err = PTR_ERR(cs);
2729 		goto err_obj;
2730 	}
2731 
2732 	/* Semaphore target: spin until zero */
2733 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
2734 
2735 	*cs++ = MI_SEMAPHORE_WAIT |
2736 		MI_SEMAPHORE_POLL |
2737 		MI_SEMAPHORE_SAD_EQ_SDD;
2738 	*cs++ = 0;
2739 	*cs++ = lower_32_bits(vma->node.start);
2740 	*cs++ = upper_32_bits(vma->node.start);
2741 
2742 	if (*prev) {
2743 		u64 offset = (*prev)->batch->node.start;
2744 
2745 		/* Terminate the spinner in the next lower priority batch. */
2746 		*cs++ = MI_STORE_DWORD_IMM_GEN4;
2747 		*cs++ = lower_32_bits(offset);
2748 		*cs++ = upper_32_bits(offset);
2749 		*cs++ = 0;
2750 	}
2751 
2752 	*cs++ = MI_BATCH_BUFFER_END;
2753 	i915_gem_object_flush_map(obj);
2754 	i915_gem_object_unpin_map(obj);
2755 
2756 	rq = intel_context_create_request(ce);
2757 	if (IS_ERR(rq)) {
2758 		err = PTR_ERR(rq);
2759 		goto err_obj;
2760 	}
2761 
2762 	rq->batch = i915_vma_get(vma);
2763 	i915_request_get(rq);
2764 
2765 	i915_vma_lock(vma);
2766 	err = i915_request_await_object(rq, vma->obj, false);
2767 	if (!err)
2768 		err = i915_vma_move_to_active(vma, rq, 0);
2769 	if (!err)
2770 		err = rq->engine->emit_bb_start(rq,
2771 						vma->node.start,
2772 						PAGE_SIZE, 0);
2773 	i915_vma_unlock(vma);
2774 	i915_request_add(rq);
2775 	if (err)
2776 		goto err_rq;
2777 
2778 	i915_gem_object_put(obj);
2779 	intel_context_put(ce);
2780 
2781 	rq->mock.link.next = &(*prev)->mock.link;
2782 	*prev = rq;
2783 	return 0;
2784 
2785 err_rq:
2786 	i915_vma_put(rq->batch);
2787 	i915_request_put(rq);
2788 err_obj:
2789 	i915_gem_object_put(obj);
2790 err_ce:
2791 	intel_context_put(ce);
2792 	return err;
2793 }
2794 
__live_preempt_ring(struct intel_engine_cs * engine,struct igt_spinner * spin,int queue_sz,int ring_sz)2795 static int __live_preempt_ring(struct intel_engine_cs *engine,
2796 			       struct igt_spinner *spin,
2797 			       int queue_sz, int ring_sz)
2798 {
2799 	struct intel_context *ce[2] = {};
2800 	struct i915_request *rq;
2801 	struct igt_live_test t;
2802 	int err = 0;
2803 	int n;
2804 
2805 	if (igt_live_test_begin(&t, engine->i915, __func__, engine->name))
2806 		return -EIO;
2807 
2808 	for (n = 0; n < ARRAY_SIZE(ce); n++) {
2809 		struct intel_context *tmp;
2810 
2811 		tmp = intel_context_create(engine);
2812 		if (IS_ERR(tmp)) {
2813 			err = PTR_ERR(tmp);
2814 			goto err_ce;
2815 		}
2816 
2817 		tmp->ring_size = ring_sz;
2818 
2819 		err = intel_context_pin(tmp);
2820 		if (err) {
2821 			intel_context_put(tmp);
2822 			goto err_ce;
2823 		}
2824 
2825 		memset32(tmp->ring->vaddr,
2826 			 0xdeadbeef, /* trigger a hang if executed */
2827 			 tmp->ring->vma->size / sizeof(u32));
2828 
2829 		ce[n] = tmp;
2830 	}
2831 
2832 	rq = igt_spinner_create_request(spin, ce[0], MI_ARB_CHECK);
2833 	if (IS_ERR(rq)) {
2834 		err = PTR_ERR(rq);
2835 		goto err_ce;
2836 	}
2837 
2838 	i915_request_get(rq);
2839 	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
2840 	i915_request_add(rq);
2841 
2842 	if (!igt_wait_for_spinner(spin, rq)) {
2843 		intel_gt_set_wedged(engine->gt);
2844 		i915_request_put(rq);
2845 		err = -ETIME;
2846 		goto err_ce;
2847 	}
2848 
2849 	/* Fill the ring, until we will cause a wrap */
2850 	n = 0;
2851 	while (ce[0]->ring->tail - rq->wa_tail <= queue_sz) {
2852 		struct i915_request *tmp;
2853 
2854 		tmp = intel_context_create_request(ce[0]);
2855 		if (IS_ERR(tmp)) {
2856 			err = PTR_ERR(tmp);
2857 			i915_request_put(rq);
2858 			goto err_ce;
2859 		}
2860 
2861 		i915_request_add(tmp);
2862 		intel_engine_flush_submission(engine);
2863 		n++;
2864 	}
2865 	intel_engine_flush_submission(engine);
2866 	pr_debug("%s: Filled %d with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n",
2867 		 engine->name, queue_sz, n,
2868 		 ce[0]->ring->size,
2869 		 ce[0]->ring->tail,
2870 		 ce[0]->ring->emit,
2871 		 rq->tail);
2872 	i915_request_put(rq);
2873 
2874 	/* Create a second request to preempt the first ring */
2875 	rq = intel_context_create_request(ce[1]);
2876 	if (IS_ERR(rq)) {
2877 		err = PTR_ERR(rq);
2878 		goto err_ce;
2879 	}
2880 
2881 	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
2882 	i915_request_get(rq);
2883 	i915_request_add(rq);
2884 
2885 	err = wait_for_submit(engine, rq, HZ / 2);
2886 	i915_request_put(rq);
2887 	if (err) {
2888 		pr_err("%s: preemption request was not submitted\n",
2889 		       engine->name);
2890 		err = -ETIME;
2891 	}
2892 
2893 	pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n",
2894 		 engine->name,
2895 		 ce[0]->ring->tail, ce[0]->ring->emit,
2896 		 ce[1]->ring->tail, ce[1]->ring->emit);
2897 
2898 err_ce:
2899 	intel_engine_flush_submission(engine);
2900 	igt_spinner_end(spin);
2901 	for (n = 0; n < ARRAY_SIZE(ce); n++) {
2902 		if (IS_ERR_OR_NULL(ce[n]))
2903 			break;
2904 
2905 		intel_context_unpin(ce[n]);
2906 		intel_context_put(ce[n]);
2907 	}
2908 	if (igt_live_test_end(&t))
2909 		err = -EIO;
2910 	return err;
2911 }
2912 
live_preempt_ring(void * arg)2913 static int live_preempt_ring(void *arg)
2914 {
2915 	struct intel_gt *gt = arg;
2916 	struct intel_engine_cs *engine;
2917 	struct igt_spinner spin;
2918 	enum intel_engine_id id;
2919 	int err = 0;
2920 
2921 	/*
2922 	 * Check that we rollback large chunks of a ring in order to do a
2923 	 * preemption event. Similar to live_unlite_ring, but looking at
2924 	 * ring size rather than the impact of intel_ring_direction().
2925 	 */
2926 
2927 	if (igt_spinner_init(&spin, gt))
2928 		return -ENOMEM;
2929 
2930 	for_each_engine(engine, gt, id) {
2931 		int n;
2932 
2933 		if (!intel_engine_has_preemption(engine))
2934 			continue;
2935 
2936 		if (!intel_engine_can_store_dword(engine))
2937 			continue;
2938 
2939 		st_engine_heartbeat_disable(engine);
2940 
2941 		for (n = 0; n <= 3; n++) {
2942 			err = __live_preempt_ring(engine, &spin,
2943 						  n * SZ_4K / 4, SZ_4K);
2944 			if (err)
2945 				break;
2946 		}
2947 
2948 		st_engine_heartbeat_enable(engine);
2949 		if (err)
2950 			break;
2951 	}
2952 
2953 	igt_spinner_fini(&spin);
2954 	return err;
2955 }
2956 
live_preempt_gang(void * arg)2957 static int live_preempt_gang(void *arg)
2958 {
2959 	struct intel_gt *gt = arg;
2960 	struct intel_engine_cs *engine;
2961 	enum intel_engine_id id;
2962 
2963 	/*
2964 	 * Build as long a chain of preempters as we can, with each
2965 	 * request higher priority than the last. Once we are ready, we release
2966 	 * the last batch which then precolates down the chain, each releasing
2967 	 * the next oldest in turn. The intent is to simply push as hard as we
2968 	 * can with the number of preemptions, trying to exceed narrow HW
2969 	 * limits. At a minimum, we insist that we can sort all the user
2970 	 * high priority levels into execution order.
2971 	 */
2972 
2973 	for_each_engine(engine, gt, id) {
2974 		struct i915_request *rq = NULL;
2975 		struct igt_live_test t;
2976 		IGT_TIMEOUT(end_time);
2977 		int prio = 0;
2978 		int err = 0;
2979 		u32 *cs;
2980 
2981 		if (!intel_engine_has_preemption(engine))
2982 			continue;
2983 
2984 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name))
2985 			return -EIO;
2986 
2987 		do {
2988 			struct i915_sched_attr attr = { .priority = prio++ };
2989 
2990 			err = create_gang(engine, &rq);
2991 			if (err)
2992 				break;
2993 
2994 			/* Submit each spinner at increasing priority */
2995 			engine->sched_engine->schedule(rq, &attr);
2996 		} while (prio <= I915_PRIORITY_MAX &&
2997 			 !__igt_timeout(end_time, NULL));
2998 		pr_debug("%s: Preempt chain of %d requests\n",
2999 			 engine->name, prio);
3000 
3001 		/*
3002 		 * Such that the last spinner is the highest priority and
3003 		 * should execute first. When that spinner completes,
3004 		 * it will terminate the next lowest spinner until there
3005 		 * are no more spinners and the gang is complete.
3006 		 */
3007 		cs = i915_gem_object_pin_map_unlocked(rq->batch->obj, I915_MAP_WC);
3008 		if (!IS_ERR(cs)) {
3009 			*cs = 0;
3010 			i915_gem_object_unpin_map(rq->batch->obj);
3011 		} else {
3012 			err = PTR_ERR(cs);
3013 			intel_gt_set_wedged(gt);
3014 		}
3015 
3016 		while (rq) { /* wait for each rq from highest to lowest prio */
3017 			struct i915_request *n = list_next_entry(rq, mock.link);
3018 
3019 			if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) {
3020 				struct drm_printer p =
3021 					drm_info_printer(engine->i915->drm.dev);
3022 
3023 				pr_err("Failed to flush chain of %d requests, at %d\n",
3024 				       prio, rq_prio(rq));
3025 				intel_engine_dump(engine, &p,
3026 						  "%s\n", engine->name);
3027 
3028 				err = -ETIME;
3029 			}
3030 
3031 			i915_vma_put(rq->batch);
3032 			i915_request_put(rq);
3033 			rq = n;
3034 		}
3035 
3036 		if (igt_live_test_end(&t))
3037 			err = -EIO;
3038 		if (err)
3039 			return err;
3040 	}
3041 
3042 	return 0;
3043 }
3044 
3045 static struct i915_vma *
create_gpr_user(struct intel_engine_cs * engine,struct i915_vma * result,unsigned int offset)3046 create_gpr_user(struct intel_engine_cs *engine,
3047 		struct i915_vma *result,
3048 		unsigned int offset)
3049 {
3050 	struct drm_i915_gem_object *obj;
3051 	struct i915_vma *vma;
3052 	u32 *cs;
3053 	int err;
3054 	int i;
3055 
3056 	obj = i915_gem_object_create_internal(engine->i915, 4096);
3057 	if (IS_ERR(obj))
3058 		return ERR_CAST(obj);
3059 
3060 	vma = i915_vma_instance(obj, result->vm, NULL);
3061 	if (IS_ERR(vma)) {
3062 		i915_gem_object_put(obj);
3063 		return vma;
3064 	}
3065 
3066 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
3067 	if (err) {
3068 		i915_vma_put(vma);
3069 		return ERR_PTR(err);
3070 	}
3071 
3072 	cs = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
3073 	if (IS_ERR(cs)) {
3074 		i915_vma_put(vma);
3075 		return ERR_CAST(cs);
3076 	}
3077 
3078 	/* All GPR are clear for new contexts. We use GPR(0) as a constant */
3079 	*cs++ = MI_LOAD_REGISTER_IMM(1);
3080 	*cs++ = CS_GPR(engine, 0);
3081 	*cs++ = 1;
3082 
3083 	for (i = 1; i < NUM_GPR; i++) {
3084 		u64 addr;
3085 
3086 		/*
3087 		 * Perform: GPR[i]++
3088 		 *
3089 		 * As we read and write into the context saved GPR[i], if
3090 		 * we restart this batch buffer from an earlier point, we
3091 		 * will repeat the increment and store a value > 1.
3092 		 */
3093 		*cs++ = MI_MATH(4);
3094 		*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(i));
3095 		*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(0));
3096 		*cs++ = MI_MATH_ADD;
3097 		*cs++ = MI_MATH_STORE(MI_MATH_REG(i), MI_MATH_REG_ACCU);
3098 
3099 		addr = result->node.start + offset + i * sizeof(*cs);
3100 		*cs++ = MI_STORE_REGISTER_MEM_GEN8;
3101 		*cs++ = CS_GPR(engine, 2 * i);
3102 		*cs++ = lower_32_bits(addr);
3103 		*cs++ = upper_32_bits(addr);
3104 
3105 		*cs++ = MI_SEMAPHORE_WAIT |
3106 			MI_SEMAPHORE_POLL |
3107 			MI_SEMAPHORE_SAD_GTE_SDD;
3108 		*cs++ = i;
3109 		*cs++ = lower_32_bits(result->node.start);
3110 		*cs++ = upper_32_bits(result->node.start);
3111 	}
3112 
3113 	*cs++ = MI_BATCH_BUFFER_END;
3114 	i915_gem_object_flush_map(obj);
3115 	i915_gem_object_unpin_map(obj);
3116 
3117 	return vma;
3118 }
3119 
create_global(struct intel_gt * gt,size_t sz)3120 static struct i915_vma *create_global(struct intel_gt *gt, size_t sz)
3121 {
3122 	struct drm_i915_gem_object *obj;
3123 	struct i915_vma *vma;
3124 	int err;
3125 
3126 	obj = i915_gem_object_create_internal(gt->i915, sz);
3127 	if (IS_ERR(obj))
3128 		return ERR_CAST(obj);
3129 
3130 	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
3131 	if (IS_ERR(vma)) {
3132 		i915_gem_object_put(obj);
3133 		return vma;
3134 	}
3135 
3136 	err = i915_ggtt_pin(vma, NULL, 0, 0);
3137 	if (err) {
3138 		i915_vma_put(vma);
3139 		return ERR_PTR(err);
3140 	}
3141 
3142 	return vma;
3143 }
3144 
3145 static struct i915_request *
create_gpr_client(struct intel_engine_cs * engine,struct i915_vma * global,unsigned int offset)3146 create_gpr_client(struct intel_engine_cs *engine,
3147 		  struct i915_vma *global,
3148 		  unsigned int offset)
3149 {
3150 	struct i915_vma *batch, *vma;
3151 	struct intel_context *ce;
3152 	struct i915_request *rq;
3153 	int err;
3154 
3155 	ce = intel_context_create(engine);
3156 	if (IS_ERR(ce))
3157 		return ERR_CAST(ce);
3158 
3159 	vma = i915_vma_instance(global->obj, ce->vm, NULL);
3160 	if (IS_ERR(vma)) {
3161 		err = PTR_ERR(vma);
3162 		goto out_ce;
3163 	}
3164 
3165 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
3166 	if (err)
3167 		goto out_ce;
3168 
3169 	batch = create_gpr_user(engine, vma, offset);
3170 	if (IS_ERR(batch)) {
3171 		err = PTR_ERR(batch);
3172 		goto out_vma;
3173 	}
3174 
3175 	rq = intel_context_create_request(ce);
3176 	if (IS_ERR(rq)) {
3177 		err = PTR_ERR(rq);
3178 		goto out_batch;
3179 	}
3180 
3181 	i915_vma_lock(vma);
3182 	err = i915_request_await_object(rq, vma->obj, false);
3183 	if (!err)
3184 		err = i915_vma_move_to_active(vma, rq, 0);
3185 	i915_vma_unlock(vma);
3186 
3187 	i915_vma_lock(batch);
3188 	if (!err)
3189 		err = i915_request_await_object(rq, batch->obj, false);
3190 	if (!err)
3191 		err = i915_vma_move_to_active(batch, rq, 0);
3192 	if (!err)
3193 		err = rq->engine->emit_bb_start(rq,
3194 						batch->node.start,
3195 						PAGE_SIZE, 0);
3196 	i915_vma_unlock(batch);
3197 	i915_vma_unpin(batch);
3198 
3199 	if (!err)
3200 		i915_request_get(rq);
3201 	i915_request_add(rq);
3202 
3203 out_batch:
3204 	i915_vma_put(batch);
3205 out_vma:
3206 	i915_vma_unpin(vma);
3207 out_ce:
3208 	intel_context_put(ce);
3209 	return err ? ERR_PTR(err) : rq;
3210 }
3211 
preempt_user(struct intel_engine_cs * engine,struct i915_vma * global,int id)3212 static int preempt_user(struct intel_engine_cs *engine,
3213 			struct i915_vma *global,
3214 			int id)
3215 {
3216 	struct i915_sched_attr attr = {
3217 		.priority = I915_PRIORITY_MAX
3218 	};
3219 	struct i915_request *rq;
3220 	int err = 0;
3221 	u32 *cs;
3222 
3223 	rq = intel_engine_create_kernel_request(engine);
3224 	if (IS_ERR(rq))
3225 		return PTR_ERR(rq);
3226 
3227 	cs = intel_ring_begin(rq, 4);
3228 	if (IS_ERR(cs)) {
3229 		i915_request_add(rq);
3230 		return PTR_ERR(cs);
3231 	}
3232 
3233 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
3234 	*cs++ = i915_ggtt_offset(global);
3235 	*cs++ = 0;
3236 	*cs++ = id;
3237 
3238 	intel_ring_advance(rq, cs);
3239 
3240 	i915_request_get(rq);
3241 	i915_request_add(rq);
3242 
3243 	engine->sched_engine->schedule(rq, &attr);
3244 
3245 	if (i915_request_wait(rq, 0, HZ / 2) < 0)
3246 		err = -ETIME;
3247 	i915_request_put(rq);
3248 
3249 	return err;
3250 }
3251 
live_preempt_user(void * arg)3252 static int live_preempt_user(void *arg)
3253 {
3254 	struct intel_gt *gt = arg;
3255 	struct intel_engine_cs *engine;
3256 	struct i915_vma *global;
3257 	enum intel_engine_id id;
3258 	u32 *result;
3259 	int err = 0;
3260 
3261 	/*
3262 	 * In our other tests, we look at preemption in carefully
3263 	 * controlled conditions in the ringbuffer. Since most of the
3264 	 * time is spent in user batches, most of our preemptions naturally
3265 	 * occur there. We want to verify that when we preempt inside a batch
3266 	 * we continue on from the current instruction and do not roll back
3267 	 * to the start, or another earlier arbitration point.
3268 	 *
3269 	 * To verify this, we create a batch which is a mixture of
3270 	 * MI_MATH (gpr++) MI_SRM (gpr) and preemption points. Then with
3271 	 * a few preempting contexts thrown into the mix, we look for any
3272 	 * repeated instructions (which show up as incorrect values).
3273 	 */
3274 
3275 	global = create_global(gt, 4096);
3276 	if (IS_ERR(global))
3277 		return PTR_ERR(global);
3278 
3279 	result = i915_gem_object_pin_map_unlocked(global->obj, I915_MAP_WC);
3280 	if (IS_ERR(result)) {
3281 		i915_vma_unpin_and_release(&global, 0);
3282 		return PTR_ERR(result);
3283 	}
3284 
3285 	for_each_engine(engine, gt, id) {
3286 		struct i915_request *client[3] = {};
3287 		struct igt_live_test t;
3288 		int i;
3289 
3290 		if (!intel_engine_has_preemption(engine))
3291 			continue;
3292 
3293 		if (GRAPHICS_VER(gt->i915) == 8 && engine->class != RENDER_CLASS)
3294 			continue; /* we need per-context GPR */
3295 
3296 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
3297 			err = -EIO;
3298 			break;
3299 		}
3300 
3301 		memset(result, 0, 4096);
3302 
3303 		for (i = 0; i < ARRAY_SIZE(client); i++) {
3304 			struct i915_request *rq;
3305 
3306 			rq = create_gpr_client(engine, global,
3307 					       NUM_GPR * i * sizeof(u32));
3308 			if (IS_ERR(rq)) {
3309 				err = PTR_ERR(rq);
3310 				goto end_test;
3311 			}
3312 
3313 			client[i] = rq;
3314 		}
3315 
3316 		/* Continuously preempt the set of 3 running contexts */
3317 		for (i = 1; i <= NUM_GPR; i++) {
3318 			err = preempt_user(engine, global, i);
3319 			if (err)
3320 				goto end_test;
3321 		}
3322 
3323 		if (READ_ONCE(result[0]) != NUM_GPR) {
3324 			pr_err("%s: Failed to release semaphore\n",
3325 			       engine->name);
3326 			err = -EIO;
3327 			goto end_test;
3328 		}
3329 
3330 		for (i = 0; i < ARRAY_SIZE(client); i++) {
3331 			int gpr;
3332 
3333 			if (i915_request_wait(client[i], 0, HZ / 2) < 0) {
3334 				err = -ETIME;
3335 				goto end_test;
3336 			}
3337 
3338 			for (gpr = 1; gpr < NUM_GPR; gpr++) {
3339 				if (result[NUM_GPR * i + gpr] != 1) {
3340 					pr_err("%s: Invalid result, client %d, gpr %d, result: %d\n",
3341 					       engine->name,
3342 					       i, gpr, result[NUM_GPR * i + gpr]);
3343 					err = -EINVAL;
3344 					goto end_test;
3345 				}
3346 			}
3347 		}
3348 
3349 end_test:
3350 		for (i = 0; i < ARRAY_SIZE(client); i++) {
3351 			if (!client[i])
3352 				break;
3353 
3354 			i915_request_put(client[i]);
3355 		}
3356 
3357 		/* Flush the semaphores on error */
3358 		smp_store_mb(result[0], -1);
3359 		if (igt_live_test_end(&t))
3360 			err = -EIO;
3361 		if (err)
3362 			break;
3363 	}
3364 
3365 	i915_vma_unpin_and_release(&global, I915_VMA_RELEASE_MAP);
3366 	return err;
3367 }
3368 
live_preempt_timeout(void * arg)3369 static int live_preempt_timeout(void *arg)
3370 {
3371 	struct intel_gt *gt = arg;
3372 	struct i915_gem_context *ctx_hi, *ctx_lo;
3373 	struct igt_spinner spin_lo;
3374 	struct intel_engine_cs *engine;
3375 	enum intel_engine_id id;
3376 	int err = -ENOMEM;
3377 
3378 	/*
3379 	 * Check that we force preemption to occur by cancelling the previous
3380 	 * context if it refuses to yield the GPU.
3381 	 */
3382 	if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
3383 		return 0;
3384 
3385 	if (!intel_has_reset_engine(gt))
3386 		return 0;
3387 
3388 	if (igt_spinner_init(&spin_lo, gt))
3389 		return -ENOMEM;
3390 
3391 	ctx_hi = kernel_context(gt->i915, NULL);
3392 	if (!ctx_hi)
3393 		goto err_spin_lo;
3394 	ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
3395 
3396 	ctx_lo = kernel_context(gt->i915, NULL);
3397 	if (!ctx_lo)
3398 		goto err_ctx_hi;
3399 	ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
3400 
3401 	for_each_engine(engine, gt, id) {
3402 		unsigned long saved_timeout;
3403 		struct i915_request *rq;
3404 
3405 		if (!intel_engine_has_preemption(engine))
3406 			continue;
3407 
3408 		rq = spinner_create_request(&spin_lo, ctx_lo, engine,
3409 					    MI_NOOP); /* preemption disabled */
3410 		if (IS_ERR(rq)) {
3411 			err = PTR_ERR(rq);
3412 			goto err_ctx_lo;
3413 		}
3414 
3415 		i915_request_add(rq);
3416 		if (!igt_wait_for_spinner(&spin_lo, rq)) {
3417 			intel_gt_set_wedged(gt);
3418 			err = -EIO;
3419 			goto err_ctx_lo;
3420 		}
3421 
3422 		rq = igt_request_alloc(ctx_hi, engine);
3423 		if (IS_ERR(rq)) {
3424 			igt_spinner_end(&spin_lo);
3425 			err = PTR_ERR(rq);
3426 			goto err_ctx_lo;
3427 		}
3428 
3429 		/* Flush the previous CS ack before changing timeouts */
3430 		while (READ_ONCE(engine->execlists.pending[0]))
3431 			cpu_relax();
3432 
3433 		saved_timeout = engine->props.preempt_timeout_ms;
3434 		engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */
3435 
3436 		i915_request_get(rq);
3437 		i915_request_add(rq);
3438 
3439 		intel_engine_flush_submission(engine);
3440 		engine->props.preempt_timeout_ms = saved_timeout;
3441 
3442 		if (i915_request_wait(rq, 0, HZ / 10) < 0) {
3443 			intel_gt_set_wedged(gt);
3444 			i915_request_put(rq);
3445 			err = -ETIME;
3446 			goto err_ctx_lo;
3447 		}
3448 
3449 		igt_spinner_end(&spin_lo);
3450 		i915_request_put(rq);
3451 	}
3452 
3453 	err = 0;
3454 err_ctx_lo:
3455 	kernel_context_close(ctx_lo);
3456 err_ctx_hi:
3457 	kernel_context_close(ctx_hi);
3458 err_spin_lo:
3459 	igt_spinner_fini(&spin_lo);
3460 	return err;
3461 }
3462 
random_range(struct rnd_state * rnd,int min,int max)3463 static int random_range(struct rnd_state *rnd, int min, int max)
3464 {
3465 	return i915_prandom_u32_max_state(max - min, rnd) + min;
3466 }
3467 
random_priority(struct rnd_state * rnd)3468 static int random_priority(struct rnd_state *rnd)
3469 {
3470 	return random_range(rnd, I915_PRIORITY_MIN, I915_PRIORITY_MAX);
3471 }
3472 
3473 struct preempt_smoke {
3474 	struct intel_gt *gt;
3475 	struct kthread_work work;
3476 	struct i915_gem_context **contexts;
3477 	struct intel_engine_cs *engine;
3478 	struct drm_i915_gem_object *batch;
3479 	unsigned int ncontext;
3480 	struct rnd_state prng;
3481 	unsigned long count;
3482 	int result;
3483 };
3484 
smoke_context(struct preempt_smoke * smoke)3485 static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke)
3486 {
3487 	return smoke->contexts[i915_prandom_u32_max_state(smoke->ncontext,
3488 							  &smoke->prng)];
3489 }
3490 
smoke_submit(struct preempt_smoke * smoke,struct i915_gem_context * ctx,int prio,struct drm_i915_gem_object * batch)3491 static int smoke_submit(struct preempt_smoke *smoke,
3492 			struct i915_gem_context *ctx, int prio,
3493 			struct drm_i915_gem_object *batch)
3494 {
3495 	struct i915_request *rq;
3496 	struct i915_vma *vma = NULL;
3497 	int err = 0;
3498 
3499 	if (batch) {
3500 		struct i915_address_space *vm;
3501 
3502 		vm = i915_gem_context_get_vm_rcu(ctx);
3503 		vma = i915_vma_instance(batch, vm, NULL);
3504 		i915_vm_put(vm);
3505 		if (IS_ERR(vma))
3506 			return PTR_ERR(vma);
3507 
3508 		err = i915_vma_pin(vma, 0, 0, PIN_USER);
3509 		if (err)
3510 			return err;
3511 	}
3512 
3513 	ctx->sched.priority = prio;
3514 
3515 	rq = igt_request_alloc(ctx, smoke->engine);
3516 	if (IS_ERR(rq)) {
3517 		err = PTR_ERR(rq);
3518 		goto unpin;
3519 	}
3520 
3521 	if (vma) {
3522 		i915_vma_lock(vma);
3523 		err = i915_request_await_object(rq, vma->obj, false);
3524 		if (!err)
3525 			err = i915_vma_move_to_active(vma, rq, 0);
3526 		if (!err)
3527 			err = rq->engine->emit_bb_start(rq,
3528 							vma->node.start,
3529 							PAGE_SIZE, 0);
3530 		i915_vma_unlock(vma);
3531 	}
3532 
3533 	i915_request_add(rq);
3534 
3535 unpin:
3536 	if (vma)
3537 		i915_vma_unpin(vma);
3538 
3539 	return err;
3540 }
3541 
smoke_crescendo_work(struct kthread_work * work)3542 static void smoke_crescendo_work(struct kthread_work *work)
3543 {
3544 	struct preempt_smoke *smoke = container_of(work, typeof(*smoke), work);
3545 	IGT_TIMEOUT(end_time);
3546 	unsigned long count;
3547 
3548 	count = 0;
3549 	do {
3550 		struct i915_gem_context *ctx = smoke_context(smoke);
3551 
3552 		smoke->result = smoke_submit(smoke, ctx,
3553 					     count % I915_PRIORITY_MAX,
3554 					     smoke->batch);
3555 
3556 		count++;
3557 	} while (!smoke->result && count < smoke->ncontext &&
3558 		 !__igt_timeout(end_time, NULL));
3559 
3560 	smoke->count = count;
3561 }
3562 
smoke_crescendo(struct preempt_smoke * smoke,unsigned int flags)3563 static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
3564 #define BATCH BIT(0)
3565 {
3566 	struct kthread_worker *worker[I915_NUM_ENGINES] = {};
3567 	struct preempt_smoke *arg;
3568 	struct intel_engine_cs *engine;
3569 	enum intel_engine_id id;
3570 	unsigned long count;
3571 	int err = 0;
3572 
3573 	arg = kmalloc_array(I915_NUM_ENGINES, sizeof(*arg), GFP_KERNEL);
3574 	if (!arg)
3575 		return -ENOMEM;
3576 
3577 	memset(arg, 0, I915_NUM_ENGINES * sizeof(*arg));
3578 
3579 	for_each_engine(engine, smoke->gt, id) {
3580 		arg[id] = *smoke;
3581 		arg[id].engine = engine;
3582 		if (!(flags & BATCH))
3583 			arg[id].batch = NULL;
3584 		arg[id].count = 0;
3585 
3586 		worker[id] = kthread_create_worker(0, "igt/smoke:%d", id);
3587 		if (IS_ERR(worker[id])) {
3588 			err = PTR_ERR(worker[id]);
3589 			break;
3590 		}
3591 
3592 		kthread_init_work(&arg[id].work, smoke_crescendo_work);
3593 		kthread_queue_work(worker[id], &arg[id].work);
3594 	}
3595 
3596 	count = 0;
3597 	for_each_engine(engine, smoke->gt, id) {
3598 		if (IS_ERR_OR_NULL(worker[id]))
3599 			continue;
3600 
3601 		kthread_flush_work(&arg[id].work);
3602 		if (arg[id].result && !err)
3603 			err = arg[id].result;
3604 
3605 		count += arg[id].count;
3606 
3607 		kthread_destroy_worker(worker[id]);
3608 	}
3609 
3610 	pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n",
3611 		count, flags, smoke->gt->info.num_engines, smoke->ncontext);
3612 
3613 	kfree(arg);
3614 	return 0;
3615 }
3616 
smoke_random(struct preempt_smoke * smoke,unsigned int flags)3617 static int smoke_random(struct preempt_smoke *smoke, unsigned int flags)
3618 {
3619 	enum intel_engine_id id;
3620 	IGT_TIMEOUT(end_time);
3621 	unsigned long count;
3622 
3623 	count = 0;
3624 	do {
3625 		for_each_engine(smoke->engine, smoke->gt, id) {
3626 			struct i915_gem_context *ctx = smoke_context(smoke);
3627 			int err;
3628 
3629 			err = smoke_submit(smoke,
3630 					   ctx, random_priority(&smoke->prng),
3631 					   flags & BATCH ? smoke->batch : NULL);
3632 			if (err)
3633 				return err;
3634 
3635 			count++;
3636 		}
3637 	} while (count < smoke->ncontext && !__igt_timeout(end_time, NULL));
3638 
3639 	pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n",
3640 		count, flags, smoke->gt->info.num_engines, smoke->ncontext);
3641 	return 0;
3642 }
3643 
live_preempt_smoke(void * arg)3644 static int live_preempt_smoke(void *arg)
3645 {
3646 	struct preempt_smoke smoke = {
3647 		.gt = arg,
3648 		.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed),
3649 		.ncontext = 256,
3650 	};
3651 	const unsigned int phase[] = { 0, BATCH };
3652 	struct igt_live_test t;
3653 	int err = -ENOMEM;
3654 	u32 *cs;
3655 	int n;
3656 
3657 	smoke.contexts = kmalloc_array(smoke.ncontext,
3658 				       sizeof(*smoke.contexts),
3659 				       GFP_KERNEL);
3660 	if (!smoke.contexts)
3661 		return -ENOMEM;
3662 
3663 	smoke.batch =
3664 		i915_gem_object_create_internal(smoke.gt->i915, PAGE_SIZE);
3665 	if (IS_ERR(smoke.batch)) {
3666 		err = PTR_ERR(smoke.batch);
3667 		goto err_free;
3668 	}
3669 
3670 	cs = i915_gem_object_pin_map_unlocked(smoke.batch, I915_MAP_WB);
3671 	if (IS_ERR(cs)) {
3672 		err = PTR_ERR(cs);
3673 		goto err_batch;
3674 	}
3675 	for (n = 0; n < PAGE_SIZE / sizeof(*cs) - 1; n++)
3676 		cs[n] = MI_ARB_CHECK;
3677 	cs[n] = MI_BATCH_BUFFER_END;
3678 	i915_gem_object_flush_map(smoke.batch);
3679 	i915_gem_object_unpin_map(smoke.batch);
3680 
3681 	if (igt_live_test_begin(&t, smoke.gt->i915, __func__, "all")) {
3682 		err = -EIO;
3683 		goto err_batch;
3684 	}
3685 
3686 	for (n = 0; n < smoke.ncontext; n++) {
3687 		smoke.contexts[n] = kernel_context(smoke.gt->i915, NULL);
3688 		if (!smoke.contexts[n])
3689 			goto err_ctx;
3690 	}
3691 
3692 	for (n = 0; n < ARRAY_SIZE(phase); n++) {
3693 		err = smoke_crescendo(&smoke, phase[n]);
3694 		if (err)
3695 			goto err_ctx;
3696 
3697 		err = smoke_random(&smoke, phase[n]);
3698 		if (err)
3699 			goto err_ctx;
3700 	}
3701 
3702 err_ctx:
3703 	if (igt_live_test_end(&t))
3704 		err = -EIO;
3705 
3706 	for (n = 0; n < smoke.ncontext; n++) {
3707 		if (!smoke.contexts[n])
3708 			break;
3709 		kernel_context_close(smoke.contexts[n]);
3710 	}
3711 
3712 err_batch:
3713 	i915_gem_object_put(smoke.batch);
3714 err_free:
3715 	kfree(smoke.contexts);
3716 
3717 	return err;
3718 }
3719 
nop_virtual_engine(struct intel_gt * gt,struct intel_engine_cs ** siblings,unsigned int nsibling,unsigned int nctx,unsigned int flags)3720 static int nop_virtual_engine(struct intel_gt *gt,
3721 			      struct intel_engine_cs **siblings,
3722 			      unsigned int nsibling,
3723 			      unsigned int nctx,
3724 			      unsigned int flags)
3725 #define CHAIN BIT(0)
3726 {
3727 	IGT_TIMEOUT(end_time);
3728 	struct i915_request *request[16] = {};
3729 	struct intel_context *ve[16];
3730 	unsigned long n, prime, nc;
3731 	struct igt_live_test t;
3732 	ktime_t times[2] = {};
3733 	int err;
3734 
3735 	GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve));
3736 
3737 	for (n = 0; n < nctx; n++) {
3738 		ve[n] = intel_engine_create_virtual(siblings, nsibling);
3739 		if (IS_ERR(ve[n])) {
3740 			err = PTR_ERR(ve[n]);
3741 			nctx = n;
3742 			goto out;
3743 		}
3744 
3745 		err = intel_context_pin(ve[n]);
3746 		if (err) {
3747 			intel_context_put(ve[n]);
3748 			nctx = n;
3749 			goto out;
3750 		}
3751 	}
3752 
3753 	err = igt_live_test_begin(&t, gt->i915, __func__, ve[0]->engine->name);
3754 	if (err)
3755 		goto out;
3756 
3757 	for_each_prime_number_from(prime, 1, 8192) {
3758 		times[1] = ktime_get_raw();
3759 
3760 		if (flags & CHAIN) {
3761 			for (nc = 0; nc < nctx; nc++) {
3762 				for (n = 0; n < prime; n++) {
3763 					struct i915_request *rq;
3764 
3765 					rq = i915_request_create(ve[nc]);
3766 					if (IS_ERR(rq)) {
3767 						err = PTR_ERR(rq);
3768 						goto out;
3769 					}
3770 
3771 					if (request[nc])
3772 						i915_request_put(request[nc]);
3773 					request[nc] = i915_request_get(rq);
3774 					i915_request_add(rq);
3775 				}
3776 			}
3777 		} else {
3778 			for (n = 0; n < prime; n++) {
3779 				for (nc = 0; nc < nctx; nc++) {
3780 					struct i915_request *rq;
3781 
3782 					rq = i915_request_create(ve[nc]);
3783 					if (IS_ERR(rq)) {
3784 						err = PTR_ERR(rq);
3785 						goto out;
3786 					}
3787 
3788 					if (request[nc])
3789 						i915_request_put(request[nc]);
3790 					request[nc] = i915_request_get(rq);
3791 					i915_request_add(rq);
3792 				}
3793 			}
3794 		}
3795 
3796 		for (nc = 0; nc < nctx; nc++) {
3797 			if (i915_request_wait(request[nc], 0, HZ / 10) < 0) {
3798 				pr_err("%s(%s): wait for %llx:%lld timed out\n",
3799 				       __func__, ve[0]->engine->name,
3800 				       request[nc]->fence.context,
3801 				       request[nc]->fence.seqno);
3802 
3803 				GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
3804 					  __func__, ve[0]->engine->name,
3805 					  request[nc]->fence.context,
3806 					  request[nc]->fence.seqno);
3807 				GEM_TRACE_DUMP();
3808 				intel_gt_set_wedged(gt);
3809 				break;
3810 			}
3811 		}
3812 
3813 		times[1] = ktime_sub(ktime_get_raw(), times[1]);
3814 		if (prime == 1)
3815 			times[0] = times[1];
3816 
3817 		for (nc = 0; nc < nctx; nc++) {
3818 			i915_request_put(request[nc]);
3819 			request[nc] = NULL;
3820 		}
3821 
3822 		if (__igt_timeout(end_time, NULL))
3823 			break;
3824 	}
3825 
3826 	err = igt_live_test_end(&t);
3827 	if (err)
3828 		goto out;
3829 
3830 	pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n",
3831 		nctx, ve[0]->engine->name, ktime_to_ns(times[0]),
3832 		prime, div64_u64(ktime_to_ns(times[1]), prime));
3833 
3834 out:
3835 	if (igt_flush_test(gt->i915))
3836 		err = -EIO;
3837 
3838 	for (nc = 0; nc < nctx; nc++) {
3839 		i915_request_put(request[nc]);
3840 		intel_context_unpin(ve[nc]);
3841 		intel_context_put(ve[nc]);
3842 	}
3843 	return err;
3844 }
3845 
3846 static unsigned int
__select_siblings(struct intel_gt * gt,unsigned int class,struct intel_engine_cs ** siblings,bool (* filter)(const struct intel_engine_cs *))3847 __select_siblings(struct intel_gt *gt,
3848 		  unsigned int class,
3849 		  struct intel_engine_cs **siblings,
3850 		  bool (*filter)(const struct intel_engine_cs *))
3851 {
3852 	unsigned int n = 0;
3853 	unsigned int inst;
3854 
3855 	for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3856 		if (!gt->engine_class[class][inst])
3857 			continue;
3858 
3859 		if (filter && !filter(gt->engine_class[class][inst]))
3860 			continue;
3861 
3862 		siblings[n++] = gt->engine_class[class][inst];
3863 	}
3864 
3865 	return n;
3866 }
3867 
3868 static unsigned int
select_siblings(struct intel_gt * gt,unsigned int class,struct intel_engine_cs ** siblings)3869 select_siblings(struct intel_gt *gt,
3870 		unsigned int class,
3871 		struct intel_engine_cs **siblings)
3872 {
3873 	return __select_siblings(gt, class, siblings, NULL);
3874 }
3875 
live_virtual_engine(void * arg)3876 static int live_virtual_engine(void *arg)
3877 {
3878 	struct intel_gt *gt = arg;
3879 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3880 	struct intel_engine_cs *engine;
3881 	enum intel_engine_id id;
3882 	unsigned int class;
3883 	int err;
3884 
3885 	if (intel_uc_uses_guc_submission(&gt->uc))
3886 		return 0;
3887 
3888 	for_each_engine(engine, gt, id) {
3889 		err = nop_virtual_engine(gt, &engine, 1, 1, 0);
3890 		if (err) {
3891 			pr_err("Failed to wrap engine %s: err=%d\n",
3892 			       engine->name, err);
3893 			return err;
3894 		}
3895 	}
3896 
3897 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3898 		int nsibling, n;
3899 
3900 		nsibling = select_siblings(gt, class, siblings);
3901 		if (nsibling < 2)
3902 			continue;
3903 
3904 		for (n = 1; n <= nsibling + 1; n++) {
3905 			err = nop_virtual_engine(gt, siblings, nsibling,
3906 						 n, 0);
3907 			if (err)
3908 				return err;
3909 		}
3910 
3911 		err = nop_virtual_engine(gt, siblings, nsibling, n, CHAIN);
3912 		if (err)
3913 			return err;
3914 	}
3915 
3916 	return 0;
3917 }
3918 
mask_virtual_engine(struct intel_gt * gt,struct intel_engine_cs ** siblings,unsigned int nsibling)3919 static int mask_virtual_engine(struct intel_gt *gt,
3920 			       struct intel_engine_cs **siblings,
3921 			       unsigned int nsibling)
3922 {
3923 	struct i915_request *request[MAX_ENGINE_INSTANCE + 1];
3924 	struct intel_context *ve;
3925 	struct igt_live_test t;
3926 	unsigned int n;
3927 	int err;
3928 
3929 	/*
3930 	 * Check that by setting the execution mask on a request, we can
3931 	 * restrict it to our desired engine within the virtual engine.
3932 	 */
3933 
3934 	ve = intel_engine_create_virtual(siblings, nsibling);
3935 	if (IS_ERR(ve)) {
3936 		err = PTR_ERR(ve);
3937 		goto out_close;
3938 	}
3939 
3940 	err = intel_context_pin(ve);
3941 	if (err)
3942 		goto out_put;
3943 
3944 	err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
3945 	if (err)
3946 		goto out_unpin;
3947 
3948 	for (n = 0; n < nsibling; n++) {
3949 		request[n] = i915_request_create(ve);
3950 		if (IS_ERR(request[n])) {
3951 			err = PTR_ERR(request[n]);
3952 			nsibling = n;
3953 			goto out;
3954 		}
3955 
3956 		/* Reverse order as it's more likely to be unnatural */
3957 		request[n]->execution_mask = siblings[nsibling - n - 1]->mask;
3958 
3959 		i915_request_get(request[n]);
3960 		i915_request_add(request[n]);
3961 	}
3962 
3963 	for (n = 0; n < nsibling; n++) {
3964 		if (i915_request_wait(request[n], 0, HZ / 10) < 0) {
3965 			pr_err("%s(%s): wait for %llx:%lld timed out\n",
3966 			       __func__, ve->engine->name,
3967 			       request[n]->fence.context,
3968 			       request[n]->fence.seqno);
3969 
3970 			GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
3971 				  __func__, ve->engine->name,
3972 				  request[n]->fence.context,
3973 				  request[n]->fence.seqno);
3974 			GEM_TRACE_DUMP();
3975 			intel_gt_set_wedged(gt);
3976 			err = -EIO;
3977 			goto out;
3978 		}
3979 
3980 		if (request[n]->engine != siblings[nsibling - n - 1]) {
3981 			pr_err("Executed on wrong sibling '%s', expected '%s'\n",
3982 			       request[n]->engine->name,
3983 			       siblings[nsibling - n - 1]->name);
3984 			err = -EINVAL;
3985 			goto out;
3986 		}
3987 	}
3988 
3989 	err = igt_live_test_end(&t);
3990 out:
3991 	if (igt_flush_test(gt->i915))
3992 		err = -EIO;
3993 
3994 	for (n = 0; n < nsibling; n++)
3995 		i915_request_put(request[n]);
3996 
3997 out_unpin:
3998 	intel_context_unpin(ve);
3999 out_put:
4000 	intel_context_put(ve);
4001 out_close:
4002 	return err;
4003 }
4004 
live_virtual_mask(void * arg)4005 static int live_virtual_mask(void *arg)
4006 {
4007 	struct intel_gt *gt = arg;
4008 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4009 	unsigned int class;
4010 	int err;
4011 
4012 	if (intel_uc_uses_guc_submission(&gt->uc))
4013 		return 0;
4014 
4015 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4016 		unsigned int nsibling;
4017 
4018 		nsibling = select_siblings(gt, class, siblings);
4019 		if (nsibling < 2)
4020 			continue;
4021 
4022 		err = mask_virtual_engine(gt, siblings, nsibling);
4023 		if (err)
4024 			return err;
4025 	}
4026 
4027 	return 0;
4028 }
4029 
slicein_virtual_engine(struct intel_gt * gt,struct intel_engine_cs ** siblings,unsigned int nsibling)4030 static int slicein_virtual_engine(struct intel_gt *gt,
4031 				  struct intel_engine_cs **siblings,
4032 				  unsigned int nsibling)
4033 {
4034 	const long timeout = slice_timeout(siblings[0]);
4035 	struct intel_context *ce;
4036 	struct i915_request *rq;
4037 	struct igt_spinner spin;
4038 	unsigned int n;
4039 	int err = 0;
4040 
4041 	/*
4042 	 * Virtual requests must take part in timeslicing on the target engines.
4043 	 */
4044 
4045 	if (igt_spinner_init(&spin, gt))
4046 		return -ENOMEM;
4047 
4048 	for (n = 0; n < nsibling; n++) {
4049 		ce = intel_context_create(siblings[n]);
4050 		if (IS_ERR(ce)) {
4051 			err = PTR_ERR(ce);
4052 			goto out;
4053 		}
4054 
4055 		rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
4056 		intel_context_put(ce);
4057 		if (IS_ERR(rq)) {
4058 			err = PTR_ERR(rq);
4059 			goto out;
4060 		}
4061 
4062 		i915_request_add(rq);
4063 	}
4064 
4065 	ce = intel_engine_create_virtual(siblings, nsibling);
4066 	if (IS_ERR(ce)) {
4067 		err = PTR_ERR(ce);
4068 		goto out;
4069 	}
4070 
4071 	rq = intel_context_create_request(ce);
4072 	intel_context_put(ce);
4073 	if (IS_ERR(rq)) {
4074 		err = PTR_ERR(rq);
4075 		goto out;
4076 	}
4077 
4078 	i915_request_get(rq);
4079 	i915_request_add(rq);
4080 	if (i915_request_wait(rq, 0, timeout) < 0) {
4081 		GEM_TRACE_ERR("%s(%s) failed to slice in virtual request\n",
4082 			      __func__, rq->engine->name);
4083 		GEM_TRACE_DUMP();
4084 		intel_gt_set_wedged(gt);
4085 		err = -EIO;
4086 	}
4087 	i915_request_put(rq);
4088 
4089 out:
4090 	igt_spinner_end(&spin);
4091 	if (igt_flush_test(gt->i915))
4092 		err = -EIO;
4093 	igt_spinner_fini(&spin);
4094 	return err;
4095 }
4096 
sliceout_virtual_engine(struct intel_gt * gt,struct intel_engine_cs ** siblings,unsigned int nsibling)4097 static int sliceout_virtual_engine(struct intel_gt *gt,
4098 				   struct intel_engine_cs **siblings,
4099 				   unsigned int nsibling)
4100 {
4101 	const long timeout = slice_timeout(siblings[0]);
4102 	struct intel_context *ce;
4103 	struct i915_request *rq;
4104 	struct igt_spinner spin;
4105 	unsigned int n;
4106 	int err = 0;
4107 
4108 	/*
4109 	 * Virtual requests must allow others a fair timeslice.
4110 	 */
4111 
4112 	if (igt_spinner_init(&spin, gt))
4113 		return -ENOMEM;
4114 
4115 	/* XXX We do not handle oversubscription and fairness with normal rq */
4116 	for (n = 0; n < nsibling; n++) {
4117 		ce = intel_engine_create_virtual(siblings, nsibling);
4118 		if (IS_ERR(ce)) {
4119 			err = PTR_ERR(ce);
4120 			goto out;
4121 		}
4122 
4123 		rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
4124 		intel_context_put(ce);
4125 		if (IS_ERR(rq)) {
4126 			err = PTR_ERR(rq);
4127 			goto out;
4128 		}
4129 
4130 		i915_request_add(rq);
4131 	}
4132 
4133 	for (n = 0; !err && n < nsibling; n++) {
4134 		ce = intel_context_create(siblings[n]);
4135 		if (IS_ERR(ce)) {
4136 			err = PTR_ERR(ce);
4137 			goto out;
4138 		}
4139 
4140 		rq = intel_context_create_request(ce);
4141 		intel_context_put(ce);
4142 		if (IS_ERR(rq)) {
4143 			err = PTR_ERR(rq);
4144 			goto out;
4145 		}
4146 
4147 		i915_request_get(rq);
4148 		i915_request_add(rq);
4149 		if (i915_request_wait(rq, 0, timeout) < 0) {
4150 			GEM_TRACE_ERR("%s(%s) failed to slice out virtual request\n",
4151 				      __func__, siblings[n]->name);
4152 			GEM_TRACE_DUMP();
4153 			intel_gt_set_wedged(gt);
4154 			err = -EIO;
4155 		}
4156 		i915_request_put(rq);
4157 	}
4158 
4159 out:
4160 	igt_spinner_end(&spin);
4161 	if (igt_flush_test(gt->i915))
4162 		err = -EIO;
4163 	igt_spinner_fini(&spin);
4164 	return err;
4165 }
4166 
live_virtual_slice(void * arg)4167 static int live_virtual_slice(void *arg)
4168 {
4169 	struct intel_gt *gt = arg;
4170 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4171 	unsigned int class;
4172 	int err;
4173 
4174 	if (intel_uc_uses_guc_submission(&gt->uc))
4175 		return 0;
4176 
4177 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4178 		unsigned int nsibling;
4179 
4180 		nsibling = __select_siblings(gt, class, siblings,
4181 					     intel_engine_has_timeslices);
4182 		if (nsibling < 2)
4183 			continue;
4184 
4185 		err = slicein_virtual_engine(gt, siblings, nsibling);
4186 		if (err)
4187 			return err;
4188 
4189 		err = sliceout_virtual_engine(gt, siblings, nsibling);
4190 		if (err)
4191 			return err;
4192 	}
4193 
4194 	return 0;
4195 }
4196 
preserved_virtual_engine(struct intel_gt * gt,struct intel_engine_cs ** siblings,unsigned int nsibling)4197 static int preserved_virtual_engine(struct intel_gt *gt,
4198 				    struct intel_engine_cs **siblings,
4199 				    unsigned int nsibling)
4200 {
4201 	struct i915_request *last = NULL;
4202 	struct intel_context *ve;
4203 	struct i915_vma *scratch;
4204 	struct igt_live_test t;
4205 	unsigned int n;
4206 	int err = 0;
4207 	u32 *cs;
4208 
4209 	scratch =
4210 		__vm_create_scratch_for_read_pinned(&siblings[0]->gt->ggtt->vm,
4211 						    PAGE_SIZE);
4212 	if (IS_ERR(scratch))
4213 		return PTR_ERR(scratch);
4214 
4215 	err = i915_vma_sync(scratch);
4216 	if (err)
4217 		goto out_scratch;
4218 
4219 	ve = intel_engine_create_virtual(siblings, nsibling);
4220 	if (IS_ERR(ve)) {
4221 		err = PTR_ERR(ve);
4222 		goto out_scratch;
4223 	}
4224 
4225 	err = intel_context_pin(ve);
4226 	if (err)
4227 		goto out_put;
4228 
4229 	err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
4230 	if (err)
4231 		goto out_unpin;
4232 
4233 	for (n = 0; n < NUM_GPR_DW; n++) {
4234 		struct intel_engine_cs *engine = siblings[n % nsibling];
4235 		struct i915_request *rq;
4236 
4237 		rq = i915_request_create(ve);
4238 		if (IS_ERR(rq)) {
4239 			err = PTR_ERR(rq);
4240 			goto out_end;
4241 		}
4242 
4243 		i915_request_put(last);
4244 		last = i915_request_get(rq);
4245 
4246 		cs = intel_ring_begin(rq, 8);
4247 		if (IS_ERR(cs)) {
4248 			i915_request_add(rq);
4249 			err = PTR_ERR(cs);
4250 			goto out_end;
4251 		}
4252 
4253 		*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
4254 		*cs++ = CS_GPR(engine, n);
4255 		*cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
4256 		*cs++ = 0;
4257 
4258 		*cs++ = MI_LOAD_REGISTER_IMM(1);
4259 		*cs++ = CS_GPR(engine, (n + 1) % NUM_GPR_DW);
4260 		*cs++ = n + 1;
4261 
4262 		*cs++ = MI_NOOP;
4263 		intel_ring_advance(rq, cs);
4264 
4265 		/* Restrict this request to run on a particular engine */
4266 		rq->execution_mask = engine->mask;
4267 		i915_request_add(rq);
4268 	}
4269 
4270 	if (i915_request_wait(last, 0, HZ / 5) < 0) {
4271 		err = -ETIME;
4272 		goto out_end;
4273 	}
4274 
4275 	cs = i915_gem_object_pin_map_unlocked(scratch->obj, I915_MAP_WB);
4276 	if (IS_ERR(cs)) {
4277 		err = PTR_ERR(cs);
4278 		goto out_end;
4279 	}
4280 
4281 	for (n = 0; n < NUM_GPR_DW; n++) {
4282 		if (cs[n] != n) {
4283 			pr_err("Incorrect value[%d] found for GPR[%d]\n",
4284 			       cs[n], n);
4285 			err = -EINVAL;
4286 			break;
4287 		}
4288 	}
4289 
4290 	i915_gem_object_unpin_map(scratch->obj);
4291 
4292 out_end:
4293 	if (igt_live_test_end(&t))
4294 		err = -EIO;
4295 	i915_request_put(last);
4296 out_unpin:
4297 	intel_context_unpin(ve);
4298 out_put:
4299 	intel_context_put(ve);
4300 out_scratch:
4301 	i915_vma_unpin_and_release(&scratch, 0);
4302 	return err;
4303 }
4304 
live_virtual_preserved(void * arg)4305 static int live_virtual_preserved(void *arg)
4306 {
4307 	struct intel_gt *gt = arg;
4308 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4309 	unsigned int class;
4310 
4311 	/*
4312 	 * Check that the context image retains non-privileged (user) registers
4313 	 * from one engine to the next. For this we check that the CS_GPR
4314 	 * are preserved.
4315 	 */
4316 
4317 	if (intel_uc_uses_guc_submission(&gt->uc))
4318 		return 0;
4319 
4320 	/* As we use CS_GPR we cannot run before they existed on all engines. */
4321 	if (GRAPHICS_VER(gt->i915) < 9)
4322 		return 0;
4323 
4324 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4325 		int nsibling, err;
4326 
4327 		nsibling = select_siblings(gt, class, siblings);
4328 		if (nsibling < 2)
4329 			continue;
4330 
4331 		err = preserved_virtual_engine(gt, siblings, nsibling);
4332 		if (err)
4333 			return err;
4334 	}
4335 
4336 	return 0;
4337 }
4338 
reset_virtual_engine(struct intel_gt * gt,struct intel_engine_cs ** siblings,unsigned int nsibling)4339 static int reset_virtual_engine(struct intel_gt *gt,
4340 				struct intel_engine_cs **siblings,
4341 				unsigned int nsibling)
4342 {
4343 	struct intel_engine_cs *engine;
4344 	struct intel_context *ve;
4345 	struct igt_spinner spin;
4346 	struct i915_request *rq;
4347 	unsigned int n;
4348 	int err = 0;
4349 
4350 	/*
4351 	 * In order to support offline error capture for fast preempt reset,
4352 	 * we need to decouple the guilty request and ensure that it and its
4353 	 * descendents are not executed while the capture is in progress.
4354 	 */
4355 
4356 	if (igt_spinner_init(&spin, gt))
4357 		return -ENOMEM;
4358 
4359 	ve = intel_engine_create_virtual(siblings, nsibling);
4360 	if (IS_ERR(ve)) {
4361 		err = PTR_ERR(ve);
4362 		goto out_spin;
4363 	}
4364 
4365 	for (n = 0; n < nsibling; n++)
4366 		st_engine_heartbeat_disable(siblings[n]);
4367 
4368 	rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK);
4369 	if (IS_ERR(rq)) {
4370 		err = PTR_ERR(rq);
4371 		goto out_heartbeat;
4372 	}
4373 	i915_request_add(rq);
4374 
4375 	if (!igt_wait_for_spinner(&spin, rq)) {
4376 		intel_gt_set_wedged(gt);
4377 		err = -ETIME;
4378 		goto out_heartbeat;
4379 	}
4380 
4381 	engine = rq->engine;
4382 	GEM_BUG_ON(engine == ve->engine);
4383 
4384 	/* Take ownership of the reset and tasklet */
4385 	err = engine_lock_reset_tasklet(engine);
4386 	if (err)
4387 		goto out_heartbeat;
4388 
4389 	engine->sched_engine->tasklet.callback(&engine->sched_engine->tasklet);
4390 	GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
4391 
4392 	/* Fake a preemption event; failed of course */
4393 	spin_lock_irq(&engine->sched_engine->lock);
4394 	__unwind_incomplete_requests(engine);
4395 	spin_unlock_irq(&engine->sched_engine->lock);
4396 	GEM_BUG_ON(rq->engine != engine);
4397 
4398 	/* Reset the engine while keeping our active request on hold */
4399 	execlists_hold(engine, rq);
4400 	GEM_BUG_ON(!i915_request_on_hold(rq));
4401 
4402 	__intel_engine_reset_bh(engine, NULL);
4403 	GEM_BUG_ON(rq->fence.error != -EIO);
4404 
4405 	/* Release our grasp on the engine, letting CS flow again */
4406 	engine_unlock_reset_tasklet(engine);
4407 
4408 	/* Check that we do not resubmit the held request */
4409 	i915_request_get(rq);
4410 	if (!i915_request_wait(rq, 0, HZ / 5)) {
4411 		pr_err("%s: on hold request completed!\n",
4412 		       engine->name);
4413 		intel_gt_set_wedged(gt);
4414 		err = -EIO;
4415 		goto out_rq;
4416 	}
4417 	GEM_BUG_ON(!i915_request_on_hold(rq));
4418 
4419 	/* But is resubmitted on release */
4420 	execlists_unhold(engine, rq);
4421 	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
4422 		pr_err("%s: held request did not complete!\n",
4423 		       engine->name);
4424 		intel_gt_set_wedged(gt);
4425 		err = -ETIME;
4426 	}
4427 
4428 out_rq:
4429 	i915_request_put(rq);
4430 out_heartbeat:
4431 	for (n = 0; n < nsibling; n++)
4432 		st_engine_heartbeat_enable(siblings[n]);
4433 
4434 	intel_context_put(ve);
4435 out_spin:
4436 	igt_spinner_fini(&spin);
4437 	return err;
4438 }
4439 
live_virtual_reset(void * arg)4440 static int live_virtual_reset(void *arg)
4441 {
4442 	struct intel_gt *gt = arg;
4443 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4444 	unsigned int class;
4445 
4446 	/*
4447 	 * Check that we handle a reset event within a virtual engine.
4448 	 * Only the physical engine is reset, but we have to check the flow
4449 	 * of the virtual requests around the reset, and make sure it is not
4450 	 * forgotten.
4451 	 */
4452 
4453 	if (intel_uc_uses_guc_submission(&gt->uc))
4454 		return 0;
4455 
4456 	if (!intel_has_reset_engine(gt))
4457 		return 0;
4458 
4459 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4460 		int nsibling, err;
4461 
4462 		nsibling = select_siblings(gt, class, siblings);
4463 		if (nsibling < 2)
4464 			continue;
4465 
4466 		err = reset_virtual_engine(gt, siblings, nsibling);
4467 		if (err)
4468 			return err;
4469 	}
4470 
4471 	return 0;
4472 }
4473 
intel_execlists_live_selftests(struct drm_i915_private * i915)4474 int intel_execlists_live_selftests(struct drm_i915_private *i915)
4475 {
4476 	static const struct i915_subtest tests[] = {
4477 		SUBTEST(live_sanitycheck),
4478 		SUBTEST(live_unlite_switch),
4479 		SUBTEST(live_unlite_preempt),
4480 		SUBTEST(live_unlite_ring),
4481 		SUBTEST(live_pin_rewind),
4482 		SUBTEST(live_hold_reset),
4483 		SUBTEST(live_error_interrupt),
4484 		SUBTEST(live_timeslice_preempt),
4485 		SUBTEST(live_timeslice_rewind),
4486 		SUBTEST(live_timeslice_queue),
4487 		SUBTEST(live_timeslice_nopreempt),
4488 		SUBTEST(live_busywait_preempt),
4489 		SUBTEST(live_preempt),
4490 		SUBTEST(live_late_preempt),
4491 		SUBTEST(live_nopreempt),
4492 		SUBTEST(live_preempt_cancel),
4493 		SUBTEST(live_suppress_self_preempt),
4494 		SUBTEST(live_chain_preempt),
4495 		SUBTEST(live_preempt_ring),
4496 		SUBTEST(live_preempt_gang),
4497 		SUBTEST(live_preempt_timeout),
4498 		SUBTEST(live_preempt_user),
4499 		SUBTEST(live_preempt_smoke),
4500 		SUBTEST(live_virtual_engine),
4501 		SUBTEST(live_virtual_mask),
4502 		SUBTEST(live_virtual_preserved),
4503 		SUBTEST(live_virtual_slice),
4504 		SUBTEST(live_virtual_reset),
4505 	};
4506 
4507 	if (i915->gt.submission_method != INTEL_SUBMISSION_ELSP)
4508 		return 0;
4509 
4510 	if (intel_gt_is_wedged(&i915->gt))
4511 		return 0;
4512 
4513 	return intel_gt_live_subtests(tests, &i915->gt);
4514 }
4515