• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2016 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24 
25 #include <linux/kthread.h>
26 
27 #include "../i915_selftest.h"
28 
29 #include "mock_context.h"
30 #include "mock_drm.h"
31 
32 struct hang {
33 	struct drm_i915_private *i915;
34 	struct drm_i915_gem_object *hws;
35 	struct drm_i915_gem_object *obj;
36 	u32 *seqno;
37 	u32 *batch;
38 };
39 
hang_init(struct hang * h,struct drm_i915_private * i915)40 static int hang_init(struct hang *h, struct drm_i915_private *i915)
41 {
42 	void *vaddr;
43 	int err;
44 
45 	memset(h, 0, sizeof(*h));
46 	h->i915 = i915;
47 
48 	h->hws = i915_gem_object_create_internal(i915, PAGE_SIZE);
49 	if (IS_ERR(h->hws))
50 		return PTR_ERR(h->hws);
51 
52 	h->obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
53 	if (IS_ERR(h->obj)) {
54 		err = PTR_ERR(h->obj);
55 		goto err_hws;
56 	}
57 
58 	i915_gem_object_set_cache_level(h->hws, I915_CACHE_LLC);
59 	vaddr = i915_gem_object_pin_map(h->hws, I915_MAP_WB);
60 	if (IS_ERR(vaddr)) {
61 		err = PTR_ERR(vaddr);
62 		goto err_obj;
63 	}
64 	h->seqno = memset(vaddr, 0xff, PAGE_SIZE);
65 
66 	vaddr = i915_gem_object_pin_map(h->obj,
67 					HAS_LLC(i915) ? I915_MAP_WB : I915_MAP_WC);
68 	if (IS_ERR(vaddr)) {
69 		err = PTR_ERR(vaddr);
70 		goto err_unpin_hws;
71 	}
72 	h->batch = vaddr;
73 
74 	return 0;
75 
76 err_unpin_hws:
77 	i915_gem_object_unpin_map(h->hws);
78 err_obj:
79 	i915_gem_object_put(h->obj);
80 err_hws:
81 	i915_gem_object_put(h->hws);
82 	return err;
83 }
84 
hws_address(const struct i915_vma * hws,const struct drm_i915_gem_request * rq)85 static u64 hws_address(const struct i915_vma *hws,
86 		       const struct drm_i915_gem_request *rq)
87 {
88 	return hws->node.start + offset_in_page(sizeof(u32)*rq->fence.context);
89 }
90 
emit_recurse_batch(struct hang * h,struct drm_i915_gem_request * rq)91 static int emit_recurse_batch(struct hang *h,
92 			      struct drm_i915_gem_request *rq)
93 {
94 	struct drm_i915_private *i915 = h->i915;
95 	struct i915_address_space *vm = rq->ctx->ppgtt ? &rq->ctx->ppgtt->base : &i915->ggtt.base;
96 	struct i915_vma *hws, *vma;
97 	unsigned int flags;
98 	u32 *batch;
99 	int err;
100 
101 	vma = i915_vma_instance(h->obj, vm, NULL);
102 	if (IS_ERR(vma))
103 		return PTR_ERR(vma);
104 
105 	hws = i915_vma_instance(h->hws, vm, NULL);
106 	if (IS_ERR(hws))
107 		return PTR_ERR(hws);
108 
109 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
110 	if (err)
111 		return err;
112 
113 	err = i915_vma_pin(hws, 0, 0, PIN_USER);
114 	if (err)
115 		goto unpin_vma;
116 
117 	err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
118 	if (err)
119 		goto unpin_hws;
120 
121 	err = i915_switch_context(rq);
122 	if (err)
123 		goto unpin_hws;
124 
125 	i915_vma_move_to_active(vma, rq, 0);
126 	if (!i915_gem_object_has_active_reference(vma->obj)) {
127 		i915_gem_object_get(vma->obj);
128 		i915_gem_object_set_active_reference(vma->obj);
129 	}
130 
131 	i915_vma_move_to_active(hws, rq, 0);
132 	if (!i915_gem_object_has_active_reference(hws->obj)) {
133 		i915_gem_object_get(hws->obj);
134 		i915_gem_object_set_active_reference(hws->obj);
135 	}
136 
137 	batch = h->batch;
138 	if (INTEL_GEN(i915) >= 8) {
139 		*batch++ = MI_STORE_DWORD_IMM_GEN4;
140 		*batch++ = lower_32_bits(hws_address(hws, rq));
141 		*batch++ = upper_32_bits(hws_address(hws, rq));
142 		*batch++ = rq->fence.seqno;
143 		*batch++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
144 		*batch++ = lower_32_bits(vma->node.start);
145 		*batch++ = upper_32_bits(vma->node.start);
146 	} else if (INTEL_GEN(i915) >= 6) {
147 		*batch++ = MI_STORE_DWORD_IMM_GEN4;
148 		*batch++ = 0;
149 		*batch++ = lower_32_bits(hws_address(hws, rq));
150 		*batch++ = rq->fence.seqno;
151 		*batch++ = MI_BATCH_BUFFER_START | 1 << 8;
152 		*batch++ = lower_32_bits(vma->node.start);
153 	} else if (INTEL_GEN(i915) >= 4) {
154 		*batch++ = MI_STORE_DWORD_IMM_GEN4 | 1 << 22;
155 		*batch++ = 0;
156 		*batch++ = lower_32_bits(hws_address(hws, rq));
157 		*batch++ = rq->fence.seqno;
158 		*batch++ = MI_BATCH_BUFFER_START | 2 << 6;
159 		*batch++ = lower_32_bits(vma->node.start);
160 	} else {
161 		*batch++ = MI_STORE_DWORD_IMM;
162 		*batch++ = lower_32_bits(hws_address(hws, rq));
163 		*batch++ = rq->fence.seqno;
164 		*batch++ = MI_BATCH_BUFFER_START | 2 << 6 | 1;
165 		*batch++ = lower_32_bits(vma->node.start);
166 	}
167 	*batch++ = MI_BATCH_BUFFER_END; /* not reached */
168 
169 	flags = 0;
170 	if (INTEL_GEN(vm->i915) <= 5)
171 		flags |= I915_DISPATCH_SECURE;
172 
173 	err = rq->engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, flags);
174 
175 unpin_hws:
176 	i915_vma_unpin(hws);
177 unpin_vma:
178 	i915_vma_unpin(vma);
179 	return err;
180 }
181 
182 static struct drm_i915_gem_request *
hang_create_request(struct hang * h,struct intel_engine_cs * engine,struct i915_gem_context * ctx)183 hang_create_request(struct hang *h,
184 		    struct intel_engine_cs *engine,
185 		    struct i915_gem_context *ctx)
186 {
187 	struct drm_i915_gem_request *rq;
188 	int err;
189 
190 	if (i915_gem_object_is_active(h->obj)) {
191 		struct drm_i915_gem_object *obj;
192 		void *vaddr;
193 
194 		obj = i915_gem_object_create_internal(h->i915, PAGE_SIZE);
195 		if (IS_ERR(obj))
196 			return ERR_CAST(obj);
197 
198 		vaddr = i915_gem_object_pin_map(obj,
199 						HAS_LLC(h->i915) ? I915_MAP_WB : I915_MAP_WC);
200 		if (IS_ERR(vaddr)) {
201 			i915_gem_object_put(obj);
202 			return ERR_CAST(vaddr);
203 		}
204 
205 		i915_gem_object_unpin_map(h->obj);
206 		i915_gem_object_put(h->obj);
207 
208 		h->obj = obj;
209 		h->batch = vaddr;
210 	}
211 
212 	rq = i915_gem_request_alloc(engine, ctx);
213 	if (IS_ERR(rq))
214 		return rq;
215 
216 	err = emit_recurse_batch(h, rq);
217 	if (err) {
218 		__i915_add_request(rq, false);
219 		return ERR_PTR(err);
220 	}
221 
222 	return rq;
223 }
224 
hws_seqno(const struct hang * h,const struct drm_i915_gem_request * rq)225 static u32 hws_seqno(const struct hang *h,
226 		     const struct drm_i915_gem_request *rq)
227 {
228 	return READ_ONCE(h->seqno[rq->fence.context % (PAGE_SIZE/sizeof(u32))]);
229 }
230 
hang_fini(struct hang * h)231 static void hang_fini(struct hang *h)
232 {
233 	*h->batch = MI_BATCH_BUFFER_END;
234 	wmb();
235 
236 	i915_gem_object_unpin_map(h->obj);
237 	i915_gem_object_put(h->obj);
238 
239 	i915_gem_object_unpin_map(h->hws);
240 	i915_gem_object_put(h->hws);
241 
242 	i915_gem_wait_for_idle(h->i915, I915_WAIT_LOCKED);
243 }
244 
igt_hang_sanitycheck(void * arg)245 static int igt_hang_sanitycheck(void *arg)
246 {
247 	struct drm_i915_private *i915 = arg;
248 	struct drm_i915_gem_request *rq;
249 	struct intel_engine_cs *engine;
250 	enum intel_engine_id id;
251 	struct hang h;
252 	int err;
253 
254 	/* Basic check that we can execute our hanging batch */
255 
256 	mutex_lock(&i915->drm.struct_mutex);
257 	err = hang_init(&h, i915);
258 	if (err)
259 		goto unlock;
260 
261 	for_each_engine(engine, i915, id) {
262 		long timeout;
263 
264 		if (!intel_engine_can_store_dword(engine))
265 			continue;
266 
267 		rq = hang_create_request(&h, engine, i915->kernel_context);
268 		if (IS_ERR(rq)) {
269 			err = PTR_ERR(rq);
270 			pr_err("Failed to create request for %s, err=%d\n",
271 			       engine->name, err);
272 			goto fini;
273 		}
274 
275 		i915_gem_request_get(rq);
276 
277 		*h.batch = MI_BATCH_BUFFER_END;
278 		__i915_add_request(rq, true);
279 
280 		timeout = i915_wait_request(rq,
281 					    I915_WAIT_LOCKED,
282 					    MAX_SCHEDULE_TIMEOUT);
283 		i915_gem_request_put(rq);
284 
285 		if (timeout < 0) {
286 			err = timeout;
287 			pr_err("Wait for request failed on %s, err=%d\n",
288 			       engine->name, err);
289 			goto fini;
290 		}
291 	}
292 
293 fini:
294 	hang_fini(&h);
295 unlock:
296 	mutex_unlock(&i915->drm.struct_mutex);
297 	return err;
298 }
299 
global_reset_lock(struct drm_i915_private * i915)300 static void global_reset_lock(struct drm_i915_private *i915)
301 {
302 	struct intel_engine_cs *engine;
303 	enum intel_engine_id id;
304 
305 	while (test_and_set_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags))
306 		wait_event(i915->gpu_error.reset_queue,
307 			   !test_bit(I915_RESET_BACKOFF,
308 				     &i915->gpu_error.flags));
309 
310 	for_each_engine(engine, i915, id) {
311 		while (test_and_set_bit(I915_RESET_ENGINE + id,
312 					&i915->gpu_error.flags))
313 			wait_on_bit(&i915->gpu_error.flags,
314 				    I915_RESET_ENGINE + id,
315 				    TASK_UNINTERRUPTIBLE);
316 	}
317 }
318 
global_reset_unlock(struct drm_i915_private * i915)319 static void global_reset_unlock(struct drm_i915_private *i915)
320 {
321 	struct intel_engine_cs *engine;
322 	enum intel_engine_id id;
323 
324 	for_each_engine(engine, i915, id)
325 		clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
326 
327 	clear_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags);
328 	wake_up_all(&i915->gpu_error.reset_queue);
329 }
330 
igt_global_reset(void * arg)331 static int igt_global_reset(void *arg)
332 {
333 	struct drm_i915_private *i915 = arg;
334 	unsigned int reset_count;
335 	int err = 0;
336 
337 	/* Check that we can issue a global GPU reset */
338 
339 	global_reset_lock(i915);
340 	set_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags);
341 
342 	mutex_lock(&i915->drm.struct_mutex);
343 	reset_count = i915_reset_count(&i915->gpu_error);
344 
345 	i915_reset(i915, I915_RESET_QUIET);
346 
347 	if (i915_reset_count(&i915->gpu_error) == reset_count) {
348 		pr_err("No GPU reset recorded!\n");
349 		err = -EINVAL;
350 	}
351 	mutex_unlock(&i915->drm.struct_mutex);
352 
353 	GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags));
354 	global_reset_unlock(i915);
355 
356 	if (i915_terminally_wedged(&i915->gpu_error))
357 		err = -EIO;
358 
359 	return err;
360 }
361 
igt_reset_engine(void * arg)362 static int igt_reset_engine(void *arg)
363 {
364 	struct drm_i915_private *i915 = arg;
365 	struct intel_engine_cs *engine;
366 	enum intel_engine_id id;
367 	unsigned int reset_count, reset_engine_count;
368 	int err = 0;
369 
370 	/* Check that we can issue a global GPU and engine reset */
371 
372 	if (!intel_has_reset_engine(i915))
373 		return 0;
374 
375 	for_each_engine(engine, i915, id) {
376 		set_bit(I915_RESET_ENGINE + engine->id, &i915->gpu_error.flags);
377 		reset_count = i915_reset_count(&i915->gpu_error);
378 		reset_engine_count = i915_reset_engine_count(&i915->gpu_error,
379 							     engine);
380 
381 		err = i915_reset_engine(engine, I915_RESET_QUIET);
382 		if (err) {
383 			pr_err("i915_reset_engine failed\n");
384 			break;
385 		}
386 
387 		if (i915_reset_count(&i915->gpu_error) != reset_count) {
388 			pr_err("Full GPU reset recorded! (engine reset expected)\n");
389 			err = -EINVAL;
390 			break;
391 		}
392 
393 		if (i915_reset_engine_count(&i915->gpu_error, engine) ==
394 		    reset_engine_count) {
395 			pr_err("No %s engine reset recorded!\n", engine->name);
396 			err = -EINVAL;
397 			break;
398 		}
399 
400 		clear_bit(I915_RESET_ENGINE + engine->id,
401 			  &i915->gpu_error.flags);
402 	}
403 
404 	if (i915_terminally_wedged(&i915->gpu_error))
405 		err = -EIO;
406 
407 	return err;
408 }
409 
active_engine(void * data)410 static int active_engine(void *data)
411 {
412 	struct intel_engine_cs *engine = data;
413 	struct drm_i915_gem_request *rq[2] = {};
414 	struct i915_gem_context *ctx[2];
415 	struct drm_file *file;
416 	unsigned long count = 0;
417 	int err = 0;
418 
419 	file = mock_file(engine->i915);
420 	if (IS_ERR(file))
421 		return PTR_ERR(file);
422 
423 	mutex_lock(&engine->i915->drm.struct_mutex);
424 	ctx[0] = live_context(engine->i915, file);
425 	mutex_unlock(&engine->i915->drm.struct_mutex);
426 	if (IS_ERR(ctx[0])) {
427 		err = PTR_ERR(ctx[0]);
428 		goto err_file;
429 	}
430 
431 	mutex_lock(&engine->i915->drm.struct_mutex);
432 	ctx[1] = live_context(engine->i915, file);
433 	mutex_unlock(&engine->i915->drm.struct_mutex);
434 	if (IS_ERR(ctx[1])) {
435 		err = PTR_ERR(ctx[1]);
436 		i915_gem_context_put(ctx[0]);
437 		goto err_file;
438 	}
439 
440 	while (!kthread_should_stop()) {
441 		unsigned int idx = count++ & 1;
442 		struct drm_i915_gem_request *old = rq[idx];
443 		struct drm_i915_gem_request *new;
444 
445 		mutex_lock(&engine->i915->drm.struct_mutex);
446 		new = i915_gem_request_alloc(engine, ctx[idx]);
447 		if (IS_ERR(new)) {
448 			mutex_unlock(&engine->i915->drm.struct_mutex);
449 			err = PTR_ERR(new);
450 			break;
451 		}
452 
453 		rq[idx] = i915_gem_request_get(new);
454 		i915_add_request(new);
455 		mutex_unlock(&engine->i915->drm.struct_mutex);
456 
457 		if (old) {
458 			i915_wait_request(old, 0, MAX_SCHEDULE_TIMEOUT);
459 			i915_gem_request_put(old);
460 		}
461 	}
462 
463 	for (count = 0; count < ARRAY_SIZE(rq); count++)
464 		i915_gem_request_put(rq[count]);
465 
466 err_file:
467 	mock_file_free(engine->i915, file);
468 	return err;
469 }
470 
igt_reset_active_engines(void * arg)471 static int igt_reset_active_engines(void *arg)
472 {
473 	struct drm_i915_private *i915 = arg;
474 	struct intel_engine_cs *engine, *active;
475 	enum intel_engine_id id, tmp;
476 	int err = 0;
477 
478 	/* Check that issuing a reset on one engine does not interfere
479 	 * with any other engine.
480 	 */
481 
482 	if (!intel_has_reset_engine(i915))
483 		return 0;
484 
485 	for_each_engine(engine, i915, id) {
486 		struct task_struct *threads[I915_NUM_ENGINES];
487 		unsigned long resets[I915_NUM_ENGINES];
488 		unsigned long global = i915_reset_count(&i915->gpu_error);
489 		IGT_TIMEOUT(end_time);
490 
491 		memset(threads, 0, sizeof(threads));
492 		for_each_engine(active, i915, tmp) {
493 			struct task_struct *tsk;
494 
495 			if (active == engine)
496 				continue;
497 
498 			resets[tmp] = i915_reset_engine_count(&i915->gpu_error,
499 							      active);
500 
501 			tsk = kthread_run(active_engine, active,
502 					  "igt/%s", active->name);
503 			if (IS_ERR(tsk)) {
504 				err = PTR_ERR(tsk);
505 				goto unwind;
506 			}
507 
508 			threads[tmp] = tsk;
509 			get_task_struct(tsk);
510 		}
511 
512 		set_bit(I915_RESET_ENGINE + engine->id, &i915->gpu_error.flags);
513 		do {
514 			err = i915_reset_engine(engine, I915_RESET_QUIET);
515 			if (err) {
516 				pr_err("i915_reset_engine(%s) failed, err=%d\n",
517 				       engine->name, err);
518 				break;
519 			}
520 		} while (time_before(jiffies, end_time));
521 		clear_bit(I915_RESET_ENGINE + engine->id,
522 			  &i915->gpu_error.flags);
523 
524 unwind:
525 		for_each_engine(active, i915, tmp) {
526 			int ret;
527 
528 			if (!threads[tmp])
529 				continue;
530 
531 			ret = kthread_stop(threads[tmp]);
532 			if (ret) {
533 				pr_err("kthread for active engine %s failed, err=%d\n",
534 				       active->name, ret);
535 				if (!err)
536 					err = ret;
537 			}
538 			put_task_struct(threads[tmp]);
539 
540 			if (resets[tmp] != i915_reset_engine_count(&i915->gpu_error,
541 								   active)) {
542 				pr_err("Innocent engine %s was reset (count=%ld)\n",
543 				       active->name,
544 				       i915_reset_engine_count(&i915->gpu_error,
545 							       active) - resets[tmp]);
546 				err = -EIO;
547 			}
548 		}
549 
550 		if (global != i915_reset_count(&i915->gpu_error)) {
551 			pr_err("Global reset (count=%ld)!\n",
552 			       i915_reset_count(&i915->gpu_error) - global);
553 			err = -EIO;
554 		}
555 
556 		if (err)
557 			break;
558 
559 		cond_resched();
560 	}
561 
562 	if (i915_terminally_wedged(&i915->gpu_error))
563 		err = -EIO;
564 
565 	return err;
566 }
567 
fake_hangcheck(struct drm_i915_gem_request * rq)568 static u32 fake_hangcheck(struct drm_i915_gem_request *rq)
569 {
570 	u32 reset_count;
571 
572 	rq->engine->hangcheck.stalled = true;
573 	rq->engine->hangcheck.seqno = intel_engine_get_seqno(rq->engine);
574 
575 	reset_count = i915_reset_count(&rq->i915->gpu_error);
576 
577 	set_bit(I915_RESET_HANDOFF, &rq->i915->gpu_error.flags);
578 	wake_up_all(&rq->i915->gpu_error.wait_queue);
579 
580 	return reset_count;
581 }
582 
wait_for_hang(struct hang * h,struct drm_i915_gem_request * rq)583 static bool wait_for_hang(struct hang *h, struct drm_i915_gem_request *rq)
584 {
585 	return !(wait_for_us(i915_seqno_passed(hws_seqno(h, rq),
586 					       rq->fence.seqno),
587 			     10) &&
588 		 wait_for(i915_seqno_passed(hws_seqno(h, rq),
589 					    rq->fence.seqno),
590 			  1000));
591 }
592 
igt_wait_reset(void * arg)593 static int igt_wait_reset(void *arg)
594 {
595 	struct drm_i915_private *i915 = arg;
596 	struct drm_i915_gem_request *rq;
597 	unsigned int reset_count;
598 	struct hang h;
599 	long timeout;
600 	int err;
601 
602 	if (!intel_engine_can_store_dword(i915->engine[RCS]))
603 		return 0;
604 
605 	/* Check that we detect a stuck waiter and issue a reset */
606 
607 	global_reset_lock(i915);
608 
609 	mutex_lock(&i915->drm.struct_mutex);
610 	err = hang_init(&h, i915);
611 	if (err)
612 		goto unlock;
613 
614 	rq = hang_create_request(&h, i915->engine[RCS], i915->kernel_context);
615 	if (IS_ERR(rq)) {
616 		err = PTR_ERR(rq);
617 		goto fini;
618 	}
619 
620 	i915_gem_request_get(rq);
621 	__i915_add_request(rq, true);
622 
623 	if (!wait_for_hang(&h, rq)) {
624 		pr_err("Failed to start request %x\n", rq->fence.seqno);
625 		err = -EIO;
626 		goto out_rq;
627 	}
628 
629 	reset_count = fake_hangcheck(rq);
630 
631 	timeout = i915_wait_request(rq, I915_WAIT_LOCKED, 10);
632 	if (timeout < 0) {
633 		pr_err("i915_wait_request failed on a stuck request: err=%ld\n",
634 		       timeout);
635 		err = timeout;
636 		goto out_rq;
637 	}
638 
639 	GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags));
640 	if (i915_reset_count(&i915->gpu_error) == reset_count) {
641 		pr_err("No GPU reset recorded!\n");
642 		err = -EINVAL;
643 		goto out_rq;
644 	}
645 
646 out_rq:
647 	i915_gem_request_put(rq);
648 fini:
649 	hang_fini(&h);
650 unlock:
651 	mutex_unlock(&i915->drm.struct_mutex);
652 	global_reset_unlock(i915);
653 
654 	if (i915_terminally_wedged(&i915->gpu_error))
655 		return -EIO;
656 
657 	return err;
658 }
659 
igt_reset_queue(void * arg)660 static int igt_reset_queue(void *arg)
661 {
662 	struct drm_i915_private *i915 = arg;
663 	struct intel_engine_cs *engine;
664 	enum intel_engine_id id;
665 	struct hang h;
666 	int err;
667 
668 	/* Check that we replay pending requests following a hang */
669 
670 	global_reset_lock(i915);
671 
672 	mutex_lock(&i915->drm.struct_mutex);
673 	err = hang_init(&h, i915);
674 	if (err)
675 		goto unlock;
676 
677 	for_each_engine(engine, i915, id) {
678 		struct drm_i915_gem_request *prev;
679 		IGT_TIMEOUT(end_time);
680 		unsigned int count;
681 
682 		if (!intel_engine_can_store_dword(engine))
683 			continue;
684 
685 		prev = hang_create_request(&h, engine, i915->kernel_context);
686 		if (IS_ERR(prev)) {
687 			err = PTR_ERR(prev);
688 			goto fini;
689 		}
690 
691 		i915_gem_request_get(prev);
692 		__i915_add_request(prev, true);
693 
694 		count = 0;
695 		do {
696 			struct drm_i915_gem_request *rq;
697 			unsigned int reset_count;
698 
699 			rq = hang_create_request(&h,
700 						 engine,
701 						 i915->kernel_context);
702 			if (IS_ERR(rq)) {
703 				err = PTR_ERR(rq);
704 				goto fini;
705 			}
706 
707 			i915_gem_request_get(rq);
708 			__i915_add_request(rq, true);
709 
710 			if (!wait_for_hang(&h, prev)) {
711 				pr_err("Failed to start request %x\n",
712 				       prev->fence.seqno);
713 				i915_gem_request_put(rq);
714 				i915_gem_request_put(prev);
715 				err = -EIO;
716 				goto fini;
717 			}
718 
719 			reset_count = fake_hangcheck(prev);
720 
721 			i915_reset(i915, I915_RESET_QUIET);
722 
723 			GEM_BUG_ON(test_bit(I915_RESET_HANDOFF,
724 					    &i915->gpu_error.flags));
725 
726 			if (prev->fence.error != -EIO) {
727 				pr_err("GPU reset not recorded on hanging request [fence.error=%d]!\n",
728 				       prev->fence.error);
729 				i915_gem_request_put(rq);
730 				i915_gem_request_put(prev);
731 				err = -EINVAL;
732 				goto fini;
733 			}
734 
735 			if (rq->fence.error) {
736 				pr_err("Fence error status not zero [%d] after unrelated reset\n",
737 				       rq->fence.error);
738 				i915_gem_request_put(rq);
739 				i915_gem_request_put(prev);
740 				err = -EINVAL;
741 				goto fini;
742 			}
743 
744 			if (i915_reset_count(&i915->gpu_error) == reset_count) {
745 				pr_err("No GPU reset recorded!\n");
746 				i915_gem_request_put(rq);
747 				i915_gem_request_put(prev);
748 				err = -EINVAL;
749 				goto fini;
750 			}
751 
752 			i915_gem_request_put(prev);
753 			prev = rq;
754 			count++;
755 		} while (time_before(jiffies, end_time));
756 		pr_info("%s: Completed %d resets\n", engine->name, count);
757 
758 		*h.batch = MI_BATCH_BUFFER_END;
759 		wmb();
760 
761 		i915_gem_request_put(prev);
762 	}
763 
764 fini:
765 	hang_fini(&h);
766 unlock:
767 	mutex_unlock(&i915->drm.struct_mutex);
768 	global_reset_unlock(i915);
769 
770 	if (i915_terminally_wedged(&i915->gpu_error))
771 		return -EIO;
772 
773 	return err;
774 }
775 
igt_handle_error(void * arg)776 static int igt_handle_error(void *arg)
777 {
778 	struct drm_i915_private *i915 = arg;
779 	struct intel_engine_cs *engine = i915->engine[RCS];
780 	struct hang h;
781 	struct drm_i915_gem_request *rq;
782 	struct i915_gpu_state *error;
783 	int err;
784 
785 	/* Check that we can issue a global GPU and engine reset */
786 
787 	if (!intel_has_reset_engine(i915))
788 		return 0;
789 
790 	if (!intel_engine_can_store_dword(i915->engine[RCS]))
791 		return 0;
792 
793 	mutex_lock(&i915->drm.struct_mutex);
794 
795 	err = hang_init(&h, i915);
796 	if (err)
797 		goto err_unlock;
798 
799 	rq = hang_create_request(&h, engine, i915->kernel_context);
800 	if (IS_ERR(rq)) {
801 		err = PTR_ERR(rq);
802 		goto err_fini;
803 	}
804 
805 	i915_gem_request_get(rq);
806 	__i915_add_request(rq, true);
807 
808 	if (!wait_for_hang(&h, rq)) {
809 		pr_err("Failed to start request %x\n", rq->fence.seqno);
810 		err = -EIO;
811 		goto err_request;
812 	}
813 
814 	mutex_unlock(&i915->drm.struct_mutex);
815 
816 	/* Temporarily disable error capture */
817 	error = xchg(&i915->gpu_error.first_error, (void *)-1);
818 
819 	engine->hangcheck.stalled = true;
820 	engine->hangcheck.seqno = intel_engine_get_seqno(engine);
821 
822 	i915_handle_error(i915, intel_engine_flag(engine), "%s", __func__);
823 
824 	xchg(&i915->gpu_error.first_error, error);
825 
826 	mutex_lock(&i915->drm.struct_mutex);
827 
828 	if (rq->fence.error != -EIO) {
829 		pr_err("Guilty request not identified!\n");
830 		err = -EINVAL;
831 		goto err_request;
832 	}
833 
834 err_request:
835 	i915_gem_request_put(rq);
836 err_fini:
837 	hang_fini(&h);
838 err_unlock:
839 	mutex_unlock(&i915->drm.struct_mutex);
840 	return err;
841 }
842 
intel_hangcheck_live_selftests(struct drm_i915_private * i915)843 int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
844 {
845 	static const struct i915_subtest tests[] = {
846 		SUBTEST(igt_hang_sanitycheck),
847 		SUBTEST(igt_global_reset),
848 		SUBTEST(igt_reset_engine),
849 		SUBTEST(igt_reset_active_engines),
850 		SUBTEST(igt_wait_reset),
851 		SUBTEST(igt_reset_queue),
852 		SUBTEST(igt_handle_error),
853 	};
854 
855 	if (!intel_has_gpu_reset(i915))
856 		return 0;
857 
858 	return i915_subtests(tests, i915);
859 }
860