1 /*
2  * Copyright © 2014 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include <limits.h>
25 #include <string.h>
26 #include <strings.h>
27 #include <signal.h>
28 #include <errno.h>
29 #include <sys/types.h>
30 #include <sys/stat.h>
31 #include <sys/ioctl.h>
32 #include <fcntl.h>
33 
34 #include "drmtest.h"
35 #include "igt_aux.h"
36 #include "igt_core.h"
37 #include "igt_gt.h"
38 #include "igt_sysfs.h"
39 #include "igt_debugfs.h"
40 #include "ioctl_wrappers.h"
41 #include "intel_reg.h"
42 #include "intel_chipset.h"
43 #include "igt_dummyload.h"
44 #include "i915/gem_engine_topology.h"
45 
46 /**
47  * SECTION:igt_gt
48  * @short_description: GT support library
49  * @title: GT
50  * @include: igt.h
51  *
52  * This library provides various auxiliary helper functions to handle general
53  * interactions with the GT like forcewake handling, injecting hangs or stopping
54  * engines.
55  */
56 
has_gpu_reset(int fd)57 static bool has_gpu_reset(int fd)
58 {
59 	static int once = -1;
60 	if (once < 0) {
61 		struct drm_i915_getparam gp;
62 		int val = 0;
63 
64 		memset(&gp, 0, sizeof(gp));
65 		gp.param = 35; /* HAS_GPU_RESET */
66 		gp.value = &val;
67 
68 		if (ioctl(fd, DRM_IOCTL_I915_GETPARAM, &gp))
69 			once = intel_gen(intel_get_drm_devid(fd)) >= 5;
70 		else
71 			once = val > 0;
72 	}
73 	return once;
74 }
75 
eat_error_state(int dev)76 static void eat_error_state(int dev)
77 {
78 	int dir;
79 
80 	dir = igt_sysfs_open(dev);
81 	if (dir < 0)
82 		return;
83 
84 	/* Any write to the error state clears it */
85 	igt_sysfs_set(dir, "error", "");
86 	close(dir);
87 }
88 
89 /**
90  * igt_require_hang_ring:
91  * @fd: open i915 drm file descriptor
92  * @ring: execbuf ring flag
93  *
94  * Convenience helper to check whether advanced hang injection is supported by
95  * the kernel. Uses igt_skip to automatically skip the test/subtest if this
96  * isn't the case.
97  *
98  * Note that we can't simply just call this from igt_hang_ring since some
99  * tests want to exercise gpu wedging behavior. For which we intentionally
100  * disable gpu reset support, but still want to inject a hang, see for example
101  * tests/gem_eio.c Instead, we expect that the first invocation of
102  * igt_require_hand_ring be from a vanilla context and use the has_gpu_reset()
103  * determined then for all later instances. This allows us the convenience
104  * of double checking when injecting hangs, whilst pushing the complexity
105  * to the tests that are deliberating trying to break the box.
106  *
107  * This function is also controlled by the environment variables:
108  *
109  * IGT_HANG (boolean) - if false, skip all tests that try to inject a hang.
110  * Default: true
111  *
112  * IGT_HANG_WITHOUT_RESET (boolean) - if true, allow the hang even if the
113  * kernel does not support GPU recovery. The machine will be wedged afterwards
114  * (and so require a reboot between testing), but it does allow limited testing
115  * to be done under hang injection.
116  * Default: false
117  */
igt_require_hang_ring(int fd,int ring)118 void igt_require_hang_ring(int fd, int ring)
119 {
120 	if (!igt_check_boolean_env_var("IGT_HANG", true))
121 		igt_skip("hang injection disabled by user");
122 
123 	gem_require_ring(fd, ring);
124 	gem_context_require_bannable(fd);
125 	if (!igt_check_boolean_env_var("IGT_HANG_WITHOUT_RESET", false))
126 		igt_require(has_gpu_reset(fd));
127 }
128 
context_get_ban(int fd,unsigned ctx)129 static unsigned context_get_ban(int fd, unsigned ctx)
130 {
131 	struct drm_i915_gem_context_param param = {
132 		.ctx_id = ctx,
133 		.param = I915_CONTEXT_PARAM_BANNABLE,
134 	};
135 
136 	if (__gem_context_get_param(fd, ¶m) == -EINVAL) {
137 		igt_assert(param.value == 0);
138 		param.param = I915_CONTEXT_PARAM_BAN_PERIOD;
139 		gem_context_get_param(fd, ¶m);
140 	}
141 
142 	return param.value;
143 }
144 
context_set_ban(int fd,unsigned ctx,unsigned ban)145 static void context_set_ban(int fd, unsigned ctx, unsigned ban)
146 {
147 	struct drm_i915_gem_context_param param = {
148 		.ctx_id = ctx,
149 		.param = I915_CONTEXT_PARAM_BANNABLE,
150 		.value = ban,
151 	};
152 
153 	if(__gem_context_set_param(fd, ¶m) == -EINVAL) {
154 		igt_assert(param.value == ban);
155 		param.param = I915_CONTEXT_PARAM_BAN_PERIOD;
156 		gem_context_set_param(fd, ¶m);
157 	}
158 }
159 
igt_allow_hang(int fd,unsigned ctx,unsigned flags)160 igt_hang_t igt_allow_hang(int fd, unsigned ctx, unsigned flags)
161 {
162 	struct drm_i915_gem_context_param param = {
163 		.ctx_id = ctx,
164 	};
165 	unsigned ban;
166 
167 	/*
168 	 * If the driver is already wedged, we don't expect it to be able
169 	 * to recover from reset and for it to remain wedged. It's hard to
170 	 * say even if we do hang/reset making the test suspect.
171 	 */
172 	igt_require_gem(fd);
173 
174 	if (!igt_check_boolean_env_var("IGT_HANG", true))
175 		igt_skip("hang injection disabled by user");
176 	gem_context_require_bannable(fd);
177 	if (!igt_check_boolean_env_var("IGT_HANG_WITHOUT_RESET", false))
178 		igt_require(has_gpu_reset(fd));
179 
180 	igt_require(igt_sysfs_set_parameter
181 		    (fd, "reset", "%d", INT_MAX /* any reset method */));
182 
183 	if ((flags & HANG_ALLOW_CAPTURE) == 0) {
184 		param.param = I915_CONTEXT_PARAM_NO_ERROR_CAPTURE;
185 		param.value = 1;
186 		/* Older kernels may not have NO_ERROR_CAPTURE, in which case
187 		 * we just eat the error state in post-hang (and hope we eat
188 		 * the right one).
189 		 */
190 		__gem_context_set_param(fd, ¶m);
191 	}
192 
193 	ban = context_get_ban(fd, ctx);
194 	if ((flags & HANG_ALLOW_BAN) == 0)
195 		context_set_ban(fd, ctx, 0);
196 
197 	return (struct igt_hang){ 0, ctx, ban, flags };
198 }
199 
igt_disallow_hang(int fd,igt_hang_t arg)200 void igt_disallow_hang(int fd, igt_hang_t arg)
201 {
202 	context_set_ban(fd, arg.ctx, arg.ban);
203 
204 	if ((arg.flags & HANG_ALLOW_CAPTURE) == 0) {
205 		struct drm_i915_gem_context_param param = {
206 			.ctx_id = arg.ctx,
207 			.param = I915_CONTEXT_PARAM_NO_ERROR_CAPTURE,
208 			.value = 0,
209 		};
210 		__gem_context_set_param(fd, ¶m);
211 
212 		eat_error_state(fd);
213 	}
214 }
215 
216 /**
217  * has_ctx_exec:
218  * @fd: open i915 drm file descriptor
219  * @ring: execbuf ring flag
220  * @ctx: Context to be checked
221  *
222  * This helper function checks if non default context submission is allowed
223  * on a ring.
224  *
225  * Returns:
226  * True if allowed
227  *
228  */
has_ctx_exec(int fd,unsigned ring,uint32_t ctx)229 static bool has_ctx_exec(int fd, unsigned ring, uint32_t ctx)
230 {
231 	struct drm_i915_gem_execbuffer2 execbuf;
232 	struct drm_i915_gem_exec_object2 exec;
233 
234 	/* silly ABI, the kernel thinks everyone who has BSD also has BSD2 */
235 	if ((ring & ~(3<<13)) == I915_EXEC_BSD) {
236 		if (ring & (3 << 13) && !gem_has_bsd2(fd))
237 			return false;
238 	}
239 
240 	memset(&exec, 0, sizeof(exec));
241 	memset(&execbuf, 0, sizeof(execbuf));
242 	execbuf.buffers_ptr = to_user_pointer(&exec);
243 	execbuf.buffer_count = 1;
244 	execbuf.flags = ring;
245 	execbuf.rsvd1 = ctx;
246 	/*
247 	 * If context submission is not allowed, this will return EINVAL
248 	 * Otherwise, this will return ENOENT on account of no gem obj
249 	 * being submitted
250 	 */
251 	return __gem_execbuf(fd, &execbuf) == -ENOENT;
252 }
253 
254 /**
255  * igt_hang_ring_ctx:
256  * @fd: open i915 drm file descriptor
257  * @ctx: the contxt specifier
258  * @ring: execbuf ring flag
259  * @flags: set of flags to control execution
260  * @offset: The resultant gtt offset of the exec obj
261  *
262  * This helper function injects a hanging batch associated with @ctx into @ring.
263  * It returns a #igt_hang_t structure which must be passed to
264  * igt_post_hang_ring() for hang post-processing (after the gpu hang
265  * interaction has been tested.
266  *
267  * Returns:
268  * Structure with helper internal state for igt_post_hang_ring().
269  */
igt_hang_ctx(int fd,uint32_t ctx,int ring,unsigned flags)270 igt_hang_t igt_hang_ctx(int fd, uint32_t ctx, int ring, unsigned flags)
271 {
272 	struct drm_i915_gem_context_param param;
273 	igt_spin_t *spin;
274 	unsigned ban;
275 
276 	igt_require_hang_ring(fd, ring);
277 
278 	/* check if non-default ctx submission is allowed */
279 	igt_require(ctx == 0 || has_ctx_exec(fd, ring, ctx));
280 
281 	param.ctx_id = ctx;
282 	param.size = 0;
283 
284 	if ((flags & HANG_ALLOW_CAPTURE) == 0) {
285 		param.param = I915_CONTEXT_PARAM_NO_ERROR_CAPTURE;
286 		param.value = 1;
287 		/* Older kernels may not have NO_ERROR_CAPTURE, in which case
288 		 * we just eat the error state in post-hang (and hope we eat
289 		 * the right one).
290 		 */
291 		__gem_context_set_param(fd, ¶m);
292 	}
293 
294 	ban = context_get_ban(fd, ctx);
295 	if ((flags & HANG_ALLOW_BAN) == 0)
296 		context_set_ban(fd, ctx, 0);
297 
298 	spin = __igt_spin_new(fd,
299 			      .ctx = ctx,
300 			      .engine = ring,
301 			      .flags = IGT_SPIN_NO_PREEMPTION);
302 
303 	return (igt_hang_t){ spin, ctx, ban, flags };
304 }
305 
306 /**
307  * igt_hang_ring:
308  * @fd: open i915 drm file descriptor
309  * @ring: execbuf ring flag
310  *
311  * This helper function injects a hanging batch into @ring. It returns a
312  * #igt_hang_t structure which must be passed to igt_post_hang_ring() for
313  * hang post-processing (after the gpu hang interaction has been tested.
314  *
315  * Returns:
316  * Structure with helper internal state for igt_post_hang_ring().
317  */
igt_hang_ring(int fd,int ring)318 igt_hang_t igt_hang_ring(int fd, int ring)
319 {
320 	return igt_hang_ctx(fd, 0, ring, 0);
321 }
322 
323 /**
324  * igt_post_hang_ring:
325  * @fd: open i915 drm file descriptor
326  * @arg: hang state from igt_hang_ring()
327  *
328  * This function does the necessary post-processing after a gpu hang injected
329  * with igt_hang_ring().
330  */
igt_post_hang_ring(int fd,igt_hang_t arg)331 void igt_post_hang_ring(int fd, igt_hang_t arg)
332 {
333 	if (!arg.spin)
334 		return;
335 
336 	gem_sync(fd, arg.spin->handle); /* Wait until it hangs */
337 	igt_spin_free(fd, arg.spin);
338 
339 	context_set_ban(fd, arg.ctx, arg.ban);
340 
341 	if ((arg.flags & HANG_ALLOW_CAPTURE) == 0) {
342 		struct drm_i915_gem_context_param param = {
343 			.ctx_id = arg.ctx,
344 			.param = I915_CONTEXT_PARAM_NO_ERROR_CAPTURE,
345 			.value = 0,
346 		};
347 		__gem_context_set_param(fd, ¶m);
348 
349 		eat_error_state(fd);
350 	}
351 }
352 
353 /**
354  * igt_force_gpu_reset:
355  *
356  * forces a gpu reset using the i915_wedged debugfs interface. To be used to
357  * recover from situations where the hangcheck didn't trigger and/or the gpu is
358  * stuck, either because the test manually disabled gpu resets or because the
359  * test hit an hangcheck bug
360  */
igt_force_gpu_reset(int drm_fd)361 void igt_force_gpu_reset(int drm_fd)
362 {
363 	int dir, wedged;
364 
365 	igt_debug("Triggering GPU reset\n");
366 
367 	dir = igt_debugfs_dir(drm_fd);
368 
369 	igt_sysfs_set(dir, "i915_wedged", "-1");
370 	igt_sysfs_scanf(dir, "i915_wedged", "%d", &wedged);
371 
372 	close(dir);
373 	errno = 0;
374 
375 	igt_assert(!wedged);
376 }
377 
378 /* GPU abusers */
379 static struct igt_helper_process hang_helper;
380 static void __attribute__((noreturn))
hang_helper_process(pid_t pid,int fd)381 hang_helper_process(pid_t pid, int fd)
382 {
383 	while (1) {
384 		if (kill(pid, 0)) /* Parent has died, so must we. */
385 			exit(0);
386 
387 		igt_post_hang_ring(fd,
388 				   igt_hang_ring(fd, I915_EXEC_DEFAULT));
389 
390 		sleep(1);
391 	}
392 }
393 
394 /**
395  * igt_fork_hang_helper:
396  *
397  * Fork a child process using #igt_fork_helper to hang the default engine
398  * of the GPU at regular intervals.
399  *
400  * This is useful to exercise slow running code (such as aperture placement)
401  * which needs to be robust against a GPU reset.
402  *
403  * This function automatically skips when test requirements aren't met using
404  * igt_skip().
405  */
igt_fork_hang_helper(void)406 void igt_fork_hang_helper(void)
407 {
408 	int fd, gen;
409 
410 	fd = drm_open_driver(DRIVER_INTEL);
411 
412 	gen = intel_gen(intel_get_drm_devid(fd));
413 	igt_skip_on(gen < 5);
414 
415 	igt_fork_helper(&hang_helper)
416 		hang_helper_process(getppid(), fd);
417 
418 	close(fd);
419 }
420 
421 /**
422  * igt_stop_hang_helper:
423  *
424  * Stops the child process spawned with igt_fork_hang_helper().
425  *
426  * In tests with subtests this function can be called outside of failure
427  * catching code blocks like #igt_fixture or #igt_subtest.
428  */
igt_stop_hang_helper(void)429 void igt_stop_hang_helper(void)
430 {
431 	if (hang_helper.running)
432 		igt_stop_helper(&hang_helper);
433 }
434 
435 /**
436  * igt_open_forcewake_handle:
437  *
438  * This functions opens the debugfs forcewake file and so prevents the GT from
439  * suspending. The reference is automatically dropped when the is closed.
440  *
441  * Returns:
442  * The file descriptor of the forcewake handle or -1 if that didn't work out.
443  */
igt_open_forcewake_handle(int fd)444 int igt_open_forcewake_handle(int fd)
445 {
446 	if (getenv("IGT_NO_FORCEWAKE"))
447 		return -1;
448 	return igt_debugfs_open(fd, "i915_forcewake_user", O_WRONLY);
449 }
450 
451 #if defined(__x86_64__) || defined(__i386__)
452 static unsigned int clflush_size;
453 
igt_setup_clflush(void)454 int igt_setup_clflush(void)
455 {
456 	FILE *file;
457 	char *line = NULL;
458 	size_t size = 0;
459 	int first_stanza = 1;
460 	int has_clflush = 0;
461 
462 	if (clflush_size)
463 		return 1;
464 
465 	file = fopen("/proc/cpuinfo", "r");
466 	if (file == NULL)
467 		return 0;
468 
469 	while (getline(&line, &size, file) != -1) {
470 		if (strncmp(line, "processor", 9) == 0) {
471 			if (!first_stanza)
472 				break;
473 			first_stanza = 0;
474 		}
475 
476 		if (strncmp(line, "flags", 5) == 0) {
477 			if (strstr(line, "clflush"))
478 				has_clflush = 1;
479 		}
480 
481 		if (strncmp(line, "clflush size", 12) == 0) {
482 			char *colon = strchr(line, ':');
483 			if (colon)
484 				clflush_size = atoi(colon + 1);
485 		}
486 	}
487 	free(line);
488 	fclose(file);
489 
490 	return has_clflush && clflush_size;
491 }
492 
493 __attribute__((target("sse2")))
igt_clflush_range(void * addr,int size)494 void igt_clflush_range(void *addr, int size)
495 {
496 	char *p, *end;
497 
498 	end = (char *)addr + size;
499 	p = (char *)((uintptr_t)addr & ~((uintptr_t)clflush_size - 1));
500 
501 	__builtin_ia32_mfence();
502 	for (; p < end; p += clflush_size)
503 		__builtin_ia32_clflush(p);
504 	__builtin_ia32_clflush(end - 1); /* magic serialisation for byt+ */
505 	__builtin_ia32_mfence();
506 }
507 #else
igt_setup_clflush(void)508 int igt_setup_clflush(void)
509 {
510 	/* requires mfence + clflush, both SSE2 instructions */
511 	return 0;
512 }
513 
igt_clflush_range(void * addr,int size)514 void igt_clflush_range(void *addr, int size)
515 {
516 	fprintf(stderr, "igt_clflush_range() unsupported\n");
517 }
518 #endif
519 
520 /**
521  * intel_detect_and_clear_missed_irq:
522  * @fd: open i915 drm file descriptor, used to quiesce the gpu
523  *
524  * This functions idles the GPU and then queries whether there has
525  * been a missed interrupt reported by the driver. Afterwards it
526  * clears the missed interrupt flag, in order to disable the timer
527  * fallback for the next test.
528  */
intel_detect_and_clear_missed_interrupts(int fd)529 unsigned intel_detect_and_clear_missed_interrupts(int fd)
530 {
531 	unsigned missed;
532 	int dir;
533 
534 	gem_quiescent_gpu(fd);
535 
536 	dir = igt_debugfs_dir(fd);
537 
538 	missed = 0;
539 	igt_sysfs_scanf(dir, "i915_ring_missed_irq", "%x", &missed);
540 	if (missed)
541 		igt_sysfs_set(dir, "i915_ring_missed_irq", "0");
542 
543 	close(dir);
544 
545 	errno = 0;
546 	return missed;
547 }
548 
549 const struct intel_execution_engine intel_execution_engines[] = {
550 	{ "default", NULL, 0, 0 },
551 	{ "render", "rcs0", I915_EXEC_RENDER, 0 },
552 	{ "bsd", "vcs0", I915_EXEC_BSD, 0 },
553 	{ "bsd1", "vcs0", I915_EXEC_BSD, 1<<13 /*I915_EXEC_BSD_RING1*/ },
554 	{ "bsd2", "vcs1", I915_EXEC_BSD, 2<<13 /*I915_EXEC_BSD_RING2*/ },
555 	{ "blt", "bcs0", I915_EXEC_BLT, 0 },
556 	{ "vebox", "vecs0", I915_EXEC_VEBOX, 0 },
557 	{ NULL, 0, 0 }
558 };
559 
gem_class_can_store_dword(int fd,int class)560 bool gem_class_can_store_dword(int fd, int class)
561 {
562 	uint16_t devid = intel_get_drm_devid(fd);
563 	const struct intel_device_info *info = intel_get_device_info(devid);
564 	const int gen = ffs(info->gen);
565 
566 	if (gen <= 2) /* requires physical addresses */
567 		return false;
568 
569 	if (gen == 3 && (info->is_grantsdale || info->is_alviso))
570 		return false; /* only supports physical addresses */
571 
572 	if (gen == 6 && class == I915_ENGINE_CLASS_VIDEO)
573 		return false;
574 
575 	if (info->is_broadwater)
576 		return false; /* Not sure yet... */
577 
578 	return true;
579 }
580 
gem_can_store_dword(int fd,unsigned int engine)581 bool gem_can_store_dword(int fd, unsigned int engine)
582 {
583 	return gem_class_can_store_dword(fd,
584 				gem_execbuf_flags_to_engine_class(engine));
585 }
586 
587 const struct intel_execution_engine2 intel_execution_engines2[] = {
588 	{ "rcs0", I915_ENGINE_CLASS_RENDER, 0, I915_EXEC_RENDER },
589 	{ "bcs0", I915_ENGINE_CLASS_COPY, 0, I915_EXEC_BLT },
590 	{ "vcs0", I915_ENGINE_CLASS_VIDEO, 0, I915_EXEC_BSD | I915_EXEC_BSD_RING1 },
591 	{ "vcs1", I915_ENGINE_CLASS_VIDEO, 1, I915_EXEC_BSD | I915_EXEC_BSD_RING2 },
592 	{ "vcs2", I915_ENGINE_CLASS_VIDEO, 2, -1 },
593 	{ "vecs0", I915_ENGINE_CLASS_VIDEO_ENHANCE, 0, I915_EXEC_VEBOX },
594 	{ }
595 };
596 
gem_execbuf_flags_to_engine_class(unsigned int flags)597 int gem_execbuf_flags_to_engine_class(unsigned int flags)
598 {
599 	switch (flags & 0x3f) {
600 	case I915_EXEC_DEFAULT:
601 	case I915_EXEC_RENDER:
602 		return I915_ENGINE_CLASS_RENDER;
603 	case I915_EXEC_BLT:
604 		return I915_ENGINE_CLASS_COPY;
605 	case I915_EXEC_BSD:
606 		return I915_ENGINE_CLASS_VIDEO;
607 	case I915_EXEC_VEBOX:
608 		return I915_ENGINE_CLASS_VIDEO_ENHANCE;
609 	default:
610 		igt_assert(0);
611 	}
612 }
613 
gem_ring_is_physical_engine(int fd,unsigned ring)614 bool gem_ring_is_physical_engine(int fd, unsigned ring)
615 {
616 	if (ring == I915_EXEC_DEFAULT)
617 		return false;
618 
619 	/* BSD uses an extra flag to chose between aliasing modes */
620 	if ((ring & 63) == I915_EXEC_BSD) {
621 		bool explicit_bsd = ring & (3 << 13);
622 		bool has_bsd2 = gem_has_bsd2(fd);
623 		return explicit_bsd ? has_bsd2 : !has_bsd2;
624 	}
625 
626 	return true;
627 }
628 
gem_ring_has_physical_engine(int fd,unsigned ring)629 bool gem_ring_has_physical_engine(int fd, unsigned ring)
630 {
631 	if (!gem_ring_is_physical_engine(fd, ring))
632 		return false;
633 
634 	return gem_has_ring(fd, ring);
635 }
636