1 /*
2 * Copyright © 2014 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <limits.h>
25 #include <string.h>
26 #include <strings.h>
27 #include <signal.h>
28 #include <errno.h>
29 #include <sys/types.h>
30 #include <sys/stat.h>
31 #include <sys/ioctl.h>
32 #include <fcntl.h>
33
34 #include "drmtest.h"
35 #include "igt_aux.h"
36 #include "igt_core.h"
37 #include "igt_gt.h"
38 #include "igt_sysfs.h"
39 #include "igt_debugfs.h"
40 #include "ioctl_wrappers.h"
41 #include "intel_reg.h"
42 #include "intel_chipset.h"
43 #include "igt_dummyload.h"
44 #include "i915/gem_engine_topology.h"
45
46 /**
47 * SECTION:igt_gt
48 * @short_description: GT support library
49 * @title: GT
50 * @include: igt.h
51 *
52 * This library provides various auxiliary helper functions to handle general
53 * interactions with the GT like forcewake handling, injecting hangs or stopping
54 * engines.
55 */
56
has_gpu_reset(int fd)57 static bool has_gpu_reset(int fd)
58 {
59 static int once = -1;
60 if (once < 0) {
61 struct drm_i915_getparam gp;
62 int val = 0;
63
64 memset(&gp, 0, sizeof(gp));
65 gp.param = 35; /* HAS_GPU_RESET */
66 gp.value = &val;
67
68 if (ioctl(fd, DRM_IOCTL_I915_GETPARAM, &gp))
69 once = intel_gen(intel_get_drm_devid(fd)) >= 5;
70 else
71 once = val > 0;
72 }
73 return once;
74 }
75
eat_error_state(int dev)76 static void eat_error_state(int dev)
77 {
78 int dir;
79
80 dir = igt_sysfs_open(dev);
81 if (dir < 0)
82 return;
83
84 /* Any write to the error state clears it */
85 igt_sysfs_set(dir, "error", "");
86 close(dir);
87 }
88
89 /**
90 * igt_require_hang_ring:
91 * @fd: open i915 drm file descriptor
92 * @ring: execbuf ring flag
93 *
94 * Convenience helper to check whether advanced hang injection is supported by
95 * the kernel. Uses igt_skip to automatically skip the test/subtest if this
96 * isn't the case.
97 *
98 * Note that we can't simply just call this from igt_hang_ring since some
99 * tests want to exercise gpu wedging behavior. For which we intentionally
100 * disable gpu reset support, but still want to inject a hang, see for example
101 * tests/gem_eio.c Instead, we expect that the first invocation of
102 * igt_require_hand_ring be from a vanilla context and use the has_gpu_reset()
103 * determined then for all later instances. This allows us the convenience
104 * of double checking when injecting hangs, whilst pushing the complexity
105 * to the tests that are deliberating trying to break the box.
106 *
107 * This function is also controlled by the environment variables:
108 *
109 * IGT_HANG (boolean) - if false, skip all tests that try to inject a hang.
110 * Default: true
111 *
112 * IGT_HANG_WITHOUT_RESET (boolean) - if true, allow the hang even if the
113 * kernel does not support GPU recovery. The machine will be wedged afterwards
114 * (and so require a reboot between testing), but it does allow limited testing
115 * to be done under hang injection.
116 * Default: false
117 */
igt_require_hang_ring(int fd,int ring)118 void igt_require_hang_ring(int fd, int ring)
119 {
120 if (!igt_check_boolean_env_var("IGT_HANG", true))
121 igt_skip("hang injection disabled by user");
122
123 gem_require_ring(fd, ring);
124 gem_context_require_bannable(fd);
125 if (!igt_check_boolean_env_var("IGT_HANG_WITHOUT_RESET", false))
126 igt_require(has_gpu_reset(fd));
127 }
128
context_get_ban(int fd,unsigned ctx)129 static unsigned context_get_ban(int fd, unsigned ctx)
130 {
131 struct drm_i915_gem_context_param param = {
132 .ctx_id = ctx,
133 .param = I915_CONTEXT_PARAM_BANNABLE,
134 };
135
136 if (__gem_context_get_param(fd, ¶m) == -EINVAL) {
137 igt_assert(param.value == 0);
138 param.param = I915_CONTEXT_PARAM_BAN_PERIOD;
139 gem_context_get_param(fd, ¶m);
140 }
141
142 return param.value;
143 }
144
context_set_ban(int fd,unsigned ctx,unsigned ban)145 static void context_set_ban(int fd, unsigned ctx, unsigned ban)
146 {
147 struct drm_i915_gem_context_param param = {
148 .ctx_id = ctx,
149 .param = I915_CONTEXT_PARAM_BANNABLE,
150 .value = ban,
151 };
152
153 if(__gem_context_set_param(fd, ¶m) == -EINVAL) {
154 igt_assert(param.value == ban);
155 param.param = I915_CONTEXT_PARAM_BAN_PERIOD;
156 gem_context_set_param(fd, ¶m);
157 }
158 }
159
igt_allow_hang(int fd,unsigned ctx,unsigned flags)160 igt_hang_t igt_allow_hang(int fd, unsigned ctx, unsigned flags)
161 {
162 struct drm_i915_gem_context_param param = {
163 .ctx_id = ctx,
164 };
165 unsigned ban;
166
167 /*
168 * If the driver is already wedged, we don't expect it to be able
169 * to recover from reset and for it to remain wedged. It's hard to
170 * say even if we do hang/reset making the test suspect.
171 */
172 igt_require_gem(fd);
173
174 if (!igt_check_boolean_env_var("IGT_HANG", true))
175 igt_skip("hang injection disabled by user");
176 gem_context_require_bannable(fd);
177 if (!igt_check_boolean_env_var("IGT_HANG_WITHOUT_RESET", false))
178 igt_require(has_gpu_reset(fd));
179
180 igt_require(igt_sysfs_set_parameter
181 (fd, "reset", "%d", INT_MAX /* any reset method */));
182
183 if ((flags & HANG_ALLOW_CAPTURE) == 0) {
184 param.param = I915_CONTEXT_PARAM_NO_ERROR_CAPTURE;
185 param.value = 1;
186 /* Older kernels may not have NO_ERROR_CAPTURE, in which case
187 * we just eat the error state in post-hang (and hope we eat
188 * the right one).
189 */
190 __gem_context_set_param(fd, ¶m);
191 }
192
193 ban = context_get_ban(fd, ctx);
194 if ((flags & HANG_ALLOW_BAN) == 0)
195 context_set_ban(fd, ctx, 0);
196
197 return (struct igt_hang){ 0, ctx, ban, flags };
198 }
199
igt_disallow_hang(int fd,igt_hang_t arg)200 void igt_disallow_hang(int fd, igt_hang_t arg)
201 {
202 context_set_ban(fd, arg.ctx, arg.ban);
203
204 if ((arg.flags & HANG_ALLOW_CAPTURE) == 0) {
205 struct drm_i915_gem_context_param param = {
206 .ctx_id = arg.ctx,
207 .param = I915_CONTEXT_PARAM_NO_ERROR_CAPTURE,
208 .value = 0,
209 };
210 __gem_context_set_param(fd, ¶m);
211
212 eat_error_state(fd);
213 }
214 }
215
216 /**
217 * has_ctx_exec:
218 * @fd: open i915 drm file descriptor
219 * @ring: execbuf ring flag
220 * @ctx: Context to be checked
221 *
222 * This helper function checks if non default context submission is allowed
223 * on a ring.
224 *
225 * Returns:
226 * True if allowed
227 *
228 */
has_ctx_exec(int fd,unsigned ring,uint32_t ctx)229 static bool has_ctx_exec(int fd, unsigned ring, uint32_t ctx)
230 {
231 struct drm_i915_gem_execbuffer2 execbuf;
232 struct drm_i915_gem_exec_object2 exec;
233
234 /* silly ABI, the kernel thinks everyone who has BSD also has BSD2 */
235 if ((ring & ~(3<<13)) == I915_EXEC_BSD) {
236 if (ring & (3 << 13) && !gem_has_bsd2(fd))
237 return false;
238 }
239
240 memset(&exec, 0, sizeof(exec));
241 memset(&execbuf, 0, sizeof(execbuf));
242 execbuf.buffers_ptr = to_user_pointer(&exec);
243 execbuf.buffer_count = 1;
244 execbuf.flags = ring;
245 execbuf.rsvd1 = ctx;
246 /*
247 * If context submission is not allowed, this will return EINVAL
248 * Otherwise, this will return ENOENT on account of no gem obj
249 * being submitted
250 */
251 return __gem_execbuf(fd, &execbuf) == -ENOENT;
252 }
253
254 /**
255 * igt_hang_ring_ctx:
256 * @fd: open i915 drm file descriptor
257 * @ctx: the contxt specifier
258 * @ring: execbuf ring flag
259 * @flags: set of flags to control execution
260 * @offset: The resultant gtt offset of the exec obj
261 *
262 * This helper function injects a hanging batch associated with @ctx into @ring.
263 * It returns a #igt_hang_t structure which must be passed to
264 * igt_post_hang_ring() for hang post-processing (after the gpu hang
265 * interaction has been tested.
266 *
267 * Returns:
268 * Structure with helper internal state for igt_post_hang_ring().
269 */
igt_hang_ctx(int fd,uint32_t ctx,int ring,unsigned flags)270 igt_hang_t igt_hang_ctx(int fd, uint32_t ctx, int ring, unsigned flags)
271 {
272 struct drm_i915_gem_context_param param;
273 igt_spin_t *spin;
274 unsigned ban;
275
276 igt_require_hang_ring(fd, ring);
277
278 /* check if non-default ctx submission is allowed */
279 igt_require(ctx == 0 || has_ctx_exec(fd, ring, ctx));
280
281 param.ctx_id = ctx;
282 param.size = 0;
283
284 if ((flags & HANG_ALLOW_CAPTURE) == 0) {
285 param.param = I915_CONTEXT_PARAM_NO_ERROR_CAPTURE;
286 param.value = 1;
287 /* Older kernels may not have NO_ERROR_CAPTURE, in which case
288 * we just eat the error state in post-hang (and hope we eat
289 * the right one).
290 */
291 __gem_context_set_param(fd, ¶m);
292 }
293
294 ban = context_get_ban(fd, ctx);
295 if ((flags & HANG_ALLOW_BAN) == 0)
296 context_set_ban(fd, ctx, 0);
297
298 spin = __igt_spin_new(fd,
299 .ctx = ctx,
300 .engine = ring,
301 .flags = IGT_SPIN_NO_PREEMPTION);
302
303 return (igt_hang_t){ spin, ctx, ban, flags };
304 }
305
306 /**
307 * igt_hang_ring:
308 * @fd: open i915 drm file descriptor
309 * @ring: execbuf ring flag
310 *
311 * This helper function injects a hanging batch into @ring. It returns a
312 * #igt_hang_t structure which must be passed to igt_post_hang_ring() for
313 * hang post-processing (after the gpu hang interaction has been tested.
314 *
315 * Returns:
316 * Structure with helper internal state for igt_post_hang_ring().
317 */
igt_hang_ring(int fd,int ring)318 igt_hang_t igt_hang_ring(int fd, int ring)
319 {
320 return igt_hang_ctx(fd, 0, ring, 0);
321 }
322
323 /**
324 * igt_post_hang_ring:
325 * @fd: open i915 drm file descriptor
326 * @arg: hang state from igt_hang_ring()
327 *
328 * This function does the necessary post-processing after a gpu hang injected
329 * with igt_hang_ring().
330 */
igt_post_hang_ring(int fd,igt_hang_t arg)331 void igt_post_hang_ring(int fd, igt_hang_t arg)
332 {
333 if (!arg.spin)
334 return;
335
336 gem_sync(fd, arg.spin->handle); /* Wait until it hangs */
337 igt_spin_free(fd, arg.spin);
338
339 context_set_ban(fd, arg.ctx, arg.ban);
340
341 if ((arg.flags & HANG_ALLOW_CAPTURE) == 0) {
342 struct drm_i915_gem_context_param param = {
343 .ctx_id = arg.ctx,
344 .param = I915_CONTEXT_PARAM_NO_ERROR_CAPTURE,
345 .value = 0,
346 };
347 __gem_context_set_param(fd, ¶m);
348
349 eat_error_state(fd);
350 }
351 }
352
353 /**
354 * igt_force_gpu_reset:
355 *
356 * forces a gpu reset using the i915_wedged debugfs interface. To be used to
357 * recover from situations where the hangcheck didn't trigger and/or the gpu is
358 * stuck, either because the test manually disabled gpu resets or because the
359 * test hit an hangcheck bug
360 */
igt_force_gpu_reset(int drm_fd)361 void igt_force_gpu_reset(int drm_fd)
362 {
363 int dir, wedged;
364
365 igt_debug("Triggering GPU reset\n");
366
367 dir = igt_debugfs_dir(drm_fd);
368
369 igt_sysfs_set(dir, "i915_wedged", "-1");
370 igt_sysfs_scanf(dir, "i915_wedged", "%d", &wedged);
371
372 close(dir);
373 errno = 0;
374
375 igt_assert(!wedged);
376 }
377
378 /* GPU abusers */
379 static struct igt_helper_process hang_helper;
380 static void __attribute__((noreturn))
hang_helper_process(pid_t pid,int fd)381 hang_helper_process(pid_t pid, int fd)
382 {
383 while (1) {
384 if (kill(pid, 0)) /* Parent has died, so must we. */
385 exit(0);
386
387 igt_post_hang_ring(fd,
388 igt_hang_ring(fd, I915_EXEC_DEFAULT));
389
390 sleep(1);
391 }
392 }
393
394 /**
395 * igt_fork_hang_helper:
396 *
397 * Fork a child process using #igt_fork_helper to hang the default engine
398 * of the GPU at regular intervals.
399 *
400 * This is useful to exercise slow running code (such as aperture placement)
401 * which needs to be robust against a GPU reset.
402 *
403 * This function automatically skips when test requirements aren't met using
404 * igt_skip().
405 */
igt_fork_hang_helper(void)406 void igt_fork_hang_helper(void)
407 {
408 int fd, gen;
409
410 fd = drm_open_driver(DRIVER_INTEL);
411
412 gen = intel_gen(intel_get_drm_devid(fd));
413 igt_skip_on(gen < 5);
414
415 igt_fork_helper(&hang_helper)
416 hang_helper_process(getppid(), fd);
417
418 close(fd);
419 }
420
421 /**
422 * igt_stop_hang_helper:
423 *
424 * Stops the child process spawned with igt_fork_hang_helper().
425 *
426 * In tests with subtests this function can be called outside of failure
427 * catching code blocks like #igt_fixture or #igt_subtest.
428 */
igt_stop_hang_helper(void)429 void igt_stop_hang_helper(void)
430 {
431 if (hang_helper.running)
432 igt_stop_helper(&hang_helper);
433 }
434
435 /**
436 * igt_open_forcewake_handle:
437 *
438 * This functions opens the debugfs forcewake file and so prevents the GT from
439 * suspending. The reference is automatically dropped when the is closed.
440 *
441 * Returns:
442 * The file descriptor of the forcewake handle or -1 if that didn't work out.
443 */
igt_open_forcewake_handle(int fd)444 int igt_open_forcewake_handle(int fd)
445 {
446 if (getenv("IGT_NO_FORCEWAKE"))
447 return -1;
448 return igt_debugfs_open(fd, "i915_forcewake_user", O_WRONLY);
449 }
450
451 #if defined(__x86_64__) || defined(__i386__)
452 static unsigned int clflush_size;
453
igt_setup_clflush(void)454 int igt_setup_clflush(void)
455 {
456 FILE *file;
457 char *line = NULL;
458 size_t size = 0;
459 int first_stanza = 1;
460 int has_clflush = 0;
461
462 if (clflush_size)
463 return 1;
464
465 file = fopen("/proc/cpuinfo", "r");
466 if (file == NULL)
467 return 0;
468
469 while (getline(&line, &size, file) != -1) {
470 if (strncmp(line, "processor", 9) == 0) {
471 if (!first_stanza)
472 break;
473 first_stanza = 0;
474 }
475
476 if (strncmp(line, "flags", 5) == 0) {
477 if (strstr(line, "clflush"))
478 has_clflush = 1;
479 }
480
481 if (strncmp(line, "clflush size", 12) == 0) {
482 char *colon = strchr(line, ':');
483 if (colon)
484 clflush_size = atoi(colon + 1);
485 }
486 }
487 free(line);
488 fclose(file);
489
490 return has_clflush && clflush_size;
491 }
492
493 __attribute__((target("sse2")))
igt_clflush_range(void * addr,int size)494 void igt_clflush_range(void *addr, int size)
495 {
496 char *p, *end;
497
498 end = (char *)addr + size;
499 p = (char *)((uintptr_t)addr & ~((uintptr_t)clflush_size - 1));
500
501 __builtin_ia32_mfence();
502 for (; p < end; p += clflush_size)
503 __builtin_ia32_clflush(p);
504 __builtin_ia32_clflush(end - 1); /* magic serialisation for byt+ */
505 __builtin_ia32_mfence();
506 }
507 #else
igt_setup_clflush(void)508 int igt_setup_clflush(void)
509 {
510 /* requires mfence + clflush, both SSE2 instructions */
511 return 0;
512 }
513
igt_clflush_range(void * addr,int size)514 void igt_clflush_range(void *addr, int size)
515 {
516 fprintf(stderr, "igt_clflush_range() unsupported\n");
517 }
518 #endif
519
520 /**
521 * intel_detect_and_clear_missed_irq:
522 * @fd: open i915 drm file descriptor, used to quiesce the gpu
523 *
524 * This functions idles the GPU and then queries whether there has
525 * been a missed interrupt reported by the driver. Afterwards it
526 * clears the missed interrupt flag, in order to disable the timer
527 * fallback for the next test.
528 */
intel_detect_and_clear_missed_interrupts(int fd)529 unsigned intel_detect_and_clear_missed_interrupts(int fd)
530 {
531 unsigned missed;
532 int dir;
533
534 gem_quiescent_gpu(fd);
535
536 dir = igt_debugfs_dir(fd);
537
538 missed = 0;
539 igt_sysfs_scanf(dir, "i915_ring_missed_irq", "%x", &missed);
540 if (missed)
541 igt_sysfs_set(dir, "i915_ring_missed_irq", "0");
542
543 close(dir);
544
545 errno = 0;
546 return missed;
547 }
548
549 const struct intel_execution_engine intel_execution_engines[] = {
550 { "default", NULL, 0, 0 },
551 { "render", "rcs0", I915_EXEC_RENDER, 0 },
552 { "bsd", "vcs0", I915_EXEC_BSD, 0 },
553 { "bsd1", "vcs0", I915_EXEC_BSD, 1<<13 /*I915_EXEC_BSD_RING1*/ },
554 { "bsd2", "vcs1", I915_EXEC_BSD, 2<<13 /*I915_EXEC_BSD_RING2*/ },
555 { "blt", "bcs0", I915_EXEC_BLT, 0 },
556 { "vebox", "vecs0", I915_EXEC_VEBOX, 0 },
557 { NULL, 0, 0 }
558 };
559
gem_class_can_store_dword(int fd,int class)560 bool gem_class_can_store_dword(int fd, int class)
561 {
562 uint16_t devid = intel_get_drm_devid(fd);
563 const struct intel_device_info *info = intel_get_device_info(devid);
564 const int gen = ffs(info->gen);
565
566 if (gen <= 2) /* requires physical addresses */
567 return false;
568
569 if (gen == 3 && (info->is_grantsdale || info->is_alviso))
570 return false; /* only supports physical addresses */
571
572 if (gen == 6 && class == I915_ENGINE_CLASS_VIDEO)
573 return false;
574
575 if (info->is_broadwater)
576 return false; /* Not sure yet... */
577
578 return true;
579 }
580
gem_can_store_dword(int fd,unsigned int engine)581 bool gem_can_store_dword(int fd, unsigned int engine)
582 {
583 return gem_class_can_store_dword(fd,
584 gem_execbuf_flags_to_engine_class(engine));
585 }
586
587 const struct intel_execution_engine2 intel_execution_engines2[] = {
588 { "rcs0", I915_ENGINE_CLASS_RENDER, 0, I915_EXEC_RENDER },
589 { "bcs0", I915_ENGINE_CLASS_COPY, 0, I915_EXEC_BLT },
590 { "vcs0", I915_ENGINE_CLASS_VIDEO, 0, I915_EXEC_BSD | I915_EXEC_BSD_RING1 },
591 { "vcs1", I915_ENGINE_CLASS_VIDEO, 1, I915_EXEC_BSD | I915_EXEC_BSD_RING2 },
592 { "vcs2", I915_ENGINE_CLASS_VIDEO, 2, -1 },
593 { "vecs0", I915_ENGINE_CLASS_VIDEO_ENHANCE, 0, I915_EXEC_VEBOX },
594 { }
595 };
596
gem_execbuf_flags_to_engine_class(unsigned int flags)597 int gem_execbuf_flags_to_engine_class(unsigned int flags)
598 {
599 switch (flags & 0x3f) {
600 case I915_EXEC_DEFAULT:
601 case I915_EXEC_RENDER:
602 return I915_ENGINE_CLASS_RENDER;
603 case I915_EXEC_BLT:
604 return I915_ENGINE_CLASS_COPY;
605 case I915_EXEC_BSD:
606 return I915_ENGINE_CLASS_VIDEO;
607 case I915_EXEC_VEBOX:
608 return I915_ENGINE_CLASS_VIDEO_ENHANCE;
609 default:
610 igt_assert(0);
611 }
612 }
613
gem_ring_is_physical_engine(int fd,unsigned ring)614 bool gem_ring_is_physical_engine(int fd, unsigned ring)
615 {
616 if (ring == I915_EXEC_DEFAULT)
617 return false;
618
619 /* BSD uses an extra flag to chose between aliasing modes */
620 if ((ring & 63) == I915_EXEC_BSD) {
621 bool explicit_bsd = ring & (3 << 13);
622 bool has_bsd2 = gem_has_bsd2(fd);
623 return explicit_bsd ? has_bsd2 : !has_bsd2;
624 }
625
626 return true;
627 }
628
gem_ring_has_physical_engine(int fd,unsigned ring)629 bool gem_ring_has_physical_engine(int fd, unsigned ring)
630 {
631 if (!gem_ring_is_physical_engine(fd, ring))
632 return false;
633
634 return gem_has_ring(fd, ring);
635 }
636