1 /*
2 * Copyright (c) 2013 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Mika Kuoppala <mika.kuoppala@intel.com>
25 *
26 */
27
28 #include "igt.h"
29 #include "igt_sysfs.h"
30 #include <limits.h>
31 #include <stdbool.h>
32 #include <unistd.h>
33 #include <stdlib.h>
34 #include <stdio.h>
35 #include <string.h>
36 #include <fcntl.h>
37 #include <inttypes.h>
38 #include <errno.h>
39 #include <sys/stat.h>
40 #include <sys/ioctl.h>
41 #include <sys/mman.h>
42 #include <time.h>
43 #include <signal.h>
44
45
46 #define RS_NO_ERROR 0
47 #define RS_BATCH_ACTIVE (1 << 0)
48 #define RS_BATCH_PENDING (1 << 1)
49 #define RS_UNKNOWN (1 << 2)
50
51
52 static uint32_t devid;
53
54 struct local_drm_i915_reset_stats {
55 __u32 ctx_id;
56 __u32 flags;
57 __u32 reset_count;
58 __u32 batch_active;
59 __u32 batch_pending;
60 __u32 pad;
61 };
62
63 #define MAX_FD 32
64
65 #define GET_RESET_STATS_IOCTL DRM_IOWR(DRM_COMMAND_BASE + 0x32, struct local_drm_i915_reset_stats)
66
67 #define LOCAL_I915_EXEC_VEBOX (4 << 0)
68
sync_gpu(void)69 static void sync_gpu(void)
70 {
71 int fd = drm_open_driver(DRIVER_INTEL);
72 gem_quiescent_gpu(fd);
73 close(fd);
74 }
75
noop(int fd,uint32_t ctx,const struct intel_execution_engine * e)76 static int noop(int fd, uint32_t ctx, const struct intel_execution_engine *e)
77 {
78 const uint32_t bbe = MI_BATCH_BUFFER_END;
79 struct drm_i915_gem_execbuffer2 eb;
80 struct drm_i915_gem_exec_object2 exec;
81 int ret;
82
83 memset(&exec, 0, sizeof(exec));
84 exec.handle = gem_create(fd, 4096);
85 igt_assert((int)exec.handle > 0);
86 gem_write(fd, exec.handle, 0, &bbe, sizeof(bbe));
87
88 memset(&eb, 0, sizeof(eb));
89 eb.buffers_ptr = to_user_pointer(&exec);
90 eb.buffer_count = 1;
91 eb.flags = e->exec_id | e->flags;
92 i915_execbuffer2_set_context_id(eb, ctx);
93
94 ret = __gem_execbuf(fd, &eb);
95 if (ret < 0) {
96 gem_close(fd, exec.handle);
97 return ret;
98 }
99
100 return exec.handle;
101 }
102
has_engine(int fd,uint32_t ctx,const struct intel_execution_engine * e)103 static int has_engine(int fd,
104 uint32_t ctx,
105 const struct intel_execution_engine *e)
106 {
107 int handle = noop(fd, ctx, e);
108 if (handle < 0)
109 return 0;
110 gem_close(fd, handle);
111 return 1;
112 }
113
check_context(const struct intel_execution_engine * e)114 static void check_context(const struct intel_execution_engine *e)
115 {
116 int fd = drm_open_driver(DRIVER_INTEL);
117
118 gem_require_contexts(fd);
119 igt_require(has_engine(fd, gem_context_create(fd), e));
120
121 close(fd);
122 }
123
gem_reset_stats(int fd,int ctx_id,struct local_drm_i915_reset_stats * rs)124 static int gem_reset_stats(int fd, int ctx_id,
125 struct local_drm_i915_reset_stats *rs)
126 {
127 memset(rs, 0, sizeof(*rs));
128 rs->ctx_id = ctx_id;
129 rs->reset_count = -1;
130
131 if (drmIoctl(fd, GET_RESET_STATS_IOCTL, rs))
132 return -errno;
133
134 igt_assert(rs->reset_count != -1);
135 return 0;
136 }
137
gem_reset_status(int fd,int ctx_id)138 static int gem_reset_status(int fd, int ctx_id)
139 {
140 struct local_drm_i915_reset_stats rs;
141 int ret;
142
143 ret = gem_reset_stats(fd, ctx_id, &rs);
144 if (ret)
145 return ret;
146
147 if (rs.batch_active)
148 return RS_BATCH_ACTIVE;
149 if (rs.batch_pending)
150 return RS_BATCH_PENDING;
151
152 return RS_NO_ERROR;
153 }
154
155 static struct timespec ts_injected;
156
157 #define BAN HANG_ALLOW_BAN
158 #define ASYNC 2
inject_hang(int fd,uint32_t ctx,const struct intel_execution_engine * e,unsigned flags)159 static void inject_hang(int fd, uint32_t ctx,
160 const struct intel_execution_engine *e,
161 unsigned flags)
162 {
163 igt_hang_t hang;
164
165 clock_gettime(CLOCK_MONOTONIC, &ts_injected);
166
167 hang = igt_hang_ctx(fd, ctx, e->exec_id | e->flags, flags & BAN);
168 if ((flags & ASYNC) == 0)
169 igt_post_hang_ring(fd, hang);
170 }
171
status_to_string(int x)172 static const char *status_to_string(int x)
173 {
174 const char *strings[] = {
175 "No error",
176 "Guilty",
177 "Pending",
178 };
179 if (x >= ARRAY_SIZE(strings))
180 return "Unknown";
181 return strings[x];
182 }
183
_assert_reset_status(int idx,int fd,int ctx,int status)184 static int _assert_reset_status(int idx, int fd, int ctx, int status)
185 {
186 int rs;
187
188 rs = gem_reset_status(fd, ctx);
189 if (rs < 0) {
190 igt_info("reset status for %d ctx %d returned %d\n",
191 idx, ctx, rs);
192 return rs;
193 }
194
195 if (rs != status) {
196 igt_info("%d:%d expected '%s' [%d], found '%s' [%d]\n",
197 idx, ctx,
198 status_to_string(status), status,
199 status_to_string(rs), rs);
200
201 return 1;
202 }
203
204 return 0;
205 }
206
207 #define assert_reset_status(idx, fd, ctx, status) \
208 igt_assert(_assert_reset_status(idx, fd, ctx, status) == 0)
209
test_rs(const struct intel_execution_engine * e,int num_fds,int hang_index,int rs_assumed_no_hang)210 static void test_rs(const struct intel_execution_engine *e,
211 int num_fds, int hang_index, int rs_assumed_no_hang)
212 {
213 int fd[MAX_FD];
214 int i;
215
216 igt_assert_lte(num_fds, MAX_FD);
217 igt_assert_lt(hang_index, MAX_FD);
218
219 igt_debug("num fds=%d, hang index=%d\n", num_fds, hang_index);
220
221 for (i = 0; i < num_fds; i++) {
222 fd[i] = drm_open_driver(DRIVER_INTEL);
223 assert_reset_status(i, fd[i], 0, RS_NO_ERROR);
224 }
225
226 sync_gpu();
227 for (i = 0; i < num_fds; i++) {
228 if (i == hang_index)
229 inject_hang(fd[i], 0, e, ASYNC);
230 else
231 igt_assert(noop(fd[i], 0, e) > 0);
232 }
233 sync_gpu();
234
235 for (i = 0; i < num_fds; i++) {
236 if (hang_index < 0) {
237 assert_reset_status(i, fd[i], 0, rs_assumed_no_hang);
238 continue;
239 }
240
241 if (i < hang_index)
242 assert_reset_status(i, fd[i], 0, RS_NO_ERROR);
243 if (i == hang_index)
244 assert_reset_status(i, fd[i], 0, RS_BATCH_ACTIVE);
245 if (i > hang_index)
246 assert_reset_status(i, fd[i], 0, RS_NO_ERROR);
247 }
248
249 igt_assert(igt_seconds_elapsed(&ts_injected) <= 30);
250
251 for (i = 0; i < num_fds; i++)
252 close(fd[i]);
253 }
254
255 #define MAX_CTX 100
test_rs_ctx(const struct intel_execution_engine * e,int num_fds,int num_ctx,int hang_index,int hang_context)256 static void test_rs_ctx(const struct intel_execution_engine *e,
257 int num_fds, int num_ctx, int hang_index,
258 int hang_context)
259 {
260 int i, j;
261 int fd[MAX_FD];
262 int ctx[MAX_FD][MAX_CTX];
263
264 igt_assert_lte(num_fds, MAX_FD);
265 igt_assert_lt(hang_index, MAX_FD);
266
267 igt_assert_lte(num_ctx, MAX_CTX);
268 igt_assert_lt(hang_context, MAX_CTX);
269
270 test_rs(e, num_fds, -1, RS_NO_ERROR);
271
272 for (i = 0; i < num_fds; i++) {
273 fd[i] = drm_open_driver(DRIVER_INTEL);
274 igt_assert(fd[i]);
275 assert_reset_status(i, fd[i], 0, RS_NO_ERROR);
276
277 for (j = 0; j < num_ctx; j++) {
278 ctx[i][j] = gem_context_create(fd[i]);
279 }
280
281 assert_reset_status(i, fd[i], 0, RS_NO_ERROR);
282 }
283
284 for (i = 0; i < num_fds; i++) {
285 assert_reset_status(i, fd[i], 0, RS_NO_ERROR);
286
287 for (j = 0; j < num_ctx; j++)
288 assert_reset_status(i, fd[i], ctx[i][j], RS_NO_ERROR);
289
290 assert_reset_status(i, fd[i], 0, RS_NO_ERROR);
291 }
292
293 for (i = 0; i < num_fds; i++) {
294 for (j = 0; j < num_ctx; j++) {
295 if (i == hang_index && j == hang_context)
296 inject_hang(fd[i], ctx[i][j], e, ASYNC);
297 else
298 igt_assert(noop(fd[i], ctx[i][j], e) > 0);
299 }
300 }
301 sync_gpu();
302
303 igt_assert(igt_seconds_elapsed(&ts_injected) <= 30);
304
305 for (i = 0; i < num_fds; i++)
306 assert_reset_status(i, fd[i], 0, RS_NO_ERROR);
307
308 for (i = 0; i < num_fds; i++) {
309 for (j = 0; j < num_ctx; j++) {
310 if (i < hang_index)
311 assert_reset_status(i, fd[i], ctx[i][j], RS_NO_ERROR);
312 if (i == hang_index && j < hang_context)
313 assert_reset_status(i, fd[i], ctx[i][j], RS_NO_ERROR);
314 if (i == hang_index && j == hang_context)
315 assert_reset_status(i, fd[i], ctx[i][j],
316 RS_BATCH_ACTIVE);
317 if (i == hang_index && j > hang_context)
318 assert_reset_status(i, fd[i], ctx[i][j],
319 RS_NO_ERROR);
320 if (i > hang_index)
321 assert_reset_status(i, fd[i], ctx[i][j],
322 RS_NO_ERROR);
323 }
324 }
325
326 for (i = 0; i < num_fds; i++) {
327 assert_reset_status(i, fd[i], 0, RS_NO_ERROR);
328 close(fd[i]);
329 }
330 }
331
test_ban(const struct intel_execution_engine * e)332 static void test_ban(const struct intel_execution_engine *e)
333 {
334 struct local_drm_i915_reset_stats rs_bad, rs_good;
335 int fd_bad, fd_good;
336 int ban, retry = 10;
337 int active_count = 0;
338
339 fd_bad = drm_open_driver(DRIVER_INTEL);
340 fd_good = drm_open_driver(DRIVER_INTEL);
341
342 assert_reset_status(fd_bad, fd_bad, 0, RS_NO_ERROR);
343 assert_reset_status(fd_good, fd_good, 0, RS_NO_ERROR);
344
345 noop(fd_bad, 0, e);
346 noop(fd_good, 0, e);
347
348 assert_reset_status(fd_bad, fd_bad, 0, RS_NO_ERROR);
349 assert_reset_status(fd_good, fd_good, 0, RS_NO_ERROR);
350
351 inject_hang(fd_bad, 0, e, BAN | ASYNC);
352 active_count++;
353
354 noop(fd_good, 0, e);
355 noop(fd_good, 0, e);
356
357 while (retry--) {
358 inject_hang(fd_bad, 0, e, BAN);
359 active_count++;
360
361 ban = noop(fd_bad, 0, e);
362 if (ban == -EIO)
363 break;
364
365 /* Should not happen often but sometimes hang is declared too
366 * slow due to our way of faking hang using loop */
367 gem_close(fd_bad, ban);
368
369 igt_info("retrying for ban (%d)\n", retry);
370 }
371 igt_assert_eq(ban, -EIO);
372 igt_assert_lt(0, noop(fd_good, 0, e));
373
374 assert_reset_status(fd_bad, fd_bad, 0, RS_BATCH_ACTIVE);
375 igt_assert_eq(gem_reset_stats(fd_bad, 0, &rs_bad), 0);
376 igt_assert_eq(rs_bad.batch_active, active_count);
377
378 assert_reset_status(fd_good, fd_good, 0, RS_NO_ERROR);
379 igt_assert_eq(gem_reset_stats(fd_good, 0, &rs_good), 0);
380 igt_assert_eq(rs_good.batch_active, 0);
381
382 close(fd_bad);
383 close(fd_good);
384 }
385
test_ban_ctx(const struct intel_execution_engine * e)386 static void test_ban_ctx(const struct intel_execution_engine *e)
387 {
388 struct local_drm_i915_reset_stats rs_bad, rs_good;
389 int fd, ban, retry = 10;
390 uint32_t ctx_good, ctx_bad;
391 int active_count = 0;
392
393 fd = drm_open_driver(DRIVER_INTEL);
394
395 assert_reset_status(fd, fd, 0, RS_NO_ERROR);
396
397 ctx_good = gem_context_create(fd);
398 ctx_bad = gem_context_create(fd);
399
400 assert_reset_status(fd, fd, 0, RS_NO_ERROR);
401 assert_reset_status(fd, fd, ctx_good, RS_NO_ERROR);
402 assert_reset_status(fd, fd, ctx_bad, RS_NO_ERROR);
403
404 noop(fd, ctx_bad, e);
405 noop(fd, ctx_good, e);
406
407 assert_reset_status(fd, fd, ctx_good, RS_NO_ERROR);
408 assert_reset_status(fd, fd, ctx_bad, RS_NO_ERROR);
409
410 inject_hang(fd, ctx_bad, e, BAN | ASYNC);
411 active_count++;
412
413 noop(fd, ctx_good, e);
414 noop(fd, ctx_good, e);
415
416 while (retry--) {
417 inject_hang(fd, ctx_bad, e, BAN);
418 active_count++;
419
420 ban = noop(fd, ctx_bad, e);
421 if (ban == -EIO)
422 break;
423
424 /* Should not happen often but sometimes hang is declared too
425 * slow due to our way of faking hang using loop */
426 gem_close(fd, ban);
427
428 igt_info("retrying for ban (%d)\n", retry);
429 }
430 igt_assert_eq(ban, -EIO);
431 igt_assert_lt(0, noop(fd, ctx_good, e));
432
433 assert_reset_status(fd, fd, ctx_bad, RS_BATCH_ACTIVE);
434 igt_assert_eq(gem_reset_stats(fd, ctx_bad, &rs_bad), 0);
435 igt_assert_eq(rs_bad.batch_active, active_count);
436
437 assert_reset_status(fd, fd, ctx_good, RS_NO_ERROR);
438 igt_assert_eq(gem_reset_stats(fd, ctx_good, &rs_good), 0);
439 igt_assert_eq(rs_good.batch_active, 0);
440
441 close(fd);
442 }
443
test_unrelated_ctx(const struct intel_execution_engine * e)444 static void test_unrelated_ctx(const struct intel_execution_engine *e)
445 {
446 int fd1,fd2;
447 int ctx_guilty, ctx_unrelated;
448
449 fd1 = drm_open_driver(DRIVER_INTEL);
450 fd2 = drm_open_driver(DRIVER_INTEL);
451 assert_reset_status(0, fd1, 0, RS_NO_ERROR);
452 assert_reset_status(1, fd2, 0, RS_NO_ERROR);
453 ctx_guilty = gem_context_create(fd1);
454 ctx_unrelated = gem_context_create(fd2);
455
456 assert_reset_status(0, fd1, ctx_guilty, RS_NO_ERROR);
457 assert_reset_status(1, fd2, ctx_unrelated, RS_NO_ERROR);
458
459 inject_hang(fd1, ctx_guilty, e, 0);
460 assert_reset_status(0, fd1, ctx_guilty, RS_BATCH_ACTIVE);
461 assert_reset_status(1, fd2, ctx_unrelated, RS_NO_ERROR);
462
463 gem_sync(fd2, noop(fd2, ctx_unrelated, e));
464 assert_reset_status(0, fd1, ctx_guilty, RS_BATCH_ACTIVE);
465 assert_reset_status(1, fd2, ctx_unrelated, RS_NO_ERROR);
466
467 close(fd1);
468 close(fd2);
469 }
470
get_reset_count(int fd,int ctx)471 static int get_reset_count(int fd, int ctx)
472 {
473 int ret;
474 struct local_drm_i915_reset_stats rs;
475
476 ret = gem_reset_stats(fd, ctx, &rs);
477 if (ret)
478 return ret;
479
480 return rs.reset_count;
481 }
482
test_close_pending_ctx(const struct intel_execution_engine * e)483 static void test_close_pending_ctx(const struct intel_execution_engine *e)
484 {
485 int fd = drm_open_driver(DRIVER_INTEL);
486 uint32_t ctx = gem_context_create(fd);
487
488 assert_reset_status(fd, fd, ctx, RS_NO_ERROR);
489
490 inject_hang(fd, ctx, e, 0);
491 gem_context_destroy(fd, ctx);
492 igt_assert_eq(__gem_context_destroy(fd, ctx), -ENOENT);
493
494 close(fd);
495 }
496
test_close_pending(const struct intel_execution_engine * e)497 static void test_close_pending(const struct intel_execution_engine *e)
498 {
499 int fd = drm_open_driver(DRIVER_INTEL);
500
501 assert_reset_status(fd, fd, 0, RS_NO_ERROR);
502
503 inject_hang(fd, 0, e, 0);
504 close(fd);
505 }
506
noop_on_each_ring(int fd,const bool reverse)507 static void noop_on_each_ring(int fd, const bool reverse)
508 {
509 const uint32_t bbe = MI_BATCH_BUFFER_END;
510 struct drm_i915_gem_execbuffer2 eb;
511 struct drm_i915_gem_exec_object2 obj;
512 const struct intel_execution_engine *e;
513
514 memset(&obj, 0, sizeof(obj));
515 obj.handle = gem_create(fd, 4096);
516 gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));
517
518 memset(&eb, 0, sizeof(eb));
519 eb.buffers_ptr = to_user_pointer(&obj);
520 eb.buffer_count = 1;
521
522 if (reverse) {
523 for (e = intel_execution_engines; e->name; e++)
524 ;
525 while (--e >= intel_execution_engines) {
526 eb.flags = e->exec_id | e->flags;
527 __gem_execbuf(fd, &eb);
528 }
529 } else {
530 for (e = intel_execution_engines; e->name; e++) {
531 eb.flags = e->exec_id | e->flags;
532 __gem_execbuf(fd, &eb);
533 }
534 }
535
536 gem_sync(fd, obj.handle);
537 gem_close(fd, obj.handle);
538 }
539
test_close_pending_fork(const struct intel_execution_engine * e,const bool reverse)540 static void test_close_pending_fork(const struct intel_execution_engine *e,
541 const bool reverse)
542 {
543 int fd = drm_open_driver(DRIVER_INTEL);
544 igt_hang_t hang;
545 int pid;
546
547 assert_reset_status(fd, fd, 0, RS_NO_ERROR);
548
549 hang = igt_hang_ctx(fd, 0, e->exec_id | e->flags, 0);
550 sleep(1);
551
552 /* Avoid helpers as we need to kill the child
553 * without any extra signal handling on behalf of
554 * lib/drmtest.c
555 */
556 pid = fork();
557 if (pid == 0) {
558 const int fd2 = drm_open_driver(DRIVER_INTEL);
559 igt_assert_lte(0, fd2);
560
561 /* The crucial component is that we schedule the same noop batch
562 * on each ring. This exercises batch_obj reference counting,
563 * when gpu is reset and ring lists are cleared.
564 */
565 noop_on_each_ring(fd2, reverse);
566 close(fd2);
567 pause();
568 exit(0);
569 } else {
570 igt_assert_lt(0, pid);
571 sleep(1);
572
573 /* Kill the child to reduce refcounts on
574 batch_objs */
575 kill(pid, SIGKILL);
576 }
577
578 igt_post_hang_ring(fd, hang);
579 close(fd);
580 }
581
test_reset_count(const struct intel_execution_engine * e,const bool create_ctx)582 static void test_reset_count(const struct intel_execution_engine *e,
583 const bool create_ctx)
584 {
585 int fd = drm_open_driver(DRIVER_INTEL);
586 int ctx;
587 long c1, c2;
588
589 if (create_ctx)
590 ctx = gem_context_create(fd);
591 else
592 ctx = 0;
593
594 assert_reset_status(fd, fd, ctx, RS_NO_ERROR);
595
596 c1 = get_reset_count(fd, ctx);
597 igt_assert(c1 >= 0);
598
599 inject_hang(fd, ctx, e, 0);
600
601 assert_reset_status(fd, fd, ctx, RS_BATCH_ACTIVE);
602 c2 = get_reset_count(fd, ctx);
603 igt_assert(c2 >= 0);
604 igt_assert(c2 == (c1 + 1));
605
606 igt_fork(child, 1) {
607 igt_drop_root();
608
609 c2 = get_reset_count(fd, ctx);
610
611 igt_assert(c2 == 0);
612 }
613
614 igt_waitchildren();
615
616 if (create_ctx)
617 gem_context_destroy(fd, ctx);
618
619 close(fd);
620 }
621
_test_params(int fd,int ctx,uint32_t flags,uint32_t pad)622 static int _test_params(int fd, int ctx, uint32_t flags, uint32_t pad)
623 {
624 struct local_drm_i915_reset_stats rs;
625
626 memset(&rs, 0, sizeof(rs));
627 rs.ctx_id = ctx;
628 rs.flags = flags;
629 rs.reset_count = rand();
630 rs.batch_active = rand();
631 rs.batch_pending = rand();
632 rs.pad = pad;
633
634 if (drmIoctl(fd, GET_RESET_STATS_IOCTL, &rs))
635 return -errno;
636
637 return 0;
638 }
639
640 typedef enum { root = 0, user } cap_t;
641
_check_param_ctx(const int fd,const int ctx,const cap_t cap)642 static void _check_param_ctx(const int fd, const int ctx, const cap_t cap)
643 {
644 const uint32_t bad = rand() + 1;
645
646 if (ctx == 0) {
647 igt_assert_eq(_test_params(fd, ctx, 0, 0), 0);
648
649 if (cap != root) {
650 igt_assert(get_reset_count(fd, ctx) == 0);
651 }
652 }
653
654 igt_assert_eq(_test_params(fd, ctx, 0, bad), -EINVAL);
655 igt_assert_eq(_test_params(fd, ctx, bad, 0), -EINVAL);
656 igt_assert_eq(_test_params(fd, ctx, bad, bad), -EINVAL);
657 }
658
check_params(const int fd,const int ctx,cap_t cap)659 static void check_params(const int fd, const int ctx, cap_t cap)
660 {
661 igt_assert(ioctl(fd, GET_RESET_STATS_IOCTL, 0) == -1);
662 igt_assert_eq(_test_params(fd, 0xbadbad, 0, 0), -ENOENT);
663
664 _check_param_ctx(fd, ctx, cap);
665 }
666
_test_param(const int fd,const int ctx)667 static void _test_param(const int fd, const int ctx)
668 {
669 check_params(fd, ctx, root);
670
671 igt_fork(child, 1) {
672 check_params(fd, ctx, root);
673
674 igt_drop_root();
675
676 check_params(fd, ctx, user);
677 }
678
679 check_params(fd, ctx, root);
680
681 igt_waitchildren();
682 }
683
test_params_ctx(void)684 static void test_params_ctx(void)
685 {
686 int fd;
687
688 fd = drm_open_driver(DRIVER_INTEL);
689 _test_param(fd, gem_context_create(fd));
690 close(fd);
691 }
692
test_params(void)693 static void test_params(void)
694 {
695 int fd;
696
697 fd = drm_open_driver(DRIVER_INTEL);
698 _test_param(fd, 0);
699 close(fd);
700 }
701
702 static const struct intel_execution_engine *
next_engine(int fd,const struct intel_execution_engine * e)703 next_engine(int fd, const struct intel_execution_engine *e)
704 {
705 do {
706 e++;
707 if (e->name == NULL)
708 e = intel_execution_engines;
709 if (e->exec_id == 0)
710 e++;
711 } while (!has_engine(fd, 0, e));
712
713 return e;
714 }
715
defer_hangcheck(const struct intel_execution_engine * engine)716 static void defer_hangcheck(const struct intel_execution_engine *engine)
717 {
718 const struct intel_execution_engine *next;
719 int fd, count_start, count_end;
720 int seconds = 30;
721
722 fd = drm_open_driver(DRIVER_INTEL);
723
724 next = next_engine(fd, engine);
725 igt_skip_on(next == engine);
726
727 count_start = get_reset_count(fd, 0);
728 igt_assert_lte(0, count_start);
729
730 inject_hang(fd, 0, engine, 0);
731 while (--seconds) {
732 noop(fd, 0, next);
733
734 count_end = get_reset_count(fd, 0);
735 igt_assert_lte(0, count_end);
736
737 if (count_end > count_start)
738 break;
739
740 sleep(1);
741 }
742
743 igt_assert_lt(count_start, count_end);
744
745 close(fd);
746 }
747
gem_has_reset_stats(int fd)748 static bool gem_has_reset_stats(int fd)
749 {
750 struct local_drm_i915_reset_stats rs;
751 int ret;
752
753 /* Carefully set flags and pad to zero, otherwise
754 we get -EINVAL
755 */
756 memset(&rs, 0, sizeof(rs));
757
758 ret = drmIoctl(fd, GET_RESET_STATS_IOCTL, &rs);
759 if (ret == 0)
760 return true;
761
762 /* If we get EPERM, we have support but did not
763 have CAP_SYSADM */
764 if (ret == -1 && errno == EPERM)
765 return true;
766
767 return false;
768 }
769
770 #define RUN_TEST(...) do { sync_gpu(); __VA_ARGS__; sync_gpu(); } while (0)
771 #define RUN_CTX_TEST(...) do { check_context(e); RUN_TEST(__VA_ARGS__); } while (0)
772
773 igt_main
774 {
775 const struct intel_execution_engine *e;
776 igt_skip_on_simulation();
777
778 igt_fixture {
779 int fd;
780
781 bool has_reset_stats;
782 bool using_full_reset;
783 fd = drm_open_driver(DRIVER_INTEL);
784 devid = intel_get_drm_devid(fd);
785
786 has_reset_stats = gem_has_reset_stats(fd);
787
788 igt_assert(igt_sysfs_set_parameter
789 (fd, "reset", "%d", 1 /* only global reset */));
790
791 using_full_reset = !gem_engine_reset_enabled(fd) &&
792 gem_gpu_reset_enabled(fd);
793
794 close(fd);
795
796 igt_require_f(has_reset_stats,
797 "No reset stats ioctl support. Too old kernel?\n");
798 igt_require_f(using_full_reset,
799 "Full GPU reset is not enabled. Is enable_hangcheck set?\n");
800 }
801
802 igt_subtest("params")
803 test_params();
804
805 igt_subtest_f("params-ctx")
806 RUN_TEST(test_params_ctx());
807
808 for (e = intel_execution_engines; e->name; e++) {
809 igt_subtest_f("reset-stats-%s", e->name)
810 RUN_TEST(test_rs(e, 4, 1, 0));
811
812 igt_subtest_f("reset-stats-ctx-%s", e->name)
813 RUN_CTX_TEST(test_rs_ctx(e, 4, 4, 1, 2));
814
815 igt_subtest_f("ban-%s", e->name)
816 RUN_TEST(test_ban(e));
817
818 igt_subtest_f("ban-ctx-%s", e->name)
819 RUN_CTX_TEST(test_ban_ctx(e));
820
821 igt_subtest_f("reset-count-%s", e->name)
822 RUN_TEST(test_reset_count(e, false));
823
824 igt_subtest_f("reset-count-ctx-%s", e->name)
825 RUN_CTX_TEST(test_reset_count(e, true));
826
827 igt_subtest_f("unrelated-ctx-%s", e->name)
828 RUN_CTX_TEST(test_unrelated_ctx(e));
829
830 igt_subtest_f("close-pending-%s", e->name)
831 RUN_TEST(test_close_pending(e));
832
833 igt_subtest_f("close-pending-ctx-%s", e->name)
834 RUN_CTX_TEST(test_close_pending_ctx(e));
835
836 igt_subtest_f("close-pending-fork-%s", e->name)
837 RUN_TEST(test_close_pending_fork(e, false));
838
839 igt_subtest_f("close-pending-fork-reverse-%s", e->name)
840 RUN_TEST(test_close_pending_fork(e, true));
841
842 igt_subtest_f("defer-hangcheck-%s", e->name)
843 RUN_TEST(defer_hangcheck(e));
844 }
845
846 igt_fixture {
847 int fd;
848
849 fd = drm_open_driver(DRIVER_INTEL);
850 igt_assert(igt_sysfs_set_parameter
851 (fd, "reset", "%d", INT_MAX /* any reset method */));
852 close(fd);
853 }
854 }
855