1 /*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
25 #include <stdlib.h>
26 #include <stdio.h>
27 #include <string.h>
28 #include <fcntl.h>
29 #include <inttypes.h>
30 #include <errno.h>
31 #include <sys/stat.h>
32 #include <sys/time.h>
33 #include <sys/times.h>
34 #include <sys/types.h>
35 #include <dirent.h>
36 #include <time.h>
37 #include <poll.h>
38 #include <sched.h>
39
40 #include "igt.h"
41 #include "igt_core.h"
42 #include "igt_perf.h"
43 #include "igt_sysfs.h"
44 #include "igt_pm.h"
45 #include "sw_sync.h"
46
47 IGT_TEST_DESCRIPTION("Test the i915 pmu perf interface");
48
49 const double tolerance = 0.05f;
50 const unsigned long batch_duration_ns = 500e6;
51
open_pmu(uint64_t config)52 static int open_pmu(uint64_t config)
53 {
54 int fd;
55
56 fd = perf_i915_open(config);
57 igt_skip_on(fd < 0 && errno == ENODEV);
58 igt_assert(fd >= 0);
59
60 return fd;
61 }
62
open_group(uint64_t config,int group)63 static int open_group(uint64_t config, int group)
64 {
65 int fd;
66
67 fd = perf_i915_open_group(config, group);
68 igt_skip_on(fd < 0 && errno == ENODEV);
69 igt_assert(fd >= 0);
70
71 return fd;
72 }
73
74 static void
init(int gem_fd,const struct intel_execution_engine2 * e,uint8_t sample)75 init(int gem_fd, const struct intel_execution_engine2 *e, uint8_t sample)
76 {
77 int fd, err = 0;
78 bool exists;
79
80 errno = 0;
81 fd = perf_i915_open(__I915_PMU_ENGINE(e->class, e->instance, sample));
82 if (fd < 0)
83 err = errno;
84
85 exists = gem_context_has_engine(gem_fd, 0, e->flags);
86 if (intel_gen(intel_get_drm_devid(gem_fd)) < 6 &&
87 sample == I915_SAMPLE_SEMA)
88 exists = false;
89
90 if (exists) {
91 igt_assert_eq(err, 0);
92 igt_assert_fd(fd);
93 close(fd);
94 } else {
95 igt_assert_lt(fd, 0);
96 igt_assert_eq(err, ENODEV);
97 }
98 }
99
__pmu_read_single(int fd,uint64_t * ts)100 static uint64_t __pmu_read_single(int fd, uint64_t *ts)
101 {
102 uint64_t data[2];
103
104 igt_assert_eq(read(fd, data, sizeof(data)), sizeof(data));
105
106 if (ts)
107 *ts = data[1];
108
109 return data[0];
110 }
111
pmu_read_single(int fd)112 static uint64_t pmu_read_single(int fd)
113 {
114 return __pmu_read_single(fd, NULL);
115 }
116
pmu_read_multi(int fd,unsigned int num,uint64_t * val)117 static uint64_t pmu_read_multi(int fd, unsigned int num, uint64_t *val)
118 {
119 uint64_t buf[2 + num];
120 unsigned int i;
121
122 igt_assert_eq(read(fd, buf, sizeof(buf)), sizeof(buf));
123
124 for (i = 0; i < num; i++)
125 val[i] = buf[2 + i];
126
127 return buf[1];
128 }
129
130 #define __assert_within_epsilon(x, ref, tol_up, tol_down) \
131 igt_assert_f((double)(x) <= (1.0 + (tol_up)) * (double)(ref) && \
132 (double)(x) >= (1.0 - (tol_down)) * (double)(ref), \
133 "'%s' != '%s' (%f not within +%f%%/-%f%% tolerance of %f)\n",\
134 #x, #ref, (double)(x), \
135 (tol_up) * 100.0, (tol_down) * 100.0, \
136 (double)(ref))
137
138 #define assert_within_epsilon(x, ref, tolerance) \
139 __assert_within_epsilon(x, ref, tolerance, tolerance)
140
141 /*
142 * Helper for cases where we assert on time spent sleeping (directly or
143 * indirectly), so make it more robust by ensuring the system sleep time
144 * is within test tolerance to start with.
145 */
measured_usleep(unsigned int usec)146 static unsigned int measured_usleep(unsigned int usec)
147 {
148 struct timespec ts = { };
149 unsigned int slept;
150
151 slept = igt_nsec_elapsed(&ts);
152 igt_assert(slept == 0);
153 do {
154 usleep(usec - slept);
155 slept = igt_nsec_elapsed(&ts) / 1000;
156 } while (slept < usec);
157
158 return igt_nsec_elapsed(&ts);
159 }
160
161 #define TEST_BUSY (1)
162 #define FLAG_SYNC (2)
163 #define TEST_TRAILING_IDLE (4)
164 #define TEST_RUNTIME_PM (8)
165 #define FLAG_LONG (16)
166 #define FLAG_HANG (32)
167
__spin_poll(int fd,uint32_t ctx,const struct intel_execution_engine2 * e)168 static igt_spin_t * __spin_poll(int fd, uint32_t ctx,
169 const struct intel_execution_engine2 *e)
170 {
171 struct igt_spin_factory opts = {
172 .ctx = ctx,
173 .engine = e->flags,
174 };
175
176 if (gem_class_can_store_dword(fd, e->class))
177 opts.flags |= IGT_SPIN_POLL_RUN;
178
179 return __igt_spin_factory(fd, &opts);
180 }
181
__spin_wait(int fd,igt_spin_t * spin)182 static unsigned long __spin_wait(int fd, igt_spin_t *spin)
183 {
184 struct timespec start = { };
185
186 igt_nsec_elapsed(&start);
187
188 if (igt_spin_has_poll(spin)) {
189 unsigned long timeout = 0;
190
191 while (!igt_spin_has_started(spin)) {
192 unsigned long t = igt_nsec_elapsed(&start);
193
194 if ((t - timeout) > 250e6) {
195 timeout = t;
196 igt_warn("Spinner not running after %.2fms\n",
197 (double)t / 1e6);
198 }
199 }
200 } else {
201 igt_debug("__spin_wait - usleep mode\n");
202 usleep(500e3); /* Better than nothing! */
203 }
204
205 return igt_nsec_elapsed(&start);
206 }
207
__spin_sync(int fd,uint32_t ctx,const struct intel_execution_engine2 * e)208 static igt_spin_t * __spin_sync(int fd, uint32_t ctx,
209 const struct intel_execution_engine2 *e)
210 {
211 igt_spin_t *spin = __spin_poll(fd, ctx, e);
212
213 __spin_wait(fd, spin);
214
215 return spin;
216 }
217
spin_sync(int fd,uint32_t ctx,const struct intel_execution_engine2 * e)218 static igt_spin_t * spin_sync(int fd, uint32_t ctx,
219 const struct intel_execution_engine2 *e)
220 {
221 igt_require_gem(fd);
222
223 return __spin_sync(fd, ctx, e);
224 }
225
spin_sync_flags(int fd,uint32_t ctx,unsigned int flags)226 static igt_spin_t * spin_sync_flags(int fd, uint32_t ctx, unsigned int flags)
227 {
228 struct intel_execution_engine2 e = { };
229
230 e.class = gem_execbuf_flags_to_engine_class(flags);
231 e.instance = (flags & (I915_EXEC_BSD_MASK | I915_EXEC_RING_MASK)) ==
232 (I915_EXEC_BSD | I915_EXEC_BSD_RING2) ? 1 : 0;
233 e.flags = flags;
234
235 return spin_sync(fd, ctx, &e);
236 }
237
end_spin(int fd,igt_spin_t * spin,unsigned int flags)238 static void end_spin(int fd, igt_spin_t *spin, unsigned int flags)
239 {
240 if (!spin)
241 return;
242
243 igt_spin_end(spin);
244
245 if (flags & FLAG_SYNC)
246 gem_sync(fd, spin->handle);
247
248 if (flags & TEST_TRAILING_IDLE) {
249 unsigned long t, timeout = 0;
250 struct timespec start = { };
251
252 igt_nsec_elapsed(&start);
253
254 do {
255 t = igt_nsec_elapsed(&start);
256
257 if (gem_bo_busy(fd, spin->handle) &&
258 (t - timeout) > 10e6) {
259 timeout = t;
260 igt_warn("Spinner not idle after %.2fms\n",
261 (double)t / 1e6);
262 }
263
264 usleep(1e3);
265 } while (t < batch_duration_ns / 5);
266 }
267 }
268
269 static void
single(int gem_fd,const struct intel_execution_engine2 * e,unsigned int flags)270 single(int gem_fd, const struct intel_execution_engine2 *e, unsigned int flags)
271 {
272 unsigned long slept;
273 igt_spin_t *spin;
274 uint64_t val;
275 int fd;
276
277 fd = open_pmu(I915_PMU_ENGINE_BUSY(e->class, e->instance));
278
279 if (flags & TEST_BUSY)
280 spin = spin_sync(gem_fd, 0, e);
281 else
282 spin = NULL;
283
284 val = pmu_read_single(fd);
285 slept = measured_usleep(batch_duration_ns / 1000);
286 if (flags & TEST_TRAILING_IDLE)
287 end_spin(gem_fd, spin, flags);
288 val = pmu_read_single(fd) - val;
289
290 if (flags & FLAG_HANG)
291 igt_force_gpu_reset(gem_fd);
292 else
293 end_spin(gem_fd, spin, FLAG_SYNC);
294
295 assert_within_epsilon(val, flags & TEST_BUSY ? slept : 0.f, tolerance);
296
297 /* Check for idle after hang. */
298 if (flags & FLAG_HANG) {
299 gem_quiescent_gpu(gem_fd);
300 igt_assert(!gem_bo_busy(gem_fd, spin->handle));
301
302 val = pmu_read_single(fd);
303 slept = measured_usleep(batch_duration_ns / 1000);
304 val = pmu_read_single(fd) - val;
305
306 assert_within_epsilon(val, 0, tolerance);
307 }
308
309 igt_spin_free(gem_fd, spin);
310 close(fd);
311
312 gem_quiescent_gpu(gem_fd);
313 }
314
315 static void
busy_start(int gem_fd,const struct intel_execution_engine2 * e)316 busy_start(int gem_fd, const struct intel_execution_engine2 *e)
317 {
318 unsigned long slept;
319 uint64_t val, ts[2];
320 igt_spin_t *spin;
321 int fd;
322
323 /*
324 * Defeat the busy stats delayed disable, we need to guarantee we are
325 * the first user.
326 */
327 sleep(2);
328
329 spin = __spin_sync(gem_fd, 0, e);
330
331 fd = open_pmu(I915_PMU_ENGINE_BUSY(e->class, e->instance));
332
333 val = __pmu_read_single(fd, &ts[0]);
334 slept = measured_usleep(batch_duration_ns / 1000);
335 val = __pmu_read_single(fd, &ts[1]) - val;
336 igt_debug("slept=%lu perf=%"PRIu64"\n", slept, ts[1] - ts[0]);
337
338 igt_spin_free(gem_fd, spin);
339 close(fd);
340
341 assert_within_epsilon(val, ts[1] - ts[0], tolerance);
342 gem_quiescent_gpu(gem_fd);
343 }
344
345 /*
346 * This test has a potentially low rate of catching the issue it is trying to
347 * catch. Or in other words, quite high rate of false negative successes. We
348 * will depend on the CI systems running it a lot to detect issues.
349 */
350 static void
busy_double_start(int gem_fd,const struct intel_execution_engine2 * e)351 busy_double_start(int gem_fd, const struct intel_execution_engine2 *e)
352 {
353 unsigned long slept;
354 uint64_t val, val2, ts[2];
355 igt_spin_t *spin[2];
356 uint32_t ctx;
357 int fd;
358
359 ctx = gem_context_create(gem_fd);
360 gem_context_set_all_engines(gem_fd, ctx);
361
362 /*
363 * Defeat the busy stats delayed disable, we need to guarantee we are
364 * the first user.
365 */
366 sleep(2);
367
368 /*
369 * Submit two contexts, with a pause in between targeting the ELSP
370 * re-submission in execlists mode. Make sure busyness is correctly
371 * reported with the engine busy, and after the engine went idle.
372 */
373 spin[0] = __spin_sync(gem_fd, 0, e);
374 usleep(500e3);
375 spin[1] = __igt_spin_new(gem_fd,
376 .ctx = ctx,
377 .engine = e->flags);
378
379 /*
380 * Open PMU as fast as possible after the second spin batch in attempt
381 * to be faster than the driver handling lite-restore.
382 */
383 fd = open_pmu(I915_PMU_ENGINE_BUSY(e->class, e->instance));
384
385 val = __pmu_read_single(fd, &ts[0]);
386 slept = measured_usleep(batch_duration_ns / 1000);
387 val = __pmu_read_single(fd, &ts[1]) - val;
388 igt_debug("slept=%lu perf=%"PRIu64"\n", slept, ts[1] - ts[0]);
389
390 igt_spin_end(spin[0]);
391 igt_spin_end(spin[1]);
392
393 /* Wait for GPU idle to verify PMU reports idle. */
394 gem_quiescent_gpu(gem_fd);
395
396 val2 = pmu_read_single(fd);
397 usleep(batch_duration_ns / 1000);
398 val2 = pmu_read_single(fd) - val2;
399
400 igt_info("busy=%"PRIu64" idle=%"PRIu64"\n", val, val2);
401
402 igt_spin_free(gem_fd, spin[0]);
403 igt_spin_free(gem_fd, spin[1]);
404
405 close(fd);
406
407 gem_context_destroy(gem_fd, ctx);
408
409 assert_within_epsilon(val, ts[1] - ts[0], tolerance);
410 igt_assert_eq(val2, 0);
411
412 gem_quiescent_gpu(gem_fd);
413 }
414
log_busy(unsigned int num_engines,uint64_t * val)415 static void log_busy(unsigned int num_engines, uint64_t *val)
416 {
417 char buf[1024];
418 int rem = sizeof(buf);
419 unsigned int i;
420 char *p = buf;
421
422 for (i = 0; i < num_engines; i++) {
423 int len;
424
425 len = snprintf(p, rem, "%u=%" PRIu64 "\n", i, val[i]);
426 igt_assert(len > 0);
427 rem -= len;
428 p += len;
429 }
430
431 igt_info("%s", buf);
432 }
433
434 static void
busy_check_all(int gem_fd,const struct intel_execution_engine2 * e,const unsigned int num_engines,unsigned int flags)435 busy_check_all(int gem_fd, const struct intel_execution_engine2 *e,
436 const unsigned int num_engines, unsigned int flags)
437 {
438 struct intel_execution_engine2 *e_;
439 uint64_t tval[2][num_engines];
440 unsigned int busy_idx = 0, i;
441 uint64_t val[num_engines];
442 int fd[num_engines];
443 unsigned long slept;
444 igt_spin_t *spin;
445
446 i = 0;
447 fd[0] = -1;
448 __for_each_physical_engine(gem_fd, e_) {
449 if (e->class == e_->class && e->instance == e_->instance)
450 busy_idx = i;
451
452 fd[i++] = open_group(I915_PMU_ENGINE_BUSY(e_->class,
453 e_->instance),
454 fd[0]);
455 }
456
457 igt_assert_eq(i, num_engines);
458
459 spin = spin_sync(gem_fd, 0, e);
460 pmu_read_multi(fd[0], num_engines, tval[0]);
461 slept = measured_usleep(batch_duration_ns / 1000);
462 if (flags & TEST_TRAILING_IDLE)
463 end_spin(gem_fd, spin, flags);
464 pmu_read_multi(fd[0], num_engines, tval[1]);
465
466 end_spin(gem_fd, spin, FLAG_SYNC);
467 igt_spin_free(gem_fd, spin);
468 close(fd[0]);
469
470 for (i = 0; i < num_engines; i++)
471 val[i] = tval[1][i] - tval[0][i];
472
473 log_busy(num_engines, val);
474
475 assert_within_epsilon(val[busy_idx], slept, tolerance);
476 for (i = 0; i < num_engines; i++) {
477 if (i == busy_idx)
478 continue;
479 assert_within_epsilon(val[i], 0.0f, tolerance);
480 }
481 gem_quiescent_gpu(gem_fd);
482 }
483
484 static void
__submit_spin(int gem_fd,igt_spin_t * spin,const struct intel_execution_engine2 * e,int offset)485 __submit_spin(int gem_fd, igt_spin_t *spin,
486 const struct intel_execution_engine2 *e,
487 int offset)
488 {
489 struct drm_i915_gem_execbuffer2 eb = spin->execbuf;
490
491 eb.flags &= ~(0x3f | I915_EXEC_BSD_MASK);
492 eb.flags |= e->flags | I915_EXEC_NO_RELOC;
493 eb.batch_start_offset += offset;
494
495 gem_execbuf(gem_fd, &eb);
496 }
497
498 static void
most_busy_check_all(int gem_fd,const struct intel_execution_engine2 * e,const unsigned int num_engines,unsigned int flags)499 most_busy_check_all(int gem_fd, const struct intel_execution_engine2 *e,
500 const unsigned int num_engines, unsigned int flags)
501 {
502 struct intel_execution_engine2 *e_;
503 uint64_t tval[2][num_engines];
504 uint64_t val[num_engines];
505 int fd[num_engines];
506 unsigned long slept;
507 igt_spin_t *spin = NULL;
508 unsigned int idle_idx, i;
509
510 i = 0;
511 __for_each_physical_engine(gem_fd, e_) {
512 if (e->class == e_->class && e->instance == e_->instance)
513 idle_idx = i;
514 else if (spin)
515 __submit_spin(gem_fd, spin, e_, 64);
516 else
517 spin = __spin_poll(gem_fd, 0, e_);
518
519 val[i++] = I915_PMU_ENGINE_BUSY(e_->class, e_->instance);
520 }
521 igt_assert(i == num_engines);
522 igt_require(spin); /* at least one busy engine */
523
524 fd[0] = -1;
525 for (i = 0; i < num_engines; i++)
526 fd[i] = open_group(val[i], fd[0]);
527
528 /* Small delay to allow engines to start. */
529 usleep(__spin_wait(gem_fd, spin) * num_engines / 1e3);
530
531 pmu_read_multi(fd[0], num_engines, tval[0]);
532 slept = measured_usleep(batch_duration_ns / 1000);
533 if (flags & TEST_TRAILING_IDLE)
534 end_spin(gem_fd, spin, flags);
535 pmu_read_multi(fd[0], num_engines, tval[1]);
536
537 end_spin(gem_fd, spin, FLAG_SYNC);
538 igt_spin_free(gem_fd, spin);
539 close(fd[0]);
540
541 for (i = 0; i < num_engines; i++)
542 val[i] = tval[1][i] - tval[0][i];
543
544 log_busy(num_engines, val);
545
546 for (i = 0; i < num_engines; i++) {
547 if (i == idle_idx)
548 assert_within_epsilon(val[i], 0.0f, tolerance);
549 else
550 assert_within_epsilon(val[i], slept, tolerance);
551 }
552 gem_quiescent_gpu(gem_fd);
553 }
554
555 static void
all_busy_check_all(int gem_fd,const unsigned int num_engines,unsigned int flags)556 all_busy_check_all(int gem_fd, const unsigned int num_engines,
557 unsigned int flags)
558 {
559 struct intel_execution_engine2 *e;
560 uint64_t tval[2][num_engines];
561 uint64_t val[num_engines];
562 int fd[num_engines];
563 unsigned long slept;
564 igt_spin_t *spin = NULL;
565 unsigned int i;
566
567 i = 0;
568 __for_each_physical_engine(gem_fd, e) {
569 if (spin)
570 __submit_spin(gem_fd, spin, e, 64);
571 else
572 spin = __spin_poll(gem_fd, 0, e);
573
574 val[i++] = I915_PMU_ENGINE_BUSY(e->class, e->instance);
575 }
576 igt_assert(i == num_engines);
577
578 fd[0] = -1;
579 for (i = 0; i < num_engines; i++)
580 fd[i] = open_group(val[i], fd[0]);
581
582 /* Small delay to allow engines to start. */
583 usleep(__spin_wait(gem_fd, spin) * num_engines / 1e3);
584
585 pmu_read_multi(fd[0], num_engines, tval[0]);
586 slept = measured_usleep(batch_duration_ns / 1000);
587 if (flags & TEST_TRAILING_IDLE)
588 end_spin(gem_fd, spin, flags);
589 pmu_read_multi(fd[0], num_engines, tval[1]);
590
591 end_spin(gem_fd, spin, FLAG_SYNC);
592 igt_spin_free(gem_fd, spin);
593 close(fd[0]);
594
595 for (i = 0; i < num_engines; i++)
596 val[i] = tval[1][i] - tval[0][i];
597
598 log_busy(num_engines, val);
599
600 for (i = 0; i < num_engines; i++)
601 assert_within_epsilon(val[i], slept, tolerance);
602 gem_quiescent_gpu(gem_fd);
603 }
604
605 static void
no_sema(int gem_fd,const struct intel_execution_engine2 * e,unsigned int flags)606 no_sema(int gem_fd, const struct intel_execution_engine2 *e, unsigned int flags)
607 {
608 igt_spin_t *spin;
609 uint64_t val[2][2];
610 int fd;
611
612 fd = open_group(I915_PMU_ENGINE_SEMA(e->class, e->instance), -1);
613 open_group(I915_PMU_ENGINE_WAIT(e->class, e->instance), fd);
614
615 if (flags & TEST_BUSY)
616 spin = spin_sync(gem_fd, 0, e);
617 else
618 spin = NULL;
619
620 pmu_read_multi(fd, 2, val[0]);
621 measured_usleep(batch_duration_ns / 1000);
622 if (flags & TEST_TRAILING_IDLE)
623 end_spin(gem_fd, spin, flags);
624 pmu_read_multi(fd, 2, val[1]);
625
626 val[0][0] = val[1][0] - val[0][0];
627 val[0][1] = val[1][1] - val[0][1];
628
629 if (spin) {
630 end_spin(gem_fd, spin, FLAG_SYNC);
631 igt_spin_free(gem_fd, spin);
632 }
633 close(fd);
634
635 assert_within_epsilon(val[0][0], 0.0f, tolerance);
636 assert_within_epsilon(val[0][1], 0.0f, tolerance);
637 }
638
639 #define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags))
640 #define MI_SEMAPHORE_WAIT MI_INSTR(0x1c, 2) /* GEN8+ */
641 #define MI_SEMAPHORE_POLL (1<<15)
642 #define MI_SEMAPHORE_SAD_GTE_SDD (1<<12)
643
644 static void
sema_wait(int gem_fd,const struct intel_execution_engine2 * e,unsigned int flags)645 sema_wait(int gem_fd, const struct intel_execution_engine2 *e,
646 unsigned int flags)
647 {
648 struct drm_i915_gem_relocation_entry reloc[2] = {};
649 struct drm_i915_gem_exec_object2 obj[2] = {};
650 struct drm_i915_gem_execbuffer2 eb = {};
651 uint32_t bb_handle, obj_handle;
652 unsigned long slept;
653 uint32_t *obj_ptr;
654 uint32_t batch[16];
655 uint64_t val[2], ts[2];
656 int fd;
657
658 igt_require(intel_gen(intel_get_drm_devid(gem_fd)) >= 8);
659
660 /**
661 * Setup up a batchbuffer with a polling semaphore wait command which
662 * will wait on an value in a shared bo to change. This way we are able
663 * to control how much time we will spend in this bb.
664 */
665
666 bb_handle = gem_create(gem_fd, 4096);
667 obj_handle = gem_create(gem_fd, 4096);
668
669 obj_ptr = gem_mmap__wc(gem_fd, obj_handle, 0, 4096, PROT_WRITE);
670
671 batch[0] = MI_STORE_DWORD_IMM;
672 batch[1] = sizeof(*obj_ptr);
673 batch[2] = 0;
674 batch[3] = 1;
675 batch[4] = MI_SEMAPHORE_WAIT |
676 MI_SEMAPHORE_POLL |
677 MI_SEMAPHORE_SAD_GTE_SDD;
678 batch[5] = 1;
679 batch[6] = 0x0;
680 batch[7] = 0x0;
681 batch[8] = MI_BATCH_BUFFER_END;
682
683 gem_write(gem_fd, bb_handle, 0, batch, sizeof(batch));
684
685 reloc[0].target_handle = obj_handle;
686 reloc[0].offset = 1 * sizeof(uint32_t);
687 reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
688 reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;
689 reloc[0].delta = sizeof(*obj_ptr);
690
691 reloc[1].target_handle = obj_handle;
692 reloc[1].offset = 6 * sizeof(uint32_t);
693 reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
694
695 obj[0].handle = obj_handle;
696
697 obj[1].handle = bb_handle;
698 obj[1].relocation_count = 2;
699 obj[1].relocs_ptr = to_user_pointer(reloc);
700
701 eb.buffer_count = 2;
702 eb.buffers_ptr = to_user_pointer(obj);
703 eb.flags = e->flags;
704
705 /**
706 * Start the semaphore wait PMU and after some known time let the above
707 * semaphore wait command finish. Then check that the PMU is reporting
708 * to expected time spent in semaphore wait state.
709 */
710
711 fd = open_pmu(I915_PMU_ENGINE_SEMA(e->class, e->instance));
712
713 val[0] = pmu_read_single(fd);
714
715 gem_execbuf(gem_fd, &eb);
716 do { /* wait for the batch to start executing */
717 usleep(5e3);
718 } while (!obj_ptr[1]);
719
720 igt_assert_f(igt_wait(pmu_read_single(fd) != val[0], 10, 1),
721 "sampling failed to start withing 10ms\n");
722
723 val[0] = __pmu_read_single(fd, &ts[0]);
724 slept = measured_usleep(batch_duration_ns / 1000);
725 if (flags & TEST_TRAILING_IDLE)
726 obj_ptr[0] = 1;
727 val[1] = __pmu_read_single(fd, &ts[1]);
728 igt_debug("slept %.3fms (perf %.3fms), sampled %.3fms\n",
729 slept * 1e-6,
730 (ts[1] - ts[0]) * 1e-6,
731 (val[1] - val[0]) * 1e-6);
732
733 obj_ptr[0] = 1;
734 gem_sync(gem_fd, bb_handle);
735
736 munmap(obj_ptr, 4096);
737 gem_close(gem_fd, obj_handle);
738 gem_close(gem_fd, bb_handle);
739 close(fd);
740
741 assert_within_epsilon(val[1] - val[0], slept, tolerance);
742 }
743
744 #define MI_WAIT_FOR_PIPE_C_VBLANK (1<<21)
745 #define MI_WAIT_FOR_PIPE_B_VBLANK (1<<11)
746 #define MI_WAIT_FOR_PIPE_A_VBLANK (1<<3)
747
748 typedef struct {
749 igt_display_t display;
750 struct igt_fb primary_fb;
751 igt_output_t *output;
752 enum pipe pipe;
753 } data_t;
754
prepare_crtc(data_t * data,int fd,igt_output_t * output)755 static void prepare_crtc(data_t *data, int fd, igt_output_t *output)
756 {
757 drmModeModeInfo *mode;
758 igt_display_t *display = &data->display;
759 igt_plane_t *primary;
760
761 /* select the pipe we want to use */
762 igt_output_set_pipe(output, data->pipe);
763
764 /* create and set the primary plane fb */
765 mode = igt_output_get_mode(output);
766 igt_create_color_fb(fd, mode->hdisplay, mode->vdisplay,
767 DRM_FORMAT_XRGB8888,
768 LOCAL_DRM_FORMAT_MOD_NONE,
769 0.0, 0.0, 0.0,
770 &data->primary_fb);
771
772 primary = igt_output_get_plane_type(output, DRM_PLANE_TYPE_PRIMARY);
773 igt_plane_set_fb(primary, &data->primary_fb);
774
775 igt_display_commit(display);
776
777 igt_wait_for_vblank(fd, data->pipe);
778 }
779
cleanup_crtc(data_t * data,int fd,igt_output_t * output)780 static void cleanup_crtc(data_t *data, int fd, igt_output_t *output)
781 {
782 igt_display_t *display = &data->display;
783 igt_plane_t *primary;
784
785 igt_remove_fb(fd, &data->primary_fb);
786
787 primary = igt_output_get_plane_type(output, DRM_PLANE_TYPE_PRIMARY);
788 igt_plane_set_fb(primary, NULL);
789
790 igt_output_set_pipe(output, PIPE_ANY);
791 igt_display_commit(display);
792 }
793
wait_vblank(int fd,union drm_wait_vblank * vbl)794 static int wait_vblank(int fd, union drm_wait_vblank *vbl)
795 {
796 int err;
797
798 err = 0;
799 if (igt_ioctl(fd, DRM_IOCTL_WAIT_VBLANK, vbl))
800 err = -errno;
801
802 return err;
803 }
804
805 static void
event_wait(int gem_fd,const struct intel_execution_engine2 * e)806 event_wait(int gem_fd, const struct intel_execution_engine2 *e)
807 {
808 struct drm_i915_gem_exec_object2 obj = { };
809 struct drm_i915_gem_execbuffer2 eb = { };
810 const uint32_t DERRMR = 0x44050;
811 const uint32_t FORCEWAKE_MT = 0xa188;
812 unsigned int valid_tests = 0;
813 uint32_t batch[16], *b;
814 uint16_t devid;
815 igt_output_t *output;
816 data_t data;
817 enum pipe p;
818 int fd;
819
820 devid = intel_get_drm_devid(gem_fd);
821 igt_require(intel_gen(devid) >= 7);
822 igt_skip_on(IS_VALLEYVIEW(devid) || IS_CHERRYVIEW(devid));
823
824 kmstest_set_vt_graphics_mode();
825 igt_display_require(&data.display, gem_fd);
826
827 /**
828 * We will use the display to render event forwarind so need to
829 * program the DERRMR register and restore it at exit.
830 * Note we assume that the default/desired value for DERRMR will always
831 * be ~0u (all routing disable). To be fancy, we could do a SRM of the
832 * reg beforehand and then LRM at the end.
833 *
834 * We will emit a MI_WAIT_FOR_EVENT listening for vblank events,
835 * have a background helper to indirectly enable vblank irqs, and
836 * listen to the recorded time spent in engine wait state as reported
837 * by the PMU.
838 */
839 obj.handle = gem_create(gem_fd, 4096);
840
841 b = batch;
842 *b++ = MI_LOAD_REGISTER_IMM;
843 *b++ = FORCEWAKE_MT;
844 *b++ = 2 << 16 | 2;
845 *b++ = MI_LOAD_REGISTER_IMM;
846 *b++ = DERRMR;
847 *b++ = ~0u;
848 *b++ = MI_WAIT_FOR_EVENT;
849 *b++ = MI_LOAD_REGISTER_IMM;
850 *b++ = DERRMR;
851 *b++ = ~0u;
852 *b++ = MI_LOAD_REGISTER_IMM;
853 *b++ = FORCEWAKE_MT;
854 *b++ = 2 << 16;
855 *b++ = MI_BATCH_BUFFER_END;
856
857 eb.buffer_count = 1;
858 eb.buffers_ptr = to_user_pointer(&obj);
859 eb.flags = e->flags | I915_EXEC_SECURE;
860
861 for_each_pipe_with_valid_output(&data.display, p, output) {
862 struct igt_helper_process waiter = { };
863 const unsigned int frames = 3;
864 uint64_t val[2];
865
866 batch[6] = MI_WAIT_FOR_EVENT;
867 switch (p) {
868 case PIPE_A:
869 batch[6] |= MI_WAIT_FOR_PIPE_A_VBLANK;
870 batch[5] = ~(1 << 3);
871 break;
872 case PIPE_B:
873 batch[6] |= MI_WAIT_FOR_PIPE_B_VBLANK;
874 batch[5] = ~(1 << 11);
875 break;
876 case PIPE_C:
877 batch[6] |= MI_WAIT_FOR_PIPE_C_VBLANK;
878 batch[5] = ~(1 << 21);
879 break;
880 default:
881 continue;
882 }
883
884 gem_write(gem_fd, obj.handle, 0, batch, sizeof(batch));
885
886 data.pipe = p;
887 prepare_crtc(&data, gem_fd, output);
888
889 fd = open_pmu(I915_PMU_ENGINE_WAIT(e->class, e->instance));
890
891 val[0] = pmu_read_single(fd);
892
893 igt_fork_helper(&waiter) {
894 const uint32_t pipe_id_flag =
895 kmstest_get_vbl_flag(data.pipe);
896
897 for (;;) {
898 union drm_wait_vblank vbl = { };
899
900 vbl.request.type = DRM_VBLANK_RELATIVE;
901 vbl.request.type |= pipe_id_flag;
902 vbl.request.sequence = 1;
903 igt_assert_eq(wait_vblank(gem_fd, &vbl), 0);
904 }
905 }
906
907 for (unsigned int frame = 0; frame < frames; frame++) {
908 gem_execbuf(gem_fd, &eb);
909 gem_sync(gem_fd, obj.handle);
910 }
911
912 igt_stop_helper(&waiter);
913
914 val[1] = pmu_read_single(fd);
915
916 close(fd);
917
918 cleanup_crtc(&data, gem_fd, output);
919 valid_tests++;
920
921 igt_assert(val[1] - val[0] > 0);
922 }
923
924 gem_close(gem_fd, obj.handle);
925
926 igt_require_f(valid_tests,
927 "no valid crtc/connector combinations found\n");
928 }
929
930 static void
multi_client(int gem_fd,const struct intel_execution_engine2 * e)931 multi_client(int gem_fd, const struct intel_execution_engine2 *e)
932 {
933 uint64_t config = I915_PMU_ENGINE_BUSY(e->class, e->instance);
934 unsigned long slept[2];
935 uint64_t val[2], ts[2], perf_slept[2];
936 igt_spin_t *spin;
937 int fd[2];
938
939 gem_quiescent_gpu(gem_fd);
940
941 fd[0] = open_pmu(config);
942
943 /*
944 * Second PMU client which is initialized after the first one,
945 * and exists before it, should not affect accounting as reported
946 * in the first client.
947 */
948 fd[1] = open_pmu(config);
949
950 spin = spin_sync(gem_fd, 0, e);
951
952 val[0] = val[1] = __pmu_read_single(fd[0], &ts[0]);
953 slept[1] = measured_usleep(batch_duration_ns / 1000);
954 val[1] = __pmu_read_single(fd[1], &ts[1]) - val[1];
955 perf_slept[1] = ts[1] - ts[0];
956 igt_debug("slept=%lu perf=%"PRIu64"\n", slept[1], perf_slept[1]);
957 close(fd[1]);
958
959 slept[0] = measured_usleep(batch_duration_ns / 1000) + slept[1];
960 val[0] = __pmu_read_single(fd[0], &ts[1]) - val[0];
961 perf_slept[0] = ts[1] - ts[0];
962 igt_debug("slept=%lu perf=%"PRIu64"\n", slept[0], perf_slept[0]);
963
964 igt_spin_end(spin);
965 gem_sync(gem_fd, spin->handle);
966 igt_spin_free(gem_fd, spin);
967 close(fd[0]);
968
969 assert_within_epsilon(val[0], perf_slept[0], tolerance);
970 assert_within_epsilon(val[1], perf_slept[1], tolerance);
971 }
972
973 /**
974 * Tests that i915 PMU corectly errors out in invalid initialization.
975 * i915 PMU is uncore PMU, thus:
976 * - sampling period is not supported
977 * - pid > 0 is not supported since we can't count per-process (we count
978 * per whole system)
979 * - cpu != 0 is not supported since i915 PMU only allows running on one cpu
980 * and that is normally CPU0.
981 */
invalid_init(void)982 static void invalid_init(void)
983 {
984 struct perf_event_attr attr;
985
986 #define ATTR_INIT() \
987 do { \
988 memset(&attr, 0, sizeof (attr)); \
989 attr.config = I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_RENDER, 0); \
990 attr.type = i915_type_id(); \
991 igt_assert(attr.type != 0); \
992 errno = 0; \
993 } while(0)
994
995 ATTR_INIT();
996 attr.sample_period = 100;
997 igt_assert_eq(perf_event_open(&attr, -1, 0, -1, 0), -1);
998 igt_assert_eq(errno, EINVAL);
999
1000 ATTR_INIT();
1001 igt_assert_eq(perf_event_open(&attr, 0, 0, -1, 0), -1);
1002 igt_assert_eq(errno, EINVAL);
1003
1004 ATTR_INIT();
1005 igt_assert_eq(perf_event_open(&attr, -1, 1, -1, 0), -1);
1006 igt_assert_eq(errno, EINVAL);
1007 }
1008
init_other(unsigned int i,bool valid)1009 static void init_other(unsigned int i, bool valid)
1010 {
1011 int fd;
1012
1013 fd = perf_i915_open(__I915_PMU_OTHER(i));
1014 igt_require(!(fd < 0 && errno == ENODEV));
1015 if (valid) {
1016 igt_assert(fd >= 0);
1017 } else {
1018 igt_assert(fd < 0);
1019 return;
1020 }
1021
1022 close(fd);
1023 }
1024
read_other(unsigned int i,bool valid)1025 static void read_other(unsigned int i, bool valid)
1026 {
1027 int fd;
1028
1029 fd = perf_i915_open(__I915_PMU_OTHER(i));
1030 igt_require(!(fd < 0 && errno == ENODEV));
1031 if (valid) {
1032 igt_assert(fd >= 0);
1033 } else {
1034 igt_assert(fd < 0);
1035 return;
1036 }
1037
1038 (void)pmu_read_single(fd);
1039
1040 close(fd);
1041 }
1042
cpu0_hotplug_support(void)1043 static bool cpu0_hotplug_support(void)
1044 {
1045 return access("/sys/devices/system/cpu/cpu0/online", W_OK) == 0;
1046 }
1047
cpu_hotplug(int gem_fd)1048 static void cpu_hotplug(int gem_fd)
1049 {
1050 igt_spin_t *spin[2];
1051 uint64_t ts[2];
1052 uint64_t val;
1053 int link[2];
1054 int fd, ret;
1055 int cur = 0;
1056 char buf;
1057
1058 igt_require(cpu0_hotplug_support());
1059
1060 fd = open_pmu(I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_RENDER, 0));
1061
1062 /*
1063 * Create two spinners so test can ensure shorter gaps in engine
1064 * busyness as it is terminating one and re-starting the other.
1065 */
1066 spin[0] = igt_spin_new(gem_fd, .engine = I915_EXEC_DEFAULT);
1067 spin[1] = __igt_spin_new(gem_fd, .engine = I915_EXEC_DEFAULT);
1068
1069 val = __pmu_read_single(fd, &ts[0]);
1070
1071 ret = pipe2(link, O_NONBLOCK);
1072 igt_assert_eq(ret, 0);
1073
1074 /*
1075 * Toggle online status of all the CPUs in a child process and ensure
1076 * this has not affected busyness stats in the parent.
1077 */
1078 igt_fork(child, 1) {
1079 int cpu = 0;
1080
1081 close(link[0]);
1082
1083 for (;;) {
1084 char name[128];
1085 int cpufd;
1086
1087 igt_assert_lt(snprintf(name, sizeof(name),
1088 "/sys/devices/system/cpu/cpu%d/online",
1089 cpu), sizeof(name));
1090 cpufd = open(name, O_WRONLY);
1091 if (cpufd == -1) {
1092 igt_assert(cpu > 0);
1093 /*
1094 * Signal parent that we cycled through all
1095 * CPUs and we are done.
1096 */
1097 igt_assert_eq(write(link[1], "*", 1), 1);
1098 break;
1099 }
1100
1101 /* Offline followed by online a CPU. */
1102
1103 ret = write(cpufd, "0", 2);
1104 if (ret < 0) {
1105 /*
1106 * If we failed to offline a CPU we don't want
1107 * to proceed.
1108 */
1109 igt_warn("Failed to offline cpu%u! (%d)\n",
1110 cpu, errno);
1111 igt_assert_eq(write(link[1], "s", 1), 1);
1112 break;
1113 }
1114
1115 usleep(1e6);
1116
1117 ret = write(cpufd, "1", 2);
1118 if (ret < 0) {
1119 /*
1120 * Failed to bring a CPU back online is fatal
1121 * for the sanity of a test run so stop further
1122 * testing.
1123 */
1124 igt_warn("Failed to online cpu%u! (%d)\n",
1125 cpu, errno);
1126 igt_fatal_error();
1127 }
1128
1129 close(cpufd);
1130 cpu++;
1131 }
1132 }
1133
1134 close(link[1]);
1135
1136 /*
1137 * Very long batches can be declared as GPU hangs so emit shorter ones
1138 * until the CPU core shuffler finishes one loop.
1139 */
1140 for (;;) {
1141 usleep(500e3);
1142 end_spin(gem_fd, spin[cur], 0);
1143
1144 /* Check if the child is signaling completion. */
1145 ret = read(link[0], &buf, 1);
1146 if ( ret == 1 || (ret < 0 && errno != EAGAIN))
1147 break;
1148
1149 igt_spin_free(gem_fd, spin[cur]);
1150 spin[cur] = __igt_spin_new(gem_fd,
1151 .engine = I915_EXEC_DEFAULT);
1152 cur ^= 1;
1153 }
1154
1155 val = __pmu_read_single(fd, &ts[1]) - val;
1156
1157 end_spin(gem_fd, spin[0], FLAG_SYNC);
1158 end_spin(gem_fd, spin[1], FLAG_SYNC);
1159 igt_spin_free(gem_fd, spin[0]);
1160 igt_spin_free(gem_fd, spin[1]);
1161 igt_waitchildren();
1162 close(fd);
1163 close(link[0]);
1164
1165 /* Skip if child signals a problem with offlining a CPU. */
1166 igt_skip_on(buf == 's');
1167
1168 assert_within_epsilon(val, ts[1] - ts[0], tolerance);
1169 }
1170
1171 static void
test_interrupts(int gem_fd)1172 test_interrupts(int gem_fd)
1173 {
1174 const unsigned int test_duration_ms = 1000;
1175 const int target = 30;
1176 igt_spin_t *spin[target];
1177 struct pollfd pfd;
1178 uint64_t idle, busy;
1179 int fence_fd;
1180 int fd;
1181
1182 gem_quiescent_gpu(gem_fd);
1183
1184 fd = open_pmu(I915_PMU_INTERRUPTS);
1185
1186 /* Queue spinning batches. */
1187 for (int i = 0; i < target; i++) {
1188 spin[i] = __igt_spin_new(gem_fd,
1189 .engine = I915_EXEC_DEFAULT,
1190 .flags = IGT_SPIN_FENCE_OUT);
1191 if (i == 0) {
1192 fence_fd = spin[i]->out_fence;
1193 } else {
1194 int old_fd = fence_fd;
1195
1196 fence_fd = sync_fence_merge(old_fd,
1197 spin[i]->out_fence);
1198 close(old_fd);
1199 }
1200
1201 igt_assert(fence_fd >= 0);
1202 }
1203
1204 /* Wait for idle state. */
1205 idle = pmu_read_single(fd);
1206 do {
1207 busy = idle;
1208 usleep(1e3);
1209 idle = pmu_read_single(fd);
1210 } while (idle != busy);
1211
1212 /* Arm batch expiration. */
1213 for (int i = 0; i < target; i++)
1214 igt_spin_set_timeout(spin[i],
1215 (i + 1) * test_duration_ms * 1e6
1216 / target);
1217
1218 /* Wait for last batch to finish. */
1219 pfd.events = POLLIN;
1220 pfd.fd = fence_fd;
1221 igt_assert_eq(poll(&pfd, 1, 2 * test_duration_ms), 1);
1222 close(fence_fd);
1223
1224 /* Free batches. */
1225 for (int i = 0; i < target; i++)
1226 igt_spin_free(gem_fd, spin[i]);
1227
1228 /* Check at least as many interrupts has been generated. */
1229 busy = pmu_read_single(fd) - idle;
1230 close(fd);
1231
1232 igt_assert_lte(target, busy);
1233 }
1234
1235 static void
test_interrupts_sync(int gem_fd)1236 test_interrupts_sync(int gem_fd)
1237 {
1238 const unsigned int test_duration_ms = 1000;
1239 const int target = 30;
1240 igt_spin_t *spin[target];
1241 struct pollfd pfd;
1242 uint64_t idle, busy;
1243 int fd;
1244
1245 gem_quiescent_gpu(gem_fd);
1246
1247 fd = open_pmu(I915_PMU_INTERRUPTS);
1248
1249 /* Queue spinning batches. */
1250 for (int i = 0; i < target; i++)
1251 spin[i] = __igt_spin_new(gem_fd,
1252 .flags = IGT_SPIN_FENCE_OUT);
1253
1254 /* Wait for idle state. */
1255 idle = pmu_read_single(fd);
1256 do {
1257 busy = idle;
1258 usleep(1e3);
1259 idle = pmu_read_single(fd);
1260 } while (idle != busy);
1261
1262 /* Process the batch queue. */
1263 pfd.events = POLLIN;
1264 for (int i = 0; i < target; i++) {
1265 const unsigned int timeout_ms = test_duration_ms / target;
1266
1267 pfd.fd = spin[i]->out_fence;
1268 igt_spin_set_timeout(spin[i], timeout_ms * 1e6);
1269 igt_assert_eq(poll(&pfd, 1, 2 * timeout_ms), 1);
1270 igt_spin_free(gem_fd, spin[i]);
1271 }
1272
1273 /* Check at least as many interrupts has been generated. */
1274 busy = pmu_read_single(fd) - idle;
1275 close(fd);
1276
1277 igt_assert_lte(target, busy);
1278 }
1279
1280 static void
test_frequency(int gem_fd)1281 test_frequency(int gem_fd)
1282 {
1283 uint32_t min_freq, max_freq, boost_freq;
1284 uint64_t val[2], start[2], slept;
1285 double min[2], max[2];
1286 igt_spin_t *spin;
1287 int fd, sysfs;
1288
1289 sysfs = igt_sysfs_open(gem_fd);
1290 igt_require(sysfs >= 0);
1291
1292 min_freq = igt_sysfs_get_u32(sysfs, "gt_RPn_freq_mhz");
1293 max_freq = igt_sysfs_get_u32(sysfs, "gt_RP0_freq_mhz");
1294 boost_freq = igt_sysfs_get_u32(sysfs, "gt_boost_freq_mhz");
1295 igt_info("Frequency: min=%u, max=%u, boost=%u MHz\n",
1296 min_freq, max_freq, boost_freq);
1297 igt_require(min_freq > 0 && max_freq > 0 && boost_freq > 0);
1298 igt_require(max_freq > min_freq);
1299 igt_require(boost_freq > min_freq);
1300
1301 fd = open_group(I915_PMU_REQUESTED_FREQUENCY, -1);
1302 open_group(I915_PMU_ACTUAL_FREQUENCY, fd);
1303
1304 /*
1305 * Set GPU to min frequency and read PMU counters.
1306 */
1307 igt_require(igt_sysfs_set_u32(sysfs, "gt_min_freq_mhz", min_freq));
1308 igt_require(igt_sysfs_get_u32(sysfs, "gt_min_freq_mhz") == min_freq);
1309 igt_require(igt_sysfs_set_u32(sysfs, "gt_max_freq_mhz", min_freq));
1310 igt_require(igt_sysfs_get_u32(sysfs, "gt_max_freq_mhz") == min_freq);
1311 igt_require(igt_sysfs_set_u32(sysfs, "gt_boost_freq_mhz", min_freq));
1312 igt_require(igt_sysfs_get_u32(sysfs, "gt_boost_freq_mhz") == min_freq);
1313
1314 gem_quiescent_gpu(gem_fd); /* Idle to be sure the change takes effect */
1315 spin = spin_sync_flags(gem_fd, 0, I915_EXEC_DEFAULT);
1316
1317 slept = pmu_read_multi(fd, 2, start);
1318 measured_usleep(batch_duration_ns / 1000);
1319 slept = pmu_read_multi(fd, 2, val) - slept;
1320
1321 min[0] = 1e9*(val[0] - start[0]) / slept;
1322 min[1] = 1e9*(val[1] - start[1]) / slept;
1323
1324 igt_spin_free(gem_fd, spin);
1325 gem_quiescent_gpu(gem_fd); /* Don't leak busy bo into the next phase */
1326
1327 usleep(1e6);
1328
1329 /*
1330 * Set GPU to max frequency and read PMU counters.
1331 */
1332 igt_require(igt_sysfs_set_u32(sysfs, "gt_max_freq_mhz", max_freq));
1333 igt_require(igt_sysfs_get_u32(sysfs, "gt_max_freq_mhz") == max_freq);
1334 igt_require(igt_sysfs_set_u32(sysfs, "gt_boost_freq_mhz", boost_freq));
1335 igt_require(igt_sysfs_get_u32(sysfs, "gt_boost_freq_mhz") == boost_freq);
1336
1337 igt_require(igt_sysfs_set_u32(sysfs, "gt_min_freq_mhz", max_freq));
1338 igt_require(igt_sysfs_get_u32(sysfs, "gt_min_freq_mhz") == max_freq);
1339
1340 gem_quiescent_gpu(gem_fd);
1341 spin = spin_sync_flags(gem_fd, 0, I915_EXEC_DEFAULT);
1342
1343 slept = pmu_read_multi(fd, 2, start);
1344 measured_usleep(batch_duration_ns / 1000);
1345 slept = pmu_read_multi(fd, 2, val) - slept;
1346
1347 max[0] = 1e9*(val[0] - start[0]) / slept;
1348 max[1] = 1e9*(val[1] - start[1]) / slept;
1349
1350 igt_spin_free(gem_fd, spin);
1351 gem_quiescent_gpu(gem_fd);
1352
1353 /*
1354 * Restore min/max.
1355 */
1356 igt_sysfs_set_u32(sysfs, "gt_min_freq_mhz", min_freq);
1357 if (igt_sysfs_get_u32(sysfs, "gt_min_freq_mhz") != min_freq)
1358 igt_warn("Unable to restore min frequency to saved value [%u MHz], now %u MHz\n",
1359 min_freq, igt_sysfs_get_u32(sysfs, "gt_min_freq_mhz"));
1360 close(fd);
1361
1362 igt_info("Min frequency: requested %.1f, actual %.1f\n",
1363 min[0], min[1]);
1364 igt_info("Max frequency: requested %.1f, actual %.1f\n",
1365 max[0], max[1]);
1366
1367 assert_within_epsilon(min[0], min_freq, tolerance);
1368 /*
1369 * On thermally throttled devices we cannot be sure maximum frequency
1370 * can be reached so use larger tolerance downards.
1371 */
1372 __assert_within_epsilon(max[0], max_freq, tolerance, 0.15f);
1373 }
1374
wait_for_rc6(int fd)1375 static bool wait_for_rc6(int fd)
1376 {
1377 struct timespec tv = {};
1378 uint64_t start, now;
1379
1380 /* First wait for roughly an RC6 Evaluation Interval */
1381 usleep(160 * 1000);
1382
1383 /* Then poll for RC6 to start ticking */
1384 now = pmu_read_single(fd);
1385 do {
1386 start = now;
1387 usleep(5000);
1388 now = pmu_read_single(fd);
1389 if (now - start > 1e6)
1390 return true;
1391 } while (!igt_seconds_elapsed(&tv));
1392
1393 return false;
1394 }
1395
1396 static void
test_rc6(int gem_fd,unsigned int flags)1397 test_rc6(int gem_fd, unsigned int flags)
1398 {
1399 int64_t duration_ns = 2e9;
1400 uint64_t idle, busy, prev, ts[2];
1401 unsigned long slept;
1402 int fd, fw;
1403
1404 gem_quiescent_gpu(gem_fd);
1405
1406 fd = open_pmu(I915_PMU_RC6_RESIDENCY);
1407
1408 if (flags & TEST_RUNTIME_PM) {
1409 drmModeRes *res;
1410
1411 res = drmModeGetResources(gem_fd);
1412 igt_require(res);
1413
1414 /* force all connectors off */
1415 kmstest_set_vt_graphics_mode();
1416 kmstest_unset_all_crtcs(gem_fd, res);
1417 drmModeFreeResources(res);
1418
1419 igt_require(igt_setup_runtime_pm());
1420 igt_require(igt_wait_for_pm_status(IGT_RUNTIME_PM_STATUS_SUSPENDED));
1421
1422 /*
1423 * Sleep for a bit to see if once woken up estimated RC6 hasn't
1424 * drifted to far in advance of real RC6.
1425 */
1426 if (flags & FLAG_LONG) {
1427 pmu_read_single(fd);
1428 sleep(5);
1429 pmu_read_single(fd);
1430 }
1431 }
1432
1433 igt_require(wait_for_rc6(fd));
1434
1435 /* While idle check full RC6. */
1436 prev = __pmu_read_single(fd, &ts[0]);
1437 slept = measured_usleep(duration_ns / 1000);
1438 idle = __pmu_read_single(fd, &ts[1]);
1439 igt_debug("slept=%lu perf=%"PRIu64"\n", slept, ts[1] - ts[0]);
1440
1441 assert_within_epsilon(idle - prev, ts[1] - ts[0], tolerance);
1442
1443 /* Wake up device and check no RC6. */
1444 fw = igt_open_forcewake_handle(gem_fd);
1445 igt_assert(fw >= 0);
1446 usleep(1e3); /* wait for the rc6 cycle counter to stop ticking */
1447
1448 prev = pmu_read_single(fd);
1449 usleep(duration_ns / 1000);
1450 busy = pmu_read_single(fd);
1451
1452 close(fw);
1453 close(fd);
1454
1455 if (flags & TEST_RUNTIME_PM)
1456 igt_restore_runtime_pm();
1457
1458 assert_within_epsilon(busy - prev, 0.0, tolerance);
1459 }
1460
1461 static void
test_enable_race(int gem_fd,const struct intel_execution_engine2 * e)1462 test_enable_race(int gem_fd, const struct intel_execution_engine2 *e)
1463 {
1464 uint64_t config = I915_PMU_ENGINE_BUSY(e->class, e->instance);
1465 struct igt_helper_process engine_load = { };
1466 const uint32_t bbend = MI_BATCH_BUFFER_END;
1467 struct drm_i915_gem_exec_object2 obj = { };
1468 struct drm_i915_gem_execbuffer2 eb = { };
1469 int fd;
1470
1471 igt_require(gem_has_execlists(gem_fd));
1472 igt_require(gem_context_has_engine(gem_fd, 0, e->flags));
1473
1474 obj.handle = gem_create(gem_fd, 4096);
1475 gem_write(gem_fd, obj.handle, 0, &bbend, sizeof(bbend));
1476
1477 eb.buffer_count = 1;
1478 eb.buffers_ptr = to_user_pointer(&obj);
1479 eb.flags = e->flags;
1480
1481 /*
1482 * This test is probabilistic so run in a few times to increase the
1483 * chance of hitting the race.
1484 */
1485 igt_until_timeout(10) {
1486 /*
1487 * Defeat the busy stats delayed disable, we need to guarantee
1488 * we are the first PMU user.
1489 */
1490 gem_quiescent_gpu(gem_fd);
1491 sleep(2);
1492
1493 /* Apply interrupt-heavy load on the engine. */
1494 igt_fork_helper(&engine_load) {
1495 for (;;)
1496 gem_execbuf(gem_fd, &eb);
1497 }
1498
1499 /* Wait a bit to allow engine load to start. */
1500 usleep(500e3);
1501
1502 /* Enable the PMU. */
1503 fd = open_pmu(config);
1504
1505 /* Stop load and close the PMU. */
1506 igt_stop_helper(&engine_load);
1507 close(fd);
1508 }
1509
1510 /* Cleanup. */
1511 gem_close(gem_fd, obj.handle);
1512 gem_quiescent_gpu(gem_fd);
1513 }
1514
1515 #define __assert_within(x, ref, tol_up, tol_down) \
1516 igt_assert_f((double)(x) <= ((double)(ref) + (tol_up)) && \
1517 (double)(x) >= ((double)(ref) - (tol_down)), \
1518 "%f not within +%f/-%f of %f! ('%s' vs '%s')\n", \
1519 (double)(x), (double)(tol_up), (double)(tol_down), \
1520 (double)(ref), #x, #ref)
1521
1522 #define assert_within(x, ref, tolerance) \
1523 __assert_within(x, ref, tolerance, tolerance)
1524
1525 static void
accuracy(int gem_fd,const struct intel_execution_engine2 * e,unsigned long target_busy_pct,unsigned long target_iters)1526 accuracy(int gem_fd, const struct intel_execution_engine2 *e,
1527 unsigned long target_busy_pct,
1528 unsigned long target_iters)
1529 {
1530 const unsigned long min_test_us = 1e6;
1531 unsigned long pwm_calibration_us;
1532 unsigned long test_us;
1533 unsigned long cycle_us, busy_us, idle_us;
1534 double busy_r, expected;
1535 uint64_t val[2];
1536 uint64_t ts[2];
1537 int link[2];
1538 int fd;
1539
1540 /* Sampling platforms cannot reach the high accuracy criteria. */
1541 igt_require(gem_has_execlists(gem_fd));
1542
1543 /* Aim for approximately 100 iterations for calibration */
1544 cycle_us = min_test_us / target_iters;
1545 busy_us = cycle_us * target_busy_pct / 100;
1546 idle_us = cycle_us - busy_us;
1547
1548 while (idle_us < 2500 || busy_us < 2500) {
1549 busy_us *= 2;
1550 idle_us *= 2;
1551 }
1552 cycle_us = busy_us + idle_us;
1553 pwm_calibration_us = target_iters * cycle_us / 2;
1554 test_us = target_iters * cycle_us;
1555
1556 igt_info("calibration=%lums, test=%lums, cycle=%lums; ratio=%.2f%% (%luus/%luus)\n",
1557 pwm_calibration_us / 1000, test_us / 1000, cycle_us / 1000,
1558 (double)busy_us / cycle_us * 100.0,
1559 busy_us, idle_us);
1560
1561 assert_within_epsilon((double)busy_us / cycle_us,
1562 (double)target_busy_pct / 100.0,
1563 tolerance);
1564
1565 igt_assert(pipe(link) == 0);
1566
1567 /* Emit PWM pattern on the engine from a child. */
1568 igt_fork(child, 1) {
1569 const unsigned long timeout[] = {
1570 pwm_calibration_us * 1000, test_us * 1000
1571 };
1572 uint64_t total_busy_ns = 0, total_ns = 0;
1573 igt_spin_t *spin;
1574
1575 /* Allocate our spin batch and idle it. */
1576 spin = igt_spin_new(gem_fd, .engine = e->flags);
1577 igt_spin_end(spin);
1578 gem_sync(gem_fd, spin->handle);
1579
1580 /* 1st pass is calibration, second pass is the test. */
1581 for (int pass = 0; pass < ARRAY_SIZE(timeout); pass++) {
1582 unsigned int target_idle_us = idle_us;
1583 struct timespec start = { };
1584 uint64_t busy_ns = 0;
1585 unsigned long pass_ns = 0;
1586 double avg = 0.0, var = 0.0;
1587 unsigned int n = 0;
1588
1589 igt_nsec_elapsed(&start);
1590
1591 do {
1592 unsigned long loop_ns, loop_busy;
1593 struct timespec _ts = { };
1594 double err, tmp;
1595 uint64_t now;
1596
1597 /* PWM idle sleep. */
1598 _ts.tv_nsec = target_idle_us * 1000;
1599 nanosleep(&_ts, NULL);
1600
1601 /* Restart the spinbatch. */
1602 igt_spin_reset(spin);
1603 __submit_spin(gem_fd, spin, e, 0);
1604
1605 /* PWM busy sleep. */
1606 loop_busy = igt_nsec_elapsed(&start);
1607 _ts.tv_nsec = busy_us * 1000;
1608 nanosleep(&_ts, NULL);
1609 igt_spin_end(spin);
1610
1611 /* Time accounting. */
1612 now = igt_nsec_elapsed(&start);
1613 loop_busy = now - loop_busy;
1614 loop_ns = now - pass_ns;
1615 pass_ns = now;
1616
1617 busy_ns += loop_busy;
1618 total_busy_ns += loop_busy;
1619 total_ns += loop_ns;
1620
1621 /* Re-calibrate. */
1622 err = (double)total_busy_ns / total_ns -
1623 (double)target_busy_pct / 100.0;
1624 target_idle_us = (double)target_idle_us *
1625 (1.0 + err);
1626
1627 /* Running average and variance for debug. */
1628 err = 100.0 * total_busy_ns / total_ns;
1629 tmp = avg;
1630 avg += (err - avg) / ++n;
1631 var += (err - avg) * (err - tmp);
1632 } while (pass_ns < timeout[pass]);
1633
1634 pass_ns = igt_nsec_elapsed(&start);
1635 expected = (double)busy_ns / pass_ns;
1636
1637 igt_info("%u: %d cycles, busy %"PRIu64"us, idle %"PRIu64"us -> %.2f%% (target: %lu%%; average=%.2f±%.3f%%)\n",
1638 pass, n,
1639 busy_ns / 1000, (pass_ns - busy_ns) / 1000,
1640 100 * expected, target_busy_pct,
1641 avg, sqrt(var / n));
1642
1643 write(link[1], &expected, sizeof(expected));
1644 }
1645
1646 igt_spin_free(gem_fd, spin);
1647 }
1648
1649 fd = open_pmu(I915_PMU_ENGINE_BUSY(e->class, e->instance));
1650
1651 /* Let the child run. */
1652 read(link[0], &expected, sizeof(expected));
1653 assert_within(100.0 * expected, target_busy_pct, 5);
1654
1655 /* Collect engine busyness for an interesting part of child runtime. */
1656 val[0] = __pmu_read_single(fd, &ts[0]);
1657 read(link[0], &expected, sizeof(expected));
1658 val[1] = __pmu_read_single(fd, &ts[1]);
1659 close(fd);
1660
1661 close(link[1]);
1662 close(link[0]);
1663
1664 igt_waitchildren();
1665
1666 busy_r = (double)(val[1] - val[0]) / (ts[1] - ts[0]);
1667
1668 igt_info("error=%.2f%% (%.2f%% vs %.2f%%)\n",
1669 (busy_r - expected) * 100, 100 * busy_r, 100 * expected);
1670
1671 assert_within(100.0 * busy_r, 100.0 * expected, 2);
1672 }
1673
1674 igt_main
1675 {
1676 const unsigned int num_other_metrics =
1677 I915_PMU_LAST - __I915_PMU_OTHER(0) + 1;
1678 unsigned int num_engines = 0;
1679 int fd = -1;
1680 struct intel_execution_engine2 *e;
1681 unsigned int i;
1682
1683 igt_fixture {
1684 fd = drm_open_driver_master(DRIVER_INTEL);
1685
1686 igt_require_gem(fd);
1687 igt_require(i915_type_id() > 0);
1688
1689 __for_each_physical_engine(fd, e)
1690 num_engines++;
1691 }
1692
1693 /**
1694 * Test invalid access via perf API is rejected.
1695 */
1696 igt_subtest("invalid-init")
1697 invalid_init();
1698
__for_each_physical_engine(fd,e)1699 __for_each_physical_engine(fd, e) {
1700 const unsigned int pct[] = { 2, 50, 98 };
1701
1702 /**
1703 * Test that a single engine metric can be initialized or it
1704 * is correctly rejected.
1705 */
1706 igt_subtest_f("init-busy-%s", e->name)
1707 init(fd, e, I915_SAMPLE_BUSY);
1708
1709 igt_subtest_f("init-wait-%s", e->name)
1710 init(fd, e, I915_SAMPLE_WAIT);
1711
1712 igt_subtest_f("init-sema-%s", e->name)
1713 init(fd, e, I915_SAMPLE_SEMA);
1714
1715 /**
1716 * Test that engines show no load when idle.
1717 */
1718 igt_subtest_f("idle-%s", e->name)
1719 single(fd, e, 0);
1720
1721 /**
1722 * Test that a single engine reports load correctly.
1723 */
1724 igt_subtest_f("busy-%s", e->name)
1725 single(fd, e, TEST_BUSY);
1726 igt_subtest_f("busy-idle-%s", e->name)
1727 single(fd, e, TEST_BUSY | TEST_TRAILING_IDLE);
1728
1729 /**
1730 * Test that when one engine is loaded other report no
1731 * load.
1732 */
1733 igt_subtest_f("busy-check-all-%s", e->name)
1734 busy_check_all(fd, e, num_engines, TEST_BUSY);
1735 igt_subtest_f("busy-idle-check-all-%s", e->name)
1736 busy_check_all(fd, e, num_engines,
1737 TEST_BUSY | TEST_TRAILING_IDLE);
1738
1739 /**
1740 * Test that when all except one engine are loaded all
1741 * loads are correctly reported.
1742 */
1743 igt_subtest_f("most-busy-check-all-%s", e->name)
1744 most_busy_check_all(fd, e, num_engines,
1745 TEST_BUSY);
1746 igt_subtest_f("most-busy-idle-check-all-%s", e->name)
1747 most_busy_check_all(fd, e, num_engines,
1748 TEST_BUSY |
1749 TEST_TRAILING_IDLE);
1750
1751 /**
1752 * Test that semphore counters report no activity on
1753 * idle or busy engines.
1754 */
1755 igt_subtest_f("idle-no-semaphores-%s", e->name)
1756 no_sema(fd, e, 0);
1757
1758 igt_subtest_f("busy-no-semaphores-%s", e->name)
1759 no_sema(fd, e, TEST_BUSY);
1760
1761 igt_subtest_f("busy-idle-no-semaphores-%s", e->name)
1762 no_sema(fd, e, TEST_BUSY | TEST_TRAILING_IDLE);
1763
1764 /**
1765 * Test that semaphore waits are correctly reported.
1766 */
1767 igt_subtest_f("semaphore-wait-%s", e->name)
1768 sema_wait(fd, e, TEST_BUSY);
1769
1770 igt_subtest_f("semaphore-wait-idle-%s", e->name)
1771 sema_wait(fd, e,
1772 TEST_BUSY | TEST_TRAILING_IDLE);
1773
1774 /**
1775 * Check that two perf clients do not influence each
1776 * others observations.
1777 */
1778 igt_subtest_f("multi-client-%s", e->name)
1779 multi_client(fd, e);
1780
1781 /**
1782 * Check that reported usage is correct when PMU is
1783 * enabled after the batch is running.
1784 */
1785 igt_subtest_f("busy-start-%s", e->name)
1786 busy_start(fd, e);
1787
1788 /**
1789 * Check that reported usage is correct when PMU is
1790 * enabled after two batches are running.
1791 */
1792 igt_subtest_f("busy-double-start-%s", e->name) {
1793 gem_require_contexts(fd);
1794 busy_double_start(fd, e);
1795 }
1796
1797 /**
1798 * Check that the PMU can be safely enabled in face of
1799 * interrupt-heavy engine load.
1800 */
1801 igt_subtest_f("enable-race-%s", e->name)
1802 test_enable_race(fd, e);
1803
1804 /**
1805 * Check engine busyness accuracy is as expected.
1806 */
1807 for (i = 0; i < ARRAY_SIZE(pct); i++) {
1808 igt_subtest_f("busy-accuracy-%u-%s",
1809 pct[i], e->name)
1810 accuracy(fd, e, pct[i], 10);
1811 }
1812
1813 igt_subtest_f("busy-hang-%s", e->name) {
1814 igt_hang_t hang = igt_allow_hang(fd, 0, 0);
1815
1816 single(fd, e, TEST_BUSY | FLAG_HANG);
1817
1818 igt_disallow_hang(fd, hang);
1819 }
1820
1821 /**
1822 * Test that event waits are correctly reported.
1823 */
1824 if (e->class == I915_ENGINE_CLASS_RENDER)
1825 igt_subtest_f("event-wait-%s", e->name)
1826 event_wait(fd, e);
1827 }
1828
1829 /**
1830 * Test that when all engines are loaded all loads are
1831 * correctly reported.
1832 */
1833 igt_subtest("all-busy-check-all")
1834 all_busy_check_all(fd, num_engines, TEST_BUSY);
1835 igt_subtest("all-busy-idle-check-all")
1836 all_busy_check_all(fd, num_engines,
1837 TEST_BUSY | TEST_TRAILING_IDLE);
1838
1839 /**
1840 * Test that non-engine counters can be initialized and read. Apart
1841 * from the invalid metric which should fail.
1842 */
1843 for (i = 0; i < num_other_metrics + 1; i++) {
1844 igt_subtest_f("other-init-%u", i)
1845 init_other(i, i < num_other_metrics);
1846
1847 igt_subtest_f("other-read-%u", i)
1848 read_other(i, i < num_other_metrics);
1849 }
1850
1851 /**
1852 * Test counters are not affected by CPU offline/online events.
1853 */
1854 igt_subtest("cpu-hotplug")
1855 cpu_hotplug(fd);
1856
1857 /**
1858 * Test GPU frequency.
1859 */
1860 igt_subtest("frequency")
1861 test_frequency(fd);
1862
1863 /**
1864 * Test interrupt count reporting.
1865 */
1866 igt_subtest("interrupts")
1867 test_interrupts(fd);
1868
1869 igt_subtest("interrupts-sync")
1870 test_interrupts_sync(fd);
1871
1872 /**
1873 * Test RC6 residency reporting.
1874 */
1875 igt_subtest("rc6")
1876 test_rc6(fd, 0);
1877
1878 igt_subtest("rc6-runtime-pm")
1879 test_rc6(fd, TEST_RUNTIME_PM);
1880
1881 igt_subtest("rc6-runtime-pm-long")
1882 test_rc6(fd, TEST_RUNTIME_PM | FLAG_LONG);
1883
1884 /**
1885 * Check render nodes are counted.
1886 */
1887 igt_subtest_group {
1888 int render_fd = -1;
1889
1890 igt_fixture {
1891 render_fd = drm_open_driver_render(DRIVER_INTEL);
1892 igt_require_gem(render_fd);
1893
1894 gem_quiescent_gpu(fd);
1895 }
1896
__for_each_physical_engine(render_fd,e)1897 __for_each_physical_engine(render_fd, e) {
1898 igt_subtest_f("render-node-busy-%s", e->name)
1899 single(render_fd, e, TEST_BUSY);
1900 igt_subtest_f("render-node-busy-idle-%s",
1901 e->name)
1902 single(render_fd, e,
1903 TEST_BUSY | TEST_TRAILING_IDLE);
1904 }
1905
1906 igt_fixture {
1907 close(render_fd);
1908 }
1909 }
1910 }
1911