1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2020 Intel Corporation
4 */
5
6 #include <linux/pm_qos.h>
7 #include <linux/sort.h>
8
9 #include "gem/i915_gem_internal.h"
10
11 #include "i915_reg.h"
12 #include "intel_engine_heartbeat.h"
13 #include "intel_engine_pm.h"
14 #include "intel_engine_regs.h"
15 #include "intel_gpu_commands.h"
16 #include "intel_gt_clock_utils.h"
17 #include "intel_gt_pm.h"
18 #include "intel_rc6.h"
19 #include "selftest_engine_heartbeat.h"
20 #include "selftest_rps.h"
21 #include "selftests/igt_flush_test.h"
22 #include "selftests/igt_spinner.h"
23 #include "selftests/librapl.h"
24
25 /* Try to isolate the impact of cstates from determing frequency response */
26 #define CPU_LATENCY 0 /* -1 to disable pm_qos, 0 to disable cstates */
27
dummy_rps_work(struct work_struct * wrk)28 static void dummy_rps_work(struct work_struct *wrk)
29 {
30 }
31
cmp_u64(const void * A,const void * B)32 static int cmp_u64(const void *A, const void *B)
33 {
34 const u64 *a = A, *b = B;
35
36 if (*a < *b)
37 return -1;
38 else if (*a > *b)
39 return 1;
40 else
41 return 0;
42 }
43
cmp_u32(const void * A,const void * B)44 static int cmp_u32(const void *A, const void *B)
45 {
46 const u32 *a = A, *b = B;
47
48 if (*a < *b)
49 return -1;
50 else if (*a > *b)
51 return 1;
52 else
53 return 0;
54 }
55
56 static struct i915_vma *
create_spin_counter(struct intel_engine_cs * engine,struct i915_address_space * vm,bool srm,u32 ** cancel,u32 ** counter)57 create_spin_counter(struct intel_engine_cs *engine,
58 struct i915_address_space *vm,
59 bool srm,
60 u32 **cancel,
61 u32 **counter)
62 {
63 enum {
64 COUNT,
65 INC,
66 __NGPR__,
67 };
68 #define CS_GPR(x) GEN8_RING_CS_GPR(engine->mmio_base, x)
69 struct drm_i915_gem_object *obj;
70 struct i915_vma *vma;
71 unsigned long end;
72 u32 *base, *cs;
73 int loop, i;
74 int err;
75
76 obj = i915_gem_object_create_internal(vm->i915, 64 << 10);
77 if (IS_ERR(obj))
78 return ERR_CAST(obj);
79
80 end = obj->base.size / sizeof(u32) - 1;
81
82 vma = i915_vma_instance(obj, vm, NULL);
83 if (IS_ERR(vma)) {
84 err = PTR_ERR(vma);
85 goto err_put;
86 }
87
88 err = i915_vma_pin(vma, 0, 0, PIN_USER);
89 if (err)
90 goto err_unlock;
91
92 i915_vma_lock(vma);
93
94 base = i915_gem_object_pin_map(obj, I915_MAP_WC);
95 if (IS_ERR(base)) {
96 err = PTR_ERR(base);
97 goto err_unpin;
98 }
99 cs = base;
100
101 *cs++ = MI_LOAD_REGISTER_IMM(__NGPR__ * 2);
102 for (i = 0; i < __NGPR__; i++) {
103 *cs++ = i915_mmio_reg_offset(CS_GPR(i));
104 *cs++ = 0;
105 *cs++ = i915_mmio_reg_offset(CS_GPR(i)) + 4;
106 *cs++ = 0;
107 }
108
109 *cs++ = MI_LOAD_REGISTER_IMM(1);
110 *cs++ = i915_mmio_reg_offset(CS_GPR(INC));
111 *cs++ = 1;
112
113 loop = cs - base;
114
115 /* Unroll the loop to avoid MI_BB_START stalls impacting measurements */
116 for (i = 0; i < 1024; i++) {
117 *cs++ = MI_MATH(4);
118 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(COUNT));
119 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(INC));
120 *cs++ = MI_MATH_ADD;
121 *cs++ = MI_MATH_STORE(MI_MATH_REG(COUNT), MI_MATH_REG_ACCU);
122
123 if (srm) {
124 *cs++ = MI_STORE_REGISTER_MEM_GEN8;
125 *cs++ = i915_mmio_reg_offset(CS_GPR(COUNT));
126 *cs++ = lower_32_bits(i915_vma_offset(vma) + end * sizeof(*cs));
127 *cs++ = upper_32_bits(i915_vma_offset(vma) + end * sizeof(*cs));
128 }
129 }
130
131 *cs++ = MI_BATCH_BUFFER_START_GEN8;
132 *cs++ = lower_32_bits(i915_vma_offset(vma) + loop * sizeof(*cs));
133 *cs++ = upper_32_bits(i915_vma_offset(vma) + loop * sizeof(*cs));
134 GEM_BUG_ON(cs - base > end);
135
136 i915_gem_object_flush_map(obj);
137
138 *cancel = base + loop;
139 *counter = srm ? memset32(base + end, 0, 1) : NULL;
140 return vma;
141
142 err_unpin:
143 i915_vma_unpin(vma);
144 err_unlock:
145 i915_vma_unlock(vma);
146 err_put:
147 i915_gem_object_put(obj);
148 return ERR_PTR(err);
149 }
150
wait_for_freq(struct intel_rps * rps,u8 freq,int timeout_ms)151 static u8 wait_for_freq(struct intel_rps *rps, u8 freq, int timeout_ms)
152 {
153 u8 history[64], i;
154 unsigned long end;
155 int sleep;
156
157 i = 0;
158 memset(history, freq, sizeof(history));
159 sleep = 20;
160
161 /* The PCU does not change instantly, but drifts towards the goal? */
162 end = jiffies + msecs_to_jiffies(timeout_ms);
163 do {
164 u8 act;
165
166 act = read_cagf(rps);
167 if (time_after(jiffies, end))
168 return act;
169
170 /* Target acquired */
171 if (act == freq)
172 return act;
173
174 /* Any change within the last N samples? */
175 if (!memchr_inv(history, act, sizeof(history)))
176 return act;
177
178 history[i] = act;
179 i = (i + 1) % ARRAY_SIZE(history);
180
181 usleep_range(sleep, 2 * sleep);
182 sleep *= 2;
183 if (sleep > timeout_ms * 20)
184 sleep = timeout_ms * 20;
185 } while (1);
186 }
187
rps_set_check(struct intel_rps * rps,u8 freq)188 static u8 rps_set_check(struct intel_rps *rps, u8 freq)
189 {
190 mutex_lock(&rps->lock);
191 GEM_BUG_ON(!intel_rps_is_active(rps));
192 if (wait_for(!intel_rps_set(rps, freq), 50)) {
193 mutex_unlock(&rps->lock);
194 return 0;
195 }
196 GEM_BUG_ON(rps->last_freq != freq);
197 mutex_unlock(&rps->lock);
198
199 return wait_for_freq(rps, freq, 50);
200 }
201
show_pstate_limits(struct intel_rps * rps)202 static void show_pstate_limits(struct intel_rps *rps)
203 {
204 struct drm_i915_private *i915 = rps_to_i915(rps);
205
206 if (IS_BROXTON(i915)) {
207 pr_info("P_STATE_CAP[%x]: 0x%08x\n",
208 i915_mmio_reg_offset(BXT_RP_STATE_CAP),
209 intel_uncore_read(rps_to_uncore(rps),
210 BXT_RP_STATE_CAP));
211 } else if (GRAPHICS_VER(i915) == 9) {
212 pr_info("P_STATE_LIMITS[%x]: 0x%08x\n",
213 i915_mmio_reg_offset(GEN9_RP_STATE_LIMITS),
214 intel_uncore_read(rps_to_uncore(rps),
215 GEN9_RP_STATE_LIMITS));
216 }
217 }
218
live_rps_clock_interval(void * arg)219 int live_rps_clock_interval(void *arg)
220 {
221 struct intel_gt *gt = arg;
222 struct intel_rps *rps = >->rps;
223 void (*saved_work)(struct work_struct *wrk);
224 struct intel_engine_cs *engine;
225 enum intel_engine_id id;
226 struct igt_spinner spin;
227 int err = 0;
228
229 if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6)
230 return 0;
231
232 if (igt_spinner_init(&spin, gt))
233 return -ENOMEM;
234
235 intel_gt_pm_wait_for_idle(gt);
236 saved_work = rps->work.func;
237 rps->work.func = dummy_rps_work;
238
239 intel_gt_pm_get(gt);
240 intel_rps_disable(>->rps);
241
242 intel_gt_check_clock_frequency(gt);
243
244 for_each_engine(engine, gt, id) {
245 struct i915_request *rq;
246 u32 cycles;
247 u64 dt;
248
249 if (!intel_engine_can_store_dword(engine))
250 continue;
251
252 st_engine_heartbeat_disable(engine);
253
254 rq = igt_spinner_create_request(&spin,
255 engine->kernel_context,
256 MI_NOOP);
257 if (IS_ERR(rq)) {
258 st_engine_heartbeat_enable(engine);
259 err = PTR_ERR(rq);
260 break;
261 }
262
263 i915_request_add(rq);
264
265 if (!igt_wait_for_spinner(&spin, rq)) {
266 pr_err("%s: RPS spinner did not start\n",
267 engine->name);
268 igt_spinner_end(&spin);
269 st_engine_heartbeat_enable(engine);
270 intel_gt_set_wedged(engine->gt);
271 err = -EIO;
272 break;
273 }
274
275 intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
276
277 intel_uncore_write_fw(gt->uncore, GEN6_RP_CUR_UP_EI, 0);
278
279 /* Set the evaluation interval to infinity! */
280 intel_uncore_write_fw(gt->uncore,
281 GEN6_RP_UP_EI, 0xffffffff);
282 intel_uncore_write_fw(gt->uncore,
283 GEN6_RP_UP_THRESHOLD, 0xffffffff);
284
285 intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL,
286 GEN6_RP_ENABLE | GEN6_RP_UP_BUSY_AVG);
287
288 if (wait_for(intel_uncore_read_fw(gt->uncore,
289 GEN6_RP_CUR_UP_EI),
290 10)) {
291 /* Just skip the test; assume lack of HW support */
292 pr_notice("%s: rps evaluation interval not ticking\n",
293 engine->name);
294 err = -ENODEV;
295 } else {
296 ktime_t dt_[5];
297 u32 cycles_[5];
298 int i;
299
300 for (i = 0; i < 5; i++) {
301 preempt_disable();
302
303 cycles_[i] = -intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
304 dt_[i] = ktime_get();
305
306 udelay(1000);
307
308 cycles_[i] += intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
309 dt_[i] = ktime_sub(ktime_get(), dt_[i]);
310
311 preempt_enable();
312 }
313
314 /* Use the median of both cycle/dt; close enough */
315 sort(cycles_, 5, sizeof(*cycles_), cmp_u32, NULL);
316 cycles = (cycles_[1] + 2 * cycles_[2] + cycles_[3]) / 4;
317 sort(dt_, 5, sizeof(*dt_), cmp_u64, NULL);
318 dt = div_u64(dt_[1] + 2 * dt_[2] + dt_[3], 4);
319 }
320
321 intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL, 0);
322 intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
323
324 igt_spinner_end(&spin);
325 st_engine_heartbeat_enable(engine);
326
327 if (err == 0) {
328 u64 time = intel_gt_pm_interval_to_ns(gt, cycles);
329 u32 expected =
330 intel_gt_ns_to_pm_interval(gt, dt);
331
332 pr_info("%s: rps counted %d C0 cycles [%lldns] in %lldns [%d cycles], using GT clock frequency of %uKHz\n",
333 engine->name, cycles, time, dt, expected,
334 gt->clock_frequency / 1000);
335
336 if (10 * time < 8 * dt ||
337 8 * time > 10 * dt) {
338 pr_err("%s: rps clock time does not match walltime!\n",
339 engine->name);
340 err = -EINVAL;
341 }
342
343 if (10 * expected < 8 * cycles ||
344 8 * expected > 10 * cycles) {
345 pr_err("%s: walltime does not match rps clock ticks!\n",
346 engine->name);
347 err = -EINVAL;
348 }
349 }
350
351 if (igt_flush_test(gt->i915))
352 err = -EIO;
353
354 break; /* once is enough */
355 }
356
357 intel_rps_enable(>->rps);
358 intel_gt_pm_put(gt);
359
360 igt_spinner_fini(&spin);
361
362 intel_gt_pm_wait_for_idle(gt);
363 rps->work.func = saved_work;
364
365 if (err == -ENODEV) /* skipped, don't report a fail */
366 err = 0;
367
368 return err;
369 }
370
live_rps_control(void * arg)371 int live_rps_control(void *arg)
372 {
373 struct intel_gt *gt = arg;
374 struct intel_rps *rps = >->rps;
375 void (*saved_work)(struct work_struct *wrk);
376 struct intel_engine_cs *engine;
377 enum intel_engine_id id;
378 struct igt_spinner spin;
379 int err = 0;
380
381 /*
382 * Check that the actual frequency matches our requested frequency,
383 * to verify our control mechanism. We have to be careful that the
384 * PCU may throttle the GPU in which case the actual frequency used
385 * will be lowered than requested.
386 */
387
388 if (!intel_rps_is_enabled(rps))
389 return 0;
390
391 if (IS_CHERRYVIEW(gt->i915)) /* XXX fragile PCU */
392 return 0;
393
394 if (igt_spinner_init(&spin, gt))
395 return -ENOMEM;
396
397 intel_gt_pm_wait_for_idle(gt);
398 saved_work = rps->work.func;
399 rps->work.func = dummy_rps_work;
400
401 intel_gt_pm_get(gt);
402 for_each_engine(engine, gt, id) {
403 struct i915_request *rq;
404 ktime_t min_dt, max_dt;
405 int f, limit;
406 int min, max;
407
408 if (!intel_engine_can_store_dword(engine))
409 continue;
410
411 st_engine_heartbeat_disable(engine);
412
413 rq = igt_spinner_create_request(&spin,
414 engine->kernel_context,
415 MI_NOOP);
416 if (IS_ERR(rq)) {
417 err = PTR_ERR(rq);
418 break;
419 }
420
421 i915_request_add(rq);
422
423 if (!igt_wait_for_spinner(&spin, rq)) {
424 pr_err("%s: RPS spinner did not start\n",
425 engine->name);
426 igt_spinner_end(&spin);
427 st_engine_heartbeat_enable(engine);
428 intel_gt_set_wedged(engine->gt);
429 err = -EIO;
430 break;
431 }
432
433 if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
434 pr_err("%s: could not set minimum frequency [%x], only %x!\n",
435 engine->name, rps->min_freq, read_cagf(rps));
436 igt_spinner_end(&spin);
437 st_engine_heartbeat_enable(engine);
438 show_pstate_limits(rps);
439 err = -EINVAL;
440 break;
441 }
442
443 for (f = rps->min_freq + 1; f < rps->max_freq; f++) {
444 if (rps_set_check(rps, f) < f)
445 break;
446 }
447
448 limit = rps_set_check(rps, f);
449
450 if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
451 pr_err("%s: could not restore minimum frequency [%x], only %x!\n",
452 engine->name, rps->min_freq, read_cagf(rps));
453 igt_spinner_end(&spin);
454 st_engine_heartbeat_enable(engine);
455 show_pstate_limits(rps);
456 err = -EINVAL;
457 break;
458 }
459
460 max_dt = ktime_get();
461 max = rps_set_check(rps, limit);
462 max_dt = ktime_sub(ktime_get(), max_dt);
463
464 min_dt = ktime_get();
465 min = rps_set_check(rps, rps->min_freq);
466 min_dt = ktime_sub(ktime_get(), min_dt);
467
468 igt_spinner_end(&spin);
469 st_engine_heartbeat_enable(engine);
470
471 pr_info("%s: range:[%x:%uMHz, %x:%uMHz] limit:[%x:%uMHz], %x:%x response %lluns:%lluns\n",
472 engine->name,
473 rps->min_freq, intel_gpu_freq(rps, rps->min_freq),
474 rps->max_freq, intel_gpu_freq(rps, rps->max_freq),
475 limit, intel_gpu_freq(rps, limit),
476 min, max, ktime_to_ns(min_dt), ktime_to_ns(max_dt));
477
478 if (limit == rps->min_freq) {
479 pr_err("%s: GPU throttled to minimum!\n",
480 engine->name);
481 show_pstate_limits(rps);
482 err = -ENODEV;
483 break;
484 }
485
486 if (igt_flush_test(gt->i915)) {
487 err = -EIO;
488 break;
489 }
490 }
491 intel_gt_pm_put(gt);
492
493 igt_spinner_fini(&spin);
494
495 intel_gt_pm_wait_for_idle(gt);
496 rps->work.func = saved_work;
497
498 return err;
499 }
500
show_pcu_config(struct intel_rps * rps)501 static void show_pcu_config(struct intel_rps *rps)
502 {
503 struct drm_i915_private *i915 = rps_to_i915(rps);
504 unsigned int max_gpu_freq, min_gpu_freq;
505 intel_wakeref_t wakeref;
506 int gpu_freq;
507
508 if (!HAS_LLC(i915))
509 return;
510
511 min_gpu_freq = rps->min_freq;
512 max_gpu_freq = rps->max_freq;
513 if (GRAPHICS_VER(i915) >= 9) {
514 /* Convert GT frequency to 50 HZ units */
515 min_gpu_freq /= GEN9_FREQ_SCALER;
516 max_gpu_freq /= GEN9_FREQ_SCALER;
517 }
518
519 wakeref = intel_runtime_pm_get(rps_to_uncore(rps)->rpm);
520
521 pr_info("%5s %5s %5s\n", "GPU", "eCPU", "eRing");
522 for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) {
523 int ia_freq = gpu_freq;
524
525 snb_pcode_read(rps_to_gt(rps)->uncore, GEN6_PCODE_READ_MIN_FREQ_TABLE,
526 &ia_freq, NULL);
527
528 pr_info("%5d %5d %5d\n",
529 gpu_freq * 50,
530 ((ia_freq >> 0) & 0xff) * 100,
531 ((ia_freq >> 8) & 0xff) * 100);
532 }
533
534 intel_runtime_pm_put(rps_to_uncore(rps)->rpm, wakeref);
535 }
536
__measure_frequency(u32 * cntr,int duration_ms)537 static u64 __measure_frequency(u32 *cntr, int duration_ms)
538 {
539 u64 dc, dt;
540
541 dc = READ_ONCE(*cntr);
542 dt = ktime_get();
543 usleep_range(1000 * duration_ms, 2000 * duration_ms);
544 dc = READ_ONCE(*cntr) - dc;
545 dt = ktime_get() - dt;
546
547 return div64_u64(1000 * 1000 * dc, dt);
548 }
549
measure_frequency_at(struct intel_rps * rps,u32 * cntr,int * freq)550 static u64 measure_frequency_at(struct intel_rps *rps, u32 *cntr, int *freq)
551 {
552 u64 x[5];
553 int i;
554
555 *freq = rps_set_check(rps, *freq);
556 for (i = 0; i < 5; i++)
557 x[i] = __measure_frequency(cntr, 2);
558 *freq = (*freq + read_cagf(rps)) / 2;
559
560 /* A simple triangle filter for better result stability */
561 sort(x, 5, sizeof(*x), cmp_u64, NULL);
562 return div_u64(x[1] + 2 * x[2] + x[3], 4);
563 }
564
__measure_cs_frequency(struct intel_engine_cs * engine,int duration_ms)565 static u64 __measure_cs_frequency(struct intel_engine_cs *engine,
566 int duration_ms)
567 {
568 u64 dc, dt;
569
570 dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0));
571 dt = ktime_get();
572 usleep_range(1000 * duration_ms, 2000 * duration_ms);
573 dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0)) - dc;
574 dt = ktime_get() - dt;
575
576 return div64_u64(1000 * 1000 * dc, dt);
577 }
578
measure_cs_frequency_at(struct intel_rps * rps,struct intel_engine_cs * engine,int * freq)579 static u64 measure_cs_frequency_at(struct intel_rps *rps,
580 struct intel_engine_cs *engine,
581 int *freq)
582 {
583 u64 x[5];
584 int i;
585
586 *freq = rps_set_check(rps, *freq);
587 for (i = 0; i < 5; i++)
588 x[i] = __measure_cs_frequency(engine, 2);
589 *freq = (*freq + read_cagf(rps)) / 2;
590
591 /* A simple triangle filter for better result stability */
592 sort(x, 5, sizeof(*x), cmp_u64, NULL);
593 return div_u64(x[1] + 2 * x[2] + x[3], 4);
594 }
595
scaled_within(u64 x,u64 y,u32 f_n,u32 f_d)596 static bool scaled_within(u64 x, u64 y, u32 f_n, u32 f_d)
597 {
598 return f_d * x > f_n * y && f_n * x < f_d * y;
599 }
600
live_rps_frequency_cs(void * arg)601 int live_rps_frequency_cs(void *arg)
602 {
603 void (*saved_work)(struct work_struct *wrk);
604 struct intel_gt *gt = arg;
605 struct intel_rps *rps = >->rps;
606 struct intel_engine_cs *engine;
607 struct pm_qos_request qos;
608 enum intel_engine_id id;
609 int err = 0;
610
611 /*
612 * The premise is that the GPU does change frequency at our behest.
613 * Let's check there is a correspondence between the requested
614 * frequency, the actual frequency, and the observed clock rate.
615 */
616
617 if (!intel_rps_is_enabled(rps))
618 return 0;
619
620 if (GRAPHICS_VER(gt->i915) < 8) /* for CS simplicity */
621 return 0;
622
623 if (CPU_LATENCY >= 0)
624 cpu_latency_qos_add_request(&qos, CPU_LATENCY);
625
626 intel_gt_pm_wait_for_idle(gt);
627 saved_work = rps->work.func;
628 rps->work.func = dummy_rps_work;
629
630 for_each_engine(engine, gt, id) {
631 struct i915_request *rq;
632 struct i915_vma *vma;
633 u32 *cancel, *cntr;
634 struct {
635 u64 count;
636 int freq;
637 } min, max;
638
639 st_engine_heartbeat_disable(engine);
640
641 vma = create_spin_counter(engine,
642 engine->kernel_context->vm, false,
643 &cancel, &cntr);
644 if (IS_ERR(vma)) {
645 err = PTR_ERR(vma);
646 st_engine_heartbeat_enable(engine);
647 break;
648 }
649
650 rq = intel_engine_create_kernel_request(engine);
651 if (IS_ERR(rq)) {
652 err = PTR_ERR(rq);
653 goto err_vma;
654 }
655
656 err = i915_vma_move_to_active(vma, rq, 0);
657 if (!err)
658 err = rq->engine->emit_bb_start(rq,
659 i915_vma_offset(vma),
660 PAGE_SIZE, 0);
661 i915_request_add(rq);
662 if (err)
663 goto err_vma;
664
665 if (wait_for(intel_uncore_read(engine->uncore, CS_GPR(0)),
666 10)) {
667 pr_err("%s: timed loop did not start\n",
668 engine->name);
669 goto err_vma;
670 }
671
672 min.freq = rps->min_freq;
673 min.count = measure_cs_frequency_at(rps, engine, &min.freq);
674
675 max.freq = rps->max_freq;
676 max.count = measure_cs_frequency_at(rps, engine, &max.freq);
677
678 pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
679 engine->name,
680 min.count, intel_gpu_freq(rps, min.freq),
681 max.count, intel_gpu_freq(rps, max.freq),
682 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
683 max.freq * min.count));
684
685 if (!scaled_within(max.freq * min.count,
686 min.freq * max.count,
687 2, 3)) {
688 int f;
689
690 pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
691 engine->name,
692 max.freq * min.count,
693 min.freq * max.count);
694 show_pcu_config(rps);
695
696 for (f = min.freq + 1; f <= rps->max_freq; f++) {
697 int act = f;
698 u64 count;
699
700 count = measure_cs_frequency_at(rps, engine, &act);
701 if (act < f)
702 break;
703
704 pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
705 engine->name,
706 act, intel_gpu_freq(rps, act), count,
707 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
708 act * min.count));
709
710 f = act; /* may skip ahead [pcu granularity] */
711 }
712
713 err = -EINTR; /* ignore error, continue on with test */
714 }
715
716 err_vma:
717 *cancel = MI_BATCH_BUFFER_END;
718 i915_gem_object_flush_map(vma->obj);
719 i915_gem_object_unpin_map(vma->obj);
720 i915_vma_unpin(vma);
721 i915_vma_unlock(vma);
722 i915_vma_put(vma);
723
724 st_engine_heartbeat_enable(engine);
725 if (igt_flush_test(gt->i915))
726 err = -EIO;
727 if (err)
728 break;
729 }
730
731 intel_gt_pm_wait_for_idle(gt);
732 rps->work.func = saved_work;
733
734 if (CPU_LATENCY >= 0)
735 cpu_latency_qos_remove_request(&qos);
736
737 return err;
738 }
739
live_rps_frequency_srm(void * arg)740 int live_rps_frequency_srm(void *arg)
741 {
742 void (*saved_work)(struct work_struct *wrk);
743 struct intel_gt *gt = arg;
744 struct intel_rps *rps = >->rps;
745 struct intel_engine_cs *engine;
746 struct pm_qos_request qos;
747 enum intel_engine_id id;
748 int err = 0;
749
750 /*
751 * The premise is that the GPU does change frequency at our behest.
752 * Let's check there is a correspondence between the requested
753 * frequency, the actual frequency, and the observed clock rate.
754 */
755
756 if (!intel_rps_is_enabled(rps))
757 return 0;
758
759 if (GRAPHICS_VER(gt->i915) < 8) /* for CS simplicity */
760 return 0;
761
762 if (CPU_LATENCY >= 0)
763 cpu_latency_qos_add_request(&qos, CPU_LATENCY);
764
765 intel_gt_pm_wait_for_idle(gt);
766 saved_work = rps->work.func;
767 rps->work.func = dummy_rps_work;
768
769 for_each_engine(engine, gt, id) {
770 struct i915_request *rq;
771 struct i915_vma *vma;
772 u32 *cancel, *cntr;
773 struct {
774 u64 count;
775 int freq;
776 } min, max;
777
778 st_engine_heartbeat_disable(engine);
779
780 vma = create_spin_counter(engine,
781 engine->kernel_context->vm, true,
782 &cancel, &cntr);
783 if (IS_ERR(vma)) {
784 err = PTR_ERR(vma);
785 st_engine_heartbeat_enable(engine);
786 break;
787 }
788
789 rq = intel_engine_create_kernel_request(engine);
790 if (IS_ERR(rq)) {
791 err = PTR_ERR(rq);
792 goto err_vma;
793 }
794
795 err = i915_vma_move_to_active(vma, rq, 0);
796 if (!err)
797 err = rq->engine->emit_bb_start(rq,
798 i915_vma_offset(vma),
799 PAGE_SIZE, 0);
800 i915_request_add(rq);
801 if (err)
802 goto err_vma;
803
804 if (wait_for(READ_ONCE(*cntr), 10)) {
805 pr_err("%s: timed loop did not start\n",
806 engine->name);
807 goto err_vma;
808 }
809
810 min.freq = rps->min_freq;
811 min.count = measure_frequency_at(rps, cntr, &min.freq);
812
813 max.freq = rps->max_freq;
814 max.count = measure_frequency_at(rps, cntr, &max.freq);
815
816 pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
817 engine->name,
818 min.count, intel_gpu_freq(rps, min.freq),
819 max.count, intel_gpu_freq(rps, max.freq),
820 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
821 max.freq * min.count));
822
823 if (!scaled_within(max.freq * min.count,
824 min.freq * max.count,
825 1, 2)) {
826 int f;
827
828 pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
829 engine->name,
830 max.freq * min.count,
831 min.freq * max.count);
832 show_pcu_config(rps);
833
834 for (f = min.freq + 1; f <= rps->max_freq; f++) {
835 int act = f;
836 u64 count;
837
838 count = measure_frequency_at(rps, cntr, &act);
839 if (act < f)
840 break;
841
842 pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
843 engine->name,
844 act, intel_gpu_freq(rps, act), count,
845 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
846 act * min.count));
847
848 f = act; /* may skip ahead [pcu granularity] */
849 }
850
851 err = -EINTR; /* ignore error, continue on with test */
852 }
853
854 err_vma:
855 *cancel = MI_BATCH_BUFFER_END;
856 i915_gem_object_flush_map(vma->obj);
857 i915_gem_object_unpin_map(vma->obj);
858 i915_vma_unpin(vma);
859 i915_vma_unlock(vma);
860 i915_vma_put(vma);
861
862 st_engine_heartbeat_enable(engine);
863 if (igt_flush_test(gt->i915))
864 err = -EIO;
865 if (err)
866 break;
867 }
868
869 intel_gt_pm_wait_for_idle(gt);
870 rps->work.func = saved_work;
871
872 if (CPU_LATENCY >= 0)
873 cpu_latency_qos_remove_request(&qos);
874
875 return err;
876 }
877
sleep_for_ei(struct intel_rps * rps,int timeout_us)878 static void sleep_for_ei(struct intel_rps *rps, int timeout_us)
879 {
880 /* Flush any previous EI */
881 usleep_range(timeout_us, 2 * timeout_us);
882
883 /* Reset the interrupt status */
884 rps_disable_interrupts(rps);
885 GEM_BUG_ON(rps->pm_iir);
886 rps_enable_interrupts(rps);
887
888 /* And then wait for the timeout, for real this time */
889 usleep_range(2 * timeout_us, 3 * timeout_us);
890 }
891
__rps_up_interrupt(struct intel_rps * rps,struct intel_engine_cs * engine,struct igt_spinner * spin)892 static int __rps_up_interrupt(struct intel_rps *rps,
893 struct intel_engine_cs *engine,
894 struct igt_spinner *spin)
895 {
896 struct intel_uncore *uncore = engine->uncore;
897 struct i915_request *rq;
898 u32 timeout;
899
900 if (!intel_engine_can_store_dword(engine))
901 return 0;
902
903 rps_set_check(rps, rps->min_freq);
904
905 rq = igt_spinner_create_request(spin, engine->kernel_context, MI_NOOP);
906 if (IS_ERR(rq))
907 return PTR_ERR(rq);
908
909 i915_request_get(rq);
910 i915_request_add(rq);
911
912 if (!igt_wait_for_spinner(spin, rq)) {
913 pr_err("%s: RPS spinner did not start\n",
914 engine->name);
915 i915_request_put(rq);
916 intel_gt_set_wedged(engine->gt);
917 return -EIO;
918 }
919
920 if (!intel_rps_is_active(rps)) {
921 pr_err("%s: RPS not enabled on starting spinner\n",
922 engine->name);
923 igt_spinner_end(spin);
924 i915_request_put(rq);
925 return -EINVAL;
926 }
927
928 if (!(rps->pm_events & GEN6_PM_RP_UP_THRESHOLD)) {
929 pr_err("%s: RPS did not register UP interrupt\n",
930 engine->name);
931 i915_request_put(rq);
932 return -EINVAL;
933 }
934
935 if (rps->last_freq != rps->min_freq) {
936 pr_err("%s: RPS did not program min frequency\n",
937 engine->name);
938 i915_request_put(rq);
939 return -EINVAL;
940 }
941
942 timeout = intel_uncore_read(uncore, GEN6_RP_UP_EI);
943 timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout);
944 timeout = DIV_ROUND_UP(timeout, 1000);
945
946 sleep_for_ei(rps, timeout);
947 GEM_BUG_ON(i915_request_completed(rq));
948
949 igt_spinner_end(spin);
950 i915_request_put(rq);
951
952 if (rps->cur_freq != rps->min_freq) {
953 pr_err("%s: Frequency unexpectedly changed [up], now %d!\n",
954 engine->name, intel_rps_read_actual_frequency(rps));
955 return -EINVAL;
956 }
957
958 if (!(rps->pm_iir & GEN6_PM_RP_UP_THRESHOLD)) {
959 pr_err("%s: UP interrupt not recorded for spinner, pm_iir:%x, prev_up:%x, up_threshold:%x, up_ei:%x\n",
960 engine->name, rps->pm_iir,
961 intel_uncore_read(uncore, GEN6_RP_PREV_UP),
962 intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
963 intel_uncore_read(uncore, GEN6_RP_UP_EI));
964 return -EINVAL;
965 }
966
967 return 0;
968 }
969
__rps_down_interrupt(struct intel_rps * rps,struct intel_engine_cs * engine)970 static int __rps_down_interrupt(struct intel_rps *rps,
971 struct intel_engine_cs *engine)
972 {
973 struct intel_uncore *uncore = engine->uncore;
974 u32 timeout;
975
976 rps_set_check(rps, rps->max_freq);
977
978 if (!(rps->pm_events & GEN6_PM_RP_DOWN_THRESHOLD)) {
979 pr_err("%s: RPS did not register DOWN interrupt\n",
980 engine->name);
981 return -EINVAL;
982 }
983
984 if (rps->last_freq != rps->max_freq) {
985 pr_err("%s: RPS did not program max frequency\n",
986 engine->name);
987 return -EINVAL;
988 }
989
990 timeout = intel_uncore_read(uncore, GEN6_RP_DOWN_EI);
991 timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout);
992 timeout = DIV_ROUND_UP(timeout, 1000);
993
994 sleep_for_ei(rps, timeout);
995
996 if (rps->cur_freq != rps->max_freq) {
997 pr_err("%s: Frequency unexpectedly changed [down], now %d!\n",
998 engine->name,
999 intel_rps_read_actual_frequency(rps));
1000 return -EINVAL;
1001 }
1002
1003 if (!(rps->pm_iir & (GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT))) {
1004 pr_err("%s: DOWN interrupt not recorded for idle, pm_iir:%x, prev_down:%x, down_threshold:%x, down_ei:%x [prev_up:%x, up_threshold:%x, up_ei:%x]\n",
1005 engine->name, rps->pm_iir,
1006 intel_uncore_read(uncore, GEN6_RP_PREV_DOWN),
1007 intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD),
1008 intel_uncore_read(uncore, GEN6_RP_DOWN_EI),
1009 intel_uncore_read(uncore, GEN6_RP_PREV_UP),
1010 intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
1011 intel_uncore_read(uncore, GEN6_RP_UP_EI));
1012 return -EINVAL;
1013 }
1014
1015 return 0;
1016 }
1017
live_rps_interrupt(void * arg)1018 int live_rps_interrupt(void *arg)
1019 {
1020 struct intel_gt *gt = arg;
1021 struct intel_rps *rps = >->rps;
1022 void (*saved_work)(struct work_struct *wrk);
1023 struct intel_engine_cs *engine;
1024 enum intel_engine_id id;
1025 struct igt_spinner spin;
1026 u32 pm_events;
1027 int err = 0;
1028
1029 /*
1030 * First, let's check whether or not we are receiving interrupts.
1031 */
1032
1033 if (!intel_rps_has_interrupts(rps) || GRAPHICS_VER(gt->i915) < 6)
1034 return 0;
1035
1036 intel_gt_pm_get(gt);
1037 pm_events = rps->pm_events;
1038 intel_gt_pm_put(gt);
1039 if (!pm_events) {
1040 pr_err("No RPS PM events registered, but RPS is enabled?\n");
1041 return -ENODEV;
1042 }
1043
1044 if (igt_spinner_init(&spin, gt))
1045 return -ENOMEM;
1046
1047 intel_gt_pm_wait_for_idle(gt);
1048 saved_work = rps->work.func;
1049 rps->work.func = dummy_rps_work;
1050
1051 for_each_engine(engine, gt, id) {
1052 /* Keep the engine busy with a spinner; expect an UP! */
1053 if (pm_events & GEN6_PM_RP_UP_THRESHOLD) {
1054 intel_gt_pm_wait_for_idle(engine->gt);
1055 GEM_BUG_ON(intel_rps_is_active(rps));
1056
1057 st_engine_heartbeat_disable(engine);
1058
1059 err = __rps_up_interrupt(rps, engine, &spin);
1060
1061 st_engine_heartbeat_enable(engine);
1062 if (err)
1063 goto out;
1064
1065 intel_gt_pm_wait_for_idle(engine->gt);
1066 }
1067
1068 /* Keep the engine awake but idle and check for DOWN */
1069 if (pm_events & GEN6_PM_RP_DOWN_THRESHOLD) {
1070 st_engine_heartbeat_disable(engine);
1071 intel_rc6_disable(>->rc6);
1072
1073 err = __rps_down_interrupt(rps, engine);
1074
1075 intel_rc6_enable(>->rc6);
1076 st_engine_heartbeat_enable(engine);
1077 if (err)
1078 goto out;
1079 }
1080 }
1081
1082 out:
1083 if (igt_flush_test(gt->i915))
1084 err = -EIO;
1085
1086 igt_spinner_fini(&spin);
1087
1088 intel_gt_pm_wait_for_idle(gt);
1089 rps->work.func = saved_work;
1090
1091 return err;
1092 }
1093
__measure_power(int duration_ms)1094 static u64 __measure_power(int duration_ms)
1095 {
1096 u64 dE, dt;
1097
1098 dE = librapl_energy_uJ();
1099 dt = ktime_get();
1100 usleep_range(1000 * duration_ms, 2000 * duration_ms);
1101 dE = librapl_energy_uJ() - dE;
1102 dt = ktime_get() - dt;
1103
1104 return div64_u64(1000 * 1000 * dE, dt);
1105 }
1106
measure_power(struct intel_rps * rps,int * freq)1107 static u64 measure_power(struct intel_rps *rps, int *freq)
1108 {
1109 u64 x[5];
1110 int i;
1111
1112 for (i = 0; i < 5; i++)
1113 x[i] = __measure_power(5);
1114
1115 *freq = (*freq + intel_rps_read_actual_frequency(rps)) / 2;
1116
1117 /* A simple triangle filter for better result stability */
1118 sort(x, 5, sizeof(*x), cmp_u64, NULL);
1119 return div_u64(x[1] + 2 * x[2] + x[3], 4);
1120 }
1121
measure_power_at(struct intel_rps * rps,int * freq)1122 static u64 measure_power_at(struct intel_rps *rps, int *freq)
1123 {
1124 *freq = rps_set_check(rps, *freq);
1125 return measure_power(rps, freq);
1126 }
1127
live_rps_power(void * arg)1128 int live_rps_power(void *arg)
1129 {
1130 struct intel_gt *gt = arg;
1131 struct intel_rps *rps = >->rps;
1132 void (*saved_work)(struct work_struct *wrk);
1133 struct intel_engine_cs *engine;
1134 enum intel_engine_id id;
1135 struct igt_spinner spin;
1136 int err = 0;
1137
1138 /*
1139 * Our fundamental assumption is that running at lower frequency
1140 * actually saves power. Let's see if our RAPL measurement support
1141 * that theory.
1142 */
1143
1144 if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6)
1145 return 0;
1146
1147 if (!librapl_supported(gt->i915))
1148 return 0;
1149
1150 if (igt_spinner_init(&spin, gt))
1151 return -ENOMEM;
1152
1153 intel_gt_pm_wait_for_idle(gt);
1154 saved_work = rps->work.func;
1155 rps->work.func = dummy_rps_work;
1156
1157 for_each_engine(engine, gt, id) {
1158 struct i915_request *rq;
1159 struct {
1160 u64 power;
1161 int freq;
1162 } min, max;
1163
1164 if (!intel_engine_can_store_dword(engine))
1165 continue;
1166
1167 st_engine_heartbeat_disable(engine);
1168
1169 rq = igt_spinner_create_request(&spin,
1170 engine->kernel_context,
1171 MI_NOOP);
1172 if (IS_ERR(rq)) {
1173 st_engine_heartbeat_enable(engine);
1174 err = PTR_ERR(rq);
1175 break;
1176 }
1177
1178 i915_request_add(rq);
1179
1180 if (!igt_wait_for_spinner(&spin, rq)) {
1181 pr_err("%s: RPS spinner did not start\n",
1182 engine->name);
1183 igt_spinner_end(&spin);
1184 st_engine_heartbeat_enable(engine);
1185 intel_gt_set_wedged(engine->gt);
1186 err = -EIO;
1187 break;
1188 }
1189
1190 max.freq = rps->max_freq;
1191 max.power = measure_power_at(rps, &max.freq);
1192
1193 min.freq = rps->min_freq;
1194 min.power = measure_power_at(rps, &min.freq);
1195
1196 igt_spinner_end(&spin);
1197 st_engine_heartbeat_enable(engine);
1198
1199 pr_info("%s: min:%llumW @ %uMHz, max:%llumW @ %uMHz\n",
1200 engine->name,
1201 min.power, intel_gpu_freq(rps, min.freq),
1202 max.power, intel_gpu_freq(rps, max.freq));
1203
1204 if (10 * min.freq >= 9 * max.freq) {
1205 pr_notice("Could not control frequency, ran at [%d:%uMHz, %d:%uMhz]\n",
1206 min.freq, intel_gpu_freq(rps, min.freq),
1207 max.freq, intel_gpu_freq(rps, max.freq));
1208 continue;
1209 }
1210
1211 if (11 * min.power > 10 * max.power) {
1212 pr_err("%s: did not conserve power when setting lower frequency!\n",
1213 engine->name);
1214 err = -EINVAL;
1215 break;
1216 }
1217
1218 if (igt_flush_test(gt->i915)) {
1219 err = -EIO;
1220 break;
1221 }
1222 }
1223
1224 igt_spinner_fini(&spin);
1225
1226 intel_gt_pm_wait_for_idle(gt);
1227 rps->work.func = saved_work;
1228
1229 return err;
1230 }
1231
live_rps_dynamic(void * arg)1232 int live_rps_dynamic(void *arg)
1233 {
1234 struct intel_gt *gt = arg;
1235 struct intel_rps *rps = >->rps;
1236 struct intel_engine_cs *engine;
1237 enum intel_engine_id id;
1238 struct igt_spinner spin;
1239 int err = 0;
1240
1241 /*
1242 * We've looked at the bascs, and have established that we
1243 * can change the clock frequency and that the HW will generate
1244 * interrupts based on load. Now we check how we integrate those
1245 * moving parts into dynamic reclocking based on load.
1246 */
1247
1248 if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6)
1249 return 0;
1250
1251 if (igt_spinner_init(&spin, gt))
1252 return -ENOMEM;
1253
1254 if (intel_rps_has_interrupts(rps))
1255 pr_info("RPS has interrupt support\n");
1256 if (intel_rps_uses_timer(rps))
1257 pr_info("RPS has timer support\n");
1258
1259 for_each_engine(engine, gt, id) {
1260 struct i915_request *rq;
1261 struct {
1262 ktime_t dt;
1263 u8 freq;
1264 } min, max;
1265
1266 if (!intel_engine_can_store_dword(engine))
1267 continue;
1268
1269 intel_gt_pm_wait_for_idle(gt);
1270 GEM_BUG_ON(intel_rps_is_active(rps));
1271 rps->cur_freq = rps->min_freq;
1272
1273 intel_engine_pm_get(engine);
1274 intel_rc6_disable(>->rc6);
1275 GEM_BUG_ON(rps->last_freq != rps->min_freq);
1276
1277 rq = igt_spinner_create_request(&spin,
1278 engine->kernel_context,
1279 MI_NOOP);
1280 if (IS_ERR(rq)) {
1281 err = PTR_ERR(rq);
1282 goto err;
1283 }
1284
1285 i915_request_add(rq);
1286
1287 max.dt = ktime_get();
1288 max.freq = wait_for_freq(rps, rps->max_freq, 500);
1289 max.dt = ktime_sub(ktime_get(), max.dt);
1290
1291 igt_spinner_end(&spin);
1292
1293 min.dt = ktime_get();
1294 min.freq = wait_for_freq(rps, rps->min_freq, 2000);
1295 min.dt = ktime_sub(ktime_get(), min.dt);
1296
1297 pr_info("%s: dynamically reclocked to %u:%uMHz while busy in %lluns, and %u:%uMHz while idle in %lluns\n",
1298 engine->name,
1299 max.freq, intel_gpu_freq(rps, max.freq),
1300 ktime_to_ns(max.dt),
1301 min.freq, intel_gpu_freq(rps, min.freq),
1302 ktime_to_ns(min.dt));
1303 if (min.freq >= max.freq) {
1304 pr_err("%s: dynamic reclocking of spinner failed\n!",
1305 engine->name);
1306 err = -EINVAL;
1307 }
1308
1309 err:
1310 intel_rc6_enable(>->rc6);
1311 intel_engine_pm_put(engine);
1312
1313 if (igt_flush_test(gt->i915))
1314 err = -EIO;
1315 if (err)
1316 break;
1317 }
1318
1319 igt_spinner_fini(&spin);
1320
1321 return err;
1322 }
1323