1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright (C) 2015 Linaro Ltd.
4 * Author: Shannon Zhao <shannon.zhao@linaro.org>
5 */
6
7 #include <linux/cpu.h>
8 #include <linux/kvm.h>
9 #include <linux/kvm_host.h>
10 #include <linux/perf_event.h>
11 #include <linux/perf/arm_pmu.h>
12 #include <linux/uaccess.h>
13 #include <asm/kvm_emulate.h>
14 #include <kvm/arm_pmu.h>
15 #include <kvm/arm_vgic.h>
16
17 static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx);
18 static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx);
19 static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc);
20
21 #define PERF_ATTR_CFG1_KVM_PMU_CHAINED 0x1
22
kvm_pmu_event_mask(struct kvm * kvm)23 static u32 kvm_pmu_event_mask(struct kvm *kvm)
24 {
25 switch (kvm->arch.pmuver) {
26 case ID_AA64DFR0_PMUVER_8_0:
27 return GENMASK(9, 0);
28 case ID_AA64DFR0_PMUVER_8_1:
29 case ID_AA64DFR0_PMUVER_8_4:
30 case ID_AA64DFR0_PMUVER_8_5:
31 case ID_AA64DFR0_PMUVER_8_7:
32 return GENMASK(15, 0);
33 default: /* Shouldn't be here, just for sanity */
34 WARN_ONCE(1, "Unknown PMU version %d\n", kvm->arch.pmuver);
35 return 0;
36 }
37 }
38
39 /**
40 * kvm_pmu_idx_is_64bit - determine if select_idx is a 64bit counter
41 * @vcpu: The vcpu pointer
42 * @select_idx: The counter index
43 */
kvm_pmu_idx_is_64bit(struct kvm_vcpu * vcpu,u64 select_idx)44 static bool kvm_pmu_idx_is_64bit(struct kvm_vcpu *vcpu, u64 select_idx)
45 {
46 return (select_idx == ARMV8_PMU_CYCLE_IDX &&
47 __vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_LC);
48 }
49
kvm_pmc_to_vcpu(struct kvm_pmc * pmc)50 static struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc)
51 {
52 struct kvm_pmu *pmu;
53 struct kvm_vcpu_arch *vcpu_arch;
54
55 pmc -= pmc->idx;
56 pmu = container_of(pmc, struct kvm_pmu, pmc[0]);
57 vcpu_arch = container_of(pmu, struct kvm_vcpu_arch, pmu);
58 return container_of(vcpu_arch, struct kvm_vcpu, arch);
59 }
60
61 /**
62 * kvm_pmu_pmc_is_chained - determine if the pmc is chained
63 * @pmc: The PMU counter pointer
64 */
kvm_pmu_pmc_is_chained(struct kvm_pmc * pmc)65 static bool kvm_pmu_pmc_is_chained(struct kvm_pmc *pmc)
66 {
67 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
68
69 return test_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
70 }
71
72 /**
73 * kvm_pmu_idx_is_high_counter - determine if select_idx is a high/low counter
74 * @select_idx: The counter index
75 */
kvm_pmu_idx_is_high_counter(u64 select_idx)76 static bool kvm_pmu_idx_is_high_counter(u64 select_idx)
77 {
78 return select_idx & 0x1;
79 }
80
81 /**
82 * kvm_pmu_get_canonical_pmc - obtain the canonical pmc
83 * @pmc: The PMU counter pointer
84 *
85 * When a pair of PMCs are chained together we use the low counter (canonical)
86 * to hold the underlying perf event.
87 */
kvm_pmu_get_canonical_pmc(struct kvm_pmc * pmc)88 static struct kvm_pmc *kvm_pmu_get_canonical_pmc(struct kvm_pmc *pmc)
89 {
90 if (kvm_pmu_pmc_is_chained(pmc) &&
91 kvm_pmu_idx_is_high_counter(pmc->idx))
92 return pmc - 1;
93
94 return pmc;
95 }
kvm_pmu_get_alternate_pmc(struct kvm_pmc * pmc)96 static struct kvm_pmc *kvm_pmu_get_alternate_pmc(struct kvm_pmc *pmc)
97 {
98 if (kvm_pmu_idx_is_high_counter(pmc->idx))
99 return pmc - 1;
100 else
101 return pmc + 1;
102 }
103
104 /**
105 * kvm_pmu_idx_has_chain_evtype - determine if the event type is chain
106 * @vcpu: The vcpu pointer
107 * @select_idx: The counter index
108 */
kvm_pmu_idx_has_chain_evtype(struct kvm_vcpu * vcpu,u64 select_idx)109 static bool kvm_pmu_idx_has_chain_evtype(struct kvm_vcpu *vcpu, u64 select_idx)
110 {
111 u64 eventsel, reg;
112
113 select_idx |= 0x1;
114
115 if (select_idx == ARMV8_PMU_CYCLE_IDX)
116 return false;
117
118 reg = PMEVTYPER0_EL0 + select_idx;
119 eventsel = __vcpu_sys_reg(vcpu, reg) & kvm_pmu_event_mask(vcpu->kvm);
120
121 return eventsel == ARMV8_PMUV3_PERFCTR_CHAIN;
122 }
123
124 /**
125 * kvm_pmu_get_pair_counter_value - get PMU counter value
126 * @vcpu: The vcpu pointer
127 * @pmc: The PMU counter pointer
128 */
kvm_pmu_get_pair_counter_value(struct kvm_vcpu * vcpu,struct kvm_pmc * pmc)129 static u64 kvm_pmu_get_pair_counter_value(struct kvm_vcpu *vcpu,
130 struct kvm_pmc *pmc)
131 {
132 u64 counter, counter_high, reg, enabled, running;
133
134 if (kvm_pmu_pmc_is_chained(pmc)) {
135 pmc = kvm_pmu_get_canonical_pmc(pmc);
136 reg = PMEVCNTR0_EL0 + pmc->idx;
137
138 counter = __vcpu_sys_reg(vcpu, reg);
139 counter_high = __vcpu_sys_reg(vcpu, reg + 1);
140
141 counter = lower_32_bits(counter) | (counter_high << 32);
142 } else {
143 reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX)
144 ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx;
145 counter = __vcpu_sys_reg(vcpu, reg);
146 }
147
148 /*
149 * The real counter value is equal to the value of counter register plus
150 * the value perf event counts.
151 */
152 if (pmc->perf_event)
153 counter += perf_event_read_value(pmc->perf_event, &enabled,
154 &running);
155
156 return counter;
157 }
158
159 /**
160 * kvm_pmu_get_counter_value - get PMU counter value
161 * @vcpu: The vcpu pointer
162 * @select_idx: The counter index
163 */
kvm_pmu_get_counter_value(struct kvm_vcpu * vcpu,u64 select_idx)164 u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx)
165 {
166 u64 counter;
167 struct kvm_pmu *pmu = &vcpu->arch.pmu;
168 struct kvm_pmc *pmc = &pmu->pmc[select_idx];
169
170 counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
171
172 if (kvm_pmu_pmc_is_chained(pmc) &&
173 kvm_pmu_idx_is_high_counter(select_idx))
174 counter = upper_32_bits(counter);
175 else if (select_idx != ARMV8_PMU_CYCLE_IDX)
176 counter = lower_32_bits(counter);
177
178 return counter;
179 }
180
181 /**
182 * kvm_pmu_set_counter_value - set PMU counter value
183 * @vcpu: The vcpu pointer
184 * @select_idx: The counter index
185 * @val: The counter value
186 */
kvm_pmu_set_counter_value(struct kvm_vcpu * vcpu,u64 select_idx,u64 val)187 void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val)
188 {
189 u64 reg;
190
191 reg = (select_idx == ARMV8_PMU_CYCLE_IDX)
192 ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx;
193 __vcpu_sys_reg(vcpu, reg) += (s64)val - kvm_pmu_get_counter_value(vcpu, select_idx);
194
195 /* Recreate the perf event to reflect the updated sample_period */
196 kvm_pmu_create_perf_event(vcpu, select_idx);
197 }
198
199 /**
200 * kvm_pmu_release_perf_event - remove the perf event
201 * @pmc: The PMU counter pointer
202 */
kvm_pmu_release_perf_event(struct kvm_pmc * pmc)203 static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc)
204 {
205 pmc = kvm_pmu_get_canonical_pmc(pmc);
206 if (pmc->perf_event) {
207 perf_event_disable(pmc->perf_event);
208 perf_event_release_kernel(pmc->perf_event);
209 pmc->perf_event = NULL;
210 }
211 }
212
213 /**
214 * kvm_pmu_stop_counter - stop PMU counter
215 * @pmc: The PMU counter pointer
216 *
217 * If this counter has been configured to monitor some event, release it here.
218 */
kvm_pmu_stop_counter(struct kvm_vcpu * vcpu,struct kvm_pmc * pmc)219 static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc)
220 {
221 u64 counter, reg, val;
222
223 pmc = kvm_pmu_get_canonical_pmc(pmc);
224 if (!pmc->perf_event)
225 return;
226
227 counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
228
229 if (pmc->idx == ARMV8_PMU_CYCLE_IDX) {
230 reg = PMCCNTR_EL0;
231 val = counter;
232 } else {
233 reg = PMEVCNTR0_EL0 + pmc->idx;
234 val = lower_32_bits(counter);
235 }
236
237 __vcpu_sys_reg(vcpu, reg) = val;
238
239 if (kvm_pmu_pmc_is_chained(pmc))
240 __vcpu_sys_reg(vcpu, reg + 1) = upper_32_bits(counter);
241
242 kvm_pmu_release_perf_event(pmc);
243 }
244
245 /**
246 * kvm_pmu_vcpu_init - assign pmu counter idx for cpu
247 * @vcpu: The vcpu pointer
248 *
249 */
kvm_pmu_vcpu_init(struct kvm_vcpu * vcpu)250 void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu)
251 {
252 int i;
253 struct kvm_pmu *pmu = &vcpu->arch.pmu;
254
255 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++)
256 pmu->pmc[i].idx = i;
257 }
258
259 /**
260 * kvm_pmu_vcpu_reset - reset pmu state for cpu
261 * @vcpu: The vcpu pointer
262 *
263 */
kvm_pmu_vcpu_reset(struct kvm_vcpu * vcpu)264 void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu)
265 {
266 unsigned long mask = kvm_pmu_valid_counter_mask(vcpu);
267 struct kvm_pmu *pmu = &vcpu->arch.pmu;
268 int i;
269
270 for_each_set_bit(i, &mask, 32)
271 kvm_pmu_stop_counter(vcpu, &pmu->pmc[i]);
272
273 bitmap_zero(vcpu->arch.pmu.chained, ARMV8_PMU_MAX_COUNTER_PAIRS);
274 }
275
276 /**
277 * kvm_pmu_vcpu_destroy - free perf event of PMU for cpu
278 * @vcpu: The vcpu pointer
279 *
280 */
kvm_pmu_vcpu_destroy(struct kvm_vcpu * vcpu)281 void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu)
282 {
283 int i;
284 struct kvm_pmu *pmu = &vcpu->arch.pmu;
285
286 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++)
287 kvm_pmu_release_perf_event(&pmu->pmc[i]);
288 irq_work_sync(&vcpu->arch.pmu.overflow_work);
289 }
290
kvm_pmu_valid_counter_mask(struct kvm_vcpu * vcpu)291 u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu)
292 {
293 u64 val = __vcpu_sys_reg(vcpu, PMCR_EL0) >> ARMV8_PMU_PMCR_N_SHIFT;
294
295 val &= ARMV8_PMU_PMCR_N_MASK;
296 if (val == 0)
297 return BIT(ARMV8_PMU_CYCLE_IDX);
298 else
299 return GENMASK(val - 1, 0) | BIT(ARMV8_PMU_CYCLE_IDX);
300 }
301
302 /**
303 * kvm_pmu_enable_counter_mask - enable selected PMU counters
304 * @vcpu: The vcpu pointer
305 * @val: the value guest writes to PMCNTENSET register
306 *
307 * Call perf_event_enable to start counting the perf event
308 */
kvm_pmu_enable_counter_mask(struct kvm_vcpu * vcpu,u64 val)309 void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
310 {
311 int i;
312 struct kvm_pmu *pmu = &vcpu->arch.pmu;
313 struct kvm_pmc *pmc;
314
315 if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) || !val)
316 return;
317
318 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
319 if (!(val & BIT(i)))
320 continue;
321
322 pmc = &pmu->pmc[i];
323
324 /* A change in the enable state may affect the chain state */
325 kvm_pmu_update_pmc_chained(vcpu, i);
326 kvm_pmu_create_perf_event(vcpu, i);
327
328 /* At this point, pmc must be the canonical */
329 if (pmc->perf_event) {
330 perf_event_enable(pmc->perf_event);
331 if (pmc->perf_event->state != PERF_EVENT_STATE_ACTIVE)
332 kvm_debug("fail to enable perf event\n");
333 }
334 }
335 }
336
337 /**
338 * kvm_pmu_disable_counter_mask - disable selected PMU counters
339 * @vcpu: The vcpu pointer
340 * @val: the value guest writes to PMCNTENCLR register
341 *
342 * Call perf_event_disable to stop counting the perf event
343 */
kvm_pmu_disable_counter_mask(struct kvm_vcpu * vcpu,u64 val)344 void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
345 {
346 int i;
347 struct kvm_pmu *pmu = &vcpu->arch.pmu;
348 struct kvm_pmc *pmc;
349
350 if (!val)
351 return;
352
353 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
354 if (!(val & BIT(i)))
355 continue;
356
357 pmc = &pmu->pmc[i];
358
359 /* A change in the enable state may affect the chain state */
360 kvm_pmu_update_pmc_chained(vcpu, i);
361 kvm_pmu_create_perf_event(vcpu, i);
362
363 /* At this point, pmc must be the canonical */
364 if (pmc->perf_event)
365 perf_event_disable(pmc->perf_event);
366 }
367 }
368
kvm_pmu_overflow_status(struct kvm_vcpu * vcpu)369 static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu)
370 {
371 u64 reg = 0;
372
373 if ((__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) {
374 reg = __vcpu_sys_reg(vcpu, PMOVSSET_EL0);
375 reg &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
376 reg &= __vcpu_sys_reg(vcpu, PMINTENSET_EL1);
377 }
378
379 return reg;
380 }
381
kvm_pmu_update_state(struct kvm_vcpu * vcpu)382 static void kvm_pmu_update_state(struct kvm_vcpu *vcpu)
383 {
384 struct kvm_pmu *pmu = &vcpu->arch.pmu;
385 bool overflow;
386
387 if (!kvm_vcpu_has_pmu(vcpu))
388 return;
389
390 overflow = !!kvm_pmu_overflow_status(vcpu);
391 if (pmu->irq_level == overflow)
392 return;
393
394 pmu->irq_level = overflow;
395
396 if (likely(irqchip_in_kernel(vcpu->kvm))) {
397 int ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
398 pmu->irq_num, overflow, pmu);
399 WARN_ON(ret);
400 }
401 }
402
kvm_pmu_should_notify_user(struct kvm_vcpu * vcpu)403 bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu)
404 {
405 struct kvm_pmu *pmu = &vcpu->arch.pmu;
406 struct kvm_sync_regs *sregs = &vcpu->run->s.regs;
407 bool run_level = sregs->device_irq_level & KVM_ARM_DEV_PMU;
408
409 if (likely(irqchip_in_kernel(vcpu->kvm)))
410 return false;
411
412 return pmu->irq_level != run_level;
413 }
414
415 /*
416 * Reflect the PMU overflow interrupt output level into the kvm_run structure
417 */
kvm_pmu_update_run(struct kvm_vcpu * vcpu)418 void kvm_pmu_update_run(struct kvm_vcpu *vcpu)
419 {
420 struct kvm_sync_regs *regs = &vcpu->run->s.regs;
421
422 /* Populate the timer bitmap for user space */
423 regs->device_irq_level &= ~KVM_ARM_DEV_PMU;
424 if (vcpu->arch.pmu.irq_level)
425 regs->device_irq_level |= KVM_ARM_DEV_PMU;
426 }
427
428 /**
429 * kvm_pmu_flush_hwstate - flush pmu state to cpu
430 * @vcpu: The vcpu pointer
431 *
432 * Check if the PMU has overflowed while we were running in the host, and inject
433 * an interrupt if that was the case.
434 */
kvm_pmu_flush_hwstate(struct kvm_vcpu * vcpu)435 void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu)
436 {
437 kvm_pmu_update_state(vcpu);
438 }
439
440 /**
441 * kvm_pmu_sync_hwstate - sync pmu state from cpu
442 * @vcpu: The vcpu pointer
443 *
444 * Check if the PMU has overflowed while we were running in the guest, and
445 * inject an interrupt if that was the case.
446 */
kvm_pmu_sync_hwstate(struct kvm_vcpu * vcpu)447 void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu)
448 {
449 kvm_pmu_update_state(vcpu);
450 }
451
452 /**
453 * When perf interrupt is an NMI, we cannot safely notify the vcpu corresponding
454 * to the event.
455 * This is why we need a callback to do it once outside of the NMI context.
456 */
kvm_pmu_perf_overflow_notify_vcpu(struct irq_work * work)457 static void kvm_pmu_perf_overflow_notify_vcpu(struct irq_work *work)
458 {
459 struct kvm_vcpu *vcpu;
460 struct kvm_pmu *pmu;
461
462 pmu = container_of(work, struct kvm_pmu, overflow_work);
463 vcpu = kvm_pmc_to_vcpu(pmu->pmc);
464
465 kvm_vcpu_kick(vcpu);
466 }
467
468 /**
469 * When the perf event overflows, set the overflow status and inform the vcpu.
470 */
kvm_pmu_perf_overflow(struct perf_event * perf_event,struct perf_sample_data * data,struct pt_regs * regs)471 static void kvm_pmu_perf_overflow(struct perf_event *perf_event,
472 struct perf_sample_data *data,
473 struct pt_regs *regs)
474 {
475 struct kvm_pmc *pmc = perf_event->overflow_handler_context;
476 struct arm_pmu *cpu_pmu = to_arm_pmu(perf_event->pmu);
477 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
478 int idx = pmc->idx;
479 u64 period;
480
481 cpu_pmu->pmu.stop(perf_event, PERF_EF_UPDATE);
482
483 /*
484 * Reset the sample period to the architectural limit,
485 * i.e. the point where the counter overflows.
486 */
487 period = -(local64_read(&perf_event->count));
488
489 if (!kvm_pmu_idx_is_64bit(vcpu, pmc->idx))
490 period &= GENMASK(31, 0);
491
492 local64_set(&perf_event->hw.period_left, 0);
493 perf_event->attr.sample_period = period;
494 perf_event->hw.sample_period = period;
495
496 __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx);
497
498 if (kvm_pmu_overflow_status(vcpu)) {
499 kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
500
501 if (!in_nmi())
502 kvm_vcpu_kick(vcpu);
503 else
504 irq_work_queue(&vcpu->arch.pmu.overflow_work);
505 }
506
507 cpu_pmu->pmu.start(perf_event, PERF_EF_RELOAD);
508 }
509
510 /**
511 * kvm_pmu_software_increment - do software increment
512 * @vcpu: The vcpu pointer
513 * @val: the value guest writes to PMSWINC register
514 */
kvm_pmu_software_increment(struct kvm_vcpu * vcpu,u64 val)515 void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val)
516 {
517 struct kvm_pmu *pmu = &vcpu->arch.pmu;
518 int i;
519
520 if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E))
521 return;
522
523 /* Weed out disabled counters */
524 val &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
525
526 for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++) {
527 u64 type, reg;
528
529 if (!(val & BIT(i)))
530 continue;
531
532 /* PMSWINC only applies to ... SW_INC! */
533 type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i);
534 type &= kvm_pmu_event_mask(vcpu->kvm);
535 if (type != ARMV8_PMUV3_PERFCTR_SW_INCR)
536 continue;
537
538 /* increment this even SW_INC counter */
539 reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1;
540 reg = lower_32_bits(reg);
541 __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg;
542
543 if (reg) /* no overflow on the low part */
544 continue;
545
546 if (kvm_pmu_pmc_is_chained(&pmu->pmc[i])) {
547 /* increment the high counter */
548 reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) + 1;
549 reg = lower_32_bits(reg);
550 __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) = reg;
551 if (!reg) /* mark overflow on the high counter */
552 __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i + 1);
553 } else {
554 /* mark overflow on low counter */
555 __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i);
556 }
557 }
558 kvm_vcpu_pmu_restore_guest(vcpu);
559 }
560
561 /**
562 * kvm_pmu_handle_pmcr - handle PMCR register
563 * @vcpu: The vcpu pointer
564 * @val: the value guest writes to PMCR register
565 */
kvm_pmu_handle_pmcr(struct kvm_vcpu * vcpu,u64 val)566 void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
567 {
568 int i;
569
570 if (val & ARMV8_PMU_PMCR_E) {
571 kvm_pmu_enable_counter_mask(vcpu,
572 __vcpu_sys_reg(vcpu, PMCNTENSET_EL0));
573 } else {
574 kvm_pmu_disable_counter_mask(vcpu,
575 __vcpu_sys_reg(vcpu, PMCNTENSET_EL0));
576 }
577
578 if (val & ARMV8_PMU_PMCR_C)
579 kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0);
580
581 if (val & ARMV8_PMU_PMCR_P) {
582 unsigned long mask = kvm_pmu_valid_counter_mask(vcpu);
583 mask &= ~BIT(ARMV8_PMU_CYCLE_IDX);
584 for_each_set_bit(i, &mask, 32)
585 kvm_pmu_set_counter_value(vcpu, i, 0);
586 }
587 }
588
kvm_pmu_counter_is_enabled(struct kvm_vcpu * vcpu,u64 select_idx)589 static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64 select_idx)
590 {
591 return (__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) &&
592 (__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(select_idx));
593 }
594
595 /**
596 * kvm_pmu_create_perf_event - create a perf event for a counter
597 * @vcpu: The vcpu pointer
598 * @select_idx: The number of selected counter
599 */
kvm_pmu_create_perf_event(struct kvm_vcpu * vcpu,u64 select_idx)600 static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx)
601 {
602 struct kvm_pmu *pmu = &vcpu->arch.pmu;
603 struct kvm_pmc *pmc;
604 struct perf_event *event;
605 struct perf_event_attr attr;
606 u64 eventsel, counter, reg, data;
607
608 /*
609 * For chained counters the event type and filtering attributes are
610 * obtained from the low/even counter. We also use this counter to
611 * determine if the event is enabled/disabled.
612 */
613 pmc = kvm_pmu_get_canonical_pmc(&pmu->pmc[select_idx]);
614
615 reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX)
616 ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + pmc->idx;
617 data = __vcpu_sys_reg(vcpu, reg);
618
619 kvm_pmu_stop_counter(vcpu, pmc);
620 if (pmc->idx == ARMV8_PMU_CYCLE_IDX)
621 eventsel = ARMV8_PMUV3_PERFCTR_CPU_CYCLES;
622 else
623 eventsel = data & kvm_pmu_event_mask(vcpu->kvm);
624
625 /* Software increment event doesn't need to be backed by a perf event */
626 if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR)
627 return;
628
629 /*
630 * If we have a filter in place and that the event isn't allowed, do
631 * not install a perf event either.
632 */
633 if (vcpu->kvm->arch.pmu_filter &&
634 !test_bit(eventsel, vcpu->kvm->arch.pmu_filter))
635 return;
636
637 memset(&attr, 0, sizeof(struct perf_event_attr));
638 attr.type = PERF_TYPE_RAW;
639 attr.size = sizeof(attr);
640 attr.pinned = 1;
641 attr.disabled = !kvm_pmu_counter_is_enabled(vcpu, pmc->idx);
642 attr.exclude_user = data & ARMV8_PMU_EXCLUDE_EL0 ? 1 : 0;
643 attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0;
644 attr.exclude_hv = 1; /* Don't count EL2 events */
645 attr.exclude_host = 1; /* Don't count host events */
646 attr.config = eventsel;
647
648 counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
649
650 if (kvm_pmu_pmc_is_chained(pmc)) {
651 /**
652 * The initial sample period (overflow count) of an event. For
653 * chained counters we only support overflow interrupts on the
654 * high counter.
655 */
656 attr.sample_period = (-counter) & GENMASK(63, 0);
657 attr.config1 |= PERF_ATTR_CFG1_KVM_PMU_CHAINED;
658
659 event = perf_event_create_kernel_counter(&attr, -1, current,
660 kvm_pmu_perf_overflow,
661 pmc + 1);
662 } else {
663 /* The initial sample period (overflow count) of an event. */
664 if (kvm_pmu_idx_is_64bit(vcpu, pmc->idx))
665 attr.sample_period = (-counter) & GENMASK(63, 0);
666 else
667 attr.sample_period = (-counter) & GENMASK(31, 0);
668
669 event = perf_event_create_kernel_counter(&attr, -1, current,
670 kvm_pmu_perf_overflow, pmc);
671 }
672
673 if (IS_ERR(event)) {
674 pr_err_once("kvm: pmu event creation failed %ld\n",
675 PTR_ERR(event));
676 return;
677 }
678
679 pmc->perf_event = event;
680 }
681
682 /**
683 * kvm_pmu_update_pmc_chained - update chained bitmap
684 * @vcpu: The vcpu pointer
685 * @select_idx: The number of selected counter
686 *
687 * Update the chained bitmap based on the event type written in the
688 * typer register and the enable state of the odd register.
689 */
kvm_pmu_update_pmc_chained(struct kvm_vcpu * vcpu,u64 select_idx)690 static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx)
691 {
692 struct kvm_pmu *pmu = &vcpu->arch.pmu;
693 struct kvm_pmc *pmc = &pmu->pmc[select_idx], *canonical_pmc;
694 bool new_state, old_state;
695
696 old_state = kvm_pmu_pmc_is_chained(pmc);
697 new_state = kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx) &&
698 kvm_pmu_counter_is_enabled(vcpu, pmc->idx | 0x1);
699
700 if (old_state == new_state)
701 return;
702
703 canonical_pmc = kvm_pmu_get_canonical_pmc(pmc);
704 kvm_pmu_stop_counter(vcpu, canonical_pmc);
705 if (new_state) {
706 /*
707 * During promotion from !chained to chained we must ensure
708 * the adjacent counter is stopped and its event destroyed
709 */
710 kvm_pmu_stop_counter(vcpu, kvm_pmu_get_alternate_pmc(pmc));
711 set_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
712 return;
713 }
714 clear_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
715 }
716
717 /**
718 * kvm_pmu_set_counter_event_type - set selected counter to monitor some event
719 * @vcpu: The vcpu pointer
720 * @data: The data guest writes to PMXEVTYPER_EL0
721 * @select_idx: The number of selected counter
722 *
723 * When OS accesses PMXEVTYPER_EL0, that means it wants to set a PMC to count an
724 * event with given hardware event number. Here we call perf_event API to
725 * emulate this action and create a kernel perf event for it.
726 */
kvm_pmu_set_counter_event_type(struct kvm_vcpu * vcpu,u64 data,u64 select_idx)727 void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
728 u64 select_idx)
729 {
730 u64 reg, mask;
731
732 mask = ARMV8_PMU_EVTYPE_MASK;
733 mask &= ~ARMV8_PMU_EVTYPE_EVENT;
734 mask |= kvm_pmu_event_mask(vcpu->kvm);
735
736 reg = (select_idx == ARMV8_PMU_CYCLE_IDX)
737 ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + select_idx;
738
739 __vcpu_sys_reg(vcpu, reg) = data & mask;
740
741 kvm_pmu_update_pmc_chained(vcpu, select_idx);
742 kvm_pmu_create_perf_event(vcpu, select_idx);
743 }
744
kvm_host_pmu_init(struct arm_pmu * pmu)745 void kvm_host_pmu_init(struct arm_pmu *pmu)
746 {
747 if (pmu->pmuver != 0 && pmu->pmuver != ID_AA64DFR0_PMUVER_IMP_DEF &&
748 !kvm_arm_support_pmu_v3() && !is_protected_kvm_enabled())
749 static_branch_enable(&kvm_arm_pmu_available);
750 }
751
kvm_pmu_probe_pmuver(void)752 static int kvm_pmu_probe_pmuver(void)
753 {
754 struct perf_event_attr attr = { };
755 struct perf_event *event;
756 struct arm_pmu *pmu;
757 int pmuver = ID_AA64DFR0_PMUVER_IMP_DEF;
758
759 /*
760 * Create a dummy event that only counts user cycles. As we'll never
761 * leave this function with the event being live, it will never
762 * count anything. But it allows us to probe some of the PMU
763 * details. Yes, this is terrible.
764 */
765 attr.type = PERF_TYPE_RAW;
766 attr.size = sizeof(attr);
767 attr.pinned = 1;
768 attr.disabled = 0;
769 attr.exclude_user = 0;
770 attr.exclude_kernel = 1;
771 attr.exclude_hv = 1;
772 attr.exclude_host = 1;
773 attr.config = ARMV8_PMUV3_PERFCTR_CPU_CYCLES;
774 attr.sample_period = GENMASK(63, 0);
775
776 event = perf_event_create_kernel_counter(&attr, -1, current,
777 kvm_pmu_perf_overflow, &attr);
778
779 if (IS_ERR(event)) {
780 pr_err_once("kvm: pmu event creation failed %ld\n",
781 PTR_ERR(event));
782 return ID_AA64DFR0_PMUVER_IMP_DEF;
783 }
784
785 if (event->pmu) {
786 pmu = to_arm_pmu(event->pmu);
787 if (pmu->pmuver)
788 pmuver = pmu->pmuver;
789 }
790
791 perf_event_disable(event);
792 perf_event_release_kernel(event);
793
794 return pmuver;
795 }
796
kvm_pmu_get_pmceid(struct kvm_vcpu * vcpu,bool pmceid1)797 u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1)
798 {
799 unsigned long *bmap = vcpu->kvm->arch.pmu_filter;
800 u64 val, mask = 0;
801 int base, i, nr_events;
802
803 if (!pmceid1) {
804 val = read_sysreg(pmceid0_el0);
805 base = 0;
806 } else {
807 val = read_sysreg(pmceid1_el0);
808 /*
809 * Don't advertise STALL_SLOT, as PMMIR_EL0 is handled
810 * as RAZ
811 */
812 if (vcpu->kvm->arch.pmuver >= ID_AA64DFR0_PMUVER_8_4)
813 val &= ~BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT - 32);
814 base = 32;
815 }
816
817 if (!bmap)
818 return val;
819
820 nr_events = kvm_pmu_event_mask(vcpu->kvm) + 1;
821
822 for (i = 0; i < 32; i += 8) {
823 u64 byte;
824
825 byte = bitmap_get_value8(bmap, base + i);
826 mask |= byte << i;
827 if (nr_events >= (0x4000 + base + 32)) {
828 byte = bitmap_get_value8(bmap, 0x4000 + base + i);
829 mask |= byte << (32 + i);
830 }
831 }
832
833 return val & mask;
834 }
835
kvm_arm_pmu_v3_enable(struct kvm_vcpu * vcpu)836 int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu)
837 {
838 if (!kvm_vcpu_has_pmu(vcpu))
839 return 0;
840
841 if (!vcpu->arch.pmu.created)
842 return -EINVAL;
843
844 /*
845 * A valid interrupt configuration for the PMU is either to have a
846 * properly configured interrupt number and using an in-kernel
847 * irqchip, or to not have an in-kernel GIC and not set an IRQ.
848 */
849 if (irqchip_in_kernel(vcpu->kvm)) {
850 int irq = vcpu->arch.pmu.irq_num;
851 /*
852 * If we are using an in-kernel vgic, at this point we know
853 * the vgic will be initialized, so we can check the PMU irq
854 * number against the dimensions of the vgic and make sure
855 * it's valid.
856 */
857 if (!irq_is_ppi(irq) && !vgic_valid_spi(vcpu->kvm, irq))
858 return -EINVAL;
859 } else if (kvm_arm_pmu_irq_initialized(vcpu)) {
860 return -EINVAL;
861 }
862
863 /* One-off reload of the PMU on first run */
864 kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu);
865
866 return 0;
867 }
868
kvm_arm_pmu_v3_init(struct kvm_vcpu * vcpu)869 static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu)
870 {
871 if (irqchip_in_kernel(vcpu->kvm)) {
872 int ret;
873
874 /*
875 * If using the PMU with an in-kernel virtual GIC
876 * implementation, we require the GIC to be already
877 * initialized when initializing the PMU.
878 */
879 if (!vgic_initialized(vcpu->kvm))
880 return -ENODEV;
881
882 if (!kvm_arm_pmu_irq_initialized(vcpu))
883 return -ENXIO;
884
885 ret = kvm_vgic_set_owner(vcpu, vcpu->arch.pmu.irq_num,
886 &vcpu->arch.pmu);
887 if (ret)
888 return ret;
889 }
890
891 init_irq_work(&vcpu->arch.pmu.overflow_work,
892 kvm_pmu_perf_overflow_notify_vcpu);
893
894 vcpu->arch.pmu.created = true;
895 return 0;
896 }
897
898 /*
899 * For one VM the interrupt type must be same for each vcpu.
900 * As a PPI, the interrupt number is the same for all vcpus,
901 * while as an SPI it must be a separate number per vcpu.
902 */
pmu_irq_is_valid(struct kvm * kvm,int irq)903 static bool pmu_irq_is_valid(struct kvm *kvm, int irq)
904 {
905 int i;
906 struct kvm_vcpu *vcpu;
907
908 kvm_for_each_vcpu(i, vcpu, kvm) {
909 if (!kvm_arm_pmu_irq_initialized(vcpu))
910 continue;
911
912 if (irq_is_ppi(irq)) {
913 if (vcpu->arch.pmu.irq_num != irq)
914 return false;
915 } else {
916 if (vcpu->arch.pmu.irq_num == irq)
917 return false;
918 }
919 }
920
921 return true;
922 }
923
kvm_arm_pmu_v3_set_attr(struct kvm_vcpu * vcpu,struct kvm_device_attr * attr)924 int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
925 {
926 if (!kvm_vcpu_has_pmu(vcpu))
927 return -ENODEV;
928
929 if (vcpu->arch.pmu.created)
930 return -EBUSY;
931
932 if (!vcpu->kvm->arch.pmuver)
933 vcpu->kvm->arch.pmuver = kvm_pmu_probe_pmuver();
934
935 if (vcpu->kvm->arch.pmuver == ID_AA64DFR0_PMUVER_IMP_DEF)
936 return -ENODEV;
937
938 switch (attr->attr) {
939 case KVM_ARM_VCPU_PMU_V3_IRQ: {
940 int __user *uaddr = (int __user *)(long)attr->addr;
941 int irq;
942
943 if (!irqchip_in_kernel(vcpu->kvm))
944 return -EINVAL;
945
946 if (get_user(irq, uaddr))
947 return -EFAULT;
948
949 /* The PMU overflow interrupt can be a PPI or a valid SPI. */
950 if (!(irq_is_ppi(irq) || irq_is_spi(irq)))
951 return -EINVAL;
952
953 if (!pmu_irq_is_valid(vcpu->kvm, irq))
954 return -EINVAL;
955
956 if (kvm_arm_pmu_irq_initialized(vcpu))
957 return -EBUSY;
958
959 kvm_debug("Set kvm ARM PMU irq: %d\n", irq);
960 vcpu->arch.pmu.irq_num = irq;
961 return 0;
962 }
963 case KVM_ARM_VCPU_PMU_V3_FILTER: {
964 struct kvm_pmu_event_filter __user *uaddr;
965 struct kvm_pmu_event_filter filter;
966 int nr_events;
967
968 nr_events = kvm_pmu_event_mask(vcpu->kvm) + 1;
969
970 uaddr = (struct kvm_pmu_event_filter __user *)(long)attr->addr;
971
972 if (copy_from_user(&filter, uaddr, sizeof(filter)))
973 return -EFAULT;
974
975 if (((u32)filter.base_event + filter.nevents) > nr_events ||
976 (filter.action != KVM_PMU_EVENT_ALLOW &&
977 filter.action != KVM_PMU_EVENT_DENY))
978 return -EINVAL;
979
980 mutex_lock(&vcpu->kvm->lock);
981
982 if (!vcpu->kvm->arch.pmu_filter) {
983 vcpu->kvm->arch.pmu_filter = bitmap_alloc(nr_events, GFP_KERNEL_ACCOUNT);
984 if (!vcpu->kvm->arch.pmu_filter) {
985 mutex_unlock(&vcpu->kvm->lock);
986 return -ENOMEM;
987 }
988
989 /*
990 * The default depends on the first applied filter.
991 * If it allows events, the default is to deny.
992 * Conversely, if the first filter denies a set of
993 * events, the default is to allow.
994 */
995 if (filter.action == KVM_PMU_EVENT_ALLOW)
996 bitmap_zero(vcpu->kvm->arch.pmu_filter, nr_events);
997 else
998 bitmap_fill(vcpu->kvm->arch.pmu_filter, nr_events);
999 }
1000
1001 if (filter.action == KVM_PMU_EVENT_ALLOW)
1002 bitmap_set(vcpu->kvm->arch.pmu_filter, filter.base_event, filter.nevents);
1003 else
1004 bitmap_clear(vcpu->kvm->arch.pmu_filter, filter.base_event, filter.nevents);
1005
1006 mutex_unlock(&vcpu->kvm->lock);
1007
1008 return 0;
1009 }
1010 case KVM_ARM_VCPU_PMU_V3_INIT:
1011 return kvm_arm_pmu_v3_init(vcpu);
1012 }
1013
1014 return -ENXIO;
1015 }
1016
kvm_arm_pmu_v3_get_attr(struct kvm_vcpu * vcpu,struct kvm_device_attr * attr)1017 int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
1018 {
1019 switch (attr->attr) {
1020 case KVM_ARM_VCPU_PMU_V3_IRQ: {
1021 int __user *uaddr = (int __user *)(long)attr->addr;
1022 int irq;
1023
1024 if (!irqchip_in_kernel(vcpu->kvm))
1025 return -EINVAL;
1026
1027 if (!kvm_vcpu_has_pmu(vcpu))
1028 return -ENODEV;
1029
1030 if (!kvm_arm_pmu_irq_initialized(vcpu))
1031 return -ENXIO;
1032
1033 irq = vcpu->arch.pmu.irq_num;
1034 return put_user(irq, uaddr);
1035 }
1036 }
1037
1038 return -ENXIO;
1039 }
1040
kvm_arm_pmu_v3_has_attr(struct kvm_vcpu * vcpu,struct kvm_device_attr * attr)1041 int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
1042 {
1043 switch (attr->attr) {
1044 case KVM_ARM_VCPU_PMU_V3_IRQ:
1045 case KVM_ARM_VCPU_PMU_V3_INIT:
1046 case KVM_ARM_VCPU_PMU_V3_FILTER:
1047 if (kvm_vcpu_has_pmu(vcpu))
1048 return 0;
1049 }
1050
1051 return -ENXIO;
1052 }
1053