1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright (C) 2015 Linaro Ltd.
4 * Author: Shannon Zhao <shannon.zhao@linaro.org>
5 */
6
7 #include <linux/cpu.h>
8 #include <linux/kvm.h>
9 #include <linux/kvm_host.h>
10 #include <linux/perf_event.h>
11 #include <linux/perf/arm_pmu.h>
12 #include <linux/uaccess.h>
13 #include <asm/kvm_emulate.h>
14 #include <kvm/arm_pmu.h>
15 #include <kvm/arm_vgic.h>
16
17 static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx);
18 static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx);
19 static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc);
20
21 #define PERF_ATTR_CFG1_KVM_PMU_CHAINED 0x1
22
kvm_pmu_event_mask(struct kvm * kvm)23 static u32 kvm_pmu_event_mask(struct kvm *kvm)
24 {
25 switch (kvm->arch.pmuver) {
26 case ID_AA64DFR0_PMUVER_8_0:
27 return GENMASK(9, 0);
28 case ID_AA64DFR0_PMUVER_8_1:
29 case ID_AA64DFR0_PMUVER_8_4:
30 case ID_AA64DFR0_PMUVER_8_5:
31 return GENMASK(15, 0);
32 default: /* Shouldn't be here, just for sanity */
33 WARN_ONCE(1, "Unknown PMU version %d\n", kvm->arch.pmuver);
34 return 0;
35 }
36 }
37
38 /**
39 * kvm_pmu_idx_is_64bit - determine if select_idx is a 64bit counter
40 * @vcpu: The vcpu pointer
41 * @select_idx: The counter index
42 */
kvm_pmu_idx_is_64bit(struct kvm_vcpu * vcpu,u64 select_idx)43 static bool kvm_pmu_idx_is_64bit(struct kvm_vcpu *vcpu, u64 select_idx)
44 {
45 return (select_idx == ARMV8_PMU_CYCLE_IDX &&
46 __vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_LC);
47 }
48
kvm_pmc_to_vcpu(struct kvm_pmc * pmc)49 static struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc)
50 {
51 struct kvm_pmu *pmu;
52 struct kvm_vcpu_arch *vcpu_arch;
53
54 pmc -= pmc->idx;
55 pmu = container_of(pmc, struct kvm_pmu, pmc[0]);
56 vcpu_arch = container_of(pmu, struct kvm_vcpu_arch, pmu);
57 return container_of(vcpu_arch, struct kvm_vcpu, arch);
58 }
59
60 /**
61 * kvm_pmu_pmc_is_chained - determine if the pmc is chained
62 * @pmc: The PMU counter pointer
63 */
kvm_pmu_pmc_is_chained(struct kvm_pmc * pmc)64 static bool kvm_pmu_pmc_is_chained(struct kvm_pmc *pmc)
65 {
66 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
67
68 return test_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
69 }
70
71 /**
72 * kvm_pmu_idx_is_high_counter - determine if select_idx is a high/low counter
73 * @select_idx: The counter index
74 */
kvm_pmu_idx_is_high_counter(u64 select_idx)75 static bool kvm_pmu_idx_is_high_counter(u64 select_idx)
76 {
77 return select_idx & 0x1;
78 }
79
80 /**
81 * kvm_pmu_get_canonical_pmc - obtain the canonical pmc
82 * @pmc: The PMU counter pointer
83 *
84 * When a pair of PMCs are chained together we use the low counter (canonical)
85 * to hold the underlying perf event.
86 */
kvm_pmu_get_canonical_pmc(struct kvm_pmc * pmc)87 static struct kvm_pmc *kvm_pmu_get_canonical_pmc(struct kvm_pmc *pmc)
88 {
89 if (kvm_pmu_pmc_is_chained(pmc) &&
90 kvm_pmu_idx_is_high_counter(pmc->idx))
91 return pmc - 1;
92
93 return pmc;
94 }
kvm_pmu_get_alternate_pmc(struct kvm_pmc * pmc)95 static struct kvm_pmc *kvm_pmu_get_alternate_pmc(struct kvm_pmc *pmc)
96 {
97 if (kvm_pmu_idx_is_high_counter(pmc->idx))
98 return pmc - 1;
99 else
100 return pmc + 1;
101 }
102
103 /**
104 * kvm_pmu_idx_has_chain_evtype - determine if the event type is chain
105 * @vcpu: The vcpu pointer
106 * @select_idx: The counter index
107 */
kvm_pmu_idx_has_chain_evtype(struct kvm_vcpu * vcpu,u64 select_idx)108 static bool kvm_pmu_idx_has_chain_evtype(struct kvm_vcpu *vcpu, u64 select_idx)
109 {
110 u64 eventsel, reg;
111
112 select_idx |= 0x1;
113
114 if (select_idx == ARMV8_PMU_CYCLE_IDX)
115 return false;
116
117 reg = PMEVTYPER0_EL0 + select_idx;
118 eventsel = __vcpu_sys_reg(vcpu, reg) & kvm_pmu_event_mask(vcpu->kvm);
119
120 return eventsel == ARMV8_PMUV3_PERFCTR_CHAIN;
121 }
122
123 /**
124 * kvm_pmu_get_pair_counter_value - get PMU counter value
125 * @vcpu: The vcpu pointer
126 * @pmc: The PMU counter pointer
127 */
kvm_pmu_get_pair_counter_value(struct kvm_vcpu * vcpu,struct kvm_pmc * pmc)128 static u64 kvm_pmu_get_pair_counter_value(struct kvm_vcpu *vcpu,
129 struct kvm_pmc *pmc)
130 {
131 u64 counter, counter_high, reg, enabled, running;
132
133 if (kvm_pmu_pmc_is_chained(pmc)) {
134 pmc = kvm_pmu_get_canonical_pmc(pmc);
135 reg = PMEVCNTR0_EL0 + pmc->idx;
136
137 counter = __vcpu_sys_reg(vcpu, reg);
138 counter_high = __vcpu_sys_reg(vcpu, reg + 1);
139
140 counter = lower_32_bits(counter) | (counter_high << 32);
141 } else {
142 reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX)
143 ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx;
144 counter = __vcpu_sys_reg(vcpu, reg);
145 }
146
147 /*
148 * The real counter value is equal to the value of counter register plus
149 * the value perf event counts.
150 */
151 if (pmc->perf_event)
152 counter += perf_event_read_value(pmc->perf_event, &enabled,
153 &running);
154
155 return counter;
156 }
157
158 /**
159 * kvm_pmu_get_counter_value - get PMU counter value
160 * @vcpu: The vcpu pointer
161 * @select_idx: The counter index
162 */
kvm_pmu_get_counter_value(struct kvm_vcpu * vcpu,u64 select_idx)163 u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx)
164 {
165 u64 counter;
166 struct kvm_pmu *pmu = &vcpu->arch.pmu;
167 struct kvm_pmc *pmc = &pmu->pmc[select_idx];
168
169 counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
170
171 if (kvm_pmu_pmc_is_chained(pmc) &&
172 kvm_pmu_idx_is_high_counter(select_idx))
173 counter = upper_32_bits(counter);
174 else if (select_idx != ARMV8_PMU_CYCLE_IDX)
175 counter = lower_32_bits(counter);
176
177 return counter;
178 }
179
180 /**
181 * kvm_pmu_set_counter_value - set PMU counter value
182 * @vcpu: The vcpu pointer
183 * @select_idx: The counter index
184 * @val: The counter value
185 */
kvm_pmu_set_counter_value(struct kvm_vcpu * vcpu,u64 select_idx,u64 val)186 void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val)
187 {
188 u64 reg;
189
190 reg = (select_idx == ARMV8_PMU_CYCLE_IDX)
191 ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx;
192 __vcpu_sys_reg(vcpu, reg) += (s64)val - kvm_pmu_get_counter_value(vcpu, select_idx);
193
194 /* Recreate the perf event to reflect the updated sample_period */
195 kvm_pmu_create_perf_event(vcpu, select_idx);
196 }
197
198 /**
199 * kvm_pmu_release_perf_event - remove the perf event
200 * @pmc: The PMU counter pointer
201 */
kvm_pmu_release_perf_event(struct kvm_pmc * pmc)202 static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc)
203 {
204 pmc = kvm_pmu_get_canonical_pmc(pmc);
205 if (pmc->perf_event) {
206 perf_event_disable(pmc->perf_event);
207 perf_event_release_kernel(pmc->perf_event);
208 pmc->perf_event = NULL;
209 }
210 }
211
212 /**
213 * kvm_pmu_stop_counter - stop PMU counter
214 * @pmc: The PMU counter pointer
215 *
216 * If this counter has been configured to monitor some event, release it here.
217 */
kvm_pmu_stop_counter(struct kvm_vcpu * vcpu,struct kvm_pmc * pmc)218 static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc)
219 {
220 u64 counter, reg, val;
221
222 pmc = kvm_pmu_get_canonical_pmc(pmc);
223 if (!pmc->perf_event)
224 return;
225
226 counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
227
228 if (pmc->idx == ARMV8_PMU_CYCLE_IDX) {
229 reg = PMCCNTR_EL0;
230 val = counter;
231 } else {
232 reg = PMEVCNTR0_EL0 + pmc->idx;
233 val = lower_32_bits(counter);
234 }
235
236 __vcpu_sys_reg(vcpu, reg) = val;
237
238 if (kvm_pmu_pmc_is_chained(pmc))
239 __vcpu_sys_reg(vcpu, reg + 1) = upper_32_bits(counter);
240
241 kvm_pmu_release_perf_event(pmc);
242 }
243
244 /**
245 * kvm_pmu_vcpu_init - assign pmu counter idx for cpu
246 * @vcpu: The vcpu pointer
247 *
248 */
kvm_pmu_vcpu_init(struct kvm_vcpu * vcpu)249 void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu)
250 {
251 int i;
252 struct kvm_pmu *pmu = &vcpu->arch.pmu;
253
254 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++)
255 pmu->pmc[i].idx = i;
256 }
257
258 /**
259 * kvm_pmu_vcpu_reset - reset pmu state for cpu
260 * @vcpu: The vcpu pointer
261 *
262 */
kvm_pmu_vcpu_reset(struct kvm_vcpu * vcpu)263 void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu)
264 {
265 unsigned long mask = kvm_pmu_valid_counter_mask(vcpu);
266 struct kvm_pmu *pmu = &vcpu->arch.pmu;
267 int i;
268
269 for_each_set_bit(i, &mask, 32)
270 kvm_pmu_stop_counter(vcpu, &pmu->pmc[i]);
271
272 bitmap_zero(vcpu->arch.pmu.chained, ARMV8_PMU_MAX_COUNTER_PAIRS);
273 }
274
275 /**
276 * kvm_pmu_vcpu_destroy - free perf event of PMU for cpu
277 * @vcpu: The vcpu pointer
278 *
279 */
kvm_pmu_vcpu_destroy(struct kvm_vcpu * vcpu)280 void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu)
281 {
282 int i;
283 struct kvm_pmu *pmu = &vcpu->arch.pmu;
284
285 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++)
286 kvm_pmu_release_perf_event(&pmu->pmc[i]);
287 irq_work_sync(&vcpu->arch.pmu.overflow_work);
288 }
289
kvm_pmu_valid_counter_mask(struct kvm_vcpu * vcpu)290 u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu)
291 {
292 u64 val = __vcpu_sys_reg(vcpu, PMCR_EL0) >> ARMV8_PMU_PMCR_N_SHIFT;
293
294 val &= ARMV8_PMU_PMCR_N_MASK;
295 if (val == 0)
296 return BIT(ARMV8_PMU_CYCLE_IDX);
297 else
298 return GENMASK(val - 1, 0) | BIT(ARMV8_PMU_CYCLE_IDX);
299 }
300
301 /**
302 * kvm_pmu_enable_counter_mask - enable selected PMU counters
303 * @vcpu: The vcpu pointer
304 * @val: the value guest writes to PMCNTENSET register
305 *
306 * Call perf_event_enable to start counting the perf event
307 */
kvm_pmu_enable_counter_mask(struct kvm_vcpu * vcpu,u64 val)308 void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
309 {
310 int i;
311 struct kvm_pmu *pmu = &vcpu->arch.pmu;
312 struct kvm_pmc *pmc;
313
314 if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) || !val)
315 return;
316
317 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
318 if (!(val & BIT(i)))
319 continue;
320
321 pmc = &pmu->pmc[i];
322
323 /* A change in the enable state may affect the chain state */
324 kvm_pmu_update_pmc_chained(vcpu, i);
325 kvm_pmu_create_perf_event(vcpu, i);
326
327 /* At this point, pmc must be the canonical */
328 if (pmc->perf_event) {
329 perf_event_enable(pmc->perf_event);
330 if (pmc->perf_event->state != PERF_EVENT_STATE_ACTIVE)
331 kvm_debug("fail to enable perf event\n");
332 }
333 }
334 }
335
336 /**
337 * kvm_pmu_disable_counter_mask - disable selected PMU counters
338 * @vcpu: The vcpu pointer
339 * @val: the value guest writes to PMCNTENCLR register
340 *
341 * Call perf_event_disable to stop counting the perf event
342 */
kvm_pmu_disable_counter_mask(struct kvm_vcpu * vcpu,u64 val)343 void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
344 {
345 int i;
346 struct kvm_pmu *pmu = &vcpu->arch.pmu;
347 struct kvm_pmc *pmc;
348
349 if (!val)
350 return;
351
352 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
353 if (!(val & BIT(i)))
354 continue;
355
356 pmc = &pmu->pmc[i];
357
358 /* A change in the enable state may affect the chain state */
359 kvm_pmu_update_pmc_chained(vcpu, i);
360 kvm_pmu_create_perf_event(vcpu, i);
361
362 /* At this point, pmc must be the canonical */
363 if (pmc->perf_event)
364 perf_event_disable(pmc->perf_event);
365 }
366 }
367
kvm_pmu_overflow_status(struct kvm_vcpu * vcpu)368 static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu)
369 {
370 u64 reg = 0;
371
372 if ((__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) {
373 reg = __vcpu_sys_reg(vcpu, PMOVSSET_EL0);
374 reg &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
375 reg &= __vcpu_sys_reg(vcpu, PMINTENSET_EL1);
376 reg &= kvm_pmu_valid_counter_mask(vcpu);
377 }
378
379 return reg;
380 }
381
kvm_pmu_update_state(struct kvm_vcpu * vcpu)382 static void kvm_pmu_update_state(struct kvm_vcpu *vcpu)
383 {
384 struct kvm_pmu *pmu = &vcpu->arch.pmu;
385 bool overflow;
386
387 if (!kvm_vcpu_has_pmu(vcpu))
388 return;
389
390 overflow = !!kvm_pmu_overflow_status(vcpu);
391 if (pmu->irq_level == overflow)
392 return;
393
394 pmu->irq_level = overflow;
395
396 if (likely(irqchip_in_kernel(vcpu->kvm))) {
397 int ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
398 pmu->irq_num, overflow, pmu);
399 WARN_ON(ret);
400 }
401 }
402
kvm_pmu_should_notify_user(struct kvm_vcpu * vcpu)403 bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu)
404 {
405 struct kvm_pmu *pmu = &vcpu->arch.pmu;
406 struct kvm_sync_regs *sregs = &vcpu->run->s.regs;
407 bool run_level = sregs->device_irq_level & KVM_ARM_DEV_PMU;
408
409 if (likely(irqchip_in_kernel(vcpu->kvm)))
410 return false;
411
412 return pmu->irq_level != run_level;
413 }
414
415 /*
416 * Reflect the PMU overflow interrupt output level into the kvm_run structure
417 */
kvm_pmu_update_run(struct kvm_vcpu * vcpu)418 void kvm_pmu_update_run(struct kvm_vcpu *vcpu)
419 {
420 struct kvm_sync_regs *regs = &vcpu->run->s.regs;
421
422 /* Populate the timer bitmap for user space */
423 regs->device_irq_level &= ~KVM_ARM_DEV_PMU;
424 if (vcpu->arch.pmu.irq_level)
425 regs->device_irq_level |= KVM_ARM_DEV_PMU;
426 }
427
428 /**
429 * kvm_pmu_flush_hwstate - flush pmu state to cpu
430 * @vcpu: The vcpu pointer
431 *
432 * Check if the PMU has overflowed while we were running in the host, and inject
433 * an interrupt if that was the case.
434 */
kvm_pmu_flush_hwstate(struct kvm_vcpu * vcpu)435 void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu)
436 {
437 kvm_pmu_update_state(vcpu);
438 }
439
440 /**
441 * kvm_pmu_sync_hwstate - sync pmu state from cpu
442 * @vcpu: The vcpu pointer
443 *
444 * Check if the PMU has overflowed while we were running in the guest, and
445 * inject an interrupt if that was the case.
446 */
kvm_pmu_sync_hwstate(struct kvm_vcpu * vcpu)447 void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu)
448 {
449 kvm_pmu_update_state(vcpu);
450 }
451
452 /**
453 * When perf interrupt is an NMI, we cannot safely notify the vcpu corresponding
454 * to the event.
455 * This is why we need a callback to do it once outside of the NMI context.
456 */
kvm_pmu_perf_overflow_notify_vcpu(struct irq_work * work)457 static void kvm_pmu_perf_overflow_notify_vcpu(struct irq_work *work)
458 {
459 struct kvm_vcpu *vcpu;
460 struct kvm_pmu *pmu;
461
462 pmu = container_of(work, struct kvm_pmu, overflow_work);
463 vcpu = kvm_pmc_to_vcpu(pmu->pmc);
464
465 kvm_vcpu_kick(vcpu);
466 }
467
468 /**
469 * When the perf event overflows, set the overflow status and inform the vcpu.
470 */
kvm_pmu_perf_overflow(struct perf_event * perf_event,struct perf_sample_data * data,struct pt_regs * regs)471 static void kvm_pmu_perf_overflow(struct perf_event *perf_event,
472 struct perf_sample_data *data,
473 struct pt_regs *regs)
474 {
475 struct kvm_pmc *pmc = perf_event->overflow_handler_context;
476 struct arm_pmu *cpu_pmu = to_arm_pmu(perf_event->pmu);
477 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
478 int idx = pmc->idx;
479 u64 period;
480
481 cpu_pmu->pmu.stop(perf_event, PERF_EF_UPDATE);
482
483 /*
484 * Reset the sample period to the architectural limit,
485 * i.e. the point where the counter overflows.
486 */
487 period = -(local64_read(&perf_event->count));
488
489 if (!kvm_pmu_idx_is_64bit(vcpu, pmc->idx))
490 period &= GENMASK(31, 0);
491
492 local64_set(&perf_event->hw.period_left, 0);
493 perf_event->attr.sample_period = period;
494 perf_event->hw.sample_period = period;
495
496 __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx);
497
498 if (kvm_pmu_overflow_status(vcpu)) {
499 kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
500
501 if (!in_nmi())
502 kvm_vcpu_kick(vcpu);
503 else
504 irq_work_queue(&vcpu->arch.pmu.overflow_work);
505 }
506
507 cpu_pmu->pmu.start(perf_event, PERF_EF_RELOAD);
508 }
509
510 /**
511 * kvm_pmu_software_increment - do software increment
512 * @vcpu: The vcpu pointer
513 * @val: the value guest writes to PMSWINC register
514 */
kvm_pmu_software_increment(struct kvm_vcpu * vcpu,u64 val)515 void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val)
516 {
517 struct kvm_pmu *pmu = &vcpu->arch.pmu;
518 int i;
519
520 if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E))
521 return;
522
523 /* Weed out disabled counters */
524 val &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
525
526 for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++) {
527 u64 type, reg;
528
529 if (!(val & BIT(i)))
530 continue;
531
532 /* PMSWINC only applies to ... SW_INC! */
533 type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i);
534 type &= kvm_pmu_event_mask(vcpu->kvm);
535 if (type != ARMV8_PMUV3_PERFCTR_SW_INCR)
536 continue;
537
538 /* increment this even SW_INC counter */
539 reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1;
540 reg = lower_32_bits(reg);
541 __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg;
542
543 if (reg) /* no overflow on the low part */
544 continue;
545
546 if (kvm_pmu_pmc_is_chained(&pmu->pmc[i])) {
547 /* increment the high counter */
548 reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) + 1;
549 reg = lower_32_bits(reg);
550 __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) = reg;
551 if (!reg) /* mark overflow on the high counter */
552 __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i + 1);
553 } else {
554 /* mark overflow on low counter */
555 __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i);
556 }
557 }
558 }
559
560 /**
561 * kvm_pmu_handle_pmcr - handle PMCR register
562 * @vcpu: The vcpu pointer
563 * @val: the value guest writes to PMCR register
564 */
kvm_pmu_handle_pmcr(struct kvm_vcpu * vcpu,u64 val)565 void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
566 {
567 unsigned long mask = kvm_pmu_valid_counter_mask(vcpu);
568 int i;
569
570 if (val & ARMV8_PMU_PMCR_E) {
571 kvm_pmu_enable_counter_mask(vcpu,
572 __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask);
573 } else {
574 kvm_pmu_disable_counter_mask(vcpu, mask);
575 }
576
577 if (val & ARMV8_PMU_PMCR_C)
578 kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0);
579
580 if (val & ARMV8_PMU_PMCR_P) {
581 mask &= ~BIT(ARMV8_PMU_CYCLE_IDX);
582 for_each_set_bit(i, &mask, 32)
583 kvm_pmu_set_counter_value(vcpu, i, 0);
584 }
585 }
586
kvm_pmu_counter_is_enabled(struct kvm_vcpu * vcpu,u64 select_idx)587 static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64 select_idx)
588 {
589 return (__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) &&
590 (__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(select_idx));
591 }
592
593 /**
594 * kvm_pmu_create_perf_event - create a perf event for a counter
595 * @vcpu: The vcpu pointer
596 * @select_idx: The number of selected counter
597 */
kvm_pmu_create_perf_event(struct kvm_vcpu * vcpu,u64 select_idx)598 static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx)
599 {
600 struct kvm_pmu *pmu = &vcpu->arch.pmu;
601 struct kvm_pmc *pmc;
602 struct perf_event *event;
603 struct perf_event_attr attr;
604 u64 eventsel, counter, reg, data;
605
606 /*
607 * For chained counters the event type and filtering attributes are
608 * obtained from the low/even counter. We also use this counter to
609 * determine if the event is enabled/disabled.
610 */
611 pmc = kvm_pmu_get_canonical_pmc(&pmu->pmc[select_idx]);
612
613 reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX)
614 ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + pmc->idx;
615 data = __vcpu_sys_reg(vcpu, reg);
616
617 kvm_pmu_stop_counter(vcpu, pmc);
618 if (pmc->idx == ARMV8_PMU_CYCLE_IDX)
619 eventsel = ARMV8_PMUV3_PERFCTR_CPU_CYCLES;
620 else
621 eventsel = data & kvm_pmu_event_mask(vcpu->kvm);
622
623 /* Software increment event doesn't need to be backed by a perf event */
624 if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR)
625 return;
626
627 /*
628 * If we have a filter in place and that the event isn't allowed, do
629 * not install a perf event either.
630 */
631 if (vcpu->kvm->arch.pmu_filter &&
632 !test_bit(eventsel, vcpu->kvm->arch.pmu_filter))
633 return;
634
635 memset(&attr, 0, sizeof(struct perf_event_attr));
636 attr.type = PERF_TYPE_RAW;
637 attr.size = sizeof(attr);
638 attr.pinned = 1;
639 attr.disabled = !kvm_pmu_counter_is_enabled(vcpu, pmc->idx);
640 attr.exclude_user = data & ARMV8_PMU_EXCLUDE_EL0 ? 1 : 0;
641 attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0;
642 attr.exclude_hv = 1; /* Don't count EL2 events */
643 attr.exclude_host = 1; /* Don't count host events */
644 attr.config = eventsel;
645
646 counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
647
648 if (kvm_pmu_pmc_is_chained(pmc)) {
649 /**
650 * The initial sample period (overflow count) of an event. For
651 * chained counters we only support overflow interrupts on the
652 * high counter.
653 */
654 attr.sample_period = (-counter) & GENMASK(63, 0);
655 attr.config1 |= PERF_ATTR_CFG1_KVM_PMU_CHAINED;
656
657 event = perf_event_create_kernel_counter(&attr, -1, current,
658 kvm_pmu_perf_overflow,
659 pmc + 1);
660 } else {
661 /* The initial sample period (overflow count) of an event. */
662 if (kvm_pmu_idx_is_64bit(vcpu, pmc->idx))
663 attr.sample_period = (-counter) & GENMASK(63, 0);
664 else
665 attr.sample_period = (-counter) & GENMASK(31, 0);
666
667 event = perf_event_create_kernel_counter(&attr, -1, current,
668 kvm_pmu_perf_overflow, pmc);
669 }
670
671 if (IS_ERR(event)) {
672 pr_err_once("kvm: pmu event creation failed %ld\n",
673 PTR_ERR(event));
674 return;
675 }
676
677 pmc->perf_event = event;
678 }
679
680 /**
681 * kvm_pmu_update_pmc_chained - update chained bitmap
682 * @vcpu: The vcpu pointer
683 * @select_idx: The number of selected counter
684 *
685 * Update the chained bitmap based on the event type written in the
686 * typer register and the enable state of the odd register.
687 */
kvm_pmu_update_pmc_chained(struct kvm_vcpu * vcpu,u64 select_idx)688 static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx)
689 {
690 struct kvm_pmu *pmu = &vcpu->arch.pmu;
691 struct kvm_pmc *pmc = &pmu->pmc[select_idx], *canonical_pmc;
692 bool new_state, old_state;
693
694 old_state = kvm_pmu_pmc_is_chained(pmc);
695 new_state = kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx) &&
696 kvm_pmu_counter_is_enabled(vcpu, pmc->idx | 0x1);
697
698 if (old_state == new_state)
699 return;
700
701 canonical_pmc = kvm_pmu_get_canonical_pmc(pmc);
702 kvm_pmu_stop_counter(vcpu, canonical_pmc);
703 if (new_state) {
704 /*
705 * During promotion from !chained to chained we must ensure
706 * the adjacent counter is stopped and its event destroyed
707 */
708 kvm_pmu_stop_counter(vcpu, kvm_pmu_get_alternate_pmc(pmc));
709 set_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
710 return;
711 }
712 clear_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
713 }
714
715 /**
716 * kvm_pmu_set_counter_event_type - set selected counter to monitor some event
717 * @vcpu: The vcpu pointer
718 * @data: The data guest writes to PMXEVTYPER_EL0
719 * @select_idx: The number of selected counter
720 *
721 * When OS accesses PMXEVTYPER_EL0, that means it wants to set a PMC to count an
722 * event with given hardware event number. Here we call perf_event API to
723 * emulate this action and create a kernel perf event for it.
724 */
kvm_pmu_set_counter_event_type(struct kvm_vcpu * vcpu,u64 data,u64 select_idx)725 void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
726 u64 select_idx)
727 {
728 u64 reg, mask;
729
730 mask = ARMV8_PMU_EVTYPE_MASK;
731 mask &= ~ARMV8_PMU_EVTYPE_EVENT;
732 mask |= kvm_pmu_event_mask(vcpu->kvm);
733
734 reg = (select_idx == ARMV8_PMU_CYCLE_IDX)
735 ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + select_idx;
736
737 __vcpu_sys_reg(vcpu, reg) = data & mask;
738
739 kvm_pmu_update_pmc_chained(vcpu, select_idx);
740 kvm_pmu_create_perf_event(vcpu, select_idx);
741 }
742
kvm_pmu_probe_pmuver(void)743 static int kvm_pmu_probe_pmuver(void)
744 {
745 struct perf_event_attr attr = { };
746 struct perf_event *event;
747 struct arm_pmu *pmu;
748 int pmuver = 0xf;
749
750 /*
751 * Create a dummy event that only counts user cycles. As we'll never
752 * leave this function with the event being live, it will never
753 * count anything. But it allows us to probe some of the PMU
754 * details. Yes, this is terrible.
755 */
756 attr.type = PERF_TYPE_RAW;
757 attr.size = sizeof(attr);
758 attr.pinned = 1;
759 attr.disabled = 0;
760 attr.exclude_user = 0;
761 attr.exclude_kernel = 1;
762 attr.exclude_hv = 1;
763 attr.exclude_host = 1;
764 attr.config = ARMV8_PMUV3_PERFCTR_CPU_CYCLES;
765 attr.sample_period = GENMASK(63, 0);
766
767 event = perf_event_create_kernel_counter(&attr, -1, current,
768 kvm_pmu_perf_overflow, &attr);
769
770 if (IS_ERR(event)) {
771 pr_err_once("kvm: pmu event creation failed %ld\n",
772 PTR_ERR(event));
773 return 0xf;
774 }
775
776 if (event->pmu) {
777 pmu = to_arm_pmu(event->pmu);
778 if (pmu->pmuver)
779 pmuver = pmu->pmuver;
780 }
781
782 perf_event_disable(event);
783 perf_event_release_kernel(event);
784
785 return pmuver;
786 }
787
kvm_pmu_get_pmceid(struct kvm_vcpu * vcpu,bool pmceid1)788 u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1)
789 {
790 unsigned long *bmap = vcpu->kvm->arch.pmu_filter;
791 u64 val, mask = 0;
792 int base, i, nr_events;
793
794 if (!pmceid1) {
795 val = read_sysreg(pmceid0_el0);
796 base = 0;
797 } else {
798 val = read_sysreg(pmceid1_el0);
799 /*
800 * Don't advertise STALL_SLOT, as PMMIR_EL0 is handled
801 * as RAZ
802 */
803 if (vcpu->kvm->arch.pmuver >= ID_AA64DFR0_PMUVER_8_4)
804 val &= ~BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT - 32);
805 base = 32;
806 }
807
808 if (!bmap)
809 return val;
810
811 nr_events = kvm_pmu_event_mask(vcpu->kvm) + 1;
812
813 for (i = 0; i < 32; i += 8) {
814 u64 byte;
815
816 byte = bitmap_get_value8(bmap, base + i);
817 mask |= byte << i;
818 if (nr_events >= (0x4000 + base + 32)) {
819 byte = bitmap_get_value8(bmap, 0x4000 + base + i);
820 mask |= byte << (32 + i);
821 }
822 }
823
824 return val & mask;
825 }
826
kvm_arm_pmu_v3_enable(struct kvm_vcpu * vcpu)827 int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu)
828 {
829 if (!kvm_vcpu_has_pmu(vcpu))
830 return 0;
831
832 if (!vcpu->arch.pmu.created)
833 return -EINVAL;
834
835 /*
836 * A valid interrupt configuration for the PMU is either to have a
837 * properly configured interrupt number and using an in-kernel
838 * irqchip, or to not have an in-kernel GIC and not set an IRQ.
839 */
840 if (irqchip_in_kernel(vcpu->kvm)) {
841 int irq = vcpu->arch.pmu.irq_num;
842 /*
843 * If we are using an in-kernel vgic, at this point we know
844 * the vgic will be initialized, so we can check the PMU irq
845 * number against the dimensions of the vgic and make sure
846 * it's valid.
847 */
848 if (!irq_is_ppi(irq) && !vgic_valid_spi(vcpu->kvm, irq))
849 return -EINVAL;
850 } else if (kvm_arm_pmu_irq_initialized(vcpu)) {
851 return -EINVAL;
852 }
853
854 return 0;
855 }
856
kvm_arm_pmu_v3_init(struct kvm_vcpu * vcpu)857 static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu)
858 {
859 if (irqchip_in_kernel(vcpu->kvm)) {
860 int ret;
861
862 /*
863 * If using the PMU with an in-kernel virtual GIC
864 * implementation, we require the GIC to be already
865 * initialized when initializing the PMU.
866 */
867 if (!vgic_initialized(vcpu->kvm))
868 return -ENODEV;
869
870 if (!kvm_arm_pmu_irq_initialized(vcpu))
871 return -ENXIO;
872
873 ret = kvm_vgic_set_owner(vcpu, vcpu->arch.pmu.irq_num,
874 &vcpu->arch.pmu);
875 if (ret)
876 return ret;
877 }
878
879 init_irq_work(&vcpu->arch.pmu.overflow_work,
880 kvm_pmu_perf_overflow_notify_vcpu);
881
882 vcpu->arch.pmu.created = true;
883 return 0;
884 }
885
886 /*
887 * For one VM the interrupt type must be same for each vcpu.
888 * As a PPI, the interrupt number is the same for all vcpus,
889 * while as an SPI it must be a separate number per vcpu.
890 */
pmu_irq_is_valid(struct kvm * kvm,int irq)891 static bool pmu_irq_is_valid(struct kvm *kvm, int irq)
892 {
893 int i;
894 struct kvm_vcpu *vcpu;
895
896 kvm_for_each_vcpu(i, vcpu, kvm) {
897 if (!kvm_arm_pmu_irq_initialized(vcpu))
898 continue;
899
900 if (irq_is_ppi(irq)) {
901 if (vcpu->arch.pmu.irq_num != irq)
902 return false;
903 } else {
904 if (vcpu->arch.pmu.irq_num == irq)
905 return false;
906 }
907 }
908
909 return true;
910 }
911
kvm_arm_pmu_v3_set_attr(struct kvm_vcpu * vcpu,struct kvm_device_attr * attr)912 int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
913 {
914 if (!kvm_vcpu_has_pmu(vcpu))
915 return -ENODEV;
916
917 if (vcpu->arch.pmu.created)
918 return -EBUSY;
919
920 if (!vcpu->kvm->arch.pmuver)
921 vcpu->kvm->arch.pmuver = kvm_pmu_probe_pmuver();
922
923 if (vcpu->kvm->arch.pmuver == 0xf)
924 return -ENODEV;
925
926 switch (attr->attr) {
927 case KVM_ARM_VCPU_PMU_V3_IRQ: {
928 int __user *uaddr = (int __user *)(long)attr->addr;
929 int irq;
930
931 if (!irqchip_in_kernel(vcpu->kvm))
932 return -EINVAL;
933
934 if (get_user(irq, uaddr))
935 return -EFAULT;
936
937 /* The PMU overflow interrupt can be a PPI or a valid SPI. */
938 if (!(irq_is_ppi(irq) || irq_is_spi(irq)))
939 return -EINVAL;
940
941 if (!pmu_irq_is_valid(vcpu->kvm, irq))
942 return -EINVAL;
943
944 if (kvm_arm_pmu_irq_initialized(vcpu))
945 return -EBUSY;
946
947 kvm_debug("Set kvm ARM PMU irq: %d\n", irq);
948 vcpu->arch.pmu.irq_num = irq;
949 return 0;
950 }
951 case KVM_ARM_VCPU_PMU_V3_FILTER: {
952 struct kvm_pmu_event_filter __user *uaddr;
953 struct kvm_pmu_event_filter filter;
954 int nr_events;
955
956 nr_events = kvm_pmu_event_mask(vcpu->kvm) + 1;
957
958 uaddr = (struct kvm_pmu_event_filter __user *)(long)attr->addr;
959
960 if (copy_from_user(&filter, uaddr, sizeof(filter)))
961 return -EFAULT;
962
963 if (((u32)filter.base_event + filter.nevents) > nr_events ||
964 (filter.action != KVM_PMU_EVENT_ALLOW &&
965 filter.action != KVM_PMU_EVENT_DENY))
966 return -EINVAL;
967
968 mutex_lock(&vcpu->kvm->lock);
969
970 if (!vcpu->kvm->arch.pmu_filter) {
971 vcpu->kvm->arch.pmu_filter = bitmap_alloc(nr_events, GFP_KERNEL);
972 if (!vcpu->kvm->arch.pmu_filter) {
973 mutex_unlock(&vcpu->kvm->lock);
974 return -ENOMEM;
975 }
976
977 /*
978 * The default depends on the first applied filter.
979 * If it allows events, the default is to deny.
980 * Conversely, if the first filter denies a set of
981 * events, the default is to allow.
982 */
983 if (filter.action == KVM_PMU_EVENT_ALLOW)
984 bitmap_zero(vcpu->kvm->arch.pmu_filter, nr_events);
985 else
986 bitmap_fill(vcpu->kvm->arch.pmu_filter, nr_events);
987 }
988
989 if (filter.action == KVM_PMU_EVENT_ALLOW)
990 bitmap_set(vcpu->kvm->arch.pmu_filter, filter.base_event, filter.nevents);
991 else
992 bitmap_clear(vcpu->kvm->arch.pmu_filter, filter.base_event, filter.nevents);
993
994 mutex_unlock(&vcpu->kvm->lock);
995
996 return 0;
997 }
998 case KVM_ARM_VCPU_PMU_V3_INIT:
999 return kvm_arm_pmu_v3_init(vcpu);
1000 }
1001
1002 return -ENXIO;
1003 }
1004
kvm_arm_pmu_v3_get_attr(struct kvm_vcpu * vcpu,struct kvm_device_attr * attr)1005 int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
1006 {
1007 switch (attr->attr) {
1008 case KVM_ARM_VCPU_PMU_V3_IRQ: {
1009 int __user *uaddr = (int __user *)(long)attr->addr;
1010 int irq;
1011
1012 if (!irqchip_in_kernel(vcpu->kvm))
1013 return -EINVAL;
1014
1015 if (!kvm_vcpu_has_pmu(vcpu))
1016 return -ENODEV;
1017
1018 if (!kvm_arm_pmu_irq_initialized(vcpu))
1019 return -ENXIO;
1020
1021 irq = vcpu->arch.pmu.irq_num;
1022 return put_user(irq, uaddr);
1023 }
1024 }
1025
1026 return -ENXIO;
1027 }
1028
kvm_arm_pmu_v3_has_attr(struct kvm_vcpu * vcpu,struct kvm_device_attr * attr)1029 int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
1030 {
1031 switch (attr->attr) {
1032 case KVM_ARM_VCPU_PMU_V3_IRQ:
1033 case KVM_ARM_VCPU_PMU_V3_INIT:
1034 case KVM_ARM_VCPU_PMU_V3_FILTER:
1035 if (kvm_vcpu_has_pmu(vcpu))
1036 return 0;
1037 }
1038
1039 return -ENXIO;
1040 }
1041