1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /*
3 * Copyright (C) 2012,2013 - ARM Ltd
4 * Author: Marc Zyngier <marc.zyngier@arm.com>
5 *
6 * Derived from arch/arm/include/asm/kvm_host.h:
7 * Copyright (C) 2012 - Virtual Open Systems and Columbia University
8 * Author: Christoffer Dall <c.dall@virtualopensystems.com>
9 */
10
11 #ifndef __ARM64_KVM_HOST_H__
12 #define __ARM64_KVM_HOST_H__
13
14 #include <linux/arm-smccc.h>
15 #include <linux/bitmap.h>
16 #include <linux/types.h>
17 #include <linux/jump_label.h>
18 #include <linux/kvm_types.h>
19 #include <linux/maple_tree.h>
20 #include <linux/percpu.h>
21 #include <linux/psci.h>
22 #include <asm/arch_gicv3.h>
23 #include <asm/barrier.h>
24 #include <asm/cpufeature.h>
25 #include <asm/cputype.h>
26 #include <asm/daifflags.h>
27 #include <asm/fpsimd.h>
28 #include <asm/kvm.h>
29 #include <asm/kvm_asm.h>
30
31 #define __KVM_HAVE_ARCH_INTC_INITIALIZED
32
33 #define KVM_HALT_POLL_NS_DEFAULT 500000
34
35 #include <kvm/arm_vgic.h>
36 #include <kvm/arm_arch_timer.h>
37 #include <kvm/arm_pmu.h>
38
39 #define KVM_MAX_VCPUS VGIC_V3_MAX_CPUS
40
41 #define KVM_VCPU_MAX_FEATURES 7
42 #define KVM_VCPU_VALID_FEATURES (BIT(KVM_VCPU_MAX_FEATURES) - 1)
43
44 #define KVM_REQ_SLEEP \
45 KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
46 #define KVM_REQ_IRQ_PENDING KVM_ARCH_REQ(1)
47 #define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(2)
48 #define KVM_REQ_RECORD_STEAL KVM_ARCH_REQ(3)
49 #define KVM_REQ_RELOAD_GICv4 KVM_ARCH_REQ(4)
50 #define KVM_REQ_RELOAD_PMU KVM_ARCH_REQ(5)
51 #define KVM_REQ_SUSPEND KVM_ARCH_REQ(6)
52 #define KVM_REQ_RESYNC_PMU_EL0 KVM_ARCH_REQ(7)
53
54 #define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \
55 KVM_DIRTY_LOG_INITIALLY_SET)
56
57 #define KVM_HAVE_MMU_RWLOCK
58
59 /*
60 * Mode of operation configurable with kvm-arm.mode early param.
61 * See Documentation/admin-guide/kernel-parameters.txt for more information.
62 */
63 enum kvm_mode {
64 KVM_MODE_DEFAULT,
65 KVM_MODE_PROTECTED,
66 KVM_MODE_NV,
67 KVM_MODE_NONE,
68 };
69 #ifdef CONFIG_KVM
70 enum kvm_mode kvm_get_mode(void);
71 #else
kvm_get_mode(void)72 static inline enum kvm_mode kvm_get_mode(void) { return KVM_MODE_NONE; };
73 #endif
74
75 DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
76
77 extern unsigned int __ro_after_init kvm_sve_max_vl;
78 extern unsigned int __ro_after_init kvm_host_sve_max_vl;
79 int __init kvm_arm_init_sve(void);
80
81 u32 __attribute_const__ kvm_target_cpu(void);
82 int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
83 void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu);
84
85 /* Head holds page head and it's order. */
86 struct kvm_hyp_memcache {
87 phys_addr_t head;
88 unsigned long nr_pages;
89 unsigned long flags;
90 };
91
push_hyp_memcache(struct kvm_hyp_memcache * mc,phys_addr_t * p,phys_addr_t (* to_pa)(void * virt),unsigned long order)92 static inline void push_hyp_memcache(struct kvm_hyp_memcache *mc,
93 phys_addr_t *p,
94 phys_addr_t (*to_pa)(void *virt),
95 unsigned long order)
96 {
97 *p = mc->head;
98 mc->head = (to_pa(p) & PAGE_MASK) |
99 FIELD_PREP(~PAGE_MASK, order);
100 mc->nr_pages++;
101 }
102
pop_hyp_memcache(struct kvm_hyp_memcache * mc,void * (* to_va)(phys_addr_t phys),unsigned long * order)103 static inline void *pop_hyp_memcache(struct kvm_hyp_memcache *mc,
104 void *(*to_va)(phys_addr_t phys),
105 unsigned long *order)
106 {
107 phys_addr_t *p = to_va(mc->head & PAGE_MASK);
108
109 if (!mc->nr_pages)
110 return NULL;
111
112 *order = FIELD_GET(~PAGE_MASK, mc->head);
113
114 mc->head = *p;
115 mc->nr_pages--;
116
117 return p;
118 }
119
__topup_hyp_memcache(struct kvm_hyp_memcache * mc,unsigned long min_pages,void * (* alloc_fn)(void * arg,unsigned long order),phys_addr_t (* to_pa)(void * virt),void * arg,unsigned long order)120 static inline int __topup_hyp_memcache(struct kvm_hyp_memcache *mc,
121 unsigned long min_pages,
122 void *(*alloc_fn)(void *arg, unsigned long order),
123 phys_addr_t (*to_pa)(void *virt),
124 void *arg,
125 unsigned long order)
126 {
127 while (mc->nr_pages < min_pages) {
128 phys_addr_t *p = alloc_fn(arg, order);
129
130 if (!p)
131 return -ENOMEM;
132 push_hyp_memcache(mc, p, to_pa, order);
133 }
134
135 return 0;
136 }
137
__free_hyp_memcache(struct kvm_hyp_memcache * mc,void (* free_fn)(void * virt,void * arg,unsigned long order),void * (* to_va)(phys_addr_t phys),void * arg)138 static inline void __free_hyp_memcache(struct kvm_hyp_memcache *mc,
139 void (*free_fn)(void *virt, void *arg, unsigned long order),
140 void *(*to_va)(phys_addr_t phys),
141 void *arg)
142 {
143 unsigned long order;
144 void *p;
145
146 while (mc->nr_pages) {
147 p = pop_hyp_memcache(mc, to_va, &order);
148 free_fn(p, arg, order);
149 }
150 }
151
152 #define HYP_MEMCACHE_ACCOUNT_KMEMCG BIT(1)
153 #define HYP_MEMCACHE_ACCOUNT_STAGE2 BIT(2)
154
155 void free_hyp_memcache(struct kvm_hyp_memcache *mc);
156 int topup_hyp_memcache(struct kvm_hyp_memcache *mc, unsigned long min_pages, unsigned long order);
157
init_hyp_memcache(struct kvm_hyp_memcache * mc)158 static inline void init_hyp_memcache(struct kvm_hyp_memcache *mc)
159 {
160 memset(mc, 0, sizeof(*mc));
161 }
162
init_hyp_stage2_memcache(struct kvm_hyp_memcache * mc)163 static inline void init_hyp_stage2_memcache(struct kvm_hyp_memcache *mc)
164 {
165 init_hyp_memcache(mc);
166 mc->flags = HYP_MEMCACHE_ACCOUNT_KMEMCG | HYP_MEMCACHE_ACCOUNT_STAGE2;
167 }
168
169 struct kvm_vmid {
170 atomic64_t id;
171 };
172
173 struct kvm_s2_mmu {
174 struct kvm_vmid vmid;
175
176 /*
177 * stage2 entry level table
178 *
179 * Two kvm_s2_mmu structures in the same VM can point to the same
180 * pgd here. This happens when running a guest using a
181 * translation regime that isn't affected by its own stage-2
182 * translation, such as a non-VHE hypervisor running at vEL2, or
183 * for vEL1/EL0 with vHCR_EL2.VM == 0. In that case, we use the
184 * canonical stage-2 page tables.
185 */
186 phys_addr_t pgd_phys;
187 struct kvm_pgtable *pgt;
188
189 /* The last vcpu id that ran on each physical CPU */
190 int __percpu *last_vcpu_ran;
191
192 #define KVM_ARM_EAGER_SPLIT_CHUNK_SIZE_DEFAULT 0
193 /*
194 * Memory cache used to split
195 * KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE worth of huge pages. It
196 * is used to allocate stage2 page tables while splitting huge
197 * pages. The choice of KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE
198 * influences both the capacity of the split page cache, and
199 * how often KVM reschedules. Be wary of raising CHUNK_SIZE
200 * too high.
201 *
202 * Protected by kvm->slots_lock.
203 */
204 struct kvm_mmu_memory_cache split_page_cache;
205 uint64_t split_page_chunk_size;
206
207 struct kvm_arch *arch;
208 };
209
210 struct kvm_arch_memory_slot {
211 };
212
213 /**
214 * struct kvm_smccc_features: Descriptor of the hypercall services exposed to the guests
215 *
216 * @std_bmap: Bitmap of standard secure service calls
217 * @std_hyp_bmap: Bitmap of standard hypervisor service calls
218 * @vendor_hyp_bmap: Bitmap of vendor specific hypervisor service calls
219 */
220 struct kvm_smccc_features {
221 unsigned long std_bmap;
222 unsigned long std_hyp_bmap;
223 unsigned long vendor_hyp_bmap;
224 };
225
226 struct kvm_pinned_page {
227 struct page *page;
228 u64 ipa;
229 u8 order;
230 u16 pins;
231 };
232
233 typedef unsigned int pkvm_handle_t;
234
235 struct kvm_protected_vm {
236 pkvm_handle_t handle;
237 struct kvm_hyp_memcache stage2_teardown_mc;
238 struct maple_tree pinned_pages;
239 gpa_t pvmfw_load_addr;
240 bool enabled;
241 };
242
243 struct kvm_arch {
244 struct kvm_s2_mmu mmu;
245
246 /* VTCR_EL2 value for this VM */
247 u64 vtcr;
248
249 /* Interrupt controller */
250 struct vgic_dist vgic;
251
252 /* Timers */
253 struct arch_timer_vm_data timer_data;
254
255 /* Mandated version of PSCI */
256 u32 psci_version;
257
258 /* Protects VM-scoped configuration data */
259 struct mutex config_lock;
260
261 /*
262 * If we encounter a data abort without valid instruction syndrome
263 * information, report this to user space. User space can (and
264 * should) opt in to this feature if KVM_CAP_ARM_NISV_TO_USER is
265 * supported.
266 */
267 #define KVM_ARCH_FLAG_RETURN_NISV_IO_ABORT_TO_USER 0
268 /* Memory Tagging Extension enabled for the guest */
269 #define KVM_ARCH_FLAG_MTE_ENABLED 1
270 /* At least one vCPU has ran in the VM */
271 #define KVM_ARCH_FLAG_HAS_RAN_ONCE 2
272 /* The vCPU feature set for the VM is configured */
273 #define KVM_ARCH_FLAG_VCPU_FEATURES_CONFIGURED 3
274 /* PSCI SYSTEM_SUSPEND enabled for the guest */
275 #define KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED 4
276 /* VM counter offset */
277 #define KVM_ARCH_FLAG_VM_COUNTER_OFFSET 5
278 /* Timer PPIs made immutable */
279 #define KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE 6
280 /* SMCCC filter initialized for the VM */
281 #define KVM_ARCH_FLAG_SMCCC_FILTER_CONFIGURED 7
282 /* Initial ID reg values loaded */
283 #define KVM_ARCH_FLAG_ID_REGS_INITIALIZED 8
284 /* Guest has bought into the MMIO guard extension */
285 #define KVM_ARCH_FLAG_MMIO_GUARD 9
286 unsigned long flags;
287
288 /* VM-wide vCPU feature set */
289 DECLARE_BITMAP(vcpu_features, KVM_VCPU_MAX_FEATURES);
290
291 /*
292 * VM-wide PMU filter, implemented as a bitmap and big enough for
293 * up to 2^10 events (ARMv8.0) or 2^16 events (ARMv8.1+).
294 */
295 unsigned long *pmu_filter;
296 struct arm_pmu *arm_pmu;
297
298 cpumask_var_t supported_cpus;
299
300 /* Hypercall features firmware registers' descriptor */
301 struct kvm_smccc_features smccc_feat;
302 struct maple_tree smccc_filter;
303
304 /*
305 * Emulated CPU ID registers per VM
306 * (Op0, Op1, CRn, CRm, Op2) of the ID registers to be saved in it
307 * is (3, 0, 0, crm, op2), where 1<=crm<8, 0<=op2<8.
308 *
309 * These emulated idregs are VM-wide, but accessed from the context of a vCPU.
310 * Atomic access to multiple idregs are guarded by kvm_arch.config_lock.
311 */
312 #define IDREG_IDX(id) (((sys_reg_CRm(id) - 1) << 3) | sys_reg_Op2(id))
313 #define IDREG(kvm, id) ((kvm)->arch.id_regs[IDREG_IDX(id)])
314 #define KVM_ARM_ID_REG_NUM (IDREG_IDX(sys_reg(3, 0, 0, 7, 7)) + 1)
315 u64 id_regs[KVM_ARM_ID_REG_NUM];
316
317 /*
318 * For an untrusted host VM, 'pkvm.handle' is used to lookup
319 * the associated pKVM instance in the hypervisor.
320 */
321 struct kvm_protected_vm pkvm;
322 };
323
324 struct kvm_vcpu_fault_info {
325 u64 esr_el2; /* Hyp Syndrom Register */
326 u64 far_el2; /* Hyp Fault Address Register */
327 u64 hpfar_el2; /* Hyp IPA Fault Address Register */
328 u64 disr_el1; /* Deferred [SError] Status Register */
329 };
330
331 enum vcpu_sysreg {
332 __INVALID_SYSREG__, /* 0 is reserved as an invalid value */
333 MPIDR_EL1, /* MultiProcessor Affinity Register */
334 CLIDR_EL1, /* Cache Level ID Register */
335 CSSELR_EL1, /* Cache Size Selection Register */
336 SCTLR_EL1, /* System Control Register */
337 ACTLR_EL1, /* Auxiliary Control Register */
338 CPACR_EL1, /* Coprocessor Access Control */
339 ZCR_EL1, /* SVE Control */
340 TTBR0_EL1, /* Translation Table Base Register 0 */
341 TTBR1_EL1, /* Translation Table Base Register 1 */
342 TCR_EL1, /* Translation Control Register */
343 TCR2_EL1, /* Extended Translation Control Register */
344 ESR_EL1, /* Exception Syndrome Register */
345 AFSR0_EL1, /* Auxiliary Fault Status Register 0 */
346 AFSR1_EL1, /* Auxiliary Fault Status Register 1 */
347 FAR_EL1, /* Fault Address Register */
348 MAIR_EL1, /* Memory Attribute Indirection Register */
349 VBAR_EL1, /* Vector Base Address Register */
350 CONTEXTIDR_EL1, /* Context ID Register */
351 TPIDR_EL0, /* Thread ID, User R/W */
352 TPIDRRO_EL0, /* Thread ID, User R/O */
353 TPIDR_EL1, /* Thread ID, Privileged */
354 AMAIR_EL1, /* Aux Memory Attribute Indirection Register */
355 CNTKCTL_EL1, /* Timer Control Register (EL1) */
356 PAR_EL1, /* Physical Address Register */
357 MDSCR_EL1, /* Monitor Debug System Control Register */
358 MDCCINT_EL1, /* Monitor Debug Comms Channel Interrupt Enable Reg */
359 OSLSR_EL1, /* OS Lock Status Register */
360 DISR_EL1, /* Deferred Interrupt Status Register */
361
362 /* Performance Monitors Registers */
363 PMCR_EL0, /* Control Register */
364 PMSELR_EL0, /* Event Counter Selection Register */
365 PMEVCNTR0_EL0, /* Event Counter Register (0-30) */
366 PMEVCNTR30_EL0 = PMEVCNTR0_EL0 + 30,
367 PMCCNTR_EL0, /* Cycle Counter Register */
368 PMEVTYPER0_EL0, /* Event Type Register (0-30) */
369 PMEVTYPER30_EL0 = PMEVTYPER0_EL0 + 30,
370 PMCCFILTR_EL0, /* Cycle Count Filter Register */
371 PMCNTENSET_EL0, /* Count Enable Set Register */
372 PMINTENSET_EL1, /* Interrupt Enable Set Register */
373 PMOVSSET_EL0, /* Overflow Flag Status Set Register */
374 PMUSERENR_EL0, /* User Enable Register */
375
376 /* Pointer Authentication Registers in a strict increasing order. */
377 APIAKEYLO_EL1,
378 APIAKEYHI_EL1,
379 APIBKEYLO_EL1,
380 APIBKEYHI_EL1,
381 APDAKEYLO_EL1,
382 APDAKEYHI_EL1,
383 APDBKEYLO_EL1,
384 APDBKEYHI_EL1,
385 APGAKEYLO_EL1,
386 APGAKEYHI_EL1,
387
388 ELR_EL1,
389 SP_EL1,
390 SPSR_EL1,
391
392 CNTVOFF_EL2,
393 CNTV_CVAL_EL0,
394 CNTV_CTL_EL0,
395 CNTP_CVAL_EL0,
396 CNTP_CTL_EL0,
397
398 /* Memory Tagging Extension registers */
399 RGSR_EL1, /* Random Allocation Tag Seed Register */
400 GCR_EL1, /* Tag Control Register */
401 TFSR_EL1, /* Tag Fault Status Register (EL1) */
402 TFSRE0_EL1, /* Tag Fault Status Register (EL0) */
403
404 /* Permission Indirection Extension registers */
405 PIR_EL1, /* Permission Indirection Register 1 (EL1) */
406 PIRE0_EL1, /* Permission Indirection Register 0 (EL1) */
407
408 /* 32bit specific registers. */
409 DACR32_EL2, /* Domain Access Control Register */
410 IFSR32_EL2, /* Instruction Fault Status Register */
411 FPEXC32_EL2, /* Floating-Point Exception Control Register */
412 DBGVCR32_EL2, /* Debug Vector Catch Register */
413
414 /* EL2 registers */
415 VPIDR_EL2, /* Virtualization Processor ID Register */
416 VMPIDR_EL2, /* Virtualization Multiprocessor ID Register */
417 SCTLR_EL2, /* System Control Register (EL2) */
418 ACTLR_EL2, /* Auxiliary Control Register (EL2) */
419 HCR_EL2, /* Hypervisor Configuration Register */
420 MDCR_EL2, /* Monitor Debug Configuration Register (EL2) */
421 CPTR_EL2, /* Architectural Feature Trap Register (EL2) */
422 HSTR_EL2, /* Hypervisor System Trap Register */
423 HACR_EL2, /* Hypervisor Auxiliary Control Register */
424 HCRX_EL2, /* Extended Hypervisor Configuration Register */
425 TTBR0_EL2, /* Translation Table Base Register 0 (EL2) */
426 TTBR1_EL2, /* Translation Table Base Register 1 (EL2) */
427 TCR_EL2, /* Translation Control Register (EL2) */
428 VTTBR_EL2, /* Virtualization Translation Table Base Register */
429 VTCR_EL2, /* Virtualization Translation Control Register */
430 SPSR_EL2, /* EL2 saved program status register */
431 ELR_EL2, /* EL2 exception link register */
432 AFSR0_EL2, /* Auxiliary Fault Status Register 0 (EL2) */
433 AFSR1_EL2, /* Auxiliary Fault Status Register 1 (EL2) */
434 ESR_EL2, /* Exception Syndrome Register (EL2) */
435 FAR_EL2, /* Fault Address Register (EL2) */
436 HPFAR_EL2, /* Hypervisor IPA Fault Address Register */
437 MAIR_EL2, /* Memory Attribute Indirection Register (EL2) */
438 AMAIR_EL2, /* Auxiliary Memory Attribute Indirection Register (EL2) */
439 VBAR_EL2, /* Vector Base Address Register (EL2) */
440 RVBAR_EL2, /* Reset Vector Base Address Register */
441 CONTEXTIDR_EL2, /* Context ID Register (EL2) */
442 TPIDR_EL2, /* EL2 Software Thread ID Register */
443 CNTHCTL_EL2, /* Counter-timer Hypervisor Control register */
444 SP_EL2, /* EL2 Stack Pointer */
445 HFGRTR_EL2,
446 HFGWTR_EL2,
447 HFGITR_EL2,
448 HDFGRTR_EL2,
449 HDFGWTR_EL2,
450 HAFGRTR_EL2,
451 CNTHP_CTL_EL2,
452 CNTHP_CVAL_EL2,
453 CNTHV_CTL_EL2,
454 CNTHV_CVAL_EL2,
455
456 NR_SYS_REGS /* Nothing after this line! */
457 };
458
459 struct kvm_cpu_context {
460 struct user_pt_regs regs; /* sp = sp_el0 */
461
462 u64 spsr_abt;
463 u64 spsr_und;
464 u64 spsr_irq;
465 u64 spsr_fiq;
466
467 struct user_fpsimd_state fp_regs;
468
469 u64 sys_regs[NR_SYS_REGS];
470
471 struct kvm_vcpu *__hyp_running_vcpu;
472 };
473
474 struct kvm_host_data {
475 struct kvm_cpu_context host_ctxt;
476 };
477
478 struct kvm_host_psci_config {
479 /* PSCI version used by host. */
480 u32 version;
481 u32 smccc_version;
482
483 /* Function IDs used by host if version is v0.1. */
484 struct psci_0_1_function_ids function_ids_0_1;
485
486 bool psci_0_1_cpu_suspend_implemented;
487 bool psci_0_1_cpu_on_implemented;
488 bool psci_0_1_cpu_off_implemented;
489 bool psci_0_1_migrate_implemented;
490 };
491
492 extern struct kvm_host_psci_config kvm_nvhe_sym(kvm_host_psci_config);
493 #define kvm_host_psci_config CHOOSE_NVHE_SYM(kvm_host_psci_config)
494
495 extern s64 kvm_nvhe_sym(hyp_physvirt_offset);
496 #define hyp_physvirt_offset CHOOSE_NVHE_SYM(hyp_physvirt_offset)
497
498 extern u64 kvm_nvhe_sym(hyp_cpu_logical_map)[NR_CPUS];
499 #define hyp_cpu_logical_map CHOOSE_NVHE_SYM(hyp_cpu_logical_map)
500
501 struct kvm_iommu_driver {
502 int (*init_driver)(void);
503 void (*remove_driver)(void);
504 pkvm_handle_t (*get_iommu_id)(struct device *dev);
505 ANDROID_KABI_RESERVE(1);
506 ANDROID_KABI_RESERVE(2);
507 ANDROID_KABI_RESERVE(3);
508 ANDROID_KABI_RESERVE(4);
509 ANDROID_KABI_RESERVE(5);
510 ANDROID_KABI_RESERVE(6);
511 ANDROID_KABI_RESERVE(7);
512 ANDROID_KABI_RESERVE(8);
513 };
514
515 struct vcpu_reset_state {
516 unsigned long pc;
517 unsigned long r0;
518 bool be;
519 bool reset;
520 };
521
522 struct kvm_hyp_req {
523 #define KVM_HYP_LAST_REQ 0
524 #define KVM_HYP_REQ_TYPE_MEM 1
525 #define KVM_HYP_REQ_TYPE_MAP 2
526 u8 type;
527 union {
528 struct {
529 #define REQ_MEM_DEST_HYP_ALLOC 1
530 #define REQ_MEM_DEST_VCPU_MEMCACHE 2
531 #define REQ_MEM_DEST_HYP_IOMMU 3
532 u8 dest;
533 int nr_pages;
534 int sz_alloc; /* Size of the page. */
535 } mem;
536 struct {
537 unsigned long guest_ipa;
538 size_t size;
539 } map;
540 };
541 };
542
543 #define KVM_HYP_REQ_MAX (PAGE_SIZE / sizeof(struct kvm_hyp_req))
544 /*
545 * De-serialize request from SMCCC return.
546 * See hyp-main.c for serialization.
547 */
548 /* Register a2. */
549 #define SMCCC_REQ_TYPE_MASK GENMASK_ULL(7, 0)
550 #define SMCCC_REQ_DEST_MASK GENMASK_ULL(15 , 8)
551 /* Register a3. */
552 #define SMCCC_REQ_NR_PAGES_MASK GENMASK_ULL(31 , 0)
553 #define SMCCC_REQ_SZ_ALLOC_MASK GENMASK_ULL(63 , 32)
554
hyp_reqs_smccc_decode(struct arm_smccc_res * res,struct kvm_hyp_req * req)555 static inline void hyp_reqs_smccc_decode(struct arm_smccc_res *res,
556 struct kvm_hyp_req *req)
557 {
558 req->type = FIELD_GET(SMCCC_REQ_TYPE_MASK, res->a2);
559 req->mem.dest = FIELD_GET(SMCCC_REQ_DEST_MASK, res->a2);
560 req->mem.nr_pages = FIELD_GET(SMCCC_REQ_NR_PAGES_MASK, res->a3);
561 req->mem.sz_alloc = FIELD_GET(SMCCC_REQ_SZ_ALLOC_MASK, res->a3);
562 }
563
564 struct kvm_vcpu_arch {
565 struct kvm_cpu_context ctxt;
566
567 /*
568 * Guest floating point state
569 *
570 * The architecture has two main floating point extensions,
571 * the original FPSIMD and SVE. These have overlapping
572 * register views, with the FPSIMD V registers occupying the
573 * low 128 bits of the SVE Z registers. When the core
574 * floating point code saves the register state of a task it
575 * records which view it saved in fp_type.
576 */
577 void *sve_state;
578 enum fp_type fp_type;
579 unsigned int sve_max_vl;
580 u64 svcr;
581
582 /* Stage 2 paging state used by the hardware on next switch */
583 struct kvm_s2_mmu *hw_mmu;
584
585 /* Values of trap registers for the guest. */
586 u64 hcr_el2;
587 u64 mdcr_el2;
588 u64 cptr_el2;
589
590 /* Values of trap registers for the host before guest entry. */
591 u64 mdcr_el2_host;
592
593 /* Exception Information */
594 struct kvm_vcpu_fault_info fault;
595
596 /* Ownership of the FP regs */
597 enum {
598 FP_STATE_FREE,
599 FP_STATE_HOST_OWNED,
600 FP_STATE_GUEST_OWNED,
601 } fp_state;
602
603 /* Configuration flags, set once and for all before the vcpu can run */
604 u8 cflags;
605
606 /* Input flags to the hypervisor code, potentially cleared after use */
607 u8 iflags;
608
609 /* State flags for kernel bookkeeping, unused by the hypervisor code */
610 u8 sflags;
611
612 /*
613 * Don't run the guest (internal implementation need).
614 *
615 * Contrary to the flags above, this is set/cleared outside of
616 * a vcpu context, and thus cannot be mixed with the flags
617 * themselves (or the flag accesses need to be made atomic).
618 */
619 bool pause;
620
621 /*
622 * We maintain more than a single set of debug registers to support
623 * debugging the guest from the host and to maintain separate host and
624 * guest state during world switches. vcpu_debug_state are the debug
625 * registers of the vcpu as the guest sees them. host_debug_state are
626 * the host registers which are saved and restored during
627 * world switches. external_debug_state contains the debug
628 * values we want to debug the guest. This is set via the
629 * KVM_SET_GUEST_DEBUG ioctl.
630 *
631 * debug_ptr points to the set of debug registers that should be loaded
632 * onto the hardware when running the guest.
633 */
634 struct kvm_guest_debug_arch *debug_ptr;
635 struct kvm_guest_debug_arch vcpu_debug_state;
636 struct kvm_guest_debug_arch external_debug_state;
637
638 struct user_fpsimd_state *host_fpsimd_state; /* hyp VA */
639
640 struct {
641 /* {Break,watch}point registers */
642 struct kvm_guest_debug_arch regs;
643 /* Statistical profiling extension */
644 u64 pmscr_el1;
645 /* Self-hosted trace */
646 u64 trfcr_el1;
647 } host_debug_state;
648
649 /* VGIC state */
650 struct vgic_cpu vgic_cpu;
651 struct arch_timer_cpu timer_cpu;
652 struct kvm_pmu pmu;
653
654 /*
655 * Guest registers we preserve during guest debugging.
656 *
657 * These shadow registers are updated by the kvm_handle_sys_reg
658 * trap handler if the guest accesses or updates them while we
659 * are using guest debug.
660 */
661 struct {
662 u32 mdscr_el1;
663 bool pstate_ss;
664 } guest_debug_preserved;
665
666 /* vcpu power state */
667 struct kvm_mp_state mp_state;
668 spinlock_t mp_state_lock;
669
670 union {
671 /* Cache some mmu pages needed inside spinlock regions */
672 struct kvm_mmu_memory_cache mmu_page_cache;
673 /* Pages to be donated to pkvm/EL2 if it runs out */
674 struct kvm_hyp_memcache stage2_mc;
675 };
676
677 /* feature flags */
678 DECLARE_BITMAP(features, KVM_VCPU_MAX_FEATURES);
679
680 /* Virtual SError ESR to restore when HCR_EL2.VSE is set */
681 u64 vsesr_el2;
682
683 /* Additional reset state */
684 struct vcpu_reset_state reset_state;
685
686 /* Guest PV state */
687 struct {
688 u64 last_steal;
689 gpa_t base;
690 } steal;
691
692 /* Per-vcpu CCSIDR override or NULL */
693 u32 *ccsidr;
694
695 /* PAGE_SIZE bound list of requests from the hypervisor to the host. */
696 struct kvm_hyp_req *hyp_reqs;
697 };
698
699 /*
700 * Each 'flag' is composed of a comma-separated triplet:
701 *
702 * - the flag-set it belongs to in the vcpu->arch structure
703 * - the value for that flag
704 * - the mask for that flag
705 *
706 * __vcpu_single_flag() builds such a triplet for a single-bit flag.
707 * unpack_vcpu_flag() extract the flag value from the triplet for
708 * direct use outside of the flag accessors.
709 */
710 #define __vcpu_single_flag(_set, _f) _set, (_f), (_f)
711
712 #define __unpack_flag(_set, _f, _m) _f
713 #define unpack_vcpu_flag(...) __unpack_flag(__VA_ARGS__)
714
715 #define __build_check_flag(v, flagset, f, m) \
716 do { \
717 typeof(v->arch.flagset) *_fset; \
718 \
719 /* Check that the flags fit in the mask */ \
720 BUILD_BUG_ON(HWEIGHT(m) != HWEIGHT((f) | (m))); \
721 /* Check that the flags fit in the type */ \
722 BUILD_BUG_ON((sizeof(*_fset) * 8) <= __fls(m)); \
723 } while (0)
724
725 #define __vcpu_get_flag(v, flagset, f, m) \
726 ({ \
727 __build_check_flag(v, flagset, f, m); \
728 \
729 READ_ONCE(v->arch.flagset) & (m); \
730 })
731
732 /*
733 * Note that the set/clear accessors must be preempt-safe in order to
734 * avoid nesting them with load/put which also manipulate flags...
735 */
736 #ifdef __KVM_NVHE_HYPERVISOR__
737 /* the nVHE hypervisor is always non-preemptible */
738 #define __vcpu_flags_preempt_disable()
739 #define __vcpu_flags_preempt_enable()
740 #else
741 #define __vcpu_flags_preempt_disable() preempt_disable()
742 #define __vcpu_flags_preempt_enable() preempt_enable()
743 #endif
744
745 #define __vcpu_set_flag(v, flagset, f, m) \
746 do { \
747 typeof(v->arch.flagset) *fset; \
748 \
749 __build_check_flag(v, flagset, f, m); \
750 \
751 fset = &v->arch.flagset; \
752 __vcpu_flags_preempt_disable(); \
753 if (HWEIGHT(m) > 1) \
754 *fset &= ~(m); \
755 *fset |= (f); \
756 __vcpu_flags_preempt_enable(); \
757 } while (0)
758
759 #define __vcpu_clear_flag(v, flagset, f, m) \
760 do { \
761 typeof(v->arch.flagset) *fset; \
762 \
763 __build_check_flag(v, flagset, f, m); \
764 \
765 fset = &v->arch.flagset; \
766 __vcpu_flags_preempt_disable(); \
767 *fset &= ~(m); \
768 __vcpu_flags_preempt_enable(); \
769 } while (0)
770
771 #define __vcpu_copy_flag(vt, vs, flagset, f, m) \
772 do { \
773 typeof(vs->arch.flagset) tmp, val; \
774 \
775 __build_check_flag(vs, flagset, f, m); \
776 \
777 val = READ_ONCE(vs->arch.flagset); \
778 val &= (m); \
779 tmp = READ_ONCE(vt->arch.flagset); \
780 tmp &= ~(m); \
781 tmp |= val; \
782 WRITE_ONCE(vt->arch.flagset, tmp); \
783 } while (0)
784
785
786 #define vcpu_get_flag(v, ...) __vcpu_get_flag((v), __VA_ARGS__)
787 #define vcpu_set_flag(v, ...) __vcpu_set_flag((v), __VA_ARGS__)
788 #define vcpu_clear_flag(v, ...) __vcpu_clear_flag((v), __VA_ARGS__)
789 #define vcpu_copy_flag(vt, vs,...) __vcpu_copy_flag((vt), (vs), __VA_ARGS__)
790
791 /* SVE exposed to guest */
792 #define GUEST_HAS_SVE __vcpu_single_flag(cflags, BIT(0))
793 /* SVE config completed */
794 #define VCPU_SVE_FINALIZED __vcpu_single_flag(cflags, BIT(1))
795 /* PTRAUTH exposed to guest */
796 #define GUEST_HAS_PTRAUTH __vcpu_single_flag(cflags, BIT(2))
797 /* KVM_ARM_VCPU_INIT completed */
798 #define VCPU_INITIALIZED __vcpu_single_flag(cflags, BIT(3))
799
800 /* Exception pending */
801 #define PENDING_EXCEPTION __vcpu_single_flag(iflags, BIT(0))
802 /*
803 * PC increment. Overlaps with EXCEPT_MASK on purpose so that it can't
804 * be set together with an exception...
805 */
806 #define INCREMENT_PC __vcpu_single_flag(iflags, BIT(1))
807 /* Target EL/MODE (not a single flag, but let's abuse the macro) */
808 #define EXCEPT_MASK __vcpu_single_flag(iflags, GENMASK(3, 1))
809 /* Cover both PENDING_EXCEPTION and EXCEPT_MASK for global operations */
810 #define PC_UPDATE_REQ __vcpu_single_flag(iflags, GENMASK(3, 0))
811
812 /* Helpers to encode exceptions with minimum fuss */
813 #define __EXCEPT_MASK_VAL unpack_vcpu_flag(EXCEPT_MASK)
814 #define __EXCEPT_SHIFT __builtin_ctzl(__EXCEPT_MASK_VAL)
815 #define __vcpu_except_flags(_f) iflags, (_f << __EXCEPT_SHIFT), __EXCEPT_MASK_VAL
816
817 /*
818 * When PENDING_EXCEPTION is set, EXCEPT_MASK can take the following
819 * values:
820 *
821 * For AArch32 EL1:
822 */
823 #define EXCEPT_AA32_UND __vcpu_except_flags(0)
824 #define EXCEPT_AA32_IABT __vcpu_except_flags(1)
825 #define EXCEPT_AA32_DABT __vcpu_except_flags(2)
826 /* For AArch64: */
827 #define EXCEPT_AA64_EL1_SYNC __vcpu_except_flags(0)
828 #define EXCEPT_AA64_EL1_IRQ __vcpu_except_flags(1)
829 #define EXCEPT_AA64_EL1_FIQ __vcpu_except_flags(2)
830 #define EXCEPT_AA64_EL1_SERR __vcpu_except_flags(3)
831 /* For AArch64 with NV: */
832 #define EXCEPT_AA64_EL2_SYNC __vcpu_except_flags(4)
833 #define EXCEPT_AA64_EL2_IRQ __vcpu_except_flags(5)
834 #define EXCEPT_AA64_EL2_FIQ __vcpu_except_flags(6)
835 #define EXCEPT_AA64_EL2_SERR __vcpu_except_flags(7)
836 /* Guest debug is live */
837 #define DEBUG_DIRTY __vcpu_single_flag(iflags, BIT(4))
838 /* Save SPE context if active */
839 #define DEBUG_STATE_SAVE_SPE __vcpu_single_flag(iflags, BIT(5))
840 /* Save TRBE context if active */
841 #define DEBUG_STATE_SAVE_TRBE __vcpu_single_flag(iflags, BIT(6))
842 /* vcpu running in HYP context (VHE-only) */
843 #define VCPU_HYP_CONTEXT __vcpu_single_flag(iflags, BIT(7))
844 /* pKVM host vcpu state is dirty, needs resync (nVHE-only) */
845 #define PKVM_HOST_STATE_DIRTY __vcpu_single_flag(iflags, BIT(7))
846
847 /* SVE enabled for host EL0 */
848 #define HOST_SVE_ENABLED __vcpu_single_flag(sflags, BIT(0))
849 /* SME enabled for EL0 */
850 #define HOST_SME_ENABLED __vcpu_single_flag(sflags, BIT(1))
851 /* Physical CPU not in supported_cpus */
852 #define ON_UNSUPPORTED_CPU __vcpu_single_flag(sflags, BIT(2))
853 /* WFIT instruction trapped */
854 #define IN_WFIT __vcpu_single_flag(sflags, BIT(3))
855 /* vcpu system registers loaded on physical CPU */
856 #define SYSREGS_ON_CPU __vcpu_single_flag(sflags, BIT(4))
857 /* Software step state is Active-pending */
858 #define DBG_SS_ACTIVE_PENDING __vcpu_single_flag(sflags, BIT(5))
859 /* PMUSERENR for the guest EL0 is on physical CPU */
860 #define PMUSERENR_ON_CPU __vcpu_single_flag(sflags, BIT(6))
861 /* WFI instruction trapped */
862 #define IN_WFI __vcpu_single_flag(sflags, BIT(7))
863
864
865 /* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
866 #define vcpu_sve_pffr(vcpu) (kern_hyp_va((vcpu)->arch.sve_state) + \
867 sve_ffr_offset((vcpu)->arch.sve_max_vl))
868
869 #define _vcpu_sve_state_size(sve_max_vl) ({ \
870 size_t __size_ret; \
871 unsigned int __vq; \
872 \
873 if (WARN_ON(!sve_vl_valid(sve_max_vl))) { \
874 __size_ret = 0; \
875 } else { \
876 __vq = sve_vq_from_vl(sve_max_vl); \
877 __size_ret = SVE_SIG_REGS_SIZE(__vq); \
878 } \
879 \
880 __size_ret; \
881 })
882
883 #define vcpu_sve_max_vq(vcpu) sve_vq_from_vl((vcpu)->arch.sve_max_vl)
884
885 #define vcpu_sve_state_size(vcpu) _vcpu_sve_state_size((vcpu)->arch.sve_max_vl)
886
887 #define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE | \
888 KVM_GUESTDBG_USE_SW_BP | \
889 KVM_GUESTDBG_USE_HW | \
890 KVM_GUESTDBG_SINGLESTEP)
891
892 #define vcpu_has_sve(vcpu) (system_supports_sve() && \
893 vcpu_get_flag(vcpu, GUEST_HAS_SVE))
894
895 #ifdef CONFIG_ARM64_PTR_AUTH
896 #define vcpu_has_ptrauth(vcpu) \
897 ((cpus_have_final_cap(ARM64_HAS_ADDRESS_AUTH) || \
898 cpus_have_final_cap(ARM64_HAS_GENERIC_AUTH)) && \
899 vcpu_get_flag(vcpu, GUEST_HAS_PTRAUTH))
900 #else
901 #define vcpu_has_ptrauth(vcpu) false
902 #endif
903
904 #define vcpu_on_unsupported_cpu(vcpu) \
905 vcpu_get_flag(vcpu, ON_UNSUPPORTED_CPU)
906
907 #define vcpu_set_on_unsupported_cpu(vcpu) \
908 vcpu_set_flag(vcpu, ON_UNSUPPORTED_CPU)
909
910 #define vcpu_clear_on_unsupported_cpu(vcpu) \
911 vcpu_clear_flag(vcpu, ON_UNSUPPORTED_CPU)
912
913 #define vcpu_gp_regs(v) (&(v)->arch.ctxt.regs)
914
915 /*
916 * Only use __vcpu_sys_reg/ctxt_sys_reg if you know you want the
917 * memory backed version of a register, and not the one most recently
918 * accessed by a running VCPU. For example, for userspace access or
919 * for system registers that are never context switched, but only
920 * emulated.
921 */
922 #define __ctxt_sys_reg(c,r) (&(c)->sys_regs[(r)])
923
924 #define ctxt_sys_reg(c,r) (*__ctxt_sys_reg(c,r))
925
926 #define __vcpu_sys_reg(v,r) (ctxt_sys_reg(&(v)->arch.ctxt, (r)))
927
__vcpu_read_sys_reg_from_cpu(int reg,u64 * val)928 static inline bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val)
929 {
930 /*
931 * *** VHE ONLY ***
932 *
933 * System registers listed in the switch are not saved on every
934 * exit from the guest but are only saved on vcpu_put.
935 *
936 * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but
937 * should never be listed below, because the guest cannot modify its
938 * own MPIDR_EL1 and MPIDR_EL1 is accessed for VCPU A from VCPU B's
939 * thread when emulating cross-VCPU communication.
940 */
941 if (!has_vhe())
942 return false;
943
944 switch (reg) {
945 case SCTLR_EL1: *val = read_sysreg_s(SYS_SCTLR_EL12); break;
946 case CPACR_EL1: *val = read_sysreg_s(SYS_CPACR_EL12); break;
947 case TTBR0_EL1: *val = read_sysreg_s(SYS_TTBR0_EL12); break;
948 case TTBR1_EL1: *val = read_sysreg_s(SYS_TTBR1_EL12); break;
949 case TCR_EL1: *val = read_sysreg_s(SYS_TCR_EL12); break;
950 case ESR_EL1: *val = read_sysreg_s(SYS_ESR_EL12); break;
951 case AFSR0_EL1: *val = read_sysreg_s(SYS_AFSR0_EL12); break;
952 case AFSR1_EL1: *val = read_sysreg_s(SYS_AFSR1_EL12); break;
953 case FAR_EL1: *val = read_sysreg_s(SYS_FAR_EL12); break;
954 case MAIR_EL1: *val = read_sysreg_s(SYS_MAIR_EL12); break;
955 case VBAR_EL1: *val = read_sysreg_s(SYS_VBAR_EL12); break;
956 case CONTEXTIDR_EL1: *val = read_sysreg_s(SYS_CONTEXTIDR_EL12);break;
957 case TPIDR_EL0: *val = read_sysreg_s(SYS_TPIDR_EL0); break;
958 case TPIDRRO_EL0: *val = read_sysreg_s(SYS_TPIDRRO_EL0); break;
959 case TPIDR_EL1: *val = read_sysreg_s(SYS_TPIDR_EL1); break;
960 case AMAIR_EL1: *val = read_sysreg_s(SYS_AMAIR_EL12); break;
961 case CNTKCTL_EL1: *val = read_sysreg_s(SYS_CNTKCTL_EL12); break;
962 case ELR_EL1: *val = read_sysreg_s(SYS_ELR_EL12); break;
963 case PAR_EL1: *val = read_sysreg_par(); break;
964 case DACR32_EL2: *val = read_sysreg_s(SYS_DACR32_EL2); break;
965 case IFSR32_EL2: *val = read_sysreg_s(SYS_IFSR32_EL2); break;
966 case DBGVCR32_EL2: *val = read_sysreg_s(SYS_DBGVCR32_EL2); break;
967 default: return false;
968 }
969
970 return true;
971 }
972
__vcpu_write_sys_reg_to_cpu(u64 val,int reg)973 static inline bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg)
974 {
975 /*
976 * *** VHE ONLY ***
977 *
978 * System registers listed in the switch are not restored on every
979 * entry to the guest but are only restored on vcpu_load.
980 *
981 * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but
982 * should never be listed below, because the MPIDR should only be set
983 * once, before running the VCPU, and never changed later.
984 */
985 if (!has_vhe())
986 return false;
987
988 switch (reg) {
989 case SCTLR_EL1: write_sysreg_s(val, SYS_SCTLR_EL12); break;
990 case CPACR_EL1: write_sysreg_s(val, SYS_CPACR_EL12); break;
991 case TTBR0_EL1: write_sysreg_s(val, SYS_TTBR0_EL12); break;
992 case TTBR1_EL1: write_sysreg_s(val, SYS_TTBR1_EL12); break;
993 case TCR_EL1: write_sysreg_s(val, SYS_TCR_EL12); break;
994 case ESR_EL1: write_sysreg_s(val, SYS_ESR_EL12); break;
995 case AFSR0_EL1: write_sysreg_s(val, SYS_AFSR0_EL12); break;
996 case AFSR1_EL1: write_sysreg_s(val, SYS_AFSR1_EL12); break;
997 case FAR_EL1: write_sysreg_s(val, SYS_FAR_EL12); break;
998 case MAIR_EL1: write_sysreg_s(val, SYS_MAIR_EL12); break;
999 case VBAR_EL1: write_sysreg_s(val, SYS_VBAR_EL12); break;
1000 case CONTEXTIDR_EL1: write_sysreg_s(val, SYS_CONTEXTIDR_EL12);break;
1001 case TPIDR_EL0: write_sysreg_s(val, SYS_TPIDR_EL0); break;
1002 case TPIDRRO_EL0: write_sysreg_s(val, SYS_TPIDRRO_EL0); break;
1003 case TPIDR_EL1: write_sysreg_s(val, SYS_TPIDR_EL1); break;
1004 case AMAIR_EL1: write_sysreg_s(val, SYS_AMAIR_EL12); break;
1005 case CNTKCTL_EL1: write_sysreg_s(val, SYS_CNTKCTL_EL12); break;
1006 case ELR_EL1: write_sysreg_s(val, SYS_ELR_EL12); break;
1007 case PAR_EL1: write_sysreg_s(val, SYS_PAR_EL1); break;
1008 case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); break;
1009 case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); break;
1010 case DBGVCR32_EL2: write_sysreg_s(val, SYS_DBGVCR32_EL2); break;
1011 default: return false;
1012 }
1013
1014 return true;
1015 }
1016
1017 #define vcpu_read_sys_reg(__vcpu, reg) \
1018 ({ \
1019 u64 __val = 0x8badf00d8badf00d; \
1020 \
1021 /* SYSREGS_ON_CPU is only used in VHE */ \
1022 ((!is_nvhe_hyp_code() && \
1023 vcpu_get_flag(__vcpu, SYSREGS_ON_CPU) && \
1024 __vcpu_read_sys_reg_from_cpu(reg, &__val))) ? \
1025 __val \
1026 : \
1027 ctxt_sys_reg(&__vcpu->arch.ctxt, reg); \
1028 })
1029
1030 #define vcpu_write_sys_reg(__vcpu, __val, reg) \
1031 do { \
1032 /* SYSREGS_ON_CPU is only used in VHE */ \
1033 if (is_nvhe_hyp_code() || \
1034 !vcpu_get_flag(__vcpu, SYSREGS_ON_CPU) || \
1035 !__vcpu_write_sys_reg_to_cpu(__val, reg)) \
1036 ctxt_sys_reg(&__vcpu->arch.ctxt, reg) = __val; \
1037 } while (0)
1038
1039 struct kvm_vm_stat {
1040 struct kvm_vm_stat_generic generic;
1041 atomic64_t protected_hyp_mem;
1042 atomic64_t protected_shared_mem;
1043 atomic64_t protected_pgtable_mem;
1044 };
1045
1046 struct kvm_vcpu_stat {
1047 struct kvm_vcpu_stat_generic generic;
1048 u64 hvc_exit_stat;
1049 u64 wfe_exit_stat;
1050 u64 wfi_exit_stat;
1051 u64 mmio_exit_user;
1052 u64 mmio_exit_kernel;
1053 u64 signal_exits;
1054 u64 exits;
1055 };
1056
1057 unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
1058 int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
1059 int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
1060 int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
1061
1062 unsigned long kvm_arm_num_sys_reg_descs(struct kvm_vcpu *vcpu);
1063 int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices);
1064
1065 int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
1066 struct kvm_vcpu_events *events);
1067
1068 int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
1069 struct kvm_vcpu_events *events);
1070
1071 #define KVM_ARCH_WANT_MMU_NOTIFIER
1072
1073 void kvm_arm_halt_guest(struct kvm *kvm);
1074 void kvm_arm_resume_guest(struct kvm *kvm);
1075
1076 #define vcpu_has_run_once(vcpu) !!rcu_access_pointer((vcpu)->pid)
1077
1078 #ifndef __KVM_NVHE_HYPERVISOR__
1079 #define kvm_call_hyp_nvhe_smccc(f, ...) \
1080 ({ \
1081 struct arm_smccc_res res; \
1082 \
1083 arm_smccc_1_1_hvc(KVM_HOST_SMCCC_FUNC(f), \
1084 ##__VA_ARGS__, &res); \
1085 WARN_ON(res.a0 != SMCCC_RET_SUCCESS); \
1086 \
1087 res; \
1088 })
1089
1090 #define kvm_call_hyp_nvhe(f, ...) \
1091 ({ \
1092 struct arm_smccc_res res; \
1093 \
1094 arm_smccc_1_1_hvc(KVM_HOST_SMCCC_FUNC(f), \
1095 ##__VA_ARGS__, &res); \
1096 WARN_ON(res.a0 != SMCCC_RET_SUCCESS); \
1097 \
1098 res.a1; \
1099 })
1100
1101 /*
1102 * The couple of isb() below are there to guarantee the same behaviour
1103 * on VHE as on !VHE, where the eret to EL1 acts as a context
1104 * synchronization event.
1105 */
1106 #define kvm_call_hyp(f, ...) \
1107 do { \
1108 if (has_vhe()) { \
1109 f(__VA_ARGS__); \
1110 isb(); \
1111 } else { \
1112 kvm_call_hyp_nvhe(f, ##__VA_ARGS__); \
1113 } \
1114 } while(0)
1115
1116 #define kvm_call_hyp_ret(f, ...) \
1117 ({ \
1118 typeof(f(__VA_ARGS__)) ret; \
1119 \
1120 if (has_vhe()) { \
1121 ret = f(__VA_ARGS__); \
1122 isb(); \
1123 } else { \
1124 ret = kvm_call_hyp_nvhe(f, ##__VA_ARGS__); \
1125 } \
1126 \
1127 ret; \
1128 })
1129 #else /* __KVM_NVHE_HYPERVISOR__ */
1130 #define kvm_call_hyp(f, ...) f(__VA_ARGS__)
1131 #define kvm_call_hyp_ret(f, ...) f(__VA_ARGS__)
1132 #define kvm_call_hyp_nvhe(f, ...) f(__VA_ARGS__)
1133 #endif /* __KVM_NVHE_HYPERVISOR__ */
1134
1135 int handle_exit(struct kvm_vcpu *vcpu, int exception_index);
1136 void handle_exit_early(struct kvm_vcpu *vcpu, int exception_index);
1137
1138 int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu);
1139 int kvm_handle_cp14_32(struct kvm_vcpu *vcpu);
1140 int kvm_handle_cp14_64(struct kvm_vcpu *vcpu);
1141 int kvm_handle_cp15_32(struct kvm_vcpu *vcpu);
1142 int kvm_handle_cp15_64(struct kvm_vcpu *vcpu);
1143 int kvm_handle_sys_reg(struct kvm_vcpu *vcpu);
1144 int kvm_handle_cp10_id(struct kvm_vcpu *vcpu);
1145
1146 void kvm_reset_sys_regs(struct kvm_vcpu *vcpu);
1147
1148 int __init kvm_sys_reg_table_init(void);
1149 int __init populate_nv_trap_config(void);
1150
1151 bool lock_all_vcpus(struct kvm *kvm);
1152 void unlock_all_vcpus(struct kvm *kvm);
1153
1154 /* MMIO helpers */
1155 void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data);
1156 unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len);
1157
1158 int kvm_handle_mmio_return(struct kvm_vcpu *vcpu);
1159 int io_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa);
1160
1161 /*
1162 * Returns true if a Performance Monitoring Interrupt (PMI), a.k.a. perf event,
1163 * arrived in guest context. For arm64, any event that arrives while a vCPU is
1164 * loaded is considered to be "in guest".
1165 */
kvm_arch_pmi_in_guest(struct kvm_vcpu * vcpu)1166 static inline bool kvm_arch_pmi_in_guest(struct kvm_vcpu *vcpu)
1167 {
1168 return IS_ENABLED(CONFIG_GUEST_PERF_EVENTS) && !!vcpu;
1169 }
1170
1171 long kvm_hypercall_pv_features(struct kvm_vcpu *vcpu);
1172 gpa_t kvm_init_stolen_time(struct kvm_vcpu *vcpu);
1173 void kvm_update_stolen_time(struct kvm_vcpu *vcpu);
1174
1175 bool kvm_arm_pvtime_supported(void);
1176 int kvm_arm_pvtime_set_attr(struct kvm_vcpu *vcpu,
1177 struct kvm_device_attr *attr);
1178 int kvm_arm_pvtime_get_attr(struct kvm_vcpu *vcpu,
1179 struct kvm_device_attr *attr);
1180 int kvm_arm_pvtime_has_attr(struct kvm_vcpu *vcpu,
1181 struct kvm_device_attr *attr);
1182
1183 extern unsigned int __ro_after_init kvm_arm_vmid_bits;
1184 int __init kvm_arm_vmid_alloc_init(void);
1185 void __init kvm_arm_vmid_alloc_free(void);
1186 void kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid);
1187 void kvm_arm_vmid_clear_active(void);
1188
kvm_arm_pvtime_vcpu_init(struct kvm_vcpu_arch * vcpu_arch)1189 static inline void kvm_arm_pvtime_vcpu_init(struct kvm_vcpu_arch *vcpu_arch)
1190 {
1191 vcpu_arch->steal.base = INVALID_GPA;
1192 }
1193
kvm_arm_is_pvtime_enabled(struct kvm_vcpu_arch * vcpu_arch)1194 static inline bool kvm_arm_is_pvtime_enabled(struct kvm_vcpu_arch *vcpu_arch)
1195 {
1196 return (vcpu_arch->steal.base != INVALID_GPA);
1197 }
1198
1199 void kvm_set_sei_esr(struct kvm_vcpu *vcpu, u64 syndrome);
1200
1201 struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
1202
1203 DECLARE_KVM_HYP_PER_CPU(struct kvm_host_data, kvm_host_data);
1204
kvm_init_host_cpu_context(struct kvm_cpu_context * cpu_ctxt)1205 static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt)
1206 {
1207 /* The host's MPIDR is immutable, so let's set it up at boot time */
1208 ctxt_sys_reg(cpu_ctxt, MPIDR_EL1) = read_cpuid_mpidr();
1209 }
1210
kvm_system_needs_idmapped_vectors(void)1211 static inline bool kvm_system_needs_idmapped_vectors(void)
1212 {
1213 return cpus_have_const_cap(ARM64_SPECTRE_V3A);
1214 }
1215
kvm_arch_sync_events(struct kvm * kvm)1216 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
kvm_arch_sched_in(struct kvm_vcpu * vcpu,int cpu)1217 static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
1218
1219 void kvm_arm_init_debug(void);
1220 void kvm_arm_vcpu_init_debug(struct kvm_vcpu *vcpu);
1221 void kvm_arm_setup_debug(struct kvm_vcpu *vcpu);
1222 void kvm_arm_clear_debug(struct kvm_vcpu *vcpu);
1223 void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu);
1224
1225 #define __vcpu_save_guest_debug_regs(vcpu) \
1226 do { \
1227 u64 val = vcpu_read_sys_reg(vcpu, MDSCR_EL1); \
1228 \
1229 (vcpu)->arch.guest_debug_preserved.mdscr_el1 = val; \
1230 } while(0)
1231
1232 #define __vcpu_restore_guest_debug_regs(vcpu) \
1233 do { \
1234 u64 val = (vcpu)->arch.guest_debug_preserved.mdscr_el1; \
1235 \
1236 vcpu_write_sys_reg(vcpu, val, MDSCR_EL1); \
1237 } while (0)
1238
1239 #define kvm_vcpu_os_lock_enabled(vcpu) \
1240 (!!(__vcpu_sys_reg(vcpu, OSLSR_EL1) & OSLSR_EL1_OSLK))
1241
1242 #define kvm_vcpu_needs_debug_regs(vcpu) \
1243 ((vcpu)->guest_debug || kvm_vcpu_os_lock_enabled(vcpu))
1244
1245 int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
1246 struct kvm_device_attr *attr);
1247 int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
1248 struct kvm_device_attr *attr);
1249 int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
1250 struct kvm_device_attr *attr);
1251
1252 int kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm,
1253 struct kvm_arm_copy_mte_tags *copy_tags);
1254 int kvm_vm_ioctl_set_counter_offset(struct kvm *kvm,
1255 struct kvm_arm_counter_offset *offset);
1256
1257 /* Guest/host FPSIMD coordination helpers */
1258 int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu);
1259 void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu);
1260 void kvm_arch_vcpu_ctxflush_fp(struct kvm_vcpu *vcpu);
1261 void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu);
1262 void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu);
1263
kvm_pmu_counter_deferred(struct perf_event_attr * attr)1264 static inline bool kvm_pmu_counter_deferred(struct perf_event_attr *attr)
1265 {
1266 return (!has_vhe() && attr->exclude_host);
1267 }
1268
1269 /* Flags for host debug state */
1270 void kvm_arch_vcpu_load_debug_state_flags(struct kvm_vcpu *vcpu);
1271 void kvm_arch_vcpu_put_debug_state_flags(struct kvm_vcpu *vcpu);
1272
1273 #ifdef CONFIG_KVM
1274 void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr);
1275 void kvm_clr_pmu_events(u32 clr);
1276 bool kvm_set_pmuserenr(u64 val);
1277 #else
kvm_set_pmu_events(u32 set,struct perf_event_attr * attr)1278 static inline void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr) {}
kvm_clr_pmu_events(u32 clr)1279 static inline void kvm_clr_pmu_events(u32 clr) {}
kvm_set_pmuserenr(u64 val)1280 static inline bool kvm_set_pmuserenr(u64 val)
1281 {
1282 return false;
1283 }
1284 #endif
1285
1286 void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu);
1287 void kvm_vcpu_put_sysregs_vhe(struct kvm_vcpu *vcpu);
1288
1289 int __init kvm_set_ipa_limit(void);
1290
1291 #define __KVM_HAVE_ARCH_VM_ALLOC
1292 struct kvm *kvm_arch_alloc_vm(void);
1293
1294 #define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS
1295
1296 #define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE
1297
1298 #define kvm_vm_is_protected(kvm) ((kvm)->arch.pkvm.enabled)
1299
1300 int kvm_arm_vcpu_finalize(struct kvm_vcpu *vcpu, int feature);
1301 bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu);
1302
1303 #define kvm_arm_vcpu_sve_finalized(vcpu) vcpu_get_flag(vcpu, VCPU_SVE_FINALIZED)
1304
1305 #define kvm_has_mte(kvm) \
1306 (system_supports_mte() && \
1307 test_bit(KVM_ARCH_FLAG_MTE_ENABLED, &(kvm)->arch.flags))
1308
1309 #define kvm_supports_32bit_el0() \
1310 (system_supports_32bit_el0() && \
1311 !static_branch_unlikely(&arm64_mismatched_32bit_el0))
1312
1313 #define kvm_vm_has_ran_once(kvm) \
1314 (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &(kvm)->arch.flags))
1315
1316 int kvm_trng_call(struct kvm_vcpu *vcpu);
1317 #ifdef CONFIG_KVM
1318 extern phys_addr_t hyp_mem_base;
1319 extern phys_addr_t hyp_mem_size;
1320 void __init kvm_hyp_reserve(void);
1321 #else
kvm_hyp_reserve(void)1322 static inline void kvm_hyp_reserve(void) { }
1323 #endif
1324
1325 void kvm_arm_vcpu_power_off(struct kvm_vcpu *vcpu);
1326 bool kvm_arm_vcpu_stopped(struct kvm_vcpu *vcpu);
1327
1328 int kvm_iommu_init_driver(void);
1329 void kvm_iommu_remove_driver(void);
1330
1331 int pkvm_iommu_suspend(struct device *dev);
1332 int pkvm_iommu_resume(struct device *dev);
1333
1334 struct kvm_iommu_ops;
1335
1336 int kvm_iommu_init_hyp(struct kvm_iommu_ops *hyp_ops,
1337 struct kvm_hyp_memcache *atomic_mc,
1338 unsigned long init_arg);
1339
1340 int kvm_iommu_register_driver(struct kvm_iommu_driver *kern_ops);
1341
1342 /* Allocator interface IDs. */
1343 #define HYP_ALLOC_MGT_HEAP_ID 0
1344 #define HYP_ALLOC_MGT_IOMMU_ID 1
1345
1346 unsigned long __pkvm_reclaim_hyp_alloc_mgt(unsigned long nr_pages);
1347 int __pkvm_topup_hyp_alloc_mgt(unsigned long id, unsigned long nr_pages,
1348 unsigned long sz_alloc);
1349
1350 #endif /* __ARM64_KVM_HOST_H__ */
1351