• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /*
3  * Copyright (C) 2012,2013 - ARM Ltd
4  * Author: Marc Zyngier <marc.zyngier@arm.com>
5  *
6  * Derived from arch/arm/include/asm/kvm_host.h:
7  * Copyright (C) 2012 - Virtual Open Systems and Columbia University
8  * Author: Christoffer Dall <c.dall@virtualopensystems.com>
9  */
10 
11 #ifndef __ARM64_KVM_HOST_H__
12 #define __ARM64_KVM_HOST_H__
13 
14 #include <linux/arm-smccc.h>
15 #include <linux/bitmap.h>
16 #include <linux/types.h>
17 #include <linux/jump_label.h>
18 #include <linux/kvm_types.h>
19 #include <linux/maple_tree.h>
20 #include <linux/percpu.h>
21 #include <linux/psci.h>
22 #include <asm/arch_gicv3.h>
23 #include <asm/barrier.h>
24 #include <asm/cpufeature.h>
25 #include <asm/cputype.h>
26 #include <asm/daifflags.h>
27 #include <asm/fpsimd.h>
28 #include <asm/kvm.h>
29 #include <asm/kvm_asm.h>
30 
31 #define __KVM_HAVE_ARCH_INTC_INITIALIZED
32 
33 #define KVM_HALT_POLL_NS_DEFAULT 500000
34 
35 #include <kvm/arm_vgic.h>
36 #include <kvm/arm_arch_timer.h>
37 #include <kvm/arm_pmu.h>
38 
39 #define KVM_MAX_VCPUS VGIC_V3_MAX_CPUS
40 
41 #define KVM_VCPU_MAX_FEATURES 7
42 #define KVM_VCPU_VALID_FEATURES	(BIT(KVM_VCPU_MAX_FEATURES) - 1)
43 
44 #define KVM_REQ_SLEEP \
45 	KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
46 #define KVM_REQ_IRQ_PENDING	KVM_ARCH_REQ(1)
47 #define KVM_REQ_VCPU_RESET	KVM_ARCH_REQ(2)
48 #define KVM_REQ_RECORD_STEAL	KVM_ARCH_REQ(3)
49 #define KVM_REQ_RELOAD_GICv4	KVM_ARCH_REQ(4)
50 #define KVM_REQ_RELOAD_PMU	KVM_ARCH_REQ(5)
51 #define KVM_REQ_SUSPEND		KVM_ARCH_REQ(6)
52 #define KVM_REQ_RESYNC_PMU_EL0	KVM_ARCH_REQ(7)
53 
54 #define KVM_DIRTY_LOG_MANUAL_CAPS   (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \
55 				     KVM_DIRTY_LOG_INITIALLY_SET)
56 
57 #define KVM_HAVE_MMU_RWLOCK
58 
59 /*
60  * Mode of operation configurable with kvm-arm.mode early param.
61  * See Documentation/admin-guide/kernel-parameters.txt for more information.
62  */
63 enum kvm_mode {
64 	KVM_MODE_DEFAULT,
65 	KVM_MODE_PROTECTED,
66 	KVM_MODE_NV,
67 	KVM_MODE_NONE,
68 };
69 #ifdef CONFIG_KVM
70 enum kvm_mode kvm_get_mode(void);
71 #else
kvm_get_mode(void)72 static inline enum kvm_mode kvm_get_mode(void) { return KVM_MODE_NONE; };
73 #endif
74 
75 DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
76 
77 extern unsigned int __ro_after_init kvm_sve_max_vl;
78 extern unsigned int __ro_after_init kvm_host_sve_max_vl;
79 int __init kvm_arm_init_sve(void);
80 
81 u32 __attribute_const__ kvm_target_cpu(void);
82 int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
83 void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu);
84 
85 /* Head holds page head and it's order. */
86 struct kvm_hyp_memcache {
87 	phys_addr_t head;
88 	unsigned long nr_pages;
89 	unsigned long flags;
90 };
91 
push_hyp_memcache(struct kvm_hyp_memcache * mc,phys_addr_t * p,phys_addr_t (* to_pa)(void * virt),unsigned long order)92 static inline void push_hyp_memcache(struct kvm_hyp_memcache *mc,
93 				     phys_addr_t *p,
94 				     phys_addr_t (*to_pa)(void *virt),
95 				     unsigned long order)
96 {
97 	*p = mc->head;
98 	mc->head = (to_pa(p) & PAGE_MASK) |
99 		   FIELD_PREP(~PAGE_MASK, order);
100 	mc->nr_pages++;
101 }
102 
pop_hyp_memcache(struct kvm_hyp_memcache * mc,void * (* to_va)(phys_addr_t phys),unsigned long * order)103 static inline void *pop_hyp_memcache(struct kvm_hyp_memcache *mc,
104 				     void *(*to_va)(phys_addr_t phys),
105 				     unsigned long *order)
106 {
107 	phys_addr_t *p = to_va(mc->head & PAGE_MASK);
108 
109 	if (!mc->nr_pages)
110 		return NULL;
111 
112 	*order = FIELD_GET(~PAGE_MASK, mc->head);
113 
114 	mc->head = *p;
115 	mc->nr_pages--;
116 
117 	return p;
118 }
119 
__topup_hyp_memcache(struct kvm_hyp_memcache * mc,unsigned long min_pages,void * (* alloc_fn)(void * arg,unsigned long order),phys_addr_t (* to_pa)(void * virt),void * arg,unsigned long order)120 static inline int __topup_hyp_memcache(struct kvm_hyp_memcache *mc,
121 				       unsigned long min_pages,
122 				       void *(*alloc_fn)(void *arg, unsigned long order),
123 				       phys_addr_t (*to_pa)(void *virt),
124 				       void *arg,
125 				       unsigned long order)
126 {
127 	while (mc->nr_pages < min_pages) {
128 		phys_addr_t *p = alloc_fn(arg, order);
129 
130 		if (!p)
131 			return -ENOMEM;
132 		push_hyp_memcache(mc, p, to_pa, order);
133 	}
134 
135 	return 0;
136 }
137 
__free_hyp_memcache(struct kvm_hyp_memcache * mc,void (* free_fn)(void * virt,void * arg,unsigned long order),void * (* to_va)(phys_addr_t phys),void * arg)138 static inline void __free_hyp_memcache(struct kvm_hyp_memcache *mc,
139 				       void (*free_fn)(void *virt, void *arg, unsigned long order),
140 				       void *(*to_va)(phys_addr_t phys),
141 				       void *arg)
142 {
143 	unsigned long order;
144 	void *p;
145 
146 	while (mc->nr_pages) {
147 		p = pop_hyp_memcache(mc, to_va, &order);
148 		free_fn(p, arg, order);
149 	}
150 }
151 
152 #define HYP_MEMCACHE_ACCOUNT_KMEMCG BIT(1)
153 #define HYP_MEMCACHE_ACCOUNT_STAGE2 BIT(2)
154 
155 void free_hyp_memcache(struct kvm_hyp_memcache *mc);
156 int topup_hyp_memcache(struct kvm_hyp_memcache *mc, unsigned long min_pages, unsigned long order);
157 
init_hyp_memcache(struct kvm_hyp_memcache * mc)158 static inline void init_hyp_memcache(struct kvm_hyp_memcache *mc)
159 {
160 	memset(mc, 0, sizeof(*mc));
161 }
162 
init_hyp_stage2_memcache(struct kvm_hyp_memcache * mc)163 static inline void init_hyp_stage2_memcache(struct kvm_hyp_memcache *mc)
164 {
165 	init_hyp_memcache(mc);
166 	mc->flags = HYP_MEMCACHE_ACCOUNT_KMEMCG | HYP_MEMCACHE_ACCOUNT_STAGE2;
167 }
168 
169 struct kvm_vmid {
170 	atomic64_t id;
171 };
172 
173 struct kvm_s2_mmu {
174 	struct kvm_vmid vmid;
175 
176 	/*
177 	 * stage2 entry level table
178 	 *
179 	 * Two kvm_s2_mmu structures in the same VM can point to the same
180 	 * pgd here.  This happens when running a guest using a
181 	 * translation regime that isn't affected by its own stage-2
182 	 * translation, such as a non-VHE hypervisor running at vEL2, or
183 	 * for vEL1/EL0 with vHCR_EL2.VM == 0.  In that case, we use the
184 	 * canonical stage-2 page tables.
185 	 */
186 	phys_addr_t	pgd_phys;
187 	struct kvm_pgtable *pgt;
188 
189 	/* The last vcpu id that ran on each physical CPU */
190 	int __percpu *last_vcpu_ran;
191 
192 #define KVM_ARM_EAGER_SPLIT_CHUNK_SIZE_DEFAULT 0
193 	/*
194 	 * Memory cache used to split
195 	 * KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE worth of huge pages. It
196 	 * is used to allocate stage2 page tables while splitting huge
197 	 * pages. The choice of KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE
198 	 * influences both the capacity of the split page cache, and
199 	 * how often KVM reschedules. Be wary of raising CHUNK_SIZE
200 	 * too high.
201 	 *
202 	 * Protected by kvm->slots_lock.
203 	 */
204 	struct kvm_mmu_memory_cache split_page_cache;
205 	uint64_t split_page_chunk_size;
206 
207 	struct kvm_arch *arch;
208 };
209 
210 struct kvm_arch_memory_slot {
211 };
212 
213 /**
214  * struct kvm_smccc_features: Descriptor of the hypercall services exposed to the guests
215  *
216  * @std_bmap: Bitmap of standard secure service calls
217  * @std_hyp_bmap: Bitmap of standard hypervisor service calls
218  * @vendor_hyp_bmap: Bitmap of vendor specific hypervisor service calls
219  */
220 struct kvm_smccc_features {
221 	unsigned long std_bmap;
222 	unsigned long std_hyp_bmap;
223 	unsigned long vendor_hyp_bmap;
224 };
225 
226 struct kvm_pinned_page {
227 	struct page		*page;
228 	u64			ipa;
229 	u8			order;
230 	u16			pins;
231 };
232 
233 typedef unsigned int pkvm_handle_t;
234 
235 struct kvm_protected_vm {
236 	pkvm_handle_t handle;
237 	struct kvm_hyp_memcache stage2_teardown_mc;
238 	struct maple_tree pinned_pages;
239 	gpa_t pvmfw_load_addr;
240 	bool enabled;
241 };
242 
243 struct kvm_arch {
244 	struct kvm_s2_mmu mmu;
245 
246 	/* VTCR_EL2 value for this VM */
247 	u64    vtcr;
248 
249 	/* Interrupt controller */
250 	struct vgic_dist	vgic;
251 
252 	/* Timers */
253 	struct arch_timer_vm_data timer_data;
254 
255 	/* Mandated version of PSCI */
256 	u32 psci_version;
257 
258 	/* Protects VM-scoped configuration data */
259 	struct mutex config_lock;
260 
261 	/*
262 	 * If we encounter a data abort without valid instruction syndrome
263 	 * information, report this to user space.  User space can (and
264 	 * should) opt in to this feature if KVM_CAP_ARM_NISV_TO_USER is
265 	 * supported.
266 	 */
267 #define KVM_ARCH_FLAG_RETURN_NISV_IO_ABORT_TO_USER	0
268 	/* Memory Tagging Extension enabled for the guest */
269 #define KVM_ARCH_FLAG_MTE_ENABLED			1
270 	/* At least one vCPU has ran in the VM */
271 #define KVM_ARCH_FLAG_HAS_RAN_ONCE			2
272 	/* The vCPU feature set for the VM is configured */
273 #define KVM_ARCH_FLAG_VCPU_FEATURES_CONFIGURED		3
274 	/* PSCI SYSTEM_SUSPEND enabled for the guest */
275 #define KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED		4
276 	/* VM counter offset */
277 #define KVM_ARCH_FLAG_VM_COUNTER_OFFSET			5
278 	/* Timer PPIs made immutable */
279 #define KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE		6
280 	/* SMCCC filter initialized for the VM */
281 #define KVM_ARCH_FLAG_SMCCC_FILTER_CONFIGURED		7
282 	/* Initial ID reg values loaded */
283 #define KVM_ARCH_FLAG_ID_REGS_INITIALIZED		8
284 	/* Guest has bought into the MMIO guard extension */
285 #define KVM_ARCH_FLAG_MMIO_GUARD			9
286 	unsigned long flags;
287 
288 	/* VM-wide vCPU feature set */
289 	DECLARE_BITMAP(vcpu_features, KVM_VCPU_MAX_FEATURES);
290 
291 	/*
292 	 * VM-wide PMU filter, implemented as a bitmap and big enough for
293 	 * up to 2^10 events (ARMv8.0) or 2^16 events (ARMv8.1+).
294 	 */
295 	unsigned long *pmu_filter;
296 	struct arm_pmu *arm_pmu;
297 
298 	cpumask_var_t supported_cpus;
299 
300 	/* Hypercall features firmware registers' descriptor */
301 	struct kvm_smccc_features smccc_feat;
302 	struct maple_tree smccc_filter;
303 
304 	/*
305 	 * Emulated CPU ID registers per VM
306 	 * (Op0, Op1, CRn, CRm, Op2) of the ID registers to be saved in it
307 	 * is (3, 0, 0, crm, op2), where 1<=crm<8, 0<=op2<8.
308 	 *
309 	 * These emulated idregs are VM-wide, but accessed from the context of a vCPU.
310 	 * Atomic access to multiple idregs are guarded by kvm_arch.config_lock.
311 	 */
312 #define IDREG_IDX(id)		(((sys_reg_CRm(id) - 1) << 3) | sys_reg_Op2(id))
313 #define IDREG(kvm, id)		((kvm)->arch.id_regs[IDREG_IDX(id)])
314 #define KVM_ARM_ID_REG_NUM	(IDREG_IDX(sys_reg(3, 0, 0, 7, 7)) + 1)
315 	u64 id_regs[KVM_ARM_ID_REG_NUM];
316 
317 	/*
318 	 * For an untrusted host VM, 'pkvm.handle' is used to lookup
319 	 * the associated pKVM instance in the hypervisor.
320 	 */
321 	struct kvm_protected_vm pkvm;
322 };
323 
324 struct kvm_vcpu_fault_info {
325 	u64 esr_el2;		/* Hyp Syndrom Register */
326 	u64 far_el2;		/* Hyp Fault Address Register */
327 	u64 hpfar_el2;		/* Hyp IPA Fault Address Register */
328 	u64 disr_el1;		/* Deferred [SError] Status Register */
329 };
330 
331 enum vcpu_sysreg {
332 	__INVALID_SYSREG__,   /* 0 is reserved as an invalid value */
333 	MPIDR_EL1,	/* MultiProcessor Affinity Register */
334 	CLIDR_EL1,	/* Cache Level ID Register */
335 	CSSELR_EL1,	/* Cache Size Selection Register */
336 	SCTLR_EL1,	/* System Control Register */
337 	ACTLR_EL1,	/* Auxiliary Control Register */
338 	CPACR_EL1,	/* Coprocessor Access Control */
339 	ZCR_EL1,	/* SVE Control */
340 	TTBR0_EL1,	/* Translation Table Base Register 0 */
341 	TTBR1_EL1,	/* Translation Table Base Register 1 */
342 	TCR_EL1,	/* Translation Control Register */
343 	TCR2_EL1,	/* Extended Translation Control Register */
344 	ESR_EL1,	/* Exception Syndrome Register */
345 	AFSR0_EL1,	/* Auxiliary Fault Status Register 0 */
346 	AFSR1_EL1,	/* Auxiliary Fault Status Register 1 */
347 	FAR_EL1,	/* Fault Address Register */
348 	MAIR_EL1,	/* Memory Attribute Indirection Register */
349 	VBAR_EL1,	/* Vector Base Address Register */
350 	CONTEXTIDR_EL1,	/* Context ID Register */
351 	TPIDR_EL0,	/* Thread ID, User R/W */
352 	TPIDRRO_EL0,	/* Thread ID, User R/O */
353 	TPIDR_EL1,	/* Thread ID, Privileged */
354 	AMAIR_EL1,	/* Aux Memory Attribute Indirection Register */
355 	CNTKCTL_EL1,	/* Timer Control Register (EL1) */
356 	PAR_EL1,	/* Physical Address Register */
357 	MDSCR_EL1,	/* Monitor Debug System Control Register */
358 	MDCCINT_EL1,	/* Monitor Debug Comms Channel Interrupt Enable Reg */
359 	OSLSR_EL1,	/* OS Lock Status Register */
360 	DISR_EL1,	/* Deferred Interrupt Status Register */
361 
362 	/* Performance Monitors Registers */
363 	PMCR_EL0,	/* Control Register */
364 	PMSELR_EL0,	/* Event Counter Selection Register */
365 	PMEVCNTR0_EL0,	/* Event Counter Register (0-30) */
366 	PMEVCNTR30_EL0 = PMEVCNTR0_EL0 + 30,
367 	PMCCNTR_EL0,	/* Cycle Counter Register */
368 	PMEVTYPER0_EL0,	/* Event Type Register (0-30) */
369 	PMEVTYPER30_EL0 = PMEVTYPER0_EL0 + 30,
370 	PMCCFILTR_EL0,	/* Cycle Count Filter Register */
371 	PMCNTENSET_EL0,	/* Count Enable Set Register */
372 	PMINTENSET_EL1,	/* Interrupt Enable Set Register */
373 	PMOVSSET_EL0,	/* Overflow Flag Status Set Register */
374 	PMUSERENR_EL0,	/* User Enable Register */
375 
376 	/* Pointer Authentication Registers in a strict increasing order. */
377 	APIAKEYLO_EL1,
378 	APIAKEYHI_EL1,
379 	APIBKEYLO_EL1,
380 	APIBKEYHI_EL1,
381 	APDAKEYLO_EL1,
382 	APDAKEYHI_EL1,
383 	APDBKEYLO_EL1,
384 	APDBKEYHI_EL1,
385 	APGAKEYLO_EL1,
386 	APGAKEYHI_EL1,
387 
388 	ELR_EL1,
389 	SP_EL1,
390 	SPSR_EL1,
391 
392 	CNTVOFF_EL2,
393 	CNTV_CVAL_EL0,
394 	CNTV_CTL_EL0,
395 	CNTP_CVAL_EL0,
396 	CNTP_CTL_EL0,
397 
398 	/* Memory Tagging Extension registers */
399 	RGSR_EL1,	/* Random Allocation Tag Seed Register */
400 	GCR_EL1,	/* Tag Control Register */
401 	TFSR_EL1,	/* Tag Fault Status Register (EL1) */
402 	TFSRE0_EL1,	/* Tag Fault Status Register (EL0) */
403 
404 	/* Permission Indirection Extension registers */
405 	PIR_EL1,       /* Permission Indirection Register 1 (EL1) */
406 	PIRE0_EL1,     /*  Permission Indirection Register 0 (EL1) */
407 
408 	/* 32bit specific registers. */
409 	DACR32_EL2,	/* Domain Access Control Register */
410 	IFSR32_EL2,	/* Instruction Fault Status Register */
411 	FPEXC32_EL2,	/* Floating-Point Exception Control Register */
412 	DBGVCR32_EL2,	/* Debug Vector Catch Register */
413 
414 	/* EL2 registers */
415 	VPIDR_EL2,	/* Virtualization Processor ID Register */
416 	VMPIDR_EL2,	/* Virtualization Multiprocessor ID Register */
417 	SCTLR_EL2,	/* System Control Register (EL2) */
418 	ACTLR_EL2,	/* Auxiliary Control Register (EL2) */
419 	HCR_EL2,	/* Hypervisor Configuration Register */
420 	MDCR_EL2,	/* Monitor Debug Configuration Register (EL2) */
421 	CPTR_EL2,	/* Architectural Feature Trap Register (EL2) */
422 	HSTR_EL2,	/* Hypervisor System Trap Register */
423 	HACR_EL2,	/* Hypervisor Auxiliary Control Register */
424 	HCRX_EL2,	/* Extended Hypervisor Configuration Register */
425 	TTBR0_EL2,	/* Translation Table Base Register 0 (EL2) */
426 	TTBR1_EL2,	/* Translation Table Base Register 1 (EL2) */
427 	TCR_EL2,	/* Translation Control Register (EL2) */
428 	VTTBR_EL2,	/* Virtualization Translation Table Base Register */
429 	VTCR_EL2,	/* Virtualization Translation Control Register */
430 	SPSR_EL2,	/* EL2 saved program status register */
431 	ELR_EL2,	/* EL2 exception link register */
432 	AFSR0_EL2,	/* Auxiliary Fault Status Register 0 (EL2) */
433 	AFSR1_EL2,	/* Auxiliary Fault Status Register 1 (EL2) */
434 	ESR_EL2,	/* Exception Syndrome Register (EL2) */
435 	FAR_EL2,	/* Fault Address Register (EL2) */
436 	HPFAR_EL2,	/* Hypervisor IPA Fault Address Register */
437 	MAIR_EL2,	/* Memory Attribute Indirection Register (EL2) */
438 	AMAIR_EL2,	/* Auxiliary Memory Attribute Indirection Register (EL2) */
439 	VBAR_EL2,	/* Vector Base Address Register (EL2) */
440 	RVBAR_EL2,	/* Reset Vector Base Address Register */
441 	CONTEXTIDR_EL2,	/* Context ID Register (EL2) */
442 	TPIDR_EL2,	/* EL2 Software Thread ID Register */
443 	CNTHCTL_EL2,	/* Counter-timer Hypervisor Control register */
444 	SP_EL2,		/* EL2 Stack Pointer */
445 	HFGRTR_EL2,
446 	HFGWTR_EL2,
447 	HFGITR_EL2,
448 	HDFGRTR_EL2,
449 	HDFGWTR_EL2,
450 	HAFGRTR_EL2,
451 	CNTHP_CTL_EL2,
452 	CNTHP_CVAL_EL2,
453 	CNTHV_CTL_EL2,
454 	CNTHV_CVAL_EL2,
455 
456 	NR_SYS_REGS	/* Nothing after this line! */
457 };
458 
459 struct kvm_cpu_context {
460 	struct user_pt_regs regs;	/* sp = sp_el0 */
461 
462 	u64	spsr_abt;
463 	u64	spsr_und;
464 	u64	spsr_irq;
465 	u64	spsr_fiq;
466 
467 	struct user_fpsimd_state fp_regs;
468 
469 	u64 sys_regs[NR_SYS_REGS];
470 
471 	struct kvm_vcpu *__hyp_running_vcpu;
472 };
473 
474 struct kvm_host_data {
475 	struct kvm_cpu_context host_ctxt;
476 };
477 
478 struct kvm_host_psci_config {
479 	/* PSCI version used by host. */
480 	u32 version;
481 	u32 smccc_version;
482 
483 	/* Function IDs used by host if version is v0.1. */
484 	struct psci_0_1_function_ids function_ids_0_1;
485 
486 	bool psci_0_1_cpu_suspend_implemented;
487 	bool psci_0_1_cpu_on_implemented;
488 	bool psci_0_1_cpu_off_implemented;
489 	bool psci_0_1_migrate_implemented;
490 };
491 
492 extern struct kvm_host_psci_config kvm_nvhe_sym(kvm_host_psci_config);
493 #define kvm_host_psci_config CHOOSE_NVHE_SYM(kvm_host_psci_config)
494 
495 extern s64 kvm_nvhe_sym(hyp_physvirt_offset);
496 #define hyp_physvirt_offset CHOOSE_NVHE_SYM(hyp_physvirt_offset)
497 
498 extern u64 kvm_nvhe_sym(hyp_cpu_logical_map)[NR_CPUS];
499 #define hyp_cpu_logical_map CHOOSE_NVHE_SYM(hyp_cpu_logical_map)
500 
501 struct kvm_iommu_driver {
502 	int (*init_driver)(void);
503 	void (*remove_driver)(void);
504 	pkvm_handle_t (*get_iommu_id)(struct device *dev);
505 	ANDROID_KABI_RESERVE(1);
506 	ANDROID_KABI_RESERVE(2);
507 	ANDROID_KABI_RESERVE(3);
508 	ANDROID_KABI_RESERVE(4);
509 	ANDROID_KABI_RESERVE(5);
510 	ANDROID_KABI_RESERVE(6);
511 	ANDROID_KABI_RESERVE(7);
512 	ANDROID_KABI_RESERVE(8);
513 };
514 
515 struct vcpu_reset_state {
516 	unsigned long	pc;
517 	unsigned long	r0;
518 	bool		be;
519 	bool		reset;
520 };
521 
522 struct kvm_hyp_req {
523 #define KVM_HYP_LAST_REQ	0
524 #define KVM_HYP_REQ_TYPE_MEM	1
525 #define KVM_HYP_REQ_TYPE_MAP	2
526 	u8 type;
527 	union {
528 		struct {
529 #define REQ_MEM_DEST_HYP_ALLOC		1
530 #define REQ_MEM_DEST_VCPU_MEMCACHE	2
531 #define REQ_MEM_DEST_HYP_IOMMU		3
532 			u8	dest;
533 			int	nr_pages;
534 			int	sz_alloc; /* Size of the page. */
535 		} mem;
536 		struct {
537 			unsigned long	guest_ipa;
538 			size_t		size;
539 		} map;
540 	};
541 };
542 
543 #define KVM_HYP_REQ_MAX (PAGE_SIZE / sizeof(struct kvm_hyp_req))
544 /*
545  * De-serialize request from SMCCC return.
546  * See hyp-main.c for serialization.
547  */
548 /* Register a2. */
549 #define	SMCCC_REQ_TYPE_MASK		GENMASK_ULL(7, 0)
550 #define SMCCC_REQ_DEST_MASK		GENMASK_ULL(15 , 8)
551 /* Register a3. */
552 #define SMCCC_REQ_NR_PAGES_MASK		GENMASK_ULL(31 , 0)
553 #define SMCCC_REQ_SZ_ALLOC_MASK		GENMASK_ULL(63 , 32)
554 
hyp_reqs_smccc_decode(struct arm_smccc_res * res,struct kvm_hyp_req * req)555 static inline void hyp_reqs_smccc_decode(struct arm_smccc_res *res,
556 					 struct kvm_hyp_req *req)
557 {
558 	req->type = FIELD_GET(SMCCC_REQ_TYPE_MASK, res->a2);
559 	req->mem.dest = FIELD_GET(SMCCC_REQ_DEST_MASK, res->a2);
560 	req->mem.nr_pages = FIELD_GET(SMCCC_REQ_NR_PAGES_MASK, res->a3);
561 	req->mem.sz_alloc = FIELD_GET(SMCCC_REQ_SZ_ALLOC_MASK, res->a3);
562 }
563 
564 struct kvm_vcpu_arch {
565 	struct kvm_cpu_context ctxt;
566 
567 	/*
568 	 * Guest floating point state
569 	 *
570 	 * The architecture has two main floating point extensions,
571 	 * the original FPSIMD and SVE.  These have overlapping
572 	 * register views, with the FPSIMD V registers occupying the
573 	 * low 128 bits of the SVE Z registers.  When the core
574 	 * floating point code saves the register state of a task it
575 	 * records which view it saved in fp_type.
576 	 */
577 	void *sve_state;
578 	enum fp_type fp_type;
579 	unsigned int sve_max_vl;
580 	u64 svcr;
581 
582 	/* Stage 2 paging state used by the hardware on next switch */
583 	struct kvm_s2_mmu *hw_mmu;
584 
585 	/* Values of trap registers for the guest. */
586 	u64 hcr_el2;
587 	u64 mdcr_el2;
588 	u64 cptr_el2;
589 
590 	/* Values of trap registers for the host before guest entry. */
591 	u64 mdcr_el2_host;
592 
593 	/* Exception Information */
594 	struct kvm_vcpu_fault_info fault;
595 
596 	/* Ownership of the FP regs */
597 	enum {
598 		FP_STATE_FREE,
599 		FP_STATE_HOST_OWNED,
600 		FP_STATE_GUEST_OWNED,
601 	} fp_state;
602 
603 	/* Configuration flags, set once and for all before the vcpu can run */
604 	u8 cflags;
605 
606 	/* Input flags to the hypervisor code, potentially cleared after use */
607 	u8 iflags;
608 
609 	/* State flags for kernel bookkeeping, unused by the hypervisor code */
610 	u8 sflags;
611 
612 	/*
613 	 * Don't run the guest (internal implementation need).
614 	 *
615 	 * Contrary to the flags above, this is set/cleared outside of
616 	 * a vcpu context, and thus cannot be mixed with the flags
617 	 * themselves (or the flag accesses need to be made atomic).
618 	 */
619 	bool pause;
620 
621 	/*
622 	 * We maintain more than a single set of debug registers to support
623 	 * debugging the guest from the host and to maintain separate host and
624 	 * guest state during world switches. vcpu_debug_state are the debug
625 	 * registers of the vcpu as the guest sees them.  host_debug_state are
626 	 * the host registers which are saved and restored during
627 	 * world switches. external_debug_state contains the debug
628 	 * values we want to debug the guest. This is set via the
629 	 * KVM_SET_GUEST_DEBUG ioctl.
630 	 *
631 	 * debug_ptr points to the set of debug registers that should be loaded
632 	 * onto the hardware when running the guest.
633 	 */
634 	struct kvm_guest_debug_arch *debug_ptr;
635 	struct kvm_guest_debug_arch vcpu_debug_state;
636 	struct kvm_guest_debug_arch external_debug_state;
637 
638 	struct user_fpsimd_state *host_fpsimd_state;	/* hyp VA */
639 
640 	struct {
641 		/* {Break,watch}point registers */
642 		struct kvm_guest_debug_arch regs;
643 		/* Statistical profiling extension */
644 		u64 pmscr_el1;
645 		/* Self-hosted trace */
646 		u64 trfcr_el1;
647 	} host_debug_state;
648 
649 	/* VGIC state */
650 	struct vgic_cpu vgic_cpu;
651 	struct arch_timer_cpu timer_cpu;
652 	struct kvm_pmu pmu;
653 
654 	/*
655 	 * Guest registers we preserve during guest debugging.
656 	 *
657 	 * These shadow registers are updated by the kvm_handle_sys_reg
658 	 * trap handler if the guest accesses or updates them while we
659 	 * are using guest debug.
660 	 */
661 	struct {
662 		u32	mdscr_el1;
663 		bool	pstate_ss;
664 	} guest_debug_preserved;
665 
666 	/* vcpu power state */
667 	struct kvm_mp_state mp_state;
668 	spinlock_t mp_state_lock;
669 
670 	union {
671 		/* Cache some mmu pages needed inside spinlock regions */
672 		struct kvm_mmu_memory_cache mmu_page_cache;
673 		/* Pages to be donated to pkvm/EL2 if it runs out */
674 		struct kvm_hyp_memcache stage2_mc;
675 	};
676 
677 	/* feature flags */
678 	DECLARE_BITMAP(features, KVM_VCPU_MAX_FEATURES);
679 
680 	/* Virtual SError ESR to restore when HCR_EL2.VSE is set */
681 	u64 vsesr_el2;
682 
683 	/* Additional reset state */
684 	struct vcpu_reset_state	reset_state;
685 
686 	/* Guest PV state */
687 	struct {
688 		u64 last_steal;
689 		gpa_t base;
690 	} steal;
691 
692 	/* Per-vcpu CCSIDR override or NULL */
693 	u32 *ccsidr;
694 
695 	/* PAGE_SIZE bound list of requests from the hypervisor to the host. */
696 	struct kvm_hyp_req *hyp_reqs;
697 };
698 
699 /*
700  * Each 'flag' is composed of a comma-separated triplet:
701  *
702  * - the flag-set it belongs to in the vcpu->arch structure
703  * - the value for that flag
704  * - the mask for that flag
705  *
706  *  __vcpu_single_flag() builds such a triplet for a single-bit flag.
707  * unpack_vcpu_flag() extract the flag value from the triplet for
708  * direct use outside of the flag accessors.
709  */
710 #define __vcpu_single_flag(_set, _f)	_set, (_f), (_f)
711 
712 #define __unpack_flag(_set, _f, _m)	_f
713 #define unpack_vcpu_flag(...)		__unpack_flag(__VA_ARGS__)
714 
715 #define __build_check_flag(v, flagset, f, m)			\
716 	do {							\
717 		typeof(v->arch.flagset) *_fset;			\
718 								\
719 		/* Check that the flags fit in the mask */	\
720 		BUILD_BUG_ON(HWEIGHT(m) != HWEIGHT((f) | (m)));	\
721 		/* Check that the flags fit in the type */	\
722 		BUILD_BUG_ON((sizeof(*_fset) * 8) <= __fls(m));	\
723 	} while (0)
724 
725 #define __vcpu_get_flag(v, flagset, f, m)			\
726 	({							\
727 		__build_check_flag(v, flagset, f, m);		\
728 								\
729 		READ_ONCE(v->arch.flagset) & (m);		\
730 	})
731 
732 /*
733  * Note that the set/clear accessors must be preempt-safe in order to
734  * avoid nesting them with load/put which also manipulate flags...
735  */
736 #ifdef __KVM_NVHE_HYPERVISOR__
737 /* the nVHE hypervisor is always non-preemptible */
738 #define __vcpu_flags_preempt_disable()
739 #define __vcpu_flags_preempt_enable()
740 #else
741 #define __vcpu_flags_preempt_disable()	preempt_disable()
742 #define __vcpu_flags_preempt_enable()	preempt_enable()
743 #endif
744 
745 #define __vcpu_set_flag(v, flagset, f, m)			\
746 	do {							\
747 		typeof(v->arch.flagset) *fset;			\
748 								\
749 		__build_check_flag(v, flagset, f, m);		\
750 								\
751 		fset = &v->arch.flagset;			\
752 		__vcpu_flags_preempt_disable();			\
753 		if (HWEIGHT(m) > 1)				\
754 			*fset &= ~(m);				\
755 		*fset |= (f);					\
756 		__vcpu_flags_preempt_enable();			\
757 	} while (0)
758 
759 #define __vcpu_clear_flag(v, flagset, f, m)			\
760 	do {							\
761 		typeof(v->arch.flagset) *fset;			\
762 								\
763 		__build_check_flag(v, flagset, f, m);		\
764 								\
765 		fset = &v->arch.flagset;			\
766 		__vcpu_flags_preempt_disable();			\
767 		*fset &= ~(m);					\
768 		__vcpu_flags_preempt_enable();			\
769 	} while (0)
770 
771 #define __vcpu_copy_flag(vt, vs, flagset, f, m)			\
772 	do {							\
773 		typeof(vs->arch.flagset) tmp, val;		\
774 								\
775 		__build_check_flag(vs, flagset, f, m);		\
776 								\
777 		val = READ_ONCE(vs->arch.flagset);		\
778 		val &= (m);					\
779 		tmp = READ_ONCE(vt->arch.flagset);		\
780 		tmp &= ~(m);					\
781 		tmp |= val;					\
782 		WRITE_ONCE(vt->arch.flagset, tmp);		\
783 	} while (0)
784 
785 
786 #define vcpu_get_flag(v, ...)	__vcpu_get_flag((v), __VA_ARGS__)
787 #define vcpu_set_flag(v, ...)	__vcpu_set_flag((v), __VA_ARGS__)
788 #define vcpu_clear_flag(v, ...)	__vcpu_clear_flag((v), __VA_ARGS__)
789 #define vcpu_copy_flag(vt, vs,...) __vcpu_copy_flag((vt), (vs), __VA_ARGS__)
790 
791 /* SVE exposed to guest */
792 #define GUEST_HAS_SVE		__vcpu_single_flag(cflags, BIT(0))
793 /* SVE config completed */
794 #define VCPU_SVE_FINALIZED	__vcpu_single_flag(cflags, BIT(1))
795 /* PTRAUTH exposed to guest */
796 #define GUEST_HAS_PTRAUTH	__vcpu_single_flag(cflags, BIT(2))
797 /* KVM_ARM_VCPU_INIT completed */
798 #define VCPU_INITIALIZED	__vcpu_single_flag(cflags, BIT(3))
799 
800 /* Exception pending */
801 #define PENDING_EXCEPTION	__vcpu_single_flag(iflags, BIT(0))
802 /*
803  * PC increment. Overlaps with EXCEPT_MASK on purpose so that it can't
804  * be set together with an exception...
805  */
806 #define INCREMENT_PC		__vcpu_single_flag(iflags, BIT(1))
807 /* Target EL/MODE (not a single flag, but let's abuse the macro) */
808 #define EXCEPT_MASK		__vcpu_single_flag(iflags, GENMASK(3, 1))
809 /* Cover both PENDING_EXCEPTION and EXCEPT_MASK for global operations */
810 #define PC_UPDATE_REQ		__vcpu_single_flag(iflags, GENMASK(3, 0))
811 
812 /* Helpers to encode exceptions with minimum fuss */
813 #define __EXCEPT_MASK_VAL	unpack_vcpu_flag(EXCEPT_MASK)
814 #define __EXCEPT_SHIFT		__builtin_ctzl(__EXCEPT_MASK_VAL)
815 #define __vcpu_except_flags(_f)	iflags, (_f << __EXCEPT_SHIFT), __EXCEPT_MASK_VAL
816 
817 /*
818  * When PENDING_EXCEPTION is set, EXCEPT_MASK can take the following
819  * values:
820  *
821  * For AArch32 EL1:
822  */
823 #define EXCEPT_AA32_UND		__vcpu_except_flags(0)
824 #define EXCEPT_AA32_IABT	__vcpu_except_flags(1)
825 #define EXCEPT_AA32_DABT	__vcpu_except_flags(2)
826 /* For AArch64: */
827 #define EXCEPT_AA64_EL1_SYNC	__vcpu_except_flags(0)
828 #define EXCEPT_AA64_EL1_IRQ	__vcpu_except_flags(1)
829 #define EXCEPT_AA64_EL1_FIQ	__vcpu_except_flags(2)
830 #define EXCEPT_AA64_EL1_SERR	__vcpu_except_flags(3)
831 /* For AArch64 with NV: */
832 #define EXCEPT_AA64_EL2_SYNC	__vcpu_except_flags(4)
833 #define EXCEPT_AA64_EL2_IRQ	__vcpu_except_flags(5)
834 #define EXCEPT_AA64_EL2_FIQ	__vcpu_except_flags(6)
835 #define EXCEPT_AA64_EL2_SERR	__vcpu_except_flags(7)
836 /* Guest debug is live */
837 #define DEBUG_DIRTY		__vcpu_single_flag(iflags, BIT(4))
838 /* Save SPE context if active  */
839 #define DEBUG_STATE_SAVE_SPE	__vcpu_single_flag(iflags, BIT(5))
840 /* Save TRBE context if active  */
841 #define DEBUG_STATE_SAVE_TRBE	__vcpu_single_flag(iflags, BIT(6))
842 /* vcpu running in HYP context (VHE-only) */
843 #define VCPU_HYP_CONTEXT	__vcpu_single_flag(iflags, BIT(7))
844 /* pKVM host vcpu state is dirty, needs resync (nVHE-only) */
845 #define PKVM_HOST_STATE_DIRTY	__vcpu_single_flag(iflags, BIT(7))
846 
847 /* SVE enabled for host EL0 */
848 #define HOST_SVE_ENABLED	__vcpu_single_flag(sflags, BIT(0))
849 /* SME enabled for EL0 */
850 #define HOST_SME_ENABLED	__vcpu_single_flag(sflags, BIT(1))
851 /* Physical CPU not in supported_cpus */
852 #define ON_UNSUPPORTED_CPU	__vcpu_single_flag(sflags, BIT(2))
853 /* WFIT instruction trapped */
854 #define IN_WFIT			__vcpu_single_flag(sflags, BIT(3))
855 /* vcpu system registers loaded on physical CPU */
856 #define SYSREGS_ON_CPU		__vcpu_single_flag(sflags, BIT(4))
857 /* Software step state is Active-pending */
858 #define DBG_SS_ACTIVE_PENDING	__vcpu_single_flag(sflags, BIT(5))
859 /* PMUSERENR for the guest EL0 is on physical CPU */
860 #define PMUSERENR_ON_CPU	__vcpu_single_flag(sflags, BIT(6))
861 /* WFI instruction trapped */
862 #define IN_WFI			__vcpu_single_flag(sflags, BIT(7))
863 
864 
865 /* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
866 #define vcpu_sve_pffr(vcpu) (kern_hyp_va((vcpu)->arch.sve_state) +	\
867 			     sve_ffr_offset((vcpu)->arch.sve_max_vl))
868 
869 #define _vcpu_sve_state_size(sve_max_vl) ({				\
870 	size_t __size_ret;						\
871 	unsigned int __vq;						\
872 									\
873 	if (WARN_ON(!sve_vl_valid(sve_max_vl))) {			\
874 		__size_ret = 0;						\
875 	} else {							\
876 		__vq = sve_vq_from_vl(sve_max_vl);			\
877 		__size_ret = SVE_SIG_REGS_SIZE(__vq);			\
878 	}								\
879 									\
880 	__size_ret;							\
881 })
882 
883 #define vcpu_sve_max_vq(vcpu) sve_vq_from_vl((vcpu)->arch.sve_max_vl)
884 
885 #define vcpu_sve_state_size(vcpu) _vcpu_sve_state_size((vcpu)->arch.sve_max_vl)
886 
887 #define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE | \
888 				 KVM_GUESTDBG_USE_SW_BP | \
889 				 KVM_GUESTDBG_USE_HW | \
890 				 KVM_GUESTDBG_SINGLESTEP)
891 
892 #define vcpu_has_sve(vcpu) (system_supports_sve() &&			\
893 			    vcpu_get_flag(vcpu, GUEST_HAS_SVE))
894 
895 #ifdef CONFIG_ARM64_PTR_AUTH
896 #define vcpu_has_ptrauth(vcpu)						\
897 	((cpus_have_final_cap(ARM64_HAS_ADDRESS_AUTH) ||		\
898 	  cpus_have_final_cap(ARM64_HAS_GENERIC_AUTH)) &&		\
899 	  vcpu_get_flag(vcpu, GUEST_HAS_PTRAUTH))
900 #else
901 #define vcpu_has_ptrauth(vcpu)		false
902 #endif
903 
904 #define vcpu_on_unsupported_cpu(vcpu)					\
905 	vcpu_get_flag(vcpu, ON_UNSUPPORTED_CPU)
906 
907 #define vcpu_set_on_unsupported_cpu(vcpu)				\
908 	vcpu_set_flag(vcpu, ON_UNSUPPORTED_CPU)
909 
910 #define vcpu_clear_on_unsupported_cpu(vcpu)				\
911 	vcpu_clear_flag(vcpu, ON_UNSUPPORTED_CPU)
912 
913 #define vcpu_gp_regs(v)		(&(v)->arch.ctxt.regs)
914 
915 /*
916  * Only use __vcpu_sys_reg/ctxt_sys_reg if you know you want the
917  * memory backed version of a register, and not the one most recently
918  * accessed by a running VCPU.  For example, for userspace access or
919  * for system registers that are never context switched, but only
920  * emulated.
921  */
922 #define __ctxt_sys_reg(c,r)	(&(c)->sys_regs[(r)])
923 
924 #define ctxt_sys_reg(c,r)	(*__ctxt_sys_reg(c,r))
925 
926 #define __vcpu_sys_reg(v,r)	(ctxt_sys_reg(&(v)->arch.ctxt, (r)))
927 
__vcpu_read_sys_reg_from_cpu(int reg,u64 * val)928 static inline bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val)
929 {
930 	/*
931 	 * *** VHE ONLY ***
932 	 *
933 	 * System registers listed in the switch are not saved on every
934 	 * exit from the guest but are only saved on vcpu_put.
935 	 *
936 	 * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but
937 	 * should never be listed below, because the guest cannot modify its
938 	 * own MPIDR_EL1 and MPIDR_EL1 is accessed for VCPU A from VCPU B's
939 	 * thread when emulating cross-VCPU communication.
940 	 */
941 	if (!has_vhe())
942 		return false;
943 
944 	switch (reg) {
945 	case SCTLR_EL1:		*val = read_sysreg_s(SYS_SCTLR_EL12);	break;
946 	case CPACR_EL1:		*val = read_sysreg_s(SYS_CPACR_EL12);	break;
947 	case TTBR0_EL1:		*val = read_sysreg_s(SYS_TTBR0_EL12);	break;
948 	case TTBR1_EL1:		*val = read_sysreg_s(SYS_TTBR1_EL12);	break;
949 	case TCR_EL1:		*val = read_sysreg_s(SYS_TCR_EL12);	break;
950 	case ESR_EL1:		*val = read_sysreg_s(SYS_ESR_EL12);	break;
951 	case AFSR0_EL1:		*val = read_sysreg_s(SYS_AFSR0_EL12);	break;
952 	case AFSR1_EL1:		*val = read_sysreg_s(SYS_AFSR1_EL12);	break;
953 	case FAR_EL1:		*val = read_sysreg_s(SYS_FAR_EL12);	break;
954 	case MAIR_EL1:		*val = read_sysreg_s(SYS_MAIR_EL12);	break;
955 	case VBAR_EL1:		*val = read_sysreg_s(SYS_VBAR_EL12);	break;
956 	case CONTEXTIDR_EL1:	*val = read_sysreg_s(SYS_CONTEXTIDR_EL12);break;
957 	case TPIDR_EL0:		*val = read_sysreg_s(SYS_TPIDR_EL0);	break;
958 	case TPIDRRO_EL0:	*val = read_sysreg_s(SYS_TPIDRRO_EL0);	break;
959 	case TPIDR_EL1:		*val = read_sysreg_s(SYS_TPIDR_EL1);	break;
960 	case AMAIR_EL1:		*val = read_sysreg_s(SYS_AMAIR_EL12);	break;
961 	case CNTKCTL_EL1:	*val = read_sysreg_s(SYS_CNTKCTL_EL12);	break;
962 	case ELR_EL1:		*val = read_sysreg_s(SYS_ELR_EL12);	break;
963 	case PAR_EL1:		*val = read_sysreg_par();		break;
964 	case DACR32_EL2:	*val = read_sysreg_s(SYS_DACR32_EL2);	break;
965 	case IFSR32_EL2:	*val = read_sysreg_s(SYS_IFSR32_EL2);	break;
966 	case DBGVCR32_EL2:	*val = read_sysreg_s(SYS_DBGVCR32_EL2);	break;
967 	default:		return false;
968 	}
969 
970 	return true;
971 }
972 
__vcpu_write_sys_reg_to_cpu(u64 val,int reg)973 static inline bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg)
974 {
975 	/*
976 	 * *** VHE ONLY ***
977 	 *
978 	 * System registers listed in the switch are not restored on every
979 	 * entry to the guest but are only restored on vcpu_load.
980 	 *
981 	 * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but
982 	 * should never be listed below, because the MPIDR should only be set
983 	 * once, before running the VCPU, and never changed later.
984 	 */
985 	if (!has_vhe())
986 		return false;
987 
988 	switch (reg) {
989 	case SCTLR_EL1:		write_sysreg_s(val, SYS_SCTLR_EL12);	break;
990 	case CPACR_EL1:		write_sysreg_s(val, SYS_CPACR_EL12);	break;
991 	case TTBR0_EL1:		write_sysreg_s(val, SYS_TTBR0_EL12);	break;
992 	case TTBR1_EL1:		write_sysreg_s(val, SYS_TTBR1_EL12);	break;
993 	case TCR_EL1:		write_sysreg_s(val, SYS_TCR_EL12);	break;
994 	case ESR_EL1:		write_sysreg_s(val, SYS_ESR_EL12);	break;
995 	case AFSR0_EL1:		write_sysreg_s(val, SYS_AFSR0_EL12);	break;
996 	case AFSR1_EL1:		write_sysreg_s(val, SYS_AFSR1_EL12);	break;
997 	case FAR_EL1:		write_sysreg_s(val, SYS_FAR_EL12);	break;
998 	case MAIR_EL1:		write_sysreg_s(val, SYS_MAIR_EL12);	break;
999 	case VBAR_EL1:		write_sysreg_s(val, SYS_VBAR_EL12);	break;
1000 	case CONTEXTIDR_EL1:	write_sysreg_s(val, SYS_CONTEXTIDR_EL12);break;
1001 	case TPIDR_EL0:		write_sysreg_s(val, SYS_TPIDR_EL0);	break;
1002 	case TPIDRRO_EL0:	write_sysreg_s(val, SYS_TPIDRRO_EL0);	break;
1003 	case TPIDR_EL1:		write_sysreg_s(val, SYS_TPIDR_EL1);	break;
1004 	case AMAIR_EL1:		write_sysreg_s(val, SYS_AMAIR_EL12);	break;
1005 	case CNTKCTL_EL1:	write_sysreg_s(val, SYS_CNTKCTL_EL12);	break;
1006 	case ELR_EL1:		write_sysreg_s(val, SYS_ELR_EL12);	break;
1007 	case PAR_EL1:		write_sysreg_s(val, SYS_PAR_EL1);	break;
1008 	case DACR32_EL2:	write_sysreg_s(val, SYS_DACR32_EL2);	break;
1009 	case IFSR32_EL2:	write_sysreg_s(val, SYS_IFSR32_EL2);	break;
1010 	case DBGVCR32_EL2:	write_sysreg_s(val, SYS_DBGVCR32_EL2);	break;
1011 	default:		return false;
1012 	}
1013 
1014 	return true;
1015 }
1016 
1017 #define vcpu_read_sys_reg(__vcpu, reg)					\
1018 	({								\
1019 		u64 __val = 0x8badf00d8badf00d;				\
1020 									\
1021 		/* SYSREGS_ON_CPU is only used in VHE */		\
1022 		((!is_nvhe_hyp_code() &&				\
1023 		  vcpu_get_flag(__vcpu, SYSREGS_ON_CPU) &&		\
1024 		  __vcpu_read_sys_reg_from_cpu(reg, &__val))) ?		\
1025 		 __val							\
1026 		 :							\
1027 		 ctxt_sys_reg(&__vcpu->arch.ctxt, reg);			\
1028 	 })
1029 
1030 #define vcpu_write_sys_reg(__vcpu, __val, reg)				\
1031 	do {								\
1032 		/* SYSREGS_ON_CPU is only used in VHE */		\
1033 		if (is_nvhe_hyp_code() ||				\
1034 		    !vcpu_get_flag(__vcpu, SYSREGS_ON_CPU) ||		\
1035 		    !__vcpu_write_sys_reg_to_cpu(__val, reg))		\
1036 			ctxt_sys_reg(&__vcpu->arch.ctxt, reg) = __val;	\
1037 	} while (0)
1038 
1039 struct kvm_vm_stat {
1040 	struct kvm_vm_stat_generic generic;
1041 	atomic64_t protected_hyp_mem;
1042 	atomic64_t protected_shared_mem;
1043 	atomic64_t protected_pgtable_mem;
1044 };
1045 
1046 struct kvm_vcpu_stat {
1047 	struct kvm_vcpu_stat_generic generic;
1048 	u64 hvc_exit_stat;
1049 	u64 wfe_exit_stat;
1050 	u64 wfi_exit_stat;
1051 	u64 mmio_exit_user;
1052 	u64 mmio_exit_kernel;
1053 	u64 signal_exits;
1054 	u64 exits;
1055 };
1056 
1057 unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
1058 int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
1059 int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
1060 int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
1061 
1062 unsigned long kvm_arm_num_sys_reg_descs(struct kvm_vcpu *vcpu);
1063 int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices);
1064 
1065 int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
1066 			      struct kvm_vcpu_events *events);
1067 
1068 int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
1069 			      struct kvm_vcpu_events *events);
1070 
1071 #define KVM_ARCH_WANT_MMU_NOTIFIER
1072 
1073 void kvm_arm_halt_guest(struct kvm *kvm);
1074 void kvm_arm_resume_guest(struct kvm *kvm);
1075 
1076 #define vcpu_has_run_once(vcpu)	!!rcu_access_pointer((vcpu)->pid)
1077 
1078 #ifndef __KVM_NVHE_HYPERVISOR__
1079 #define kvm_call_hyp_nvhe_smccc(f, ...)					\
1080 	({								\
1081 		struct arm_smccc_res res;				\
1082 									\
1083 		arm_smccc_1_1_hvc(KVM_HOST_SMCCC_FUNC(f),		\
1084 				  ##__VA_ARGS__, &res);			\
1085 		WARN_ON(res.a0 != SMCCC_RET_SUCCESS);			\
1086 									\
1087 		res;							\
1088 	})
1089 
1090 #define kvm_call_hyp_nvhe(f, ...)					\
1091 	({								\
1092 		struct arm_smccc_res res;				\
1093 									\
1094 		arm_smccc_1_1_hvc(KVM_HOST_SMCCC_FUNC(f),		\
1095 				  ##__VA_ARGS__, &res);			\
1096 		WARN_ON(res.a0 != SMCCC_RET_SUCCESS);			\
1097 									\
1098 		res.a1;							\
1099 	})
1100 
1101 /*
1102  * The couple of isb() below are there to guarantee the same behaviour
1103  * on VHE as on !VHE, where the eret to EL1 acts as a context
1104  * synchronization event.
1105  */
1106 #define kvm_call_hyp(f, ...)						\
1107 	do {								\
1108 		if (has_vhe()) {					\
1109 			f(__VA_ARGS__);					\
1110 			isb();						\
1111 		} else {						\
1112 			kvm_call_hyp_nvhe(f, ##__VA_ARGS__);		\
1113 		}							\
1114 	} while(0)
1115 
1116 #define kvm_call_hyp_ret(f, ...)					\
1117 	({								\
1118 		typeof(f(__VA_ARGS__)) ret;				\
1119 									\
1120 		if (has_vhe()) {					\
1121 			ret = f(__VA_ARGS__);				\
1122 			isb();						\
1123 		} else {						\
1124 			ret = kvm_call_hyp_nvhe(f, ##__VA_ARGS__);	\
1125 		}							\
1126 									\
1127 		ret;							\
1128 	})
1129 #else /* __KVM_NVHE_HYPERVISOR__ */
1130 #define kvm_call_hyp(f, ...) f(__VA_ARGS__)
1131 #define kvm_call_hyp_ret(f, ...) f(__VA_ARGS__)
1132 #define kvm_call_hyp_nvhe(f, ...) f(__VA_ARGS__)
1133 #endif /* __KVM_NVHE_HYPERVISOR__ */
1134 
1135 int handle_exit(struct kvm_vcpu *vcpu, int exception_index);
1136 void handle_exit_early(struct kvm_vcpu *vcpu, int exception_index);
1137 
1138 int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu);
1139 int kvm_handle_cp14_32(struct kvm_vcpu *vcpu);
1140 int kvm_handle_cp14_64(struct kvm_vcpu *vcpu);
1141 int kvm_handle_cp15_32(struct kvm_vcpu *vcpu);
1142 int kvm_handle_cp15_64(struct kvm_vcpu *vcpu);
1143 int kvm_handle_sys_reg(struct kvm_vcpu *vcpu);
1144 int kvm_handle_cp10_id(struct kvm_vcpu *vcpu);
1145 
1146 void kvm_reset_sys_regs(struct kvm_vcpu *vcpu);
1147 
1148 int __init kvm_sys_reg_table_init(void);
1149 int __init populate_nv_trap_config(void);
1150 
1151 bool lock_all_vcpus(struct kvm *kvm);
1152 void unlock_all_vcpus(struct kvm *kvm);
1153 
1154 /* MMIO helpers */
1155 void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data);
1156 unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len);
1157 
1158 int kvm_handle_mmio_return(struct kvm_vcpu *vcpu);
1159 int io_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa);
1160 
1161 /*
1162  * Returns true if a Performance Monitoring Interrupt (PMI), a.k.a. perf event,
1163  * arrived in guest context.  For arm64, any event that arrives while a vCPU is
1164  * loaded is considered to be "in guest".
1165  */
kvm_arch_pmi_in_guest(struct kvm_vcpu * vcpu)1166 static inline bool kvm_arch_pmi_in_guest(struct kvm_vcpu *vcpu)
1167 {
1168 	return IS_ENABLED(CONFIG_GUEST_PERF_EVENTS) && !!vcpu;
1169 }
1170 
1171 long kvm_hypercall_pv_features(struct kvm_vcpu *vcpu);
1172 gpa_t kvm_init_stolen_time(struct kvm_vcpu *vcpu);
1173 void kvm_update_stolen_time(struct kvm_vcpu *vcpu);
1174 
1175 bool kvm_arm_pvtime_supported(void);
1176 int kvm_arm_pvtime_set_attr(struct kvm_vcpu *vcpu,
1177 			    struct kvm_device_attr *attr);
1178 int kvm_arm_pvtime_get_attr(struct kvm_vcpu *vcpu,
1179 			    struct kvm_device_attr *attr);
1180 int kvm_arm_pvtime_has_attr(struct kvm_vcpu *vcpu,
1181 			    struct kvm_device_attr *attr);
1182 
1183 extern unsigned int __ro_after_init kvm_arm_vmid_bits;
1184 int __init kvm_arm_vmid_alloc_init(void);
1185 void __init kvm_arm_vmid_alloc_free(void);
1186 void kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid);
1187 void kvm_arm_vmid_clear_active(void);
1188 
kvm_arm_pvtime_vcpu_init(struct kvm_vcpu_arch * vcpu_arch)1189 static inline void kvm_arm_pvtime_vcpu_init(struct kvm_vcpu_arch *vcpu_arch)
1190 {
1191 	vcpu_arch->steal.base = INVALID_GPA;
1192 }
1193 
kvm_arm_is_pvtime_enabled(struct kvm_vcpu_arch * vcpu_arch)1194 static inline bool kvm_arm_is_pvtime_enabled(struct kvm_vcpu_arch *vcpu_arch)
1195 {
1196 	return (vcpu_arch->steal.base != INVALID_GPA);
1197 }
1198 
1199 void kvm_set_sei_esr(struct kvm_vcpu *vcpu, u64 syndrome);
1200 
1201 struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
1202 
1203 DECLARE_KVM_HYP_PER_CPU(struct kvm_host_data, kvm_host_data);
1204 
kvm_init_host_cpu_context(struct kvm_cpu_context * cpu_ctxt)1205 static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt)
1206 {
1207 	/* The host's MPIDR is immutable, so let's set it up at boot time */
1208 	ctxt_sys_reg(cpu_ctxt, MPIDR_EL1) = read_cpuid_mpidr();
1209 }
1210 
kvm_system_needs_idmapped_vectors(void)1211 static inline bool kvm_system_needs_idmapped_vectors(void)
1212 {
1213 	return cpus_have_const_cap(ARM64_SPECTRE_V3A);
1214 }
1215 
kvm_arch_sync_events(struct kvm * kvm)1216 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
kvm_arch_sched_in(struct kvm_vcpu * vcpu,int cpu)1217 static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
1218 
1219 void kvm_arm_init_debug(void);
1220 void kvm_arm_vcpu_init_debug(struct kvm_vcpu *vcpu);
1221 void kvm_arm_setup_debug(struct kvm_vcpu *vcpu);
1222 void kvm_arm_clear_debug(struct kvm_vcpu *vcpu);
1223 void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu);
1224 
1225 #define __vcpu_save_guest_debug_regs(vcpu)				\
1226 	do {								\
1227 		u64 val = vcpu_read_sys_reg(vcpu, MDSCR_EL1);		\
1228 									\
1229 		(vcpu)->arch.guest_debug_preserved.mdscr_el1 = val;	\
1230 	} while(0)
1231 
1232 #define __vcpu_restore_guest_debug_regs(vcpu)				\
1233 	do {								\
1234 		u64 val = (vcpu)->arch.guest_debug_preserved.mdscr_el1;	\
1235 									\
1236 		vcpu_write_sys_reg(vcpu, val, MDSCR_EL1);		\
1237 	} while (0)
1238 
1239 #define kvm_vcpu_os_lock_enabled(vcpu)		\
1240 	(!!(__vcpu_sys_reg(vcpu, OSLSR_EL1) & OSLSR_EL1_OSLK))
1241 
1242 #define kvm_vcpu_needs_debug_regs(vcpu)		\
1243 	((vcpu)->guest_debug || kvm_vcpu_os_lock_enabled(vcpu))
1244 
1245 int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
1246 			       struct kvm_device_attr *attr);
1247 int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
1248 			       struct kvm_device_attr *attr);
1249 int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
1250 			       struct kvm_device_attr *attr);
1251 
1252 int kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm,
1253 			       struct kvm_arm_copy_mte_tags *copy_tags);
1254 int kvm_vm_ioctl_set_counter_offset(struct kvm *kvm,
1255 				    struct kvm_arm_counter_offset *offset);
1256 
1257 /* Guest/host FPSIMD coordination helpers */
1258 int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu);
1259 void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu);
1260 void kvm_arch_vcpu_ctxflush_fp(struct kvm_vcpu *vcpu);
1261 void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu);
1262 void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu);
1263 
kvm_pmu_counter_deferred(struct perf_event_attr * attr)1264 static inline bool kvm_pmu_counter_deferred(struct perf_event_attr *attr)
1265 {
1266 	return (!has_vhe() && attr->exclude_host);
1267 }
1268 
1269 /* Flags for host debug state */
1270 void kvm_arch_vcpu_load_debug_state_flags(struct kvm_vcpu *vcpu);
1271 void kvm_arch_vcpu_put_debug_state_flags(struct kvm_vcpu *vcpu);
1272 
1273 #ifdef CONFIG_KVM
1274 void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr);
1275 void kvm_clr_pmu_events(u32 clr);
1276 bool kvm_set_pmuserenr(u64 val);
1277 #else
kvm_set_pmu_events(u32 set,struct perf_event_attr * attr)1278 static inline void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr) {}
kvm_clr_pmu_events(u32 clr)1279 static inline void kvm_clr_pmu_events(u32 clr) {}
kvm_set_pmuserenr(u64 val)1280 static inline bool kvm_set_pmuserenr(u64 val)
1281 {
1282 	return false;
1283 }
1284 #endif
1285 
1286 void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu);
1287 void kvm_vcpu_put_sysregs_vhe(struct kvm_vcpu *vcpu);
1288 
1289 int __init kvm_set_ipa_limit(void);
1290 
1291 #define __KVM_HAVE_ARCH_VM_ALLOC
1292 struct kvm *kvm_arch_alloc_vm(void);
1293 
1294 #define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS
1295 
1296 #define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE
1297 
1298 #define kvm_vm_is_protected(kvm)	((kvm)->arch.pkvm.enabled)
1299 
1300 int kvm_arm_vcpu_finalize(struct kvm_vcpu *vcpu, int feature);
1301 bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu);
1302 
1303 #define kvm_arm_vcpu_sve_finalized(vcpu) vcpu_get_flag(vcpu, VCPU_SVE_FINALIZED)
1304 
1305 #define kvm_has_mte(kvm)					\
1306 	(system_supports_mte() &&				\
1307 	 test_bit(KVM_ARCH_FLAG_MTE_ENABLED, &(kvm)->arch.flags))
1308 
1309 #define kvm_supports_32bit_el0()				\
1310 	(system_supports_32bit_el0() &&				\
1311 	 !static_branch_unlikely(&arm64_mismatched_32bit_el0))
1312 
1313 #define kvm_vm_has_ran_once(kvm)					\
1314 	(test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &(kvm)->arch.flags))
1315 
1316 int kvm_trng_call(struct kvm_vcpu *vcpu);
1317 #ifdef CONFIG_KVM
1318 extern phys_addr_t hyp_mem_base;
1319 extern phys_addr_t hyp_mem_size;
1320 void __init kvm_hyp_reserve(void);
1321 #else
kvm_hyp_reserve(void)1322 static inline void kvm_hyp_reserve(void) { }
1323 #endif
1324 
1325 void kvm_arm_vcpu_power_off(struct kvm_vcpu *vcpu);
1326 bool kvm_arm_vcpu_stopped(struct kvm_vcpu *vcpu);
1327 
1328 int kvm_iommu_init_driver(void);
1329 void kvm_iommu_remove_driver(void);
1330 
1331 int pkvm_iommu_suspend(struct device *dev);
1332 int pkvm_iommu_resume(struct device *dev);
1333 
1334 struct kvm_iommu_ops;
1335 
1336 int kvm_iommu_init_hyp(struct kvm_iommu_ops *hyp_ops,
1337 		       struct kvm_hyp_memcache *atomic_mc,
1338 		       unsigned long init_arg);
1339 
1340 int kvm_iommu_register_driver(struct kvm_iommu_driver *kern_ops);
1341 
1342 /* Allocator interface IDs. */
1343 #define HYP_ALLOC_MGT_HEAP_ID		0
1344 #define HYP_ALLOC_MGT_IOMMU_ID		1
1345 
1346 unsigned long __pkvm_reclaim_hyp_alloc_mgt(unsigned long nr_pages);
1347 int __pkvm_topup_hyp_alloc_mgt(unsigned long id, unsigned long nr_pages,
1348 			       unsigned long sz_alloc);
1349 
1350 #endif /* __ARM64_KVM_HOST_H__ */
1351