• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2020 - Google LLC
4  * Author: Quentin Perret <qperret@google.com>
5  * Author: Fuad Tabba <tabba@google.com>
6  */
7 #ifndef __ARM64_KVM_PKVM_H__
8 #define __ARM64_KVM_PKVM_H__
9 
10 #include <linux/arm_ffa.h>
11 #include <linux/memblock.h>
12 #include <linux/scatterlist.h>
13 #include <asm/kvm_pgtable.h>
14 #include <asm/sysreg.h>
15 
16 /*
17  * Stores the sve state for the host in protected mode.
18  */
19 struct kvm_host_sve_state {
20 	u64 zcr_el1;
21 
22 	/*
23 	 * Ordering is important since __sve_save_state/__sve_restore_state
24 	 * relies on it.
25 	 */
26 	u32 fpsr;
27 	u32 fpcr;
28 
29 	/* Must be SVE_VQ_BYTES (128 bit) aligned. */
30 	char sve_regs[];
31 };
32 
33 /* Maximum number of VMs that can co-exist under pKVM. */
34 #define KVM_MAX_PVMS 255
35 
36 #define HYP_MEMBLOCK_REGIONS 128
37 #define PVMFW_INVALID_LOAD_ADDR	(-1)
38 
39 int pkvm_vm_ioctl_enable_cap(struct kvm *kvm,struct kvm_enable_cap *cap);
40 int pkvm_init_host_vm(struct kvm *kvm, unsigned long type);
41 int pkvm_create_hyp_vm(struct kvm *kvm);
42 void pkvm_destroy_hyp_vm(struct kvm *kvm);
43 void pkvm_host_reclaim_page(struct kvm *host_kvm, phys_addr_t ipa);
44 
45 /*
46  * Definitions for features to be allowed or restricted for guest virtual
47  * machines, depending on the mode KVM is running in and on the type of guest
48  * that is running.
49  *
50  * The ALLOW masks represent a bitmask of feature fields that are allowed
51  * without any restrictions as long as they are supported by the system.
52  *
53  * The RESTRICT_UNSIGNED masks, if present, represent unsigned fields for
54  * features that are restricted to support at most the specified feature.
55  *
56  * If a feature field is not present in either, than it is not supported.
57  *
58  * The approach taken for protected VMs is to allow features that are:
59  * - Needed by common Linux distributions (e.g., floating point)
60  * - Trivial to support, e.g., supporting the feature does not introduce or
61  * require tracking of additional state in KVM
62  * - Cannot be trapped or prevent the guest from using anyway
63  */
64 
65 /*
66  * Allow for protected VMs:
67  * - Floating-point and Advanced SIMD
68  * - GICv3(+) system register interface
69  * - Data Independent Timing
70  * - Spectre/Meltdown Mitigation
71  */
72 #define PVM_ID_AA64PFR0_ALLOW (\
73 	ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_FP) | \
74 	ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_AdvSIMD) | \
75 	ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC) | \
76 	ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_DIT) | \
77 	ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV2) | \
78 	ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV3) \
79 	)
80 
81 /*
82  * Restrict to the following *unsigned* features for protected VMs:
83  * - AArch64 guests only (no support for AArch32 guests):
84  *	AArch32 adds complexity in trap handling, emulation, condition codes,
85  *	etc...
86  * - RAS (v1)
87  *	Supported by KVM
88  */
89 #define PVM_ID_AA64PFR0_RESTRICT_UNSIGNED (\
90 	FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), ID_AA64PFR0_EL1_ELx_64BIT_ONLY) | \
91 	FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL1), ID_AA64PFR0_EL1_ELx_64BIT_ONLY) | \
92 	FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL2), ID_AA64PFR0_EL1_ELx_64BIT_ONLY) | \
93 	FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL3), ID_AA64PFR0_EL1_ELx_64BIT_ONLY) | \
94 	FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_RAS), ID_AA64PFR0_EL1_RAS_IMP) \
95 	)
96 
97 /*
98  * Allow for protected VMs:
99  * - Branch Target Identification
100  * - Speculative Store Bypassing
101  */
102 #define PVM_ID_AA64PFR1_ALLOW (\
103 	ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_BT) | \
104 	ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_SSBS) \
105 	)
106 
107 /*
108  * Allow for protected VMs:
109  * - Mixed-endian
110  * - Distinction between Secure and Non-secure Memory
111  * - Mixed-endian at EL0 only
112  * - Non-context synchronizing exception entry and exit
113  */
114 #define PVM_ID_AA64MMFR0_ALLOW (\
115 	ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_BIGEND) | \
116 	ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_SNSMEM) | \
117 	ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_BIGENDEL0) | \
118 	ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_EXS) \
119 	)
120 
121 /*
122  * Restrict to the following *unsigned* features for protected VMs:
123  * - 40-bit IPA
124  * - 16-bit ASID
125  */
126 #define PVM_ID_AA64MMFR0_RESTRICT_UNSIGNED (\
127 	FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_PARANGE), ID_AA64MMFR0_EL1_PARANGE_40) | \
128 	FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_ASIDBITS), ID_AA64MMFR0_EL1_ASIDBITS_16) \
129 	)
130 
131 /*
132  * Allow for protected VMs:
133  * - Hardware translation table updates to Access flag and Dirty state
134  * - Number of VMID bits from CPU
135  * - Hierarchical Permission Disables
136  * - Privileged Access Never
137  * - SError interrupt exceptions from speculative reads
138  * - Enhanced Translation Synchronization
139  */
140 #define PVM_ID_AA64MMFR1_ALLOW (\
141 	ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HAFDBS) | \
142 	ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_VMIDBits) | \
143 	ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HPDS) | \
144 	ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_PAN) | \
145 	ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_SpecSEI) | \
146 	ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_ETS) \
147 	)
148 
149 /*
150  * Allow for protected VMs:
151  * - Common not Private translations
152  * - User Access Override
153  * - IESB bit in the SCTLR_ELx registers
154  * - Unaligned single-copy atomicity and atomic functions
155  * - ESR_ELx.EC value on an exception by read access to feature ID space
156  * - TTL field in address operations.
157  * - Break-before-make sequences when changing translation block size
158  * - E0PDx mechanism
159  */
160 #define PVM_ID_AA64MMFR2_ALLOW (\
161 	ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_CnP) | \
162 	ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_UAO) | \
163 	ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_IESB) | \
164 	ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_AT) | \
165 	ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_IDS) | \
166 	ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_TTL) | \
167 	ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_BBM) | \
168 	ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_E0PD) \
169 	)
170 
171 /*
172  * No support for Scalable Vectors for protected VMs:
173  *	Requires additional support from KVM, e.g., context-switching and
174  *	trapping at EL2
175  */
176 #define PVM_ID_AA64ZFR0_ALLOW (0ULL)
177 
178 /*
179  * No support for debug, including breakpoints, and watchpoints for protected
180  * VMs:
181  *	The Arm architecture mandates support for at least the Armv8 debug
182  *	architecture, which would include at least 2 hardware breakpoints and
183  *	watchpoints. Providing that support to protected guests adds
184  *	considerable state and complexity. Therefore, the reserved value of 0 is
185  *	used for debug-related fields.
186  */
187 #define PVM_ID_AA64DFR0_ALLOW (0ULL)
188 #define PVM_ID_AA64DFR1_ALLOW (0ULL)
189 
190 /*
191  * No support for implementation defined features.
192  */
193 #define PVM_ID_AA64AFR0_ALLOW (0ULL)
194 #define PVM_ID_AA64AFR1_ALLOW (0ULL)
195 
196 /*
197  * No restrictions on instructions implemented in AArch64.
198  */
199 #define PVM_ID_AA64ISAR0_ALLOW (\
200 	ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_AES) | \
201 	ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SHA1) | \
202 	ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SHA2) | \
203 	ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_CRC32) | \
204 	ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_ATOMIC) | \
205 	ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_RDM) | \
206 	ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SHA3) | \
207 	ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SM3) | \
208 	ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SM4) | \
209 	ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_DP) | \
210 	ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_FHM) | \
211 	ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_TS) | \
212 	ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_TLB) | \
213 	ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_RNDR) \
214 	)
215 
216 #define PVM_ID_AA64ISAR1_ALLOW (\
217 	ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_DPB) | \
218 	ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_APA) | \
219 	ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_API) | \
220 	ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_JSCVT) | \
221 	ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_FCMA) | \
222 	ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_LRCPC) | \
223 	ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPA) | \
224 	ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPI) | \
225 	ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_FRINTTS) | \
226 	ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_SB) | \
227 	ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_SPECRES) | \
228 	ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_BF16) | \
229 	ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_DGH) | \
230 	ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_I8MM) \
231 	)
232 
233 #define PVM_ID_AA64ISAR2_ALLOW (\
234 	ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_GPA3) | \
235 	ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_APA3) \
236 	)
237 
238 /*
239  * Returns the maximum number of breakpoints supported for protected VMs.
240  */
pkvm_get_max_brps(void)241 static inline int pkvm_get_max_brps(void)
242 {
243 	int num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_BRPs),
244 			    PVM_ID_AA64DFR0_ALLOW);
245 
246 	/*
247 	 * If breakpoints are supported, the maximum number is 1 + the field.
248 	 * Otherwise, return 0, which is not compliant with the architecture,
249 	 * but is reserved and is used here to indicate no debug support.
250 	 */
251 	return num ? num + 1 : 0;
252 }
253 
254 /*
255  * Returns the maximum number of watchpoints supported for protected VMs.
256  */
pkvm_get_max_wrps(void)257 static inline int pkvm_get_max_wrps(void)
258 {
259 	int num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_WRPs),
260 			    PVM_ID_AA64DFR0_ALLOW);
261 
262 	return num ? num + 1 : 0;
263 }
264 
265 enum pkvm_moveable_reg_type {
266 	PKVM_MREG_MEMORY,
267 	PKVM_MREG_PROTECTED_RANGE,
268 };
269 
270 struct pkvm_moveable_reg {
271 	phys_addr_t start;
272 	u64 size;
273 	enum pkvm_moveable_reg_type type;
274 };
275 
276 #define PKVM_NR_MOVEABLE_REGS 512
277 extern struct pkvm_moveable_reg kvm_nvhe_sym(pkvm_moveable_regs)[];
278 extern unsigned int kvm_nvhe_sym(pkvm_moveable_regs_nr);
279 
280 extern struct memblock_region kvm_nvhe_sym(hyp_memory)[];
281 extern unsigned int kvm_nvhe_sym(hyp_memblock_nr);
282 
283 extern phys_addr_t kvm_nvhe_sym(pvmfw_base);
284 extern phys_addr_t kvm_nvhe_sym(pvmfw_size);
285 
286 static inline unsigned long
hyp_vmemmap_memblock_size(struct memblock_region * reg,size_t vmemmap_entry_size)287 hyp_vmemmap_memblock_size(struct memblock_region *reg, size_t vmemmap_entry_size)
288 {
289 	unsigned long nr_pages = reg->size >> PAGE_SHIFT;
290 	unsigned long start, end;
291 
292 	start = (reg->base >> PAGE_SHIFT) * vmemmap_entry_size;
293 	end = start + nr_pages * vmemmap_entry_size;
294 	start = ALIGN_DOWN(start, PAGE_SIZE);
295 	end = ALIGN(end, PAGE_SIZE);
296 
297 	return end - start;
298 }
299 
hyp_vmemmap_pages(size_t vmemmap_entry_size)300 static inline unsigned long hyp_vmemmap_pages(size_t vmemmap_entry_size)
301 {
302 	unsigned long res = 0, i;
303 
304 	for (i = 0; i < kvm_nvhe_sym(hyp_memblock_nr); i++) {
305 		res += hyp_vmemmap_memblock_size(&kvm_nvhe_sym(hyp_memory)[i],
306 						 vmemmap_entry_size);
307 	}
308 
309 	return res >> PAGE_SHIFT;
310 }
311 
hyp_vm_table_pages(void)312 static inline unsigned long hyp_vm_table_pages(void)
313 {
314 	return PAGE_ALIGN(KVM_MAX_PVMS * sizeof(void *)) >> PAGE_SHIFT;
315 }
316 
__hyp_pgtable_max_pages(unsigned long nr_pages)317 static inline unsigned long __hyp_pgtable_max_pages(unsigned long nr_pages)
318 {
319 	unsigned long total = 0, i;
320 
321 	/* Provision the worst case scenario */
322 	for (i = 0; i < KVM_PGTABLE_MAX_LEVELS; i++) {
323 		nr_pages = DIV_ROUND_UP(nr_pages, PTRS_PER_PTE);
324 		total += nr_pages;
325 	}
326 
327 	return total;
328 }
329 
__hyp_pgtable_moveable_regs_pages(void)330 static inline unsigned long __hyp_pgtable_moveable_regs_pages(void)
331 {
332 	unsigned long res = 0, i;
333 
334 	/* Cover all of moveable regions with page-granularity */
335 	for (i = 0; i < kvm_nvhe_sym(pkvm_moveable_regs_nr); i++) {
336 		struct pkvm_moveable_reg *reg = &kvm_nvhe_sym(pkvm_moveable_regs)[i];
337 		res += __hyp_pgtable_max_pages(reg->size >> PAGE_SHIFT);
338 	}
339 
340 	return res;
341 }
342 
343 #define __PKVM_PRIVATE_SZ SZ_1G
344 
hyp_s1_pgtable_pages(void)345 static inline unsigned long hyp_s1_pgtable_pages(void)
346 {
347 	unsigned long res;
348 
349 	res = __hyp_pgtable_moveable_regs_pages();
350 
351 	res += __hyp_pgtable_max_pages(__PKVM_PRIVATE_SZ >> PAGE_SHIFT);
352 
353 	return res;
354 }
355 
host_s2_pgtable_pages(void)356 static inline unsigned long host_s2_pgtable_pages(void)
357 {
358 	unsigned long res;
359 
360 	/*
361 	 * Include an extra 16 pages to safely upper-bound the worst case of
362 	 * concatenated pgds.
363 	 */
364 	res = __hyp_pgtable_moveable_regs_pages() + 16;
365 
366 	/* Allow 1 GiB for non-moveable regions */
367 	res += __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT);
368 
369 	return res;
370 }
371 
372 #define KVM_FFA_MBOX_NR_PAGES	1
373 
374 /*
375  * Maximum number of consitutents allowed in a descriptor. This number is
376  * arbitrary, see comment below on SG_MAX_SEGMENTS in hyp_ffa_proxy_pages().
377  */
378 #define KVM_FFA_MAX_NR_CONSTITUENTS	4096
379 
hyp_ffa_proxy_pages(void)380 static inline unsigned long hyp_ffa_proxy_pages(void)
381 {
382 	size_t desc_max;
383 
384 	/*
385 	 * SG_MAX_SEGMENTS is supposed to bound the number of elements in an
386 	 * sglist, which should match the number of consituents in the
387 	 * corresponding FFA descriptor. As such, the EL2 buffer needs to be
388 	 * large enough to hold a descriptor with SG_MAX_SEGMENTS consituents
389 	 * at least. But the kernel's DMA code doesn't enforce the limit, and
390 	 * it is sometimes abused, so let's allow larger descriptors and hope
391 	 * for the best.
392 	 */
393 	BUILD_BUG_ON(KVM_FFA_MAX_NR_CONSTITUENTS < SG_MAX_SEGMENTS);
394 
395 	/*
396 	 * The hypervisor FFA proxy needs enough memory to buffer a fragmented
397 	 * descriptor returned from EL3 in response to a RETRIEVE_REQ call.
398 	 */
399 	desc_max = sizeof(struct ffa_mem_region) +
400 		   sizeof(struct ffa_mem_region_attributes) +
401 		   sizeof(struct ffa_composite_mem_region) +
402 		   KVM_FFA_MAX_NR_CONSTITUENTS * sizeof(struct ffa_mem_region_addr_range);
403 
404 	/* Plus a page each for the hypervisor's RX and TX mailboxes. */
405 	return (2 * KVM_FFA_MBOX_NR_PAGES) + DIV_ROUND_UP(desc_max, PAGE_SIZE);
406 }
407 
pkvm_host_fp_state_size(void)408 static inline size_t pkvm_host_fp_state_size(void)
409 {
410 	if (system_supports_sve())
411 		return size_add(sizeof(struct kvm_host_sve_state),
412 		       SVE_SIG_REGS_SIZE(sve_vq_from_vl(kvm_host_sve_max_vl)));
413 	else
414 		return sizeof(struct user_fpsimd_state);
415 }
416 
417 #endif	/* __ARM64_KVM_PKVM_H__ */
418