• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2020 - Google LLC
4  * Author: Quentin Perret <qperret@google.com>
5  * Author: Fuad Tabba <tabba@google.com>
6  */
7 #ifndef __ARM64_KVM_PKVM_H__
8 #define __ARM64_KVM_PKVM_H__
9 
10 #include <linux/arm_ffa.h>
11 #include <linux/memblock.h>
12 #include <linux/scatterlist.h>
13 #include <asm/kvm_pgtable.h>
14 #include <asm/sysreg.h>
15 
16 /* Maximum number of protected VMs that can be created. */
17 #define KVM_MAX_PVMS 255
18 
19 #define HYP_MEMBLOCK_REGIONS 128
20 #define PVMFW_INVALID_LOAD_ADDR	(-1)
21 
22 int kvm_arm_vm_ioctl_pkvm(struct kvm *kvm, struct kvm_enable_cap *cap);
23 int kvm_init_pvm(struct kvm *kvm, unsigned long type);
24 int create_el2_shadow(struct kvm *kvm);
25 void kvm_shadow_destroy(struct kvm *kvm);
26 
27 /*
28  * Definitions for features to be allowed or restricted for guest virtual
29  * machines, depending on the mode KVM is running in and on the type of guest
30  * that is running.
31  *
32  * The ALLOW masks represent a bitmask of feature fields that are allowed
33  * without any restrictions as long as they are supported by the system.
34  *
35  * The RESTRICT_UNSIGNED masks, if present, represent unsigned fields for
36  * features that are restricted to support at most the specified feature.
37  *
38  * If a feature field is not present in either, than it is not supported.
39  *
40  * The approach taken for protected VMs is to allow features that are:
41  * - Needed by common Linux distributions (e.g., floating point)
42  * - Trivial to support, e.g., supporting the feature does not introduce or
43  * require tracking of additional state in KVM
44  * - Cannot be trapped or prevent the guest from using anyway
45  */
46 
47 /*
48  * Allow for protected VMs:
49  * - Floating-point and Advanced SIMD
50  * - Data Independent Timing
51  */
52 #define PVM_ID_AA64PFR0_ALLOW (\
53 	ARM64_FEATURE_MASK(ID_AA64PFR0_FP) | \
54 	ARM64_FEATURE_MASK(ID_AA64PFR0_ASIMD) | \
55 	ARM64_FEATURE_MASK(ID_AA64PFR0_DIT) \
56 	)
57 
58 /*
59  * Restrict to the following *unsigned* features for protected VMs:
60  * - AArch64 guests only (no support for AArch32 guests):
61  *	AArch32 adds complexity in trap handling, emulation, condition codes,
62  *	etc...
63  * - RAS (v1)
64  *	Supported by KVM
65  */
66 #define PVM_ID_AA64PFR0_RESTRICT_UNSIGNED (\
67 	FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL0), ID_AA64PFR0_ELx_64BIT_ONLY) | \
68 	FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1), ID_AA64PFR0_ELx_64BIT_ONLY) | \
69 	FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL2), ID_AA64PFR0_ELx_64BIT_ONLY) | \
70 	FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL3), ID_AA64PFR0_ELx_64BIT_ONLY) | \
71 	FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_RAS), ID_AA64PFR0_RAS_V1) \
72 	)
73 
74 /*
75  * Allow for protected VMs:
76  * - Branch Target Identification
77  * - Speculative Store Bypassing
78  */
79 #define PVM_ID_AA64PFR1_ALLOW (\
80 	ARM64_FEATURE_MASK(ID_AA64PFR1_BT) | \
81 	ARM64_FEATURE_MASK(ID_AA64PFR1_SSBS) \
82 	)
83 
84 /*
85  * Allow for protected VMs:
86  * - Mixed-endian
87  * - Distinction between Secure and Non-secure Memory
88  * - Mixed-endian at EL0 only
89  * - Non-context synchronizing exception entry and exit
90  */
91 #define PVM_ID_AA64MMFR0_ALLOW (\
92 	ARM64_FEATURE_MASK(ID_AA64MMFR0_BIGENDEL) | \
93 	ARM64_FEATURE_MASK(ID_AA64MMFR0_SNSMEM) | \
94 	ARM64_FEATURE_MASK(ID_AA64MMFR0_BIGENDEL0) | \
95 	ARM64_FEATURE_MASK(ID_AA64MMFR0_EXS) \
96 	)
97 
98 /*
99  * Restrict to the following *unsigned* features for protected VMs:
100  * - 40-bit IPA
101  * - 16-bit ASID
102  */
103 #define PVM_ID_AA64MMFR0_RESTRICT_UNSIGNED (\
104 	FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_PARANGE), ID_AA64MMFR0_PARANGE_40) | \
105 	FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_ASID), ID_AA64MMFR0_ASID_16) \
106 	)
107 
108 /*
109  * Allow for protected VMs:
110  * - Hardware translation table updates to Access flag and Dirty state
111  * - Number of VMID bits from CPU
112  * - Hierarchical Permission Disables
113  * - Privileged Access Never
114  * - SError interrupt exceptions from speculative reads
115  * - Enhanced Translation Synchronization
116  */
117 #define PVM_ID_AA64MMFR1_ALLOW (\
118 	ARM64_FEATURE_MASK(ID_AA64MMFR1_HADBS) | \
119 	ARM64_FEATURE_MASK(ID_AA64MMFR1_VMIDBITS) | \
120 	ARM64_FEATURE_MASK(ID_AA64MMFR1_HPD) | \
121 	ARM64_FEATURE_MASK(ID_AA64MMFR1_PAN) | \
122 	ARM64_FEATURE_MASK(ID_AA64MMFR1_SPECSEI) | \
123 	ARM64_FEATURE_MASK(ID_AA64MMFR1_ETS) \
124 	)
125 
126 /*
127  * Allow for protected VMs:
128  * - Common not Private translations
129  * - User Access Override
130  * - IESB bit in the SCTLR_ELx registers
131  * - Unaligned single-copy atomicity and atomic functions
132  * - ESR_ELx.EC value on an exception by read access to feature ID space
133  * - TTL field in address operations.
134  * - Break-before-make sequences when changing translation block size
135  * - E0PDx mechanism
136  */
137 #define PVM_ID_AA64MMFR2_ALLOW (\
138 	ARM64_FEATURE_MASK(ID_AA64MMFR2_CNP) | \
139 	ARM64_FEATURE_MASK(ID_AA64MMFR2_UAO) | \
140 	ARM64_FEATURE_MASK(ID_AA64MMFR2_IESB) | \
141 	ARM64_FEATURE_MASK(ID_AA64MMFR2_AT) | \
142 	ARM64_FEATURE_MASK(ID_AA64MMFR2_IDS) | \
143 	ARM64_FEATURE_MASK(ID_AA64MMFR2_TTL) | \
144 	ARM64_FEATURE_MASK(ID_AA64MMFR2_BBM) | \
145 	ARM64_FEATURE_MASK(ID_AA64MMFR2_E0PD) \
146 	)
147 
148 /*
149  * No support for Scalable Vectors for protected VMs:
150  *	Requires additional support from KVM, e.g., context-switching and
151  *	trapping at EL2
152  */
153 #define PVM_ID_AA64ZFR0_ALLOW (0ULL)
154 
155 /*
156  * No support for debug, including breakpoints, and watchpoints for protected
157  * VMs:
158  *	The Arm architecture mandates support for at least the Armv8 debug
159  *	architecture, which would include at least 2 hardware breakpoints and
160  *	watchpoints. Providing that support to protected guests adds
161  *	considerable state and complexity. Therefore, the reserved value of 0 is
162  *	used for debug-related fields.
163  */
164 #define PVM_ID_AA64DFR0_ALLOW (0ULL)
165 #define PVM_ID_AA64DFR1_ALLOW (0ULL)
166 
167 /*
168  * No support for implementation defined features.
169  */
170 #define PVM_ID_AA64AFR0_ALLOW (0ULL)
171 #define PVM_ID_AA64AFR1_ALLOW (0ULL)
172 
173 /*
174  * No restrictions on instructions implemented in AArch64.
175  */
176 #define PVM_ID_AA64ISAR0_ALLOW (\
177 	ARM64_FEATURE_MASK(ID_AA64ISAR0_AES) | \
178 	ARM64_FEATURE_MASK(ID_AA64ISAR0_SHA1) | \
179 	ARM64_FEATURE_MASK(ID_AA64ISAR0_SHA2) | \
180 	ARM64_FEATURE_MASK(ID_AA64ISAR0_CRC32) | \
181 	ARM64_FEATURE_MASK(ID_AA64ISAR0_ATOMICS) | \
182 	ARM64_FEATURE_MASK(ID_AA64ISAR0_RDM) | \
183 	ARM64_FEATURE_MASK(ID_AA64ISAR0_SHA3) | \
184 	ARM64_FEATURE_MASK(ID_AA64ISAR0_SM3) | \
185 	ARM64_FEATURE_MASK(ID_AA64ISAR0_SM4) | \
186 	ARM64_FEATURE_MASK(ID_AA64ISAR0_DP) | \
187 	ARM64_FEATURE_MASK(ID_AA64ISAR0_FHM) | \
188 	ARM64_FEATURE_MASK(ID_AA64ISAR0_TS) | \
189 	ARM64_FEATURE_MASK(ID_AA64ISAR0_TLB) | \
190 	ARM64_FEATURE_MASK(ID_AA64ISAR0_RNDR) \
191 	)
192 
193 #define PVM_ID_AA64ISAR1_ALLOW (\
194 	ARM64_FEATURE_MASK(ID_AA64ISAR1_DPB) | \
195 	ARM64_FEATURE_MASK(ID_AA64ISAR1_APA) | \
196 	ARM64_FEATURE_MASK(ID_AA64ISAR1_API) | \
197 	ARM64_FEATURE_MASK(ID_AA64ISAR1_JSCVT) | \
198 	ARM64_FEATURE_MASK(ID_AA64ISAR1_FCMA) | \
199 	ARM64_FEATURE_MASK(ID_AA64ISAR1_LRCPC) | \
200 	ARM64_FEATURE_MASK(ID_AA64ISAR1_GPA) | \
201 	ARM64_FEATURE_MASK(ID_AA64ISAR1_GPI) | \
202 	ARM64_FEATURE_MASK(ID_AA64ISAR1_FRINTTS) | \
203 	ARM64_FEATURE_MASK(ID_AA64ISAR1_SB) | \
204 	ARM64_FEATURE_MASK(ID_AA64ISAR1_SPECRES) | \
205 	ARM64_FEATURE_MASK(ID_AA64ISAR1_BF16) | \
206 	ARM64_FEATURE_MASK(ID_AA64ISAR1_DGH) | \
207 	ARM64_FEATURE_MASK(ID_AA64ISAR1_I8MM) \
208 	)
209 
210 /*
211  * Returns the maximum number of breakpoints supported for protected VMs.
212  */
pkvm_get_max_brps(void)213 static inline int pkvm_get_max_brps(void)
214 {
215 	int num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_BRPS),
216 			    PVM_ID_AA64DFR0_ALLOW);
217 
218 	/*
219 	 * If breakpoints are supported, the maximum number is 1 + the field.
220 	 * Otherwise, return 0, which is not compliant with the architecture,
221 	 * but is reserved and is used here to indicate no debug support.
222 	 */
223 	return num ? num + 1 : 0;
224 }
225 
226 /*
227  * Returns the maximum number of watchpoints supported for protected VMs.
228  */
pkvm_get_max_wrps(void)229 static inline int pkvm_get_max_wrps(void)
230 {
231 	int num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_WRPS),
232 			    PVM_ID_AA64DFR0_ALLOW);
233 
234 	return num ? num + 1 : 0;
235 }
236 
237 extern struct memblock_region kvm_nvhe_sym(hyp_memory)[];
238 extern unsigned int kvm_nvhe_sym(hyp_memblock_nr);
239 
240 extern phys_addr_t kvm_nvhe_sym(pvmfw_base);
241 extern phys_addr_t kvm_nvhe_sym(pvmfw_size);
242 
243 static inline unsigned long
hyp_vmemmap_memblock_size(struct memblock_region * reg,size_t vmemmap_entry_size)244 hyp_vmemmap_memblock_size(struct memblock_region *reg, size_t vmemmap_entry_size)
245 {
246 	unsigned long nr_pages = reg->size >> PAGE_SHIFT;
247 	unsigned long start, end;
248 
249 	start = (reg->base >> PAGE_SHIFT) * vmemmap_entry_size;
250 	end = start + nr_pages * vmemmap_entry_size;
251 	start = ALIGN_DOWN(start, PAGE_SIZE);
252 	end = ALIGN(end, PAGE_SIZE);
253 
254 	return end - start;
255 }
256 
hyp_vmemmap_pages(size_t vmemmap_entry_size)257 static inline unsigned long hyp_vmemmap_pages(size_t vmemmap_entry_size)
258 {
259 	unsigned long res = 0, i;
260 
261 	for (i = 0; i < kvm_nvhe_sym(hyp_memblock_nr); i++) {
262 		res += hyp_vmemmap_memblock_size(&kvm_nvhe_sym(hyp_memory)[i],
263 						 vmemmap_entry_size);
264 	}
265 
266 	return res >> PAGE_SHIFT;
267 }
268 
hyp_shadow_table_pages(size_t shadow_entry_size)269 static inline unsigned long hyp_shadow_table_pages(size_t shadow_entry_size)
270 {
271 	return PAGE_ALIGN(KVM_MAX_PVMS * shadow_entry_size) >> PAGE_SHIFT;
272 }
273 
__hyp_pgtable_max_pages(unsigned long nr_pages)274 static inline unsigned long __hyp_pgtable_max_pages(unsigned long nr_pages)
275 {
276 	unsigned long total = 0, i;
277 
278 	/* Provision the worst case scenario */
279 	for (i = 0; i < KVM_PGTABLE_MAX_LEVELS; i++) {
280 		nr_pages = DIV_ROUND_UP(nr_pages, PTRS_PER_PTE);
281 		total += nr_pages;
282 	}
283 
284 	return total;
285 }
286 
__hyp_pgtable_total_pages(void)287 static inline unsigned long __hyp_pgtable_total_pages(void)
288 {
289 	unsigned long res = 0, i;
290 
291 	/* Cover all of memory with page-granularity */
292 	for (i = 0; i < kvm_nvhe_sym(hyp_memblock_nr); i++) {
293 		struct memblock_region *reg = &kvm_nvhe_sym(hyp_memory)[i];
294 		res += __hyp_pgtable_max_pages(reg->size >> PAGE_SHIFT);
295 	}
296 
297 	return res;
298 }
299 
hyp_s1_pgtable_pages(void)300 static inline unsigned long hyp_s1_pgtable_pages(void)
301 {
302 	unsigned long res;
303 
304 	res = __hyp_pgtable_total_pages();
305 
306 	/* Allow 1 GiB for private mappings */
307 	res += __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT);
308 
309 	return res;
310 }
311 
host_s2_pgtable_pages(void)312 static inline unsigned long host_s2_pgtable_pages(void)
313 {
314 	unsigned long res;
315 
316 	/*
317 	 * Include an extra 16 pages to safely upper-bound the worst case of
318 	 * concatenated pgds.
319 	 */
320 	res = __hyp_pgtable_total_pages() + 16;
321 
322 	/* Allow 1 GiB for MMIO mappings */
323 	res += __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT);
324 
325 	return res;
326 }
327 
328 #define KVM_FFA_MBOX_NR_PAGES	1
329 
330 /*
331  * Maximum number of consitutents allowed in a descriptor. This number is
332  * arbitrary, see comment below on SG_MAX_SEGMENTS in hyp_ffa_proxy_pages().
333  */
334 #define KVM_FFA_MAX_NR_CONSTITUENTS	4096
335 
hyp_ffa_proxy_pages(void)336 static inline unsigned long hyp_ffa_proxy_pages(void)
337 {
338 	size_t desc_max;
339 
340 	/*
341 	 * SG_MAX_SEGMENTS is supposed to bound the number of elements in an
342 	 * sglist, which should match the number of consituents in the
343 	 * corresponding FFA descriptor. As such, the EL2 buffer needs to be
344 	 * large enough to hold a descriptor with SG_MAX_SEGMENTS consituents
345 	 * at least. But the kernel's DMA code doesn't enforce the limit, and
346 	 * it is sometimes abused, so let's allow larger descriptors and hope
347 	 * for the best.
348 	 */
349 	BUILD_BUG_ON(KVM_FFA_MAX_NR_CONSTITUENTS < SG_MAX_SEGMENTS);
350 
351 	/*
352 	 * The hypervisor FFA proxy needs enough memory to buffer a fragmented
353 	 * descriptor returned from EL3 in response to a RETRIEVE_REQ call.
354 	 */
355 	desc_max = sizeof(struct ffa_mem_region) +
356 		   sizeof(struct ffa_mem_region_attributes) +
357 		   sizeof(struct ffa_composite_mem_region) +
358 		   KVM_FFA_MAX_NR_CONSTITUENTS * sizeof(struct ffa_mem_region_addr_range);
359 
360 	/* Plus a page each for the hypervisor's RX and TX mailboxes. */
361 	return (2 * KVM_FFA_MBOX_NR_PAGES) + DIV_ROUND_UP(desc_max, PAGE_SIZE);
362 }
363 
364 #endif	/* __ARM64_KVM_PKVM_H__ */
365