1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (C) 2020 - Google LLC
4 * Author: Quentin Perret <qperret@google.com>
5 * Author: Fuad Tabba <tabba@google.com>
6 */
7 #ifndef __ARM64_KVM_PKVM_H__
8 #define __ARM64_KVM_PKVM_H__
9
10 #include <linux/arm_ffa.h>
11 #include <linux/memblock.h>
12 #include <linux/scatterlist.h>
13 #include <asm/kvm_pgtable.h>
14 #include <asm/sysreg.h>
15
16 /* Maximum number of protected VMs that can be created. */
17 #define KVM_MAX_PVMS 255
18
19 #define HYP_MEMBLOCK_REGIONS 128
20 #define PVMFW_INVALID_LOAD_ADDR (-1)
21
22 int kvm_arm_vm_ioctl_pkvm(struct kvm *kvm, struct kvm_enable_cap *cap);
23 int kvm_init_pvm(struct kvm *kvm, unsigned long type);
24 int create_el2_shadow(struct kvm *kvm);
25 void kvm_shadow_destroy(struct kvm *kvm);
26
27 /*
28 * Definitions for features to be allowed or restricted for guest virtual
29 * machines, depending on the mode KVM is running in and on the type of guest
30 * that is running.
31 *
32 * The ALLOW masks represent a bitmask of feature fields that are allowed
33 * without any restrictions as long as they are supported by the system.
34 *
35 * The RESTRICT_UNSIGNED masks, if present, represent unsigned fields for
36 * features that are restricted to support at most the specified feature.
37 *
38 * If a feature field is not present in either, than it is not supported.
39 *
40 * The approach taken for protected VMs is to allow features that are:
41 * - Needed by common Linux distributions (e.g., floating point)
42 * - Trivial to support, e.g., supporting the feature does not introduce or
43 * require tracking of additional state in KVM
44 * - Cannot be trapped or prevent the guest from using anyway
45 */
46
47 /*
48 * Allow for protected VMs:
49 * - Floating-point and Advanced SIMD
50 * - Data Independent Timing
51 */
52 #define PVM_ID_AA64PFR0_ALLOW (\
53 ARM64_FEATURE_MASK(ID_AA64PFR0_FP) | \
54 ARM64_FEATURE_MASK(ID_AA64PFR0_ASIMD) | \
55 ARM64_FEATURE_MASK(ID_AA64PFR0_DIT) \
56 )
57
58 /*
59 * Restrict to the following *unsigned* features for protected VMs:
60 * - AArch64 guests only (no support for AArch32 guests):
61 * AArch32 adds complexity in trap handling, emulation, condition codes,
62 * etc...
63 * - RAS (v1)
64 * Supported by KVM
65 */
66 #define PVM_ID_AA64PFR0_RESTRICT_UNSIGNED (\
67 FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL0), ID_AA64PFR0_ELx_64BIT_ONLY) | \
68 FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1), ID_AA64PFR0_ELx_64BIT_ONLY) | \
69 FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL2), ID_AA64PFR0_ELx_64BIT_ONLY) | \
70 FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL3), ID_AA64PFR0_ELx_64BIT_ONLY) | \
71 FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_RAS), ID_AA64PFR0_RAS_V1) \
72 )
73
74 /*
75 * Allow for protected VMs:
76 * - Branch Target Identification
77 * - Speculative Store Bypassing
78 */
79 #define PVM_ID_AA64PFR1_ALLOW (\
80 ARM64_FEATURE_MASK(ID_AA64PFR1_BT) | \
81 ARM64_FEATURE_MASK(ID_AA64PFR1_SSBS) \
82 )
83
84 /*
85 * Allow for protected VMs:
86 * - Mixed-endian
87 * - Distinction between Secure and Non-secure Memory
88 * - Mixed-endian at EL0 only
89 * - Non-context synchronizing exception entry and exit
90 */
91 #define PVM_ID_AA64MMFR0_ALLOW (\
92 ARM64_FEATURE_MASK(ID_AA64MMFR0_BIGENDEL) | \
93 ARM64_FEATURE_MASK(ID_AA64MMFR0_SNSMEM) | \
94 ARM64_FEATURE_MASK(ID_AA64MMFR0_BIGENDEL0) | \
95 ARM64_FEATURE_MASK(ID_AA64MMFR0_EXS) \
96 )
97
98 /*
99 * Restrict to the following *unsigned* features for protected VMs:
100 * - 40-bit IPA
101 * - 16-bit ASID
102 */
103 #define PVM_ID_AA64MMFR0_RESTRICT_UNSIGNED (\
104 FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_PARANGE), ID_AA64MMFR0_PARANGE_40) | \
105 FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_ASID), ID_AA64MMFR0_ASID_16) \
106 )
107
108 /*
109 * Allow for protected VMs:
110 * - Hardware translation table updates to Access flag and Dirty state
111 * - Number of VMID bits from CPU
112 * - Hierarchical Permission Disables
113 * - Privileged Access Never
114 * - SError interrupt exceptions from speculative reads
115 * - Enhanced Translation Synchronization
116 */
117 #define PVM_ID_AA64MMFR1_ALLOW (\
118 ARM64_FEATURE_MASK(ID_AA64MMFR1_HADBS) | \
119 ARM64_FEATURE_MASK(ID_AA64MMFR1_VMIDBITS) | \
120 ARM64_FEATURE_MASK(ID_AA64MMFR1_HPD) | \
121 ARM64_FEATURE_MASK(ID_AA64MMFR1_PAN) | \
122 ARM64_FEATURE_MASK(ID_AA64MMFR1_SPECSEI) | \
123 ARM64_FEATURE_MASK(ID_AA64MMFR1_ETS) \
124 )
125
126 /*
127 * Allow for protected VMs:
128 * - Common not Private translations
129 * - User Access Override
130 * - IESB bit in the SCTLR_ELx registers
131 * - Unaligned single-copy atomicity and atomic functions
132 * - ESR_ELx.EC value on an exception by read access to feature ID space
133 * - TTL field in address operations.
134 * - Break-before-make sequences when changing translation block size
135 * - E0PDx mechanism
136 */
137 #define PVM_ID_AA64MMFR2_ALLOW (\
138 ARM64_FEATURE_MASK(ID_AA64MMFR2_CNP) | \
139 ARM64_FEATURE_MASK(ID_AA64MMFR2_UAO) | \
140 ARM64_FEATURE_MASK(ID_AA64MMFR2_IESB) | \
141 ARM64_FEATURE_MASK(ID_AA64MMFR2_AT) | \
142 ARM64_FEATURE_MASK(ID_AA64MMFR2_IDS) | \
143 ARM64_FEATURE_MASK(ID_AA64MMFR2_TTL) | \
144 ARM64_FEATURE_MASK(ID_AA64MMFR2_BBM) | \
145 ARM64_FEATURE_MASK(ID_AA64MMFR2_E0PD) \
146 )
147
148 /*
149 * No support for Scalable Vectors for protected VMs:
150 * Requires additional support from KVM, e.g., context-switching and
151 * trapping at EL2
152 */
153 #define PVM_ID_AA64ZFR0_ALLOW (0ULL)
154
155 /*
156 * No support for debug, including breakpoints, and watchpoints for protected
157 * VMs:
158 * The Arm architecture mandates support for at least the Armv8 debug
159 * architecture, which would include at least 2 hardware breakpoints and
160 * watchpoints. Providing that support to protected guests adds
161 * considerable state and complexity. Therefore, the reserved value of 0 is
162 * used for debug-related fields.
163 */
164 #define PVM_ID_AA64DFR0_ALLOW (0ULL)
165 #define PVM_ID_AA64DFR1_ALLOW (0ULL)
166
167 /*
168 * No support for implementation defined features.
169 */
170 #define PVM_ID_AA64AFR0_ALLOW (0ULL)
171 #define PVM_ID_AA64AFR1_ALLOW (0ULL)
172
173 /*
174 * No restrictions on instructions implemented in AArch64.
175 */
176 #define PVM_ID_AA64ISAR0_ALLOW (\
177 ARM64_FEATURE_MASK(ID_AA64ISAR0_AES) | \
178 ARM64_FEATURE_MASK(ID_AA64ISAR0_SHA1) | \
179 ARM64_FEATURE_MASK(ID_AA64ISAR0_SHA2) | \
180 ARM64_FEATURE_MASK(ID_AA64ISAR0_CRC32) | \
181 ARM64_FEATURE_MASK(ID_AA64ISAR0_ATOMICS) | \
182 ARM64_FEATURE_MASK(ID_AA64ISAR0_RDM) | \
183 ARM64_FEATURE_MASK(ID_AA64ISAR0_SHA3) | \
184 ARM64_FEATURE_MASK(ID_AA64ISAR0_SM3) | \
185 ARM64_FEATURE_MASK(ID_AA64ISAR0_SM4) | \
186 ARM64_FEATURE_MASK(ID_AA64ISAR0_DP) | \
187 ARM64_FEATURE_MASK(ID_AA64ISAR0_FHM) | \
188 ARM64_FEATURE_MASK(ID_AA64ISAR0_TS) | \
189 ARM64_FEATURE_MASK(ID_AA64ISAR0_TLB) | \
190 ARM64_FEATURE_MASK(ID_AA64ISAR0_RNDR) \
191 )
192
193 #define PVM_ID_AA64ISAR1_ALLOW (\
194 ARM64_FEATURE_MASK(ID_AA64ISAR1_DPB) | \
195 ARM64_FEATURE_MASK(ID_AA64ISAR1_APA) | \
196 ARM64_FEATURE_MASK(ID_AA64ISAR1_API) | \
197 ARM64_FEATURE_MASK(ID_AA64ISAR1_JSCVT) | \
198 ARM64_FEATURE_MASK(ID_AA64ISAR1_FCMA) | \
199 ARM64_FEATURE_MASK(ID_AA64ISAR1_LRCPC) | \
200 ARM64_FEATURE_MASK(ID_AA64ISAR1_GPA) | \
201 ARM64_FEATURE_MASK(ID_AA64ISAR1_GPI) | \
202 ARM64_FEATURE_MASK(ID_AA64ISAR1_FRINTTS) | \
203 ARM64_FEATURE_MASK(ID_AA64ISAR1_SB) | \
204 ARM64_FEATURE_MASK(ID_AA64ISAR1_SPECRES) | \
205 ARM64_FEATURE_MASK(ID_AA64ISAR1_BF16) | \
206 ARM64_FEATURE_MASK(ID_AA64ISAR1_DGH) | \
207 ARM64_FEATURE_MASK(ID_AA64ISAR1_I8MM) \
208 )
209
210 /*
211 * Returns the maximum number of breakpoints supported for protected VMs.
212 */
pkvm_get_max_brps(void)213 static inline int pkvm_get_max_brps(void)
214 {
215 int num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_BRPS),
216 PVM_ID_AA64DFR0_ALLOW);
217
218 /*
219 * If breakpoints are supported, the maximum number is 1 + the field.
220 * Otherwise, return 0, which is not compliant with the architecture,
221 * but is reserved and is used here to indicate no debug support.
222 */
223 return num ? num + 1 : 0;
224 }
225
226 /*
227 * Returns the maximum number of watchpoints supported for protected VMs.
228 */
pkvm_get_max_wrps(void)229 static inline int pkvm_get_max_wrps(void)
230 {
231 int num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_WRPS),
232 PVM_ID_AA64DFR0_ALLOW);
233
234 return num ? num + 1 : 0;
235 }
236
237 extern struct memblock_region kvm_nvhe_sym(hyp_memory)[];
238 extern unsigned int kvm_nvhe_sym(hyp_memblock_nr);
239
240 extern phys_addr_t kvm_nvhe_sym(pvmfw_base);
241 extern phys_addr_t kvm_nvhe_sym(pvmfw_size);
242
243 static inline unsigned long
hyp_vmemmap_memblock_size(struct memblock_region * reg,size_t vmemmap_entry_size)244 hyp_vmemmap_memblock_size(struct memblock_region *reg, size_t vmemmap_entry_size)
245 {
246 unsigned long nr_pages = reg->size >> PAGE_SHIFT;
247 unsigned long start, end;
248
249 start = (reg->base >> PAGE_SHIFT) * vmemmap_entry_size;
250 end = start + nr_pages * vmemmap_entry_size;
251 start = ALIGN_DOWN(start, PAGE_SIZE);
252 end = ALIGN(end, PAGE_SIZE);
253
254 return end - start;
255 }
256
hyp_vmemmap_pages(size_t vmemmap_entry_size)257 static inline unsigned long hyp_vmemmap_pages(size_t vmemmap_entry_size)
258 {
259 unsigned long res = 0, i;
260
261 for (i = 0; i < kvm_nvhe_sym(hyp_memblock_nr); i++) {
262 res += hyp_vmemmap_memblock_size(&kvm_nvhe_sym(hyp_memory)[i],
263 vmemmap_entry_size);
264 }
265
266 return res >> PAGE_SHIFT;
267 }
268
hyp_shadow_table_pages(size_t shadow_entry_size)269 static inline unsigned long hyp_shadow_table_pages(size_t shadow_entry_size)
270 {
271 return PAGE_ALIGN(KVM_MAX_PVMS * shadow_entry_size) >> PAGE_SHIFT;
272 }
273
__hyp_pgtable_max_pages(unsigned long nr_pages)274 static inline unsigned long __hyp_pgtable_max_pages(unsigned long nr_pages)
275 {
276 unsigned long total = 0, i;
277
278 /* Provision the worst case scenario */
279 for (i = 0; i < KVM_PGTABLE_MAX_LEVELS; i++) {
280 nr_pages = DIV_ROUND_UP(nr_pages, PTRS_PER_PTE);
281 total += nr_pages;
282 }
283
284 return total;
285 }
286
__hyp_pgtable_total_pages(void)287 static inline unsigned long __hyp_pgtable_total_pages(void)
288 {
289 unsigned long res = 0, i;
290
291 /* Cover all of memory with page-granularity */
292 for (i = 0; i < kvm_nvhe_sym(hyp_memblock_nr); i++) {
293 struct memblock_region *reg = &kvm_nvhe_sym(hyp_memory)[i];
294 res += __hyp_pgtable_max_pages(reg->size >> PAGE_SHIFT);
295 }
296
297 return res;
298 }
299
hyp_s1_pgtable_pages(void)300 static inline unsigned long hyp_s1_pgtable_pages(void)
301 {
302 unsigned long res;
303
304 res = __hyp_pgtable_total_pages();
305
306 /* Allow 1 GiB for private mappings */
307 res += __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT);
308
309 return res;
310 }
311
host_s2_pgtable_pages(void)312 static inline unsigned long host_s2_pgtable_pages(void)
313 {
314 unsigned long res;
315
316 /*
317 * Include an extra 16 pages to safely upper-bound the worst case of
318 * concatenated pgds.
319 */
320 res = __hyp_pgtable_total_pages() + 16;
321
322 /* Allow 1 GiB for MMIO mappings */
323 res += __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT);
324
325 return res;
326 }
327
328 #define KVM_FFA_MBOX_NR_PAGES 1
329
330 /*
331 * Maximum number of consitutents allowed in a descriptor. This number is
332 * arbitrary, see comment below on SG_MAX_SEGMENTS in hyp_ffa_proxy_pages().
333 */
334 #define KVM_FFA_MAX_NR_CONSTITUENTS 4096
335
hyp_ffa_proxy_pages(void)336 static inline unsigned long hyp_ffa_proxy_pages(void)
337 {
338 size_t desc_max;
339
340 /*
341 * SG_MAX_SEGMENTS is supposed to bound the number of elements in an
342 * sglist, which should match the number of consituents in the
343 * corresponding FFA descriptor. As such, the EL2 buffer needs to be
344 * large enough to hold a descriptor with SG_MAX_SEGMENTS consituents
345 * at least. But the kernel's DMA code doesn't enforce the limit, and
346 * it is sometimes abused, so let's allow larger descriptors and hope
347 * for the best.
348 */
349 BUILD_BUG_ON(KVM_FFA_MAX_NR_CONSTITUENTS < SG_MAX_SEGMENTS);
350
351 /*
352 * The hypervisor FFA proxy needs enough memory to buffer a fragmented
353 * descriptor returned from EL3 in response to a RETRIEVE_REQ call.
354 */
355 desc_max = sizeof(struct ffa_mem_region) +
356 sizeof(struct ffa_mem_region_attributes) +
357 sizeof(struct ffa_composite_mem_region) +
358 KVM_FFA_MAX_NR_CONSTITUENTS * sizeof(struct ffa_mem_region_addr_range);
359
360 /* Plus a page each for the hypervisor's RX and TX mailboxes. */
361 return (2 * KVM_FFA_MBOX_NR_PAGES) + DIV_ROUND_UP(desc_max, PAGE_SIZE);
362 }
363
364 #endif /* __ARM64_KVM_PKVM_H__ */
365