1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (C) 2020 - Google LLC
4 * Author: Quentin Perret <qperret@google.com>
5 * Author: Fuad Tabba <tabba@google.com>
6 */
7 #ifndef __ARM64_KVM_PKVM_H__
8 #define __ARM64_KVM_PKVM_H__
9
10 #include <linux/arm_ffa.h>
11 #include <linux/memblock.h>
12 #include <linux/scatterlist.h>
13 #include <asm/kvm_pgtable.h>
14 #include <asm/sysreg.h>
15
16 /*
17 * Stores the sve state for the host in protected mode.
18 */
19 struct kvm_host_sve_state {
20 u64 zcr_el1;
21
22 /*
23 * Ordering is important since __sve_save_state/__sve_restore_state
24 * relies on it.
25 */
26 u32 fpsr;
27 u32 fpcr;
28
29 /* Must be SVE_VQ_BYTES (128 bit) aligned. */
30 char sve_regs[];
31 };
32
33 /* Maximum number of VMs that can co-exist under pKVM. */
34 #define KVM_MAX_PVMS 255
35
36 #define HYP_MEMBLOCK_REGIONS 128
37 #define PVMFW_INVALID_LOAD_ADDR (-1)
38
39 int pkvm_vm_ioctl_enable_cap(struct kvm *kvm,struct kvm_enable_cap *cap);
40 int pkvm_init_host_vm(struct kvm *kvm, unsigned long type);
41 int pkvm_create_hyp_vm(struct kvm *kvm);
42 void pkvm_destroy_hyp_vm(struct kvm *kvm);
43 void pkvm_host_reclaim_page(struct kvm *host_kvm, phys_addr_t ipa);
44
45 /*
46 * Definitions for features to be allowed or restricted for guest virtual
47 * machines, depending on the mode KVM is running in and on the type of guest
48 * that is running.
49 *
50 * The ALLOW masks represent a bitmask of feature fields that are allowed
51 * without any restrictions as long as they are supported by the system.
52 *
53 * The RESTRICT_UNSIGNED masks, if present, represent unsigned fields for
54 * features that are restricted to support at most the specified feature.
55 *
56 * If a feature field is not present in either, than it is not supported.
57 *
58 * The approach taken for protected VMs is to allow features that are:
59 * - Needed by common Linux distributions (e.g., floating point)
60 * - Trivial to support, e.g., supporting the feature does not introduce or
61 * require tracking of additional state in KVM
62 * - Cannot be trapped or prevent the guest from using anyway
63 */
64
65 /*
66 * Allow for protected VMs:
67 * - Floating-point and Advanced SIMD
68 * - GICv3(+) system register interface
69 * - Data Independent Timing
70 * - Spectre/Meltdown Mitigation
71 */
72 #define PVM_ID_AA64PFR0_ALLOW (\
73 ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_FP) | \
74 ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_AdvSIMD) | \
75 ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC) | \
76 ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_DIT) | \
77 ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV2) | \
78 ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV3) \
79 )
80
81 /*
82 * Restrict to the following *unsigned* features for protected VMs:
83 * - AArch64 guests only (no support for AArch32 guests):
84 * AArch32 adds complexity in trap handling, emulation, condition codes,
85 * etc...
86 * - RAS (v1)
87 * Supported by KVM
88 */
89 #define PVM_ID_AA64PFR0_RESTRICT_UNSIGNED (\
90 FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), ID_AA64PFR0_EL1_ELx_64BIT_ONLY) | \
91 FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL1), ID_AA64PFR0_EL1_ELx_64BIT_ONLY) | \
92 FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL2), ID_AA64PFR0_EL1_ELx_64BIT_ONLY) | \
93 FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL3), ID_AA64PFR0_EL1_ELx_64BIT_ONLY) | \
94 FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_RAS), ID_AA64PFR0_EL1_RAS_IMP) \
95 )
96
97 /*
98 * Allow for protected VMs:
99 * - Branch Target Identification
100 * - Speculative Store Bypassing
101 */
102 #define PVM_ID_AA64PFR1_ALLOW (\
103 ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_BT) | \
104 ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_SSBS) \
105 )
106
107 /*
108 * Allow for protected VMs:
109 * - Mixed-endian
110 * - Distinction between Secure and Non-secure Memory
111 * - Mixed-endian at EL0 only
112 * - Non-context synchronizing exception entry and exit
113 */
114 #define PVM_ID_AA64MMFR0_ALLOW (\
115 ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_BIGEND) | \
116 ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_SNSMEM) | \
117 ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_BIGENDEL0) | \
118 ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_EXS) \
119 )
120
121 /*
122 * Restrict to the following *unsigned* features for protected VMs:
123 * - 40-bit IPA
124 * - 16-bit ASID
125 */
126 #define PVM_ID_AA64MMFR0_RESTRICT_UNSIGNED (\
127 FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_PARANGE), ID_AA64MMFR0_EL1_PARANGE_40) | \
128 FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_ASIDBITS), ID_AA64MMFR0_EL1_ASIDBITS_16) \
129 )
130
131 /*
132 * Allow for protected VMs:
133 * - Hardware translation table updates to Access flag and Dirty state
134 * - Number of VMID bits from CPU
135 * - Hierarchical Permission Disables
136 * - Privileged Access Never
137 * - SError interrupt exceptions from speculative reads
138 * - Enhanced Translation Synchronization
139 */
140 #define PVM_ID_AA64MMFR1_ALLOW (\
141 ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HAFDBS) | \
142 ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_VMIDBits) | \
143 ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HPDS) | \
144 ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_PAN) | \
145 ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_SpecSEI) | \
146 ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_ETS) \
147 )
148
149 /*
150 * Allow for protected VMs:
151 * - Common not Private translations
152 * - User Access Override
153 * - IESB bit in the SCTLR_ELx registers
154 * - Unaligned single-copy atomicity and atomic functions
155 * - ESR_ELx.EC value on an exception by read access to feature ID space
156 * - TTL field in address operations.
157 * - Break-before-make sequences when changing translation block size
158 * - E0PDx mechanism
159 */
160 #define PVM_ID_AA64MMFR2_ALLOW (\
161 ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_CnP) | \
162 ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_UAO) | \
163 ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_IESB) | \
164 ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_AT) | \
165 ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_IDS) | \
166 ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_TTL) | \
167 ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_BBM) | \
168 ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_E0PD) \
169 )
170
171 /*
172 * No support for Scalable Vectors for protected VMs:
173 * Requires additional support from KVM, e.g., context-switching and
174 * trapping at EL2
175 */
176 #define PVM_ID_AA64ZFR0_ALLOW (0ULL)
177
178 /*
179 * No support for debug, including breakpoints, and watchpoints for protected
180 * VMs:
181 * The Arm architecture mandates support for at least the Armv8 debug
182 * architecture, which would include at least 2 hardware breakpoints and
183 * watchpoints. Providing that support to protected guests adds
184 * considerable state and complexity. Therefore, the reserved value of 0 is
185 * used for debug-related fields.
186 */
187 #define PVM_ID_AA64DFR0_ALLOW (0ULL)
188 #define PVM_ID_AA64DFR1_ALLOW (0ULL)
189
190 /*
191 * No support for implementation defined features.
192 */
193 #define PVM_ID_AA64AFR0_ALLOW (0ULL)
194 #define PVM_ID_AA64AFR1_ALLOW (0ULL)
195
196 /*
197 * No restrictions on instructions implemented in AArch64.
198 */
199 #define PVM_ID_AA64ISAR0_ALLOW (\
200 ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_AES) | \
201 ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SHA1) | \
202 ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SHA2) | \
203 ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_CRC32) | \
204 ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_ATOMIC) | \
205 ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_RDM) | \
206 ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SHA3) | \
207 ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SM3) | \
208 ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SM4) | \
209 ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_DP) | \
210 ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_FHM) | \
211 ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_TS) | \
212 ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_TLB) | \
213 ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_RNDR) \
214 )
215
216 #define PVM_ID_AA64ISAR1_ALLOW (\
217 ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_DPB) | \
218 ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_APA) | \
219 ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_API) | \
220 ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_JSCVT) | \
221 ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_FCMA) | \
222 ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_LRCPC) | \
223 ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPA) | \
224 ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPI) | \
225 ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_FRINTTS) | \
226 ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_SB) | \
227 ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_SPECRES) | \
228 ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_BF16) | \
229 ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_DGH) | \
230 ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_I8MM) \
231 )
232
233 #define PVM_ID_AA64ISAR2_ALLOW (\
234 ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_GPA3) | \
235 ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_APA3) \
236 )
237
238 /*
239 * Returns the maximum number of breakpoints supported for protected VMs.
240 */
pkvm_get_max_brps(void)241 static inline int pkvm_get_max_brps(void)
242 {
243 int num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_BRPs),
244 PVM_ID_AA64DFR0_ALLOW);
245
246 /*
247 * If breakpoints are supported, the maximum number is 1 + the field.
248 * Otherwise, return 0, which is not compliant with the architecture,
249 * but is reserved and is used here to indicate no debug support.
250 */
251 return num ? num + 1 : 0;
252 }
253
254 /*
255 * Returns the maximum number of watchpoints supported for protected VMs.
256 */
pkvm_get_max_wrps(void)257 static inline int pkvm_get_max_wrps(void)
258 {
259 int num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_WRPs),
260 PVM_ID_AA64DFR0_ALLOW);
261
262 return num ? num + 1 : 0;
263 }
264
265 enum pkvm_moveable_reg_type {
266 PKVM_MREG_MEMORY,
267 PKVM_MREG_PROTECTED_RANGE,
268 };
269
270 struct pkvm_moveable_reg {
271 phys_addr_t start;
272 u64 size;
273 enum pkvm_moveable_reg_type type;
274 };
275
276 #define PKVM_NR_MOVEABLE_REGS 512
277 extern struct pkvm_moveable_reg kvm_nvhe_sym(pkvm_moveable_regs)[];
278 extern unsigned int kvm_nvhe_sym(pkvm_moveable_regs_nr);
279
280 extern struct memblock_region kvm_nvhe_sym(hyp_memory)[];
281 extern unsigned int kvm_nvhe_sym(hyp_memblock_nr);
282
283 extern phys_addr_t kvm_nvhe_sym(pvmfw_base);
284 extern phys_addr_t kvm_nvhe_sym(pvmfw_size);
285
286 static inline unsigned long
hyp_vmemmap_memblock_size(struct memblock_region * reg,size_t vmemmap_entry_size)287 hyp_vmemmap_memblock_size(struct memblock_region *reg, size_t vmemmap_entry_size)
288 {
289 unsigned long nr_pages = reg->size >> PAGE_SHIFT;
290 unsigned long start, end;
291
292 start = (reg->base >> PAGE_SHIFT) * vmemmap_entry_size;
293 end = start + nr_pages * vmemmap_entry_size;
294 start = ALIGN_DOWN(start, PAGE_SIZE);
295 end = ALIGN(end, PAGE_SIZE);
296
297 return end - start;
298 }
299
hyp_vmemmap_pages(size_t vmemmap_entry_size)300 static inline unsigned long hyp_vmemmap_pages(size_t vmemmap_entry_size)
301 {
302 unsigned long res = 0, i;
303
304 for (i = 0; i < kvm_nvhe_sym(hyp_memblock_nr); i++) {
305 res += hyp_vmemmap_memblock_size(&kvm_nvhe_sym(hyp_memory)[i],
306 vmemmap_entry_size);
307 }
308
309 return res >> PAGE_SHIFT;
310 }
311
hyp_vm_table_pages(void)312 static inline unsigned long hyp_vm_table_pages(void)
313 {
314 return PAGE_ALIGN(KVM_MAX_PVMS * sizeof(void *)) >> PAGE_SHIFT;
315 }
316
__hyp_pgtable_max_pages(unsigned long nr_pages)317 static inline unsigned long __hyp_pgtable_max_pages(unsigned long nr_pages)
318 {
319 unsigned long total = 0, i;
320
321 /* Provision the worst case scenario */
322 for (i = 0; i < KVM_PGTABLE_MAX_LEVELS; i++) {
323 nr_pages = DIV_ROUND_UP(nr_pages, PTRS_PER_PTE);
324 total += nr_pages;
325 }
326
327 return total;
328 }
329
__hyp_pgtable_moveable_regs_pages(void)330 static inline unsigned long __hyp_pgtable_moveable_regs_pages(void)
331 {
332 unsigned long res = 0, i;
333
334 /* Cover all of moveable regions with page-granularity */
335 for (i = 0; i < kvm_nvhe_sym(pkvm_moveable_regs_nr); i++) {
336 struct pkvm_moveable_reg *reg = &kvm_nvhe_sym(pkvm_moveable_regs)[i];
337 res += __hyp_pgtable_max_pages(reg->size >> PAGE_SHIFT);
338 }
339
340 return res;
341 }
342
343 #define __PKVM_PRIVATE_SZ SZ_1G
344
hyp_s1_pgtable_pages(void)345 static inline unsigned long hyp_s1_pgtable_pages(void)
346 {
347 unsigned long res;
348
349 res = __hyp_pgtable_moveable_regs_pages();
350
351 res += __hyp_pgtable_max_pages(__PKVM_PRIVATE_SZ >> PAGE_SHIFT);
352
353 return res;
354 }
355
host_s2_pgtable_pages(void)356 static inline unsigned long host_s2_pgtable_pages(void)
357 {
358 unsigned long res;
359
360 /*
361 * Include an extra 16 pages to safely upper-bound the worst case of
362 * concatenated pgds.
363 */
364 res = __hyp_pgtable_moveable_regs_pages() + 16;
365
366 /* Allow 1 GiB for non-moveable regions */
367 res += __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT);
368
369 return res;
370 }
371
372 #define KVM_FFA_MBOX_NR_PAGES 1
373
374 /*
375 * Maximum number of consitutents allowed in a descriptor. This number is
376 * arbitrary, see comment below on SG_MAX_SEGMENTS in hyp_ffa_proxy_pages().
377 */
378 #define KVM_FFA_MAX_NR_CONSTITUENTS 4096
379
hyp_ffa_proxy_pages(void)380 static inline unsigned long hyp_ffa_proxy_pages(void)
381 {
382 size_t desc_max;
383
384 /*
385 * SG_MAX_SEGMENTS is supposed to bound the number of elements in an
386 * sglist, which should match the number of consituents in the
387 * corresponding FFA descriptor. As such, the EL2 buffer needs to be
388 * large enough to hold a descriptor with SG_MAX_SEGMENTS consituents
389 * at least. But the kernel's DMA code doesn't enforce the limit, and
390 * it is sometimes abused, so let's allow larger descriptors and hope
391 * for the best.
392 */
393 BUILD_BUG_ON(KVM_FFA_MAX_NR_CONSTITUENTS < SG_MAX_SEGMENTS);
394
395 /*
396 * The hypervisor FFA proxy needs enough memory to buffer a fragmented
397 * descriptor returned from EL3 in response to a RETRIEVE_REQ call.
398 */
399 desc_max = sizeof(struct ffa_mem_region) +
400 sizeof(struct ffa_mem_region_attributes) +
401 sizeof(struct ffa_composite_mem_region) +
402 KVM_FFA_MAX_NR_CONSTITUENTS * sizeof(struct ffa_mem_region_addr_range);
403
404 /* Plus a page each for the hypervisor's RX and TX mailboxes. */
405 return (2 * KVM_FFA_MBOX_NR_PAGES) + DIV_ROUND_UP(desc_max, PAGE_SIZE);
406 }
407
pkvm_host_fp_state_size(void)408 static inline size_t pkvm_host_fp_state_size(void)
409 {
410 if (system_supports_sve())
411 return size_add(sizeof(struct kvm_host_sve_state),
412 SVE_SIG_REGS_SIZE(sve_vq_from_vl(kvm_host_sve_max_vl)));
413 else
414 return sizeof(struct user_fpsimd_state);
415 }
416
417 #endif /* __ARM64_KVM_PKVM_H__ */
418