1 /* SPDX-License-Identifier: GPL-2.0 */
2
3 #ifndef __ARM64_KVM_PKVM_MODULE_H__
4 #define __ARM64_KVM_PKVM_MODULE_H__
5
6 #include <asm/kvm_pgtable.h>
7 #include <linux/android_kabi.h>
8 #include <linux/export.h>
9
10 typedef void (*dyn_hcall_t)(struct user_pt_regs *);
11 struct kvm_hyp_iommu;
12 struct iommu_iotlb_gather;
13 struct kvm_hyp_iommu_domain;
14 struct pkvm_device;
15
16 #ifdef CONFIG_MODULES
17 enum pkvm_psci_notification {
18 PKVM_PSCI_CPU_SUSPEND,
19 PKVM_PSCI_SYSTEM_SUSPEND,
20 PKVM_PSCI_CPU_ENTRY,
21 };
22
23 struct pkvm_sglist_page {
24 u64 pfn : 40;
25 u8 order;
26 } __packed;
27
28 /**
29 * struct pkvm_module_trng_ops - pKVM TRNG implementation modules callbacks
30 * @trng_uuid: Implementation's UUID advertised on TRNG_GET_UUID call
31 * @trng_rnd64: TRNG implementation call for generating entropy for TRNG_RND64
32 * call. The implementation is required to output specified
33 * number of bits of entropy. The output array will stored in the
34 * registers in the following order: x3, x2, x1.
35 */
36 struct pkvm_module_trng_ops {
37 const uuid_t *trng_uuid;
38 int (*trng_rnd64)(u64 *entropy, int bits);
39
40 ANDROID_KABI_RESERVE(1);
41 ANDROID_KABI_RESERVE(2);
42 };
43
44 /**
45 * struct pkvm_module_ops - pKVM modules callbacks
46 * @create_private_mapping: Map a memory region into the hypervisor private
47 * range. @haddr returns the virtual address where
48 * the mapping starts. It can't be unmapped. Host
49 * access permissions are unaffected.
50 * @alloc_module_va: Reserve a range of VA space in the hypervisor
51 * private range. This is handy for modules that
52 * need to map plugin code in a similar fashion to
53 * how pKVM maps module code. That space could also
54 * be used to map memory temporarily, when the
55 * fixmap granularity (PAGE_SIZE) is too small.
56 * @map_module_page: Used in conjunction with @alloc_module_va. When
57 * @is_protected is not set, the page is also
58 * unmapped from the host stage-2.
59 * @register_serial_driver: Register a driver for a serial interface. The
60 * framework only needs a single callback
61 * @hyp_putc_cb which is expected to print a single
62 * character.
63 * @puts: If a serial interface is registered, print a
64 * string, else does nothing.
65 * @putx64: If a serial interface is registered, print a
66 * 64-bit number, else does nothing.
67 * @fixmap_map: Map a page in the per-CPU hypervisor fixmap.
68 * This is intended to be used for temporary
69 * mappings in the hypervisor VA space.
70 * @fixmap_unmap must be called between each
71 * mapping to do cache maintenance and ensure the
72 * new mapping is visible.
73 * @fixmap_unmap: Unmap a page from the hypervisor fixmap. This
74 * call is required between each @fixmap_map().
75 * @fixblock_map: Map a PMD-size large page into a CPU-shared
76 * fixmap. This can be used to replace and speed-up
77 * a set of @fixmap_map. @fixblock_unmap must be
78 * called between each mappings to do cache
79 * maintenance and ensure the new mapping is visible.
80 * @fixblock_unmap: Unmap a PMD-size large page from the hypervisor
81 * fixmap. This call is required between each
82 * @fixblock_map.
83 * @linear_map_early: Map a large portion of memory into the
84 * hypervisor linear VA space. This is intended to
85 * be used only for module bootstrap and must be
86 * unmapped before the host is deprivilged.
87 * @linear_unmap_early: See @linear_map_early.
88 * @flush_dcache_to_poc: Clean the data cache to the point of coherency.
89 * This is not a requirement for any other of the
90 * pkvm_module_ops callbacks.
91 * @update_hcr_el2: Modify the running value of HCR_EL2. pKVM will
92 * save/restore the new value across power
93 * management transitions.
94 * @update_hfgwtr_el2: Modify the running value of HFGWTR_EL2. pKVM
95 * will save/restore the new value across power
96 * management transitions.
97 * @register_host_perm_fault_handler:
98 * @cb is called whenever the host generates an
99 * abort with the fault status code Permission
100 * Fault. This is useful when a module changes the
101 * host stage-2 permissions for certain pages.
102 * Up-to 16 handlers can be registered. Returning
103 * -EPERM lets pKVM handle the abort while on 0,
104 * the next handler will be called. The handler
105 * order depends on the registration order.
106 * @host_stage2_mod_prot: Apply @prot to the page @pfn. This requires a
107 * permission fault handler to be registered (see
108 * @register_host_perm_fault_handler), otherwise
109 * pKVM will be unable to handle this fault and the
110 * CPU will be stuck in an infinite loop. @nr_pages
111 * allows to apply this prot on a range of
112 * contiguous memory.
113 * @host_stage2_enable_lazy_pte:
114 * DEPRECATED
115 * Unmap a range of memory from the host stage-2,
116 * leaving the pages host ownership intact. The
117 * pages will be remapped lazily (subject to the
118 * usual ownership checks) in response to a
119 * faulting access from the host.
120 * @host_stage2_disable_lazy_pte:
121 * DEPRECATED
122 * This is the opposite function of
123 * host_stage2_enable_lazy_pte. Must be called once
124 * the module is done with the region.
125 * @host_stage2_get_leaf: Query the host's stage2 page-table entry for
126 * the page @phys.
127 * @register_host_smc_handler: @cb is called whenever the host issues an SMC
128 * pKVM couldn't handle.
129 * Up-to 16 handlers can be registered. The handler
130 * order depends on the registration order. If no
131 * handler return True, the SMC is forwarded to
132 * EL3.
133 * @register_guest_smc_handler: @cb is called when guest identified by the
134 * pkvm_handle issues an SMC that pKVM couldn't
135 * handle. If @cb returns false, then unsupported
136 * operation error is returned back to the guest.
137 * @register_default_trap_handler:
138 * @cb is called whenever EL2 traps EL1 and pKVM
139 * has not handled it. If @cb returns false, the
140 * hypervisor will panic. This trap handler must be
141 * registered whenever changes are made to HCR
142 * (@update_hcr_el2) or HFGWTR
143 * (@update_hfgwtr_el2).
144 * @register_illegal_abt_notifier:
145 * To notify the module of a pending illegal abort
146 * from the host. On @cb return, the abort will be
147 * injected back into the host.
148 * @register_psci_notifier: To notify the module of a pending PSCI event.
149 * @register_hyp_panic_notifier:
150 * To notify the module of a pending hypervisor
151 * panic. On return from @cb, the panic will occur.
152 * @register_unmask_serror: When @unmask returns true, the hypervisor will
153 * unmask SErrors at EL2. Although the hypervisor
154 * cannot recover from an SError (and will panic if
155 * one occurs), they can be useful for debugging in
156 * some situations. @mask is the @unmask twin and
157 * is called before remasking SErrors.
158 * @host_donate_hyp: The page @pfn is unmapped from the host and
159 * full control is given to the hypervisor.
160 * @host_donate_hyp_prot: As host_donate_hyp_prot, but this variant sets
161 * the prot of the hyp.
162 * @host_donate_sglist_hyp: Similar to host_donate_hyp but take an array of PFNs
163 * (kvm_sglist_page) as an argument. This intends to
164 * batch IOMMU updates.
165 * @hyp_donate_host: The page @pfn whom control has previously been
166 * given to the hypervisor (@host_donate_hyp) is
167 * given back to the host.
168 * @host_share_hyp: The page @pfn will be shared between the host
169 * and the hypervisor. Must be followed by
170 * @pin_shared_mem.
171 * @host_unshare_hyp: The page @pfn will be unshared and unmapped from
172 * the hypervisor. Must be called after
173 * @unpin_shared_mem.
174 * @pin_shared_mem: After @host_share_hyp, the newly shared page is
175 * still owned by the host. @pin_shared_mem will
176 * prevent the host from reclaiming that page until
177 * the hypervisor releases it (@unpin_shared_mem)
178 * @unpin_shared_mem: Enable the host to reclaim the shared memory
179 * (@host_unshare_hyp).
180 * @memcpy: Same as kernel memcpy, but use hypervisor VAs.
181 * @memset: Same as kernel memset, but use a hypervisor VA.
182 * @hyp_pa: Return the physical address for a hypervisor
183 * virtual address in the linear range.
184 * @hyp_va: Convert a physical address into a virtual one.
185 * @kern_hyp_va: Convert a kernel virtual address into an
186 * hypervisor virtual one.
187 * @hyp_alloc: Allocate memory in hyp VA space.
188 * @hyp_alloc_errno: Error in case hyp_alloc() returns NULL.
189 * @hyp_free: Free memory allocated from hyp_alloc().
190 * @iommu_donate_pages: Allocate memory from IOMMU pool.
191 * @iommu_reclaim_pages: Reclaim memory from iommu_donate_pages()
192 * @iommu_request: Fill a request that is returned from the entry HVC (see hyp-main.c).
193 * @iommu_init_device: Initialize common IOMMU fields.
194 * @udelay: Delay in us.
195 * @hyp_alloc_missing_donations:
196 * Missing donations if allocator returns NULL
197 * @iommu_iotlb_gather_add_page:
198 * Add an IOVA range to an iommu_iotlb_gather.
199 * @pkvm_unuse_dma: Decrement the refcount for pages used for DMA,
200 * this is typically called from the module after a
201 * successful unmap() operation, so the hypervisor
202 * can track the page state.
203 * @iommu_snapshot_host_stage2: Snapshot the host stage-2 CPU page table in to an
204 * IOMMU domain.
205 * @__list_add_valid_or_report: Needed if the code uses linked lists.
206 * @__list_del_entry_valid_or_report:
207 Needed if the code uses linked lists.
208 * @iommu_donate_pages_atomic: Allocate memory from IOMMU identity pool.
209 * @iommu_reclaim_pages_atomic: Reclaim memory from iommu_donate_pages_atomic()
210 * @hyp_smp_processor_id: Current CPU id
211 * @device_register_reset: Register a reset callback for devices that is called
212 * before/after devices are assigned. Only one callback
213 * can be registered per device.
214 * Devices are identified by the base address of the MMIO
215 * as defined in the device tree.
216 * Reset is expected to clear any state/secrets on the
217 * device and put it in quiescent state, where it can't
218 * trigger any DMA.
219 * If reset fails at device assignment to guest, the
220 * device won't be assigned.
221 * Or if it fails on the guest teardown path, that would
222 * panic to avoid leaking any information.
223 * Direction of assignment can be deduced from pkvm_device::ctxt
224 * where NULL means host to guest and vice versa.
225 * @register_guest_trng_ops: Register a ARM SMCCC TRNG alternative implementation
226 * for pVMs. The @ops.trng_uuid is used to advertise the
227 * identity of TRNG implementation. @ops.trng_rnd64 is used
228 * to generate entropy bits to guest.
229 */
230 struct pkvm_module_ops {
231 int (*create_private_mapping)(phys_addr_t phys, size_t size,
232 enum kvm_pgtable_prot prot,
233 unsigned long *haddr);
234 void *(*alloc_module_va)(u64 nr_pages);
235 int (*map_module_page)(u64 pfn, void *va, enum kvm_pgtable_prot prot, bool is_protected);
236 int (*register_serial_driver)(void (*hyp_putc_cb)(char));
237 void (*putc)(char c);
238 void (*puts)(const char *s);
239 void (*putx64)(u64 x);
240 void *(*fixmap_map)(phys_addr_t phys);
241 void (*fixmap_unmap)(void);
242 void *(*fixblock_map)(phys_addr_t phys);
243 void (*fixblock_unmap)(void);
244 void *(*linear_map_early)(phys_addr_t phys, size_t size, enum kvm_pgtable_prot prot);
245 void (*linear_unmap_early)(void *addr, size_t size);
246 void (*flush_dcache_to_poc)(void *addr, size_t size);
247 void (*update_hcr_el2)(unsigned long set_mask, unsigned long clear_mask);
248 void (*update_hfgwtr_el2)(unsigned long set_mask, unsigned long clear_mask);
249 int (*register_host_perm_fault_handler)(int (*cb)(struct user_pt_regs *regs, u64 esr, u64 addr));
250 int (*host_stage2_mod_prot)(u64 pfn, enum kvm_pgtable_prot prot, u64 nr_pages, bool update_iommu);
251 int (*host_stage2_get_leaf)(phys_addr_t phys, kvm_pte_t *ptep, s8 *level);
252 int (*host_stage2_enable_lazy_pte)(u64 addr, u64 nr_pages);
253 int (*host_stage2_disable_lazy_pte)(u64 addr, u64 nr_pages);
254 int (*register_host_smc_handler)(bool (*cb)(struct user_pt_regs *));
255 int (*register_guest_smc_handler)(bool (*cb)(struct arm_smccc_1_2_regs *regs,
256 struct arm_smccc_1_2_regs *res,
257 pkvm_handle_t handle));
258 int (*register_default_trap_handler)(bool (*cb)(struct user_pt_regs *));
259 int (*register_illegal_abt_notifier)(void (*cb)(struct user_pt_regs *));
260 int (*register_psci_notifier)(void (*cb)(enum pkvm_psci_notification, struct user_pt_regs *));
261 int (*register_hyp_panic_notifier)(void (*cb)(struct user_pt_regs *));
262 int (*register_unmask_serror)(bool (*unmask)(void), void (*mask)(void));
263 int (*host_donate_hyp)(u64 pfn, u64 nr_pages, bool accept_mmio);
264 int (*host_donate_hyp_prot)(u64 pfn, u64 nr_pages, bool accept_mmio, enum kvm_pgtable_prot prot);
265 int (*host_donate_sglist_hyp)(struct pkvm_sglist_page *sglist, size_t nr_pages);
266 int (*hyp_donate_host)(u64 pfn, u64 nr_pages);
267 int (*host_share_hyp)(u64 pfn);
268 int (*host_unshare_hyp)(u64 pfn);
269 int (*pin_shared_mem)(void *from, void *to);
270 void (*unpin_shared_mem)(void *from, void *to);
271 void* (*memcpy)(void *to, const void *from, size_t count);
272 void* (*memset)(void *dst, int c, size_t count);
273 phys_addr_t (*hyp_pa)(void *x);
274 void* (*hyp_va)(phys_addr_t phys);
275 unsigned long (*kern_hyp_va)(unsigned long x);
276 void* (*tracing_reserve_entry)(unsigned long length);
277 void (*tracing_commit_entry)(void);
278 void (*tracing_mod_hyp_printk)(u8 fmt_id, u64 a, u64 b, u64 c, u64 d);
279 void * (*hyp_alloc)(size_t size);
280 int (*hyp_alloc_errno)(void);
281 void (*hyp_free)(void *addr);
282 u8 (*hyp_alloc_missing_donations)(void);
283 void * (*iommu_donate_pages)(u8 order, int flags);
284 void (*iommu_reclaim_pages)(void *p, u8 order);
285 int (*iommu_init_device)(struct kvm_hyp_iommu *iommu);
286 void (*udelay)(unsigned long usecs);
287 void (*iommu_iotlb_gather_add_page)(struct kvm_hyp_iommu_domain *domain,
288 struct iommu_iotlb_gather *gather,
289 unsigned long iova,
290 size_t size);
291 int (*pkvm_unuse_dma)(phys_addr_t phys_addr, size_t size);
292 #ifdef CONFIG_LIST_HARDENED
293 /* These 2 functions change calling convention based on CONFIG_DEBUG_LIST. */
294 typeof(__list_add_valid_or_report) *list_add_valid_or_report;
295 typeof(__list_del_entry_valid_or_report) *list_del_entry_valid_or_report;
296 #endif
297 int (*iommu_snapshot_host_stage2)(struct kvm_hyp_iommu_domain *domain);
298 void * (*iommu_donate_pages_atomic)(u8 order);
299 void (*iommu_reclaim_pages_atomic)(void *p, u8 order);
300 int (*hyp_smp_processor_id)(void);
301 int (*device_register_reset)(u64 phys, void *cookie,
302 int (*cb)(void *cookie, bool host_to_guest));
303 ANDROID_KABI_USE(1, int (*register_guest_trng_ops)(
304 const struct pkvm_module_trng_ops *ops));
305 ANDROID_KABI_RESERVE(2);
306 ANDROID_KABI_RESERVE(3);
307 ANDROID_KABI_RESERVE(4);
308 ANDROID_KABI_RESERVE(5);
309 ANDROID_KABI_RESERVE(6);
310 ANDROID_KABI_RESERVE(7);
311 ANDROID_KABI_RESERVE(8);
312 ANDROID_KABI_RESERVE(9);
313 ANDROID_KABI_RESERVE(10);
314 ANDROID_KABI_RESERVE(11);
315 ANDROID_KABI_RESERVE(12);
316 ANDROID_KABI_RESERVE(13);
317 ANDROID_KABI_RESERVE(14);
318 ANDROID_KABI_RESERVE(15);
319 ANDROID_KABI_RESERVE(16);
320 ANDROID_KABI_RESERVE(17);
321 ANDROID_KABI_RESERVE(18);
322 ANDROID_KABI_RESERVE(19);
323 ANDROID_KABI_RESERVE(20);
324 ANDROID_KABI_RESERVE(21);
325 ANDROID_KABI_RESERVE(22);
326 ANDROID_KABI_RESERVE(23);
327 ANDROID_KABI_RESERVE(24);
328 ANDROID_KABI_RESERVE(25);
329 ANDROID_KABI_RESERVE(26);
330 ANDROID_KABI_RESERVE(27);
331 ANDROID_KABI_RESERVE(28);
332 ANDROID_KABI_RESERVE(29);
333 ANDROID_KABI_RESERVE(30);
334 ANDROID_KABI_RESERVE(31);
335 ANDROID_KABI_RESERVE(32);
336 };
337
338 int __pkvm_load_el2_module(struct module *this, unsigned long *token);
339
340 int __pkvm_register_el2_call(unsigned long hfn_hyp_va);
341
342 unsigned long pkvm_el2_mod_kern_va(unsigned long addr);
343
344 void pkvm_el2_mod_frob_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, char *secstrings);
345 #else
__pkvm_load_el2_module(struct module * this,unsigned long * token)346 static inline int __pkvm_load_el2_module(struct module *this,
347 unsigned long *token)
348 {
349 return -ENOSYS;
350 }
351
__pkvm_register_el2_call(unsigned long hfn_hyp_va)352 static inline int __pkvm_register_el2_call(unsigned long hfn_hyp_va)
353 {
354 return -ENOSYS;
355 }
356
pkvm_el2_mod_kern_va(unsigned long addr)357 static inline unsigned long pkvm_el2_mod_kern_va(unsigned long addr)
358 {
359 return 0;
360 }
361 #endif /* CONFIG_MODULES */
362
363 int pkvm_load_early_modules(void);
364
365 #ifdef MODULE
366 /*
367 * Convert an EL2 module addr from the kernel VA to the hyp VA
368 */
369 #define pkvm_el2_mod_va(kern_va, token) \
370 ({ \
371 unsigned long hyp_mod_kern_va = \
372 (unsigned long)THIS_MODULE->arch.hyp.sections.start; \
373 unsigned long offset; \
374 \
375 offset = (unsigned long)kern_va - hyp_mod_kern_va; \
376 token + offset; \
377 })
378
379 #define pkvm_load_el2_module(init_fn, token) \
380 ({ \
381 THIS_MODULE->arch.hyp.init = init_fn; \
382 __pkvm_load_el2_module(THIS_MODULE, token); \
383 })
384
pkvm_register_el2_mod_call(dyn_hcall_t hfn,unsigned long token)385 static inline int pkvm_register_el2_mod_call(dyn_hcall_t hfn,
386 unsigned long token)
387 {
388 return __pkvm_register_el2_call(pkvm_el2_mod_va(hfn, token));
389 }
390
391 #define pkvm_el2_mod_call(id, ...) \
392 ({ \
393 struct arm_smccc_res res; \
394 \
395 arm_smccc_1_1_hvc(KVM_HOST_SMCCC_ID(id), \
396 ##__VA_ARGS__, &res); \
397 WARN_ON(res.a0 != SMCCC_RET_SUCCESS); \
398 \
399 res.a1; \
400 })
401
402 #define pkvm_el2_mod_call_smccc(id, ...) \
403 ({ \
404 struct arm_smccc_res res; \
405 \
406 arm_smccc_1_1_hvc(KVM_HOST_SMCCC_ID(id), \
407 ##__VA_ARGS__, &res); \
408 WARN_ON(res.a0 != SMCCC_RET_SUCCESS); \
409 \
410 res; \
411 })
412 #endif
413 #endif
414