• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* SPDX-License-Identifier: GPL-2.0 */
2 
3 #ifndef __ARM64_KVM_PKVM_MODULE_H__
4 #define __ARM64_KVM_PKVM_MODULE_H__
5 
6 #include <asm/kvm_pgtable.h>
7 #include <linux/android_kabi.h>
8 #include <linux/export.h>
9 
10 typedef void (*dyn_hcall_t)(struct user_pt_regs *);
11 struct kvm_hyp_iommu;
12 struct iommu_iotlb_gather;
13 struct kvm_hyp_iommu_domain;
14 struct pkvm_device;
15 
16 #ifdef CONFIG_MODULES
17 enum pkvm_psci_notification {
18 	PKVM_PSCI_CPU_SUSPEND,
19 	PKVM_PSCI_SYSTEM_SUSPEND,
20 	PKVM_PSCI_CPU_ENTRY,
21 };
22 
23 struct pkvm_sglist_page {
24 	u64	pfn : 40;
25 	u8	order;
26 } __packed;
27 
28 /**
29  * struct pkvm_module_trng_ops - pKVM TRNG implementation modules callbacks
30  * @trng_uuid:	  Implementation's UUID advertised on TRNG_GET_UUID call
31  * @trng_rnd64:	  TRNG implementation call for generating entropy for TRNG_RND64
32  *                call. The implementation is required to output specified
33  *                number of bits of entropy. The output array will stored in the
34  *                registers in the following order: x3, x2, x1.
35  */
36 struct pkvm_module_trng_ops {
37 	const uuid_t *trng_uuid;
38 	int (*trng_rnd64)(u64 *entropy, int bits);
39 
40 	ANDROID_KABI_RESERVE(1);
41 	ANDROID_KABI_RESERVE(2);
42 };
43 
44 /**
45  * struct pkvm_module_ops - pKVM modules callbacks
46  * @create_private_mapping:	Map a memory region into the hypervisor private
47  *				range. @haddr returns the virtual address where
48  *				the mapping starts. It can't be unmapped. Host
49  *				access permissions are unaffected.
50  * @alloc_module_va:		Reserve a range of VA space in the hypervisor
51  *				private range. This is handy for modules that
52  *				need to map plugin code in a similar fashion to
53  *				how pKVM maps module code. That space could also
54  *				be used to map memory temporarily, when the
55  *				fixmap granularity (PAGE_SIZE) is too small.
56  * @map_module_page:		Used in conjunction with @alloc_module_va. When
57  *				@is_protected is not set, the page is also
58  *				unmapped from the host stage-2.
59  * @register_serial_driver:	Register a driver for a serial interface. The
60  *				framework only needs a single callback
61  *				@hyp_putc_cb which is expected to print a single
62  *				character.
63  * @puts:			If a serial interface is registered, print a
64  *				string, else does nothing.
65  * @putx64:			If a serial interface is registered, print a
66  *				64-bit number, else does nothing.
67  * @fixmap_map:			Map a page in the per-CPU hypervisor fixmap.
68  *				This is intended to be used for temporary
69  *				mappings in the hypervisor VA space.
70  *				@fixmap_unmap must be called between each
71  *				mapping to do cache maintenance and ensure the
72  *				new mapping is visible.
73  * @fixmap_unmap:		Unmap a page from the hypervisor fixmap. This
74  * 				call is required between each @fixmap_map().
75  * @fixblock_map:		Map a PMD-size large page into a CPU-shared
76  *				fixmap. This can be used to replace and speed-up
77  *				a set of @fixmap_map. @fixblock_unmap must be
78  *				called between each mappings to do cache
79  *				maintenance and ensure the new mapping is visible.
80  * @fixblock_unmap:		Unmap a PMD-size large page from the hypervisor
81  *				fixmap. This call is required between each
82  *				@fixblock_map.
83  * @linear_map_early:		Map a large portion of memory into the
84  *				hypervisor linear VA space. This is intended to
85  *				be used only for module bootstrap and must be
86  *				unmapped before the host is deprivilged.
87  * @linear_unmap_early:		See @linear_map_early.
88  * @flush_dcache_to_poc:	Clean the data cache to the point of coherency.
89  *				This is not a requirement for any other of the
90  *				pkvm_module_ops callbacks.
91  * @update_hcr_el2:		Modify the running value of HCR_EL2. pKVM will
92  *				save/restore the new value across power
93  *				management transitions.
94  * @update_hfgwtr_el2:		Modify the running value of HFGWTR_EL2. pKVM
95  *				will save/restore the new value across power
96  *				management transitions.
97  * @register_host_perm_fault_handler:
98  *				@cb is called whenever the host generates an
99  *				abort with the fault status code Permission
100  *				Fault. This is useful when a module changes the
101  *				host stage-2 permissions for certain pages.
102  *				Up-to 16 handlers can be registered. Returning
103  *				-EPERM lets pKVM handle the abort while on 0,
104  *				the next handler will be called. The handler
105  *				order depends on the registration order.
106  * @host_stage2_mod_prot:	Apply @prot to the page @pfn. This requires a
107  *				permission fault handler to be registered (see
108  *				@register_host_perm_fault_handler), otherwise
109  *				pKVM will be unable to handle this fault and the
110  *				CPU will be stuck in an infinite loop. @nr_pages
111  *				allows to apply this prot on a range of
112  *				contiguous memory.
113  * @host_stage2_enable_lazy_pte:
114  *				DEPRECATED
115  *				Unmap a range of memory from the host stage-2,
116  *				leaving the pages host ownership intact. The
117  *				pages will be remapped lazily (subject to the
118  *				usual ownership checks) in response to a
119  *				faulting access from the host.
120  * @host_stage2_disable_lazy_pte:
121  *				DEPRECATED
122  *				This is the opposite function of
123  *				host_stage2_enable_lazy_pte. Must be called once
124  *				the module is done with the region.
125  * @host_stage2_get_leaf:	Query the host's stage2 page-table entry for
126  *				the page @phys.
127  * @register_host_smc_handler:	@cb is called whenever the host issues an SMC
128  *				pKVM couldn't handle.
129  *				Up-to 16 handlers can be registered. The handler
130  *				order depends on the registration order. If no
131  *				handler return True, the SMC is forwarded to
132  *				EL3.
133  * @register_guest_smc_handler: @cb is called when guest identified by the
134  *				pkvm_handle issues an SMC that pKVM couldn't
135  *				handle. If @cb returns false, then unsupported
136  *				operation error is returned back to the guest.
137  * @register_default_trap_handler:
138  *				@cb is called whenever EL2 traps EL1 and pKVM
139  *				has not handled it. If @cb returns false, the
140  *				hypervisor will panic. This trap handler must be
141  *				registered whenever changes are made to HCR
142  *				(@update_hcr_el2) or HFGWTR
143  *				(@update_hfgwtr_el2).
144  * @register_illegal_abt_notifier:
145  *				To notify the module of a pending illegal abort
146  *				from the host. On @cb return, the abort will be
147  *				injected back into the host.
148  * @register_psci_notifier:	To notify the module of a pending PSCI event.
149  * @register_hyp_panic_notifier:
150  *				To notify the module of a pending hypervisor
151  *				panic. On return from @cb, the panic will occur.
152  * @register_unmask_serror:	When @unmask returns true, the hypervisor will
153  * 				unmask SErrors at EL2. Although the hypervisor
154  *				cannot recover from an SError (and will panic if
155  *				one occurs), they can be useful for debugging in
156  *				some situations. @mask is the @unmask twin and
157  *				is called before remasking SErrors.
158  * @host_donate_hyp:		The page @pfn is unmapped from the host and
159  *				full control is given to the hypervisor.
160  * @host_donate_hyp_prot:	As host_donate_hyp_prot, but this variant sets
161  *				the prot of the hyp.
162  * @host_donate_sglist_hyp:	Similar to host_donate_hyp but take an array of PFNs
163  *				(kvm_sglist_page) as an argument. This intends to
164  *				batch IOMMU updates.
165  * @hyp_donate_host:		The page @pfn whom control has previously been
166  *				given to the hypervisor (@host_donate_hyp) is
167  *				given back to the host.
168  * @host_share_hyp:		The page @pfn will be shared between the host
169  *				and the hypervisor. Must be followed by
170  *				@pin_shared_mem.
171  * @host_unshare_hyp:		The page @pfn will be unshared and unmapped from
172  *				the hypervisor. Must be called after
173  *				@unpin_shared_mem.
174  * @pin_shared_mem:		After @host_share_hyp, the newly shared page is
175  *				still owned by the host. @pin_shared_mem will
176  *				prevent the host from reclaiming that page until
177  *				the hypervisor releases it (@unpin_shared_mem)
178  * @unpin_shared_mem:		Enable the host to reclaim the shared memory
179  *				(@host_unshare_hyp).
180  * @memcpy:			Same as kernel memcpy, but use hypervisor VAs.
181  * @memset:			Same as kernel memset, but use a hypervisor VA.
182  * @hyp_pa:			Return the physical address for a hypervisor
183  *				virtual address in the linear range.
184  * @hyp_va:			Convert a physical address into a virtual one.
185  * @kern_hyp_va:		Convert a kernel virtual address into an
186  *				hypervisor virtual one.
187  * @hyp_alloc:			Allocate memory in hyp VA space.
188  * @hyp_alloc_errno:		Error in case hyp_alloc() returns NULL.
189  * @hyp_free:			Free memory allocated  from hyp_alloc().
190  * @iommu_donate_pages:		Allocate memory from IOMMU pool.
191  * @iommu_reclaim_pages:	Reclaim memory from iommu_donate_pages()
192  * @iommu_request:		Fill a request that is returned from the entry HVC (see hyp-main.c).
193  * @iommu_init_device:		Initialize common IOMMU fields.
194  * @udelay:			Delay in us.
195  * @hyp_alloc_missing_donations:
196  *				Missing donations if allocator returns NULL
197  * @iommu_iotlb_gather_add_page:
198  *				Add an IOVA range to an iommu_iotlb_gather.
199  * @pkvm_unuse_dma:		Decrement the refcount for pages used for DMA,
200  * 				this is typically called from the module after a
201  * 				successful unmap() operation, so the hypervisor
202  * 				can track the page state.
203  * @iommu_snapshot_host_stage2: Snapshot the host stage-2 CPU page table in to an
204  * 				IOMMU domain.
205  * @__list_add_valid_or_report: Needed if the code uses linked lists.
206  * @__list_del_entry_valid_or_report:
207 				Needed if the code uses linked lists.
208  * @iommu_donate_pages_atomic:	Allocate memory from IOMMU identity pool.
209  * @iommu_reclaim_pages_atomic:	Reclaim memory from iommu_donate_pages_atomic()
210  * @hyp_smp_processor_id:	Current CPU id
211  * @device_register_reset:	Register a reset callback for devices that is called
212  *				before/after devices are assigned. Only one callback
213  *				can be registered per device.
214  *				Devices are identified by the base address of the MMIO
215  *				as defined in the device tree.
216  *				Reset is expected to clear any state/secrets on the
217  *				device and put it in quiescent state, where it can't
218  *				trigger any DMA.
219  *				If reset fails at device assignment to guest, the
220  *				device won't be assigned.
221  *				Or if it fails on the guest teardown path, that would
222  *				panic to avoid leaking any information.
223  *				Direction of assignment can be deduced from pkvm_device::ctxt
224  *				where NULL means host to guest and vice versa.
225  * @register_guest_trng_ops:    Register a ARM SMCCC TRNG alternative implementation
226  *				for pVMs. The @ops.trng_uuid is used to advertise the
227  *				identity of TRNG implementation. @ops.trng_rnd64 is used
228  *				to generate entropy bits to guest.
229  */
230 struct pkvm_module_ops {
231 	int (*create_private_mapping)(phys_addr_t phys, size_t size,
232 				      enum kvm_pgtable_prot prot,
233 				      unsigned long *haddr);
234 	void *(*alloc_module_va)(u64 nr_pages);
235 	int (*map_module_page)(u64 pfn, void *va, enum kvm_pgtable_prot prot, bool is_protected);
236 	int (*register_serial_driver)(void (*hyp_putc_cb)(char));
237 	void (*putc)(char c);
238 	void (*puts)(const char *s);
239 	void (*putx64)(u64 x);
240 	void *(*fixmap_map)(phys_addr_t phys);
241 	void (*fixmap_unmap)(void);
242 	void *(*fixblock_map)(phys_addr_t phys);
243 	void (*fixblock_unmap)(void);
244 	void *(*linear_map_early)(phys_addr_t phys, size_t size, enum kvm_pgtable_prot prot);
245 	void (*linear_unmap_early)(void *addr, size_t size);
246 	void (*flush_dcache_to_poc)(void *addr, size_t size);
247 	void (*update_hcr_el2)(unsigned long set_mask, unsigned long clear_mask);
248 	void (*update_hfgwtr_el2)(unsigned long set_mask, unsigned long clear_mask);
249 	int (*register_host_perm_fault_handler)(int (*cb)(struct user_pt_regs *regs, u64 esr, u64 addr));
250 	int (*host_stage2_mod_prot)(u64 pfn, enum kvm_pgtable_prot prot, u64 nr_pages, bool update_iommu);
251 	int (*host_stage2_get_leaf)(phys_addr_t phys, kvm_pte_t *ptep, s8 *level);
252 	int (*host_stage2_enable_lazy_pte)(u64 addr, u64 nr_pages);
253 	int (*host_stage2_disable_lazy_pte)(u64 addr, u64 nr_pages);
254 	int (*register_host_smc_handler)(bool (*cb)(struct user_pt_regs *));
255 	int (*register_guest_smc_handler)(bool (*cb)(struct arm_smccc_1_2_regs *regs,
256 						     struct arm_smccc_1_2_regs *res,
257 						     pkvm_handle_t handle));
258 	int (*register_default_trap_handler)(bool (*cb)(struct user_pt_regs *));
259 	int (*register_illegal_abt_notifier)(void (*cb)(struct user_pt_regs *));
260 	int (*register_psci_notifier)(void (*cb)(enum pkvm_psci_notification, struct user_pt_regs *));
261 	int (*register_hyp_panic_notifier)(void (*cb)(struct user_pt_regs *));
262 	int (*register_unmask_serror)(bool (*unmask)(void), void (*mask)(void));
263 	int (*host_donate_hyp)(u64 pfn, u64 nr_pages, bool accept_mmio);
264 	int (*host_donate_hyp_prot)(u64 pfn, u64 nr_pages, bool accept_mmio, enum kvm_pgtable_prot prot);
265 	int (*host_donate_sglist_hyp)(struct pkvm_sglist_page *sglist, size_t nr_pages);
266 	int (*hyp_donate_host)(u64 pfn, u64 nr_pages);
267 	int (*host_share_hyp)(u64 pfn);
268 	int (*host_unshare_hyp)(u64 pfn);
269 	int (*pin_shared_mem)(void *from, void *to);
270 	void (*unpin_shared_mem)(void *from, void *to);
271 	void* (*memcpy)(void *to, const void *from, size_t count);
272 	void* (*memset)(void *dst, int c, size_t count);
273 	phys_addr_t (*hyp_pa)(void *x);
274 	void* (*hyp_va)(phys_addr_t phys);
275 	unsigned long (*kern_hyp_va)(unsigned long x);
276 	void* (*tracing_reserve_entry)(unsigned long length);
277 	void (*tracing_commit_entry)(void);
278 	void (*tracing_mod_hyp_printk)(u8 fmt_id, u64 a, u64 b, u64 c, u64 d);
279 	void * (*hyp_alloc)(size_t size);
280 	int (*hyp_alloc_errno)(void);
281 	void (*hyp_free)(void *addr);
282 	u8 (*hyp_alloc_missing_donations)(void);
283 	void * (*iommu_donate_pages)(u8 order, int flags);
284 	void (*iommu_reclaim_pages)(void *p, u8 order);
285 	int (*iommu_init_device)(struct kvm_hyp_iommu *iommu);
286 	void (*udelay)(unsigned long usecs);
287 	void (*iommu_iotlb_gather_add_page)(struct kvm_hyp_iommu_domain *domain,
288 					    struct iommu_iotlb_gather *gather,
289 					    unsigned long iova,
290 					    size_t size);
291 	int (*pkvm_unuse_dma)(phys_addr_t phys_addr, size_t size);
292 #ifdef CONFIG_LIST_HARDENED
293 	/* These 2 functions change calling convention based on CONFIG_DEBUG_LIST. */
294 	typeof(__list_add_valid_or_report) *list_add_valid_or_report;
295 	typeof(__list_del_entry_valid_or_report) *list_del_entry_valid_or_report;
296 #endif
297 	int (*iommu_snapshot_host_stage2)(struct kvm_hyp_iommu_domain *domain);
298 	void * (*iommu_donate_pages_atomic)(u8 order);
299 	void (*iommu_reclaim_pages_atomic)(void *p, u8 order);
300 	int (*hyp_smp_processor_id)(void);
301 	int (*device_register_reset)(u64 phys, void *cookie,
302 				     int (*cb)(void *cookie, bool host_to_guest));
303 	ANDROID_KABI_USE(1, int (*register_guest_trng_ops)(
304 				    const struct pkvm_module_trng_ops *ops));
305 	ANDROID_KABI_RESERVE(2);
306 	ANDROID_KABI_RESERVE(3);
307 	ANDROID_KABI_RESERVE(4);
308 	ANDROID_KABI_RESERVE(5);
309 	ANDROID_KABI_RESERVE(6);
310 	ANDROID_KABI_RESERVE(7);
311 	ANDROID_KABI_RESERVE(8);
312 	ANDROID_KABI_RESERVE(9);
313 	ANDROID_KABI_RESERVE(10);
314 	ANDROID_KABI_RESERVE(11);
315 	ANDROID_KABI_RESERVE(12);
316 	ANDROID_KABI_RESERVE(13);
317 	ANDROID_KABI_RESERVE(14);
318 	ANDROID_KABI_RESERVE(15);
319 	ANDROID_KABI_RESERVE(16);
320 	ANDROID_KABI_RESERVE(17);
321 	ANDROID_KABI_RESERVE(18);
322 	ANDROID_KABI_RESERVE(19);
323 	ANDROID_KABI_RESERVE(20);
324 	ANDROID_KABI_RESERVE(21);
325 	ANDROID_KABI_RESERVE(22);
326 	ANDROID_KABI_RESERVE(23);
327 	ANDROID_KABI_RESERVE(24);
328 	ANDROID_KABI_RESERVE(25);
329 	ANDROID_KABI_RESERVE(26);
330 	ANDROID_KABI_RESERVE(27);
331 	ANDROID_KABI_RESERVE(28);
332 	ANDROID_KABI_RESERVE(29);
333 	ANDROID_KABI_RESERVE(30);
334 	ANDROID_KABI_RESERVE(31);
335 	ANDROID_KABI_RESERVE(32);
336 };
337 
338 int __pkvm_load_el2_module(struct module *this, unsigned long *token);
339 
340 int __pkvm_register_el2_call(unsigned long hfn_hyp_va);
341 
342 unsigned long pkvm_el2_mod_kern_va(unsigned long addr);
343 
344 void pkvm_el2_mod_frob_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, char *secstrings);
345 #else
__pkvm_load_el2_module(struct module * this,unsigned long * token)346 static inline int __pkvm_load_el2_module(struct module *this,
347 					 unsigned long *token)
348 {
349 	return -ENOSYS;
350 }
351 
__pkvm_register_el2_call(unsigned long hfn_hyp_va)352 static inline int __pkvm_register_el2_call(unsigned long hfn_hyp_va)
353 {
354 	return -ENOSYS;
355 }
356 
pkvm_el2_mod_kern_va(unsigned long addr)357 static inline unsigned long pkvm_el2_mod_kern_va(unsigned long addr)
358 {
359 	return 0;
360 }
361 #endif /* CONFIG_MODULES */
362 
363 int pkvm_load_early_modules(void);
364 
365 #ifdef MODULE
366 /*
367  * Convert an EL2 module addr from the kernel VA to the hyp VA
368  */
369 #define pkvm_el2_mod_va(kern_va, token)					\
370 ({									\
371 	unsigned long hyp_mod_kern_va =				\
372 		(unsigned long)THIS_MODULE->arch.hyp.sections.start;	\
373 	unsigned long offset;						\
374 									\
375 	offset = (unsigned long)kern_va - hyp_mod_kern_va;		\
376 	token + offset;							\
377 })
378 
379 #define pkvm_load_el2_module(init_fn, token)				\
380 ({									\
381 	THIS_MODULE->arch.hyp.init = init_fn;				\
382 	__pkvm_load_el2_module(THIS_MODULE, token);			\
383 })
384 
pkvm_register_el2_mod_call(dyn_hcall_t hfn,unsigned long token)385 static inline int pkvm_register_el2_mod_call(dyn_hcall_t hfn,
386 					     unsigned long token)
387 {
388 	return __pkvm_register_el2_call(pkvm_el2_mod_va(hfn, token));
389 }
390 
391 #define pkvm_el2_mod_call(id, ...)					\
392 	({								\
393 		struct arm_smccc_res res;				\
394 									\
395 		arm_smccc_1_1_hvc(KVM_HOST_SMCCC_ID(id),		\
396 				  ##__VA_ARGS__, &res);			\
397 		WARN_ON(res.a0 != SMCCC_RET_SUCCESS);			\
398 									\
399 		res.a1;							\
400 	})
401 
402 #define pkvm_el2_mod_call_smccc(id, ...)				\
403 	({								\
404 		struct arm_smccc_res res;				\
405 									\
406 		arm_smccc_1_1_hvc(KVM_HOST_SMCCC_ID(id),		\
407 				  ##__VA_ARGS__, &res);			\
408 		WARN_ON(res.a0 != SMCCC_RET_SUCCESS);			\
409 									\
410 		res;							\
411 	})
412 #endif
413 #endif
414