1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2023 Google LLC
4  * Author: Mostafa Saleh <smostafa@google.com>
5  */
6 #include <linux/of_platform.h>
7 #include <linux/arm-smccc.h>
8 #include <linux/iommu.h>
9 #include <linux/maple_tree.h>
10 #include <linux/pci.h>
11 #include <linux/platform_device.h>
12 #include <linux/xarray.h>
13 
14 #define ASSERT(cond)							\
15 	do {								\
16 		if (!(cond)) {						\
17 			pr_err("line %d: assertion failed: %s\n",	\
18 			       __LINE__, #cond);			\
19 			return -1;					\
20 		}							\
21 	} while (0)
22 
23 static DEFINE_XARRAY(pviommu_groups);
24 
25 struct pviommu_domain {
26 	struct iommu_domain		domain;
27 	unsigned long			id; /* pKVM domain ID. */
28 	struct maple_tree		mappings; /* IOVA -> IPA */
29 };
30 
31 struct pviommu {
32 	struct iommu_device		iommu;
33 	u32				id;
34 };
35 
36 struct pviommu_master {
37 	struct device			*dev;
38 	struct pviommu			*iommu;
39 	u32				ssid_bits;
40 	struct pviommu_domain		*domain;
41 };
42 
smccc_to_linux_ret(u64 smccc_ret)43 static int smccc_to_linux_ret(u64 smccc_ret)
44 {
45 	switch (smccc_ret) {
46 	case SMCCC_RET_SUCCESS:
47 		return 0;
48 	case SMCCC_RET_NOT_SUPPORTED:
49 		return -EOPNOTSUPP;
50 	case SMCCC_RET_NOT_REQUIRED:
51 		return -ENOENT;
52 	case SMCCC_RET_INVALID_PARAMETER:
53 		return -EINVAL;
54 	};
55 
56 	return -ENODEV;
57 }
58 
__linux_prot_smccc(int iommu_prot)59 static u64 __linux_prot_smccc(int iommu_prot)
60 {
61 	int prot = 0;
62 
63 	if (iommu_prot & IOMMU_READ)
64 		prot |= ARM_SMCCC_KVM_PVIOMMU_READ;
65 	if (iommu_prot & IOMMU_WRITE)
66 		prot |= ARM_SMCCC_KVM_PVIOMMU_WRITE;
67 	if (iommu_prot & IOMMU_CACHE)
68 		prot |= ARM_SMCCC_KVM_PVIOMMU_CACHE;
69 	if (iommu_prot & IOMMU_NOEXEC)
70 		prot |= ARM_SMCCC_KVM_PVIOMMU_NOEXEC;
71 	if (iommu_prot & IOMMU_MMIO)
72 		prot |= ARM_SMCCC_KVM_PVIOMMU_MMIO;
73 	if (iommu_prot & IOMMU_PRIV)
74 		prot |= ARM_SMCCC_KVM_PVIOMMU_PRIV;
75 
76 	return prot;
77 }
78 
79 /* Ranges are inclusive for all functions. */
pviommu_domain_insert_map(struct pviommu_domain * pv_domain,u64 start,u64 end,u64 val,gfp_t gfp)80 static void pviommu_domain_insert_map(struct pviommu_domain *pv_domain,
81 				      u64 start, u64 end, u64 val, gfp_t gfp)
82 {
83 	if (end < start)
84 		return;
85 
86 	mtree_store_range(&pv_domain->mappings, start, end, xa_mk_value(val), gfp);
87 }
88 
pviommu_domain_remove_map(struct pviommu_domain * pv_domain,u64 start,u64 end)89 static void pviommu_domain_remove_map(struct pviommu_domain *pv_domain,
90 				      u64 start, u64 end)
91 {
92 	/* Range can cover multiple entries. */
93 	while (start < end) {
94 		MA_STATE(mas, &pv_domain->mappings, start, end);
95 		u64 entry;
96 		u64 old_start, old_end;
97 
98 		mtree_lock(mas.tree);
99 		entry = xa_to_value(mas_find(&mas, start));
100 		old_start = mas.index;
101 		old_end = mas.last;
102 		mas_erase(&mas);
103 		if (start > old_start) {
104 			MA_STATE(mas_border, &pv_domain->mappings, old_start, start - 1);
105 			WARN_ON(mas_store_gfp(&mas_border, xa_mk_value(entry), GFP_ATOMIC));
106 		}
107 		if (old_end > end) {
108 			MA_STATE(mas_border, &pv_domain->mappings, end + 1, old_end);
109 			WARN_ON(mas_store_gfp(&mas_border, xa_mk_value(entry + end - old_start + 1),
110 				GFP_ATOMIC));
111 		}
112 		mtree_unlock(mas.tree);
113 		start = old_end + 1;
114 	}
115 }
116 
pviommu_domain_find(struct pviommu_domain * pv_domain,u64 key)117 static u64 pviommu_domain_find(struct pviommu_domain *pv_domain, u64 key)
118 {
119 	MA_STATE(mas, &pv_domain->mappings, key, key);
120 	void *entry;
121 
122 	mtree_lock(mas.tree);
123 	entry = mas_find(&mas, key);
124 	mtree_unlock(mas.tree);
125 	/* No entry. */
126 	if (!xa_is_value(entry))
127 		return 0;
128 
129 	return (key - mas.index) + (u64)xa_to_value(entry);
130 }
131 
pviommu_map_pages(struct iommu_domain * domain,unsigned long iova,phys_addr_t paddr,size_t pgsize,size_t pgcount,int prot,gfp_t gfp,size_t * mapped)132 static int pviommu_map_pages(struct iommu_domain *domain, unsigned long iova,
133 			     phys_addr_t paddr, size_t pgsize, size_t pgcount,
134 			     int prot, gfp_t gfp, size_t *mapped)
135 {
136 	struct pviommu_domain *pv_domain = container_of(domain, struct pviommu_domain, domain);
137 	struct arm_smccc_res res;
138 	size_t requested_size = pgsize * pgcount, cur_mapped;
139 
140 	*mapped = 0;
141 	while (*mapped < requested_size) {
142 		arm_smccc_1_1_hvc(ARM_SMCCC_VENDOR_HYP_KVM_PVIOMMU_OP_FUNC_ID,
143 				  KVM_PVIOMMU_OP_MAP_PAGES, pv_domain->id, iova,
144 				  paddr, requested_size - *mapped, __linux_prot_smccc(prot), &res);
145 		cur_mapped = res.a1;
146 		*mapped += cur_mapped;
147 		if (res.a0 != SMCCC_RET_SUCCESS)
148 			break;
149 		iova += cur_mapped;
150 		paddr += cur_mapped;
151 	}
152 
153 	if (*mapped)
154 		pviommu_domain_insert_map(pv_domain, iova - *mapped, iova - 1,
155 					  paddr - *mapped, gfp);
156 
157 	return smccc_to_linux_ret(res.a0);
158 }
159 
pviommu_unmap_pages(struct iommu_domain * domain,unsigned long iova,size_t pgsize,size_t pgcount,struct iommu_iotlb_gather * gather)160 static size_t pviommu_unmap_pages(struct iommu_domain *domain, unsigned long iova,
161 				  size_t pgsize, size_t pgcount,
162 				  struct iommu_iotlb_gather *gather)
163 {
164 	struct pviommu_domain *pv_domain = container_of(domain, struct pviommu_domain, domain);
165 	struct arm_smccc_res res;
166 	size_t total_unmapped = 0, unmapped, requested_size = pgsize * pgcount;
167 
168 	while (total_unmapped < requested_size) {
169 		arm_smccc_1_1_hvc(ARM_SMCCC_VENDOR_HYP_KVM_PVIOMMU_OP_FUNC_ID,
170 				  KVM_PVIOMMU_OP_UNMAP_PAGES, pv_domain->id, iova,
171 				  requested_size - total_unmapped, 0, 0, &res);
172 		unmapped = res.a1;
173 		total_unmapped += unmapped;
174 		if (res.a0 != SMCCC_RET_SUCCESS)
175 			break;
176 		iova += unmapped;
177 	}
178 
179 	if (total_unmapped)
180 		pviommu_domain_remove_map(pv_domain, iova - total_unmapped, iova - 1);
181 
182 	return total_unmapped;
183 }
184 
pviommu_iova_to_phys(struct iommu_domain * domain,dma_addr_t iova)185 static phys_addr_t pviommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
186 {
187 	struct pviommu_domain *pv_domain = container_of(domain, struct pviommu_domain, domain);
188 
189 	return pviommu_domain_find(pv_domain, iova);
190 }
191 
pviommu_domain_free(struct iommu_domain * domain)192 static void pviommu_domain_free(struct iommu_domain *domain)
193 {
194 	struct pviommu_domain *pv_domain = container_of(domain, struct pviommu_domain, domain);
195 	struct arm_smccc_res res;
196 
197 	arm_smccc_1_1_hvc(ARM_SMCCC_VENDOR_HYP_KVM_PVIOMMU_OP_FUNC_ID,
198 			  KVM_PVIOMMU_OP_FREE_DOMAIN, pv_domain->id, 0, 0, 0, 0, &res);
199 	if (res.a0 != SMCCC_RET_SUCCESS)
200 		pr_err("Failed to free domain %ld\n", res.a0);
201 
202 	mtree_destroy(&pv_domain->mappings);
203 	kfree(pv_domain);
204 }
205 
pviommu_remove_dev_pasid(struct device * dev,ioasid_t pasid,struct iommu_domain * domain)206 static void pviommu_remove_dev_pasid(struct device *dev, ioasid_t pasid,
207 				     struct iommu_domain *domain)
208 {
209 	struct pviommu_master *master = dev_iommu_priv_get(dev);
210 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
211 	struct pviommu *pv = master->iommu;
212 	struct pviommu_domain *pv_domain = master->domain;
213 	struct arm_smccc_res res;
214 	u32 sid;
215 	int i;
216 
217 	if (!fwspec || !pv_domain)
218 		return;
219 
220 	for (i = 0; i < fwspec->num_ids; i++) {
221 		sid = fwspec->ids[i];
222 		arm_smccc_1_1_hvc(ARM_SMCCC_VENDOR_HYP_KVM_PVIOMMU_OP_FUNC_ID,
223 				  KVM_PVIOMMU_OP_DETACH_DEV,
224 				  pv->id, sid, pasid, pv_domain->id, 0, &res);
225 		if (res.a0 != SMCCC_RET_SUCCESS)
226 			dev_err(dev, "Failed to detach_dev sid %d, err %ld\n", sid, res.a0);
227 	}
228 
229 	if (!pasid)
230 		master->domain = NULL;
231 }
232 
pviommu_detach_dev(struct pviommu_master * master)233 static void pviommu_detach_dev(struct pviommu_master *master)
234 {
235 	if (master->domain)
236 		pviommu_remove_dev_pasid(master->dev, 0, &master->domain->domain);
237 }
238 
pviommu_set_dev_pasid(struct iommu_domain * domain,struct device * dev,ioasid_t pasid)239 static int pviommu_set_dev_pasid(struct iommu_domain *domain,
240 				 struct device *dev, ioasid_t pasid)
241 {
242 	int ret = 0, i;
243 	struct arm_smccc_res res;
244 	u32 sid;
245 	struct pviommu_master *master = dev_iommu_priv_get(dev);
246 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
247 	struct pviommu_domain *pv_domain = container_of(domain, struct pviommu_domain, domain);
248 	struct pviommu *pv = master->iommu;
249 
250 	if (!fwspec)
251 		return -ENOENT;
252 
253 	if (!pasid && master->domain) {
254 		pviommu_detach_dev(master);
255 		master->domain = pv_domain;
256 	}
257 
258 	for (i = 0; i < fwspec->num_ids; i++) {
259 		sid = fwspec->ids[i];
260 		arm_smccc_1_1_hvc(ARM_SMCCC_VENDOR_HYP_KVM_PVIOMMU_OP_FUNC_ID,
261 				  KVM_PVIOMMU_OP_ATTACH_DEV,
262 				  pv->id, sid, pasid,
263 				  pv_domain->id, master->ssid_bits, &res);
264 		if (res.a0) {
265 			ret = smccc_to_linux_ret(res.a0);
266 			break;
267 		}
268 	}
269 
270 	if (ret) {
271 		while (i--) {
272 			arm_smccc_1_1_hvc(ARM_SMCCC_VENDOR_HYP_KVM_PVIOMMU_OP_FUNC_ID,
273 					  KVM_PVIOMMU_OP_DETACH_DEV,
274 					  pv->id, sid, pasid,
275 					  pv_domain->id, 0, &res);
276 		}
277 	}
278 
279 	return ret;
280 }
281 
pviommu_attach_dev(struct iommu_domain * domain,struct device * dev)282 static int pviommu_attach_dev(struct iommu_domain *domain, struct device *dev)
283 {
284 	return pviommu_set_dev_pasid(domain, dev, 0);
285 }
286 
pviommu_domain_alloc(unsigned int type)287 static struct iommu_domain *pviommu_domain_alloc(unsigned int type)
288 {
289 	struct pviommu_domain *pv_domain;
290 	struct arm_smccc_res res;
291 
292 	if (type != IOMMU_DOMAIN_UNMANAGED &&
293 	    type != IOMMU_DOMAIN_DMA)
294 		return ERR_PTR(-EOPNOTSUPP);
295 
296 	pv_domain = kzalloc(sizeof(*pv_domain), GFP_KERNEL);
297 	if (!pv_domain)
298 		return ERR_PTR(-ENOMEM);
299 
300 	mt_init(&pv_domain->mappings);
301 
302 	arm_smccc_1_1_hvc(ARM_SMCCC_VENDOR_HYP_KVM_PVIOMMU_OP_FUNC_ID,
303 			  KVM_PVIOMMU_OP_ALLOC_DOMAIN, 0, 0, 0, 0, 0, &res);
304 	if (res.a0 != SMCCC_RET_SUCCESS) {
305 		kfree(pv_domain);
306 		return ERR_PTR(smccc_to_linux_ret(res.a0));
307 	}
308 
309 	pv_domain->id = res.a1;
310 
311 	return &pv_domain->domain;
312 }
313 
314 static struct platform_driver pkvm_pviommu_driver;
315 
pviommu_get_by_fwnode(struct fwnode_handle * fwnode)316 static struct pviommu *pviommu_get_by_fwnode(struct fwnode_handle *fwnode)
317 {
318 	struct device *dev = driver_find_device_by_fwnode(&pkvm_pviommu_driver.driver, fwnode);
319 
320 	put_device(dev);
321 	return dev ? dev_get_drvdata(dev) : NULL;
322 }
323 
324 static struct iommu_ops pviommu_ops;
325 
pviommu_probe_device(struct device * dev)326 static struct iommu_device *pviommu_probe_device(struct device *dev)
327 {
328 	struct pviommu_master *master;
329 	struct pviommu *pv = NULL;
330 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
331 
332 	if (!fwspec)
333 		return ERR_PTR(-ENODEV);
334 
335 	pv = pviommu_get_by_fwnode(fwspec->iommu_fwnode);
336 	if (!pv)
337 		return ERR_PTR(-ENODEV);
338 
339 	master = kzalloc(sizeof(*master), GFP_KERNEL);
340 	if (!master)
341 		return ERR_PTR(-ENOMEM);
342 
343 	master->dev = dev;
344 	master->iommu = pv;
345 	device_property_read_u32(dev, "pasid-num-bits", &master->ssid_bits);
346 	dev_iommu_priv_set(dev, master);
347 
348 	return &pv->iommu;
349 }
350 
pviommu_release_device(struct device * dev)351 static void pviommu_release_device(struct device *dev)
352 {
353 	struct pviommu_master *master = dev_iommu_priv_get(dev);
354 
355 	pviommu_detach_dev(master);
356 }
357 
pviommu_of_xlate(struct device * dev,const struct of_phandle_args * args)358 static int pviommu_of_xlate(struct device *dev, const struct of_phandle_args *args)
359 {
360 	return iommu_fwspec_add_ids(dev, args->args, args->args_count);
361 }
362 
pviommu_group_alloc_get(struct device * dev,int group_id)363 static struct iommu_group *pviommu_group_alloc_get(struct device *dev, int group_id)
364 {
365 	struct iommu_group *group;
366 
367 	group = xa_load(&pviommu_groups, (unsigned long)group_id);
368 	if (group)
369 		return group;
370 
371 	group = iommu_group_alloc();
372 	if (!IS_ERR(group))
373 		return group;
374 
375 	if (WARN_ON(xa_insert(&pviommu_groups, (unsigned long)group_id, group, GFP_KERNEL)))
376 		dev_err(dev,
377 			"Failed to track group %d this will lead to multiple groups instead of one\n",
378 			group_id);
379 
380 	return group;
381 }
382 
pviommu_device_group(struct device * dev)383 static struct iommu_group *pviommu_device_group(struct device *dev)
384 {
385 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
386 
387 	if (!fwspec)
388 		return ERR_PTR(-ENODEV);
389 
390 	if (dev_is_pci(dev)) {
391 		return pci_device_group(dev);
392 	} else {
393 		if (fwspec->num_ids == 1)
394 			return generic_device_group(dev);
395 		else
396 			return pviommu_group_alloc_get(dev, fwspec->ids[1]);
397 	}
398 }
399 
400 static struct iommu_ops pviommu_ops = {
401 	.device_group		= pviommu_device_group,
402 	.of_xlate		= pviommu_of_xlate,
403 	.probe_device		= pviommu_probe_device,
404 	.release_device		= pviommu_release_device,
405 	.domain_alloc		= pviommu_domain_alloc,
406 	.remove_dev_pasid	= pviommu_remove_dev_pasid,
407 	.owner			= THIS_MODULE,
408 	.default_domain_ops = &(const struct iommu_domain_ops) {
409 		.attach_dev	= pviommu_attach_dev,
410 		.map_pages	= pviommu_map_pages,
411 		.unmap_pages	= pviommu_unmap_pages,
412 		.iova_to_phys	= pviommu_iova_to_phys,
413 		.set_dev_pasid	= pviommu_set_dev_pasid,
414 		.free		= pviommu_domain_free,
415 	}
416 };
417 
pviommu_probe(struct platform_device * pdev)418 static int pviommu_probe(struct platform_device *pdev)
419 {
420 	struct device *dev = &pdev->dev;
421 	struct pviommu *pv = devm_kmalloc(dev, sizeof(*pv), GFP_KERNEL);
422 	struct device_node *np = pdev->dev.of_node;
423 	int ret;
424 	struct arm_smccc_res res;
425 
426 	ret = of_property_read_u32_index(np, "id", 0, &pv->id);
427 	if (ret) {
428 		dev_err(dev, "Failed to read id from device tree node %d\n", ret);
429 		return ret;
430 	}
431 
432 	arm_smccc_1_1_hvc(ARM_SMCCC_VENDOR_HYP_KVM_HYP_MEMINFO_FUNC_ID, 0, 0, 0, &res);
433 	if (res.a0 < 0)
434 		return -ENODEV;
435 
436 	pviommu_ops.pgsize_bitmap = res.a0;
437 
438 	ret = iommu_device_sysfs_add(&pv->iommu, dev, NULL,
439 				     "pviommu.%pa", &pv->id);
440 
441 	ret = iommu_device_register(&pv->iommu, &pviommu_ops, dev);
442 	if (ret) {
443 		dev_err(dev, "Couldn't register %d\n", ret);
444 		iommu_device_sysfs_remove(&pv->iommu);
445 	}
446 
447 	platform_set_drvdata(pdev, pv);
448 
449 	return ret;
450 }
451 
452 static const struct of_device_id pviommu_of_match[] = {
453 	{ .compatible = "pkvm,pviommu", },
454 	{ },
455 };
456 
457 static struct platform_driver pkvm_pviommu_driver = {
458 	.probe = pviommu_probe,
459 	.driver = {
460 		.name = "pkvm-pviommu",
461 		.of_match_table = pviommu_of_match,
462 	},
463 };
464 
465 #if IS_ENABLED(CONFIG_PKVM_PVIOMMU_SELFTEST) && !defined(MODULE)
466 /* Mainly test iova_to_phys and not hypervisor interface. */
__pviommu_selftest(void)467 int __init __pviommu_selftest(void)
468 {
469 	struct pviommu_domain domain;
470 
471 	pr_info("pviommu selftest starting\n");
472 
473 	mt_init(&domain.mappings);
474 
475 	pviommu_domain_insert_map(&domain, 0x10000, 0xFEFFF, 0xE0000, GFP_KERNEL);
476 	pviommu_domain_insert_map(&domain, 0xFFF0000, 0x1EDBFFFF, 0xDEAD0000, GFP_KERNEL);
477 	ASSERT(pviommu_domain_find(&domain, 0x10000) == 0xE0000);
478 	ASSERT(pviommu_domain_find(&domain, 0x10F00) == 0xE0F00);
479 	ASSERT(pviommu_domain_find(&domain, 0x1EDBFFFF) == 0xED89FFFF);
480 	ASSERT(pviommu_domain_find(&domain, 0x10000000) == 0xDEAE0000);
481 	ASSERT(pviommu_domain_find(&domain, 0x1FF000) == 0);
482 	pviommu_domain_remove_map(&domain, 0x12000, 0x19FFF);
483 	ASSERT(pviommu_domain_find(&domain, 0x11000) == 0xE1000);
484 	ASSERT(pviommu_domain_find(&domain, 0x1B000) == 0xEB000);
485 	ASSERT(pviommu_domain_find(&domain, 0x14000) == 0);
486 
487 	pviommu_domain_insert_map(&domain, 0xC00000, 0xCFFFFF, 0xABCD000, GFP_KERNEL);
488 	pviommu_domain_insert_map(&domain, 0xD00000, 0xDFFFFF, 0x1000, GFP_KERNEL);
489 	pviommu_domain_insert_map(&domain, 0xE00000, 0xEFFFFF, 0xC0FE00000, GFP_KERNEL);
490 	ASSERT(pviommu_domain_find(&domain, 0xD00000) == 0x1000);
491 	pviommu_domain_remove_map(&domain, 0xC50000, 0xE5FFFF);
492 	ASSERT(pviommu_domain_find(&domain, 0xC50000) == 0);
493 	ASSERT(pviommu_domain_find(&domain, 0xD10000) == 0);
494 	ASSERT(pviommu_domain_find(&domain, 0xE60000) == 0xC0FE60000);
495 	ASSERT(pviommu_domain_find(&domain, 0xC10000) == 0xABDD000);
496 
497 	mtree_destroy(&domain.mappings);
498 	return 0;
499 }
500 
501 subsys_initcall(__pviommu_selftest);
502 #endif
503 
504 module_platform_driver(pkvm_pviommu_driver);
505 
506 MODULE_DESCRIPTION("IOMMU API for pKVM paravirtualized IOMMU");
507 MODULE_AUTHOR("Mostafa Saleh <smostafa@google.com>");
508 MODULE_LICENSE("GPL");
509