• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * pKVM hyp driver for the Arm SMMUv3
4  *
5  * Copyright (C) 2022 Linaro Ltd.
6  */
7 #include <asm/arm-smmu-v3-common.h>
8 #include <asm/kvm_hyp.h>
9 #include <linux/io-pgtable-arm.h>
10 #include <nvhe/alloc.h>
11 #include <nvhe/iommu.h>
12 #include <nvhe/mem_protect.h>
13 #include <nvhe/mm.h>
14 #include <nvhe/pkvm.h>
15 #include <nvhe/rwlock.h>
16 #include <nvhe/trap_handler.h>
17 
18 #include "arm_smmu_v3.h"
19 #include "arm-smmu-v3-module.h"
20 
21 #ifdef MODULE
memset(void * dst,int c,size_t count)22 void *memset(void *dst, int c, size_t count)
23 {
24 	return CALL_FROM_OPS(memset, dst, c, count);
25 }
26 
27 #ifdef CONFIG_LIST_HARDENED
__list_add_valid_or_report(struct list_head * new,struct list_head * prev,struct list_head * next)28 bool __list_add_valid_or_report(struct list_head *new,
29 				struct list_head *prev,
30 				struct list_head *next)
31 {
32 	return CALL_FROM_OPS(list_add_valid_or_report, new, prev, next);
33 }
34 
__list_del_entry_valid_or_report(struct list_head * entry)35 bool __list_del_entry_valid_or_report(struct list_head *entry)
36 {
37 	return CALL_FROM_OPS(list_del_entry_valid_or_report, entry);
38 }
39 #endif
40 
41 const struct pkvm_module_ops		*mod_ops;
42 #endif
43 
44 #define ARM_SMMU_POLL_TIMEOUT_US	100000 /* 100ms arbitrary timeout */
45 
46 size_t __ro_after_init kvm_hyp_arm_smmu_v3_count;
47 struct hyp_arm_smmu_v3_device *kvm_hyp_arm_smmu_v3_smmus;
48 
49 #define for_each_smmu(smmu) \
50 	for ((smmu) = kvm_hyp_arm_smmu_v3_smmus; \
51 	     (smmu) != &kvm_hyp_arm_smmu_v3_smmus[kvm_hyp_arm_smmu_v3_count]; \
52 	     (smmu)++)
53 
54 /*
55  * Wait until @cond is true.
56  * Return 0 on success, or -ETIMEDOUT
57  */
58 #define smmu_wait(_cond)					\
59 ({								\
60 	int __i = 0;						\
61 	int __ret = 0;						\
62 								\
63 	while (!(_cond)) {					\
64 		if (++__i > ARM_SMMU_POLL_TIMEOUT_US) {		\
65 			__ret = -ETIMEDOUT;			\
66 			break;					\
67 		}						\
68 		pkvm_udelay(1);					\
69 	}							\
70 	__ret;							\
71 })
72 
73 #define smmu_wait_event(_smmu, _cond)				\
74 ({								\
75 	if ((_smmu)->features & ARM_SMMU_FEAT_SEV) {		\
76 		while (!(_cond))				\
77 			wfe();					\
78 	}							\
79 	smmu_wait(_cond);					\
80 })
81 
82 struct domain_iommu_node {
83 	struct kvm_hyp_iommu		*iommu;
84 	struct list_head		list;
85 	unsigned long			ref;
86 };
87 
88 /*
89  * SMMUv3 domain:
90  * @domain: Pointer to the IOMMU domain.
91  * @iommu_list: List of SMMU instances for this domain
92  * @list_lock: Protects iommu_list
93  * @type: Type of domain (S1, S2)
94  * @pgt_lock: Lock for page table
95  * @pgtable: io_pgtable instance for this domain
96  */
97 struct hyp_arm_smmu_v3_domain {
98 	struct kvm_hyp_iommu_domain     *domain;
99 	struct list_head		iommu_list;
100 	hyp_rwlock_t			list_lock;
101 	u32				type;
102 	hyp_spinlock_t			pgt_lock;
103 	struct io_pgtable		*pgtable;
104 };
105 
to_smmu(struct kvm_hyp_iommu * iommu)106 static struct hyp_arm_smmu_v3_device *to_smmu(struct kvm_hyp_iommu *iommu)
107 {
108 	return container_of(iommu, struct hyp_arm_smmu_v3_device, iommu);
109 }
110 
smmu_write_cr0(struct hyp_arm_smmu_v3_device * smmu,u32 val)111 static int smmu_write_cr0(struct hyp_arm_smmu_v3_device *smmu, u32 val)
112 {
113 	writel_relaxed(val, smmu->base + ARM_SMMU_CR0);
114 	return smmu_wait(readl_relaxed(smmu->base + ARM_SMMU_CR0ACK) == val);
115 }
116 
117 /* Transfer ownership of structures from host to hyp */
smmu_take_pages(u64 phys,size_t size)118 static int smmu_take_pages(u64 phys, size_t size)
119 {
120 	WARN_ON(!PAGE_ALIGNED(phys) || !PAGE_ALIGNED(size));
121 	return __pkvm_host_donate_hyp(phys >> PAGE_SHIFT, size >> PAGE_SHIFT);
122 }
123 
smmu_reclaim_pages(u64 phys,size_t size)124 static void smmu_reclaim_pages(u64 phys, size_t size)
125 {
126 	WARN_ON(!PAGE_ALIGNED(phys) || !PAGE_ALIGNED(size));
127 	WARN_ON(__pkvm_hyp_donate_host(phys >> PAGE_SHIFT, size >> PAGE_SHIFT));
128 }
129 
130 #define Q_WRAP(smmu, reg)	((reg) & (1 << (smmu)->cmdq_log2size))
131 #define Q_IDX(smmu, reg)	((reg) & ((1 << (smmu)->cmdq_log2size) - 1))
132 
smmu_cmdq_has_space(struct hyp_arm_smmu_v3_device * smmu,u32 n)133 static bool smmu_cmdq_has_space(struct hyp_arm_smmu_v3_device *smmu, u32 n)
134 {
135 	u64 smmu_cons = readl_relaxed(smmu->base + ARM_SMMU_CMDQ_CONS);
136 	u32 space, prod, cons;
137 
138 	prod = Q_IDX(smmu, smmu->cmdq_prod);
139 	cons = Q_IDX(smmu, smmu_cons);
140 
141 	if (Q_WRAP(smmu, smmu->cmdq_prod) == Q_WRAP(smmu, smmu_cons))
142 		space = (1 << smmu->cmdq_log2size) - (prod - cons);
143 	else
144 		space = cons - prod;
145 
146 	return space >= n;
147 }
148 
smmu_cmdq_empty(struct hyp_arm_smmu_v3_device * smmu)149 static bool smmu_cmdq_empty(struct hyp_arm_smmu_v3_device *smmu)
150 {
151 	u64 cons = readl_relaxed(smmu->base + ARM_SMMU_CMDQ_CONS);
152 
153 	return Q_IDX(smmu, smmu->cmdq_prod) == Q_IDX(smmu, cons) &&
154 	       Q_WRAP(smmu, smmu->cmdq_prod) == Q_WRAP(smmu, cons);
155 }
156 
smmu_build_cmd(u64 * cmd,struct arm_smmu_cmdq_ent * ent)157 static int smmu_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
158 {
159 	cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
160 
161 	switch (ent->opcode) {
162 	case CMDQ_OP_CFGI_ALL:
163 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
164 		break;
165 	case CMDQ_OP_CFGI_CD:
166 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
167 		fallthrough;
168 	case CMDQ_OP_CFGI_STE:
169 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
170 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
171 		break;
172 	case CMDQ_OP_TLBI_NH_VA:
173 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
174 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
175 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
176 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
177 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
178 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
179 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
180 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
181 		break;
182 	case CMDQ_OP_TLBI_NSNH_ALL:
183 		break;
184 	case CMDQ_OP_TLBI_NH_ASID:
185 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
186 		fallthrough;
187 	case CMDQ_OP_TLBI_S12_VMALL:
188 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
189 		break;
190 	case CMDQ_OP_TLBI_S2_IPA:
191 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
192 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
193 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
194 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
195 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
196 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
197 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
198 		break;
199 	case CMDQ_OP_CMD_SYNC:
200 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
201 		break;
202 	default:
203 		return -EINVAL;
204 	}
205 
206 	return 0;
207 }
208 
smmu_issue_cmds(struct hyp_arm_smmu_v3_device * smmu,u64 * cmds,int n)209 static int smmu_issue_cmds(struct hyp_arm_smmu_v3_device *smmu,
210 			   u64 *cmds, int n)
211 {
212 	int i;
213 	int ret;
214 	u32 prod;
215 
216 	ret = smmu_wait_event(smmu, smmu_cmdq_has_space(smmu, n));
217 	if (ret)
218 		return ret;
219 
220 	for (i = 0; i < n; i++) {
221 		int j;
222 		int idx = Q_IDX(smmu, smmu->cmdq_prod + i);
223 		u64 *slot = smmu->cmdq_base + idx * CMDQ_ENT_DWORDS;
224 
225 		for (j = 0; j < CMDQ_ENT_DWORDS; j++)
226 			slot[j] = cpu_to_le64(cmds[i * CMDQ_ENT_DWORDS + j]);
227 	}
228 
229 	prod = (Q_WRAP(smmu, smmu->cmdq_prod) | Q_IDX(smmu, smmu->cmdq_prod)) + n;
230 	smmu->cmdq_prod = Q_OVF(smmu->cmdq_prod) | Q_WRAP(smmu, prod) | Q_IDX(smmu, prod);
231 
232 	writel(Q_IDX(smmu, smmu->cmdq_prod) | Q_WRAP(smmu, smmu->cmdq_prod),
233 	       smmu->base + ARM_SMMU_CMDQ_PROD);
234 	return 0;
235 }
236 
smmu_add_cmd(struct hyp_arm_smmu_v3_device * smmu,struct arm_smmu_cmdq_ent * ent)237 static int smmu_add_cmd(struct hyp_arm_smmu_v3_device *smmu,
238 			struct arm_smmu_cmdq_ent *ent)
239 {
240 	u64 cmd[CMDQ_ENT_DWORDS] = {};
241 	int ret;
242 
243 	ret = smmu_build_cmd(cmd, ent);
244 	if (ret)
245 		return ret;
246 
247 	return smmu_issue_cmds(smmu, cmd, 1);
248 }
249 
smmu_sync_cmd(struct hyp_arm_smmu_v3_device * smmu)250 static int smmu_sync_cmd(struct hyp_arm_smmu_v3_device *smmu)
251 {
252 	int ret;
253 	struct arm_smmu_cmdq_ent cmd = {
254 		.opcode = CMDQ_OP_CMD_SYNC,
255 	};
256 
257 	ret = smmu_add_cmd(smmu, &cmd);
258 	if (ret)
259 		return ret;
260 
261 	return smmu_wait_event(smmu, smmu_cmdq_empty(smmu));
262 }
263 
smmu_send_cmd(struct hyp_arm_smmu_v3_device * smmu,struct arm_smmu_cmdq_ent * cmd)264 static int smmu_send_cmd(struct hyp_arm_smmu_v3_device *smmu,
265 			 struct arm_smmu_cmdq_ent *cmd)
266 {
267 	int ret;
268 
269 	if (smmu->iommu.power_is_off)
270 		return 0;
271 
272 	ret = smmu_add_cmd(smmu, cmd);
273 	if (ret)
274 		return ret;
275 
276 	return smmu_sync_cmd(smmu);
277 }
278 
smmu_sync_ste(struct hyp_arm_smmu_v3_device * smmu,u32 sid)279 static int smmu_sync_ste(struct hyp_arm_smmu_v3_device *smmu, u32 sid)
280 {
281 	struct arm_smmu_cmdq_ent cmd = {
282 		.opcode = CMDQ_OP_CFGI_STE,
283 		.cfgi.sid = sid,
284 		.cfgi.leaf = true,
285 	};
286 
287 	return smmu_send_cmd(smmu, &cmd);
288 }
289 
smmu_sync_cd(struct hyp_arm_smmu_v3_device * smmu,u32 sid,u32 ssid)290 static int smmu_sync_cd(struct hyp_arm_smmu_v3_device *smmu, u32 sid, u32 ssid)
291 {
292 	struct arm_smmu_cmdq_ent cmd = {
293 		.opcode = CMDQ_OP_CFGI_CD,
294 		.cfgi.sid	= sid,
295 		.cfgi.ssid	= ssid,
296 		.cfgi.leaf = true,
297 	};
298 
299 	return smmu_send_cmd(smmu, &cmd);
300 }
301 
smmu_alloc_l2_strtab(struct hyp_arm_smmu_v3_device * smmu,u32 sid)302 static int smmu_alloc_l2_strtab(struct hyp_arm_smmu_v3_device *smmu, u32 sid)
303 {
304 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
305 	struct arm_smmu_strtab_l1 *l1_desc;
306 	dma_addr_t l2ptr_dma;
307 	struct arm_smmu_strtab_l2 *l2table;
308 	size_t l2_order = get_order(sizeof(struct arm_smmu_strtab_l2));
309 	int flags = 0;
310 	u32 l1_idx = arm_smmu_strtab_l1_idx(sid);
311 
312 	if (l1_idx >= cfg->l2.num_l1_ents)
313 		return -EINVAL;
314 
315 	l1_desc = &cfg->l2.l1tab[l1_idx];
316 	if (l1_desc->l2ptr)
317 		return 0;
318 
319 	if (!(smmu->features & ARM_SMMU_FEAT_COHERENCY))
320 		flags |= IOMMU_PAGE_NOCACHE;
321 
322 	l2table = kvm_iommu_donate_pages(l2_order, flags);
323 	if (!l2table)
324 		return -ENOMEM;
325 
326 	l2ptr_dma = hyp_virt_to_phys(l2table);
327 
328 	if (l2ptr_dma & (~STRTAB_L1_DESC_L2PTR_MASK | ~PAGE_MASK)) {
329 		kvm_iommu_reclaim_pages(l2table, l2_order);
330 		return -EINVAL;
331 	}
332 
333 	/* Ensure the empty stream table is visible before the descriptor write */
334 	wmb();
335 
336 	arm_smmu_write_strtab_l1_desc(l1_desc, l2ptr_dma);
337 	return 0;
338 }
339 
340 static struct arm_smmu_ste *
smmu_get_ste_ptr(struct hyp_arm_smmu_v3_device * smmu,u32 sid)341 smmu_get_ste_ptr(struct hyp_arm_smmu_v3_device *smmu, u32 sid)
342 {
343 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
344 
345 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
346 		struct arm_smmu_strtab_l1 *l1_desc =
347 					&cfg->l2.l1tab[arm_smmu_strtab_l1_idx(sid)];
348 		struct arm_smmu_strtab_l2 *l2ptr;
349 
350 		if (arm_smmu_strtab_l1_idx(sid) >= cfg->l2.num_l1_ents)
351 			return NULL;
352 		/* L2 should be allocated before calling this. */
353 		if (WARN_ON(!l1_desc->l2ptr))
354 			return NULL;
355 
356 		l2ptr = hyp_phys_to_virt(l1_desc->l2ptr & STRTAB_L1_DESC_L2PTR_MASK);
357 		/* Two-level walk */
358 		return &l2ptr->stes[arm_smmu_strtab_l2_idx(sid)];
359 	}
360 
361 	if (sid >= cfg->linear.num_ents)
362 		return NULL;
363 	/* Simple linear lookup */
364 	return &cfg->linear.table[sid];
365 }
366 
367 static struct arm_smmu_ste *
smmu_get_alloc_ste_ptr(struct hyp_arm_smmu_v3_device * smmu,u32 sid)368 smmu_get_alloc_ste_ptr(struct hyp_arm_smmu_v3_device *smmu, u32 sid)
369 {
370 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
371 		int ret = smmu_alloc_l2_strtab(smmu, sid);
372 
373 		if (ret) {
374 			WARN_ON(ret != -ENOMEM);
375 			return NULL;
376 		}
377 	}
378 	return smmu_get_ste_ptr(smmu, sid);
379 }
380 
smmu_get_cd_ptr(u64 * cdtab,u32 ssid)381 static u64 *smmu_get_cd_ptr(u64 *cdtab, u32 ssid)
382 {
383 	/* Only linear supported for now. */
384 	return cdtab + ssid * CTXDESC_CD_DWORDS;
385 }
386 
smmu_alloc_cd(struct hyp_arm_smmu_v3_device * smmu,u32 pasid_bits)387 static u64 *smmu_alloc_cd(struct hyp_arm_smmu_v3_device *smmu, u32 pasid_bits)
388 {
389 	u64 *cd_table;
390 	int flags = 0;
391 	u32 requested_order = get_order((1 << pasid_bits) *
392 					(CTXDESC_CD_DWORDS << 3));
393 
394 	/*
395 	 * We support max of 64K linear tables only, this should be enough
396 	 * for 128 pasids
397 	 */
398 	if (WARN_ON(requested_order > 4))
399 		return NULL;
400 
401 	if (!(smmu->features & ARM_SMMU_FEAT_COHERENCY))
402 		flags |= IOMMU_PAGE_NOCACHE;
403 
404 	cd_table = kvm_iommu_donate_pages(requested_order, flags);
405 	if (!cd_table)
406 		return NULL;
407 	return (u64 *)hyp_virt_to_phys(cd_table);
408 }
409 
smmu_free_cd(u64 * cd_table,u32 pasid_bits)410 static void smmu_free_cd(u64 *cd_table, u32 pasid_bits)
411 {
412 	u32 order = get_order((1 << pasid_bits) *
413 			      (CTXDESC_CD_DWORDS << 3));
414 
415 	kvm_iommu_reclaim_pages(cd_table, order);
416 }
417 
smmu_init_registers(struct hyp_arm_smmu_v3_device * smmu)418 static int smmu_init_registers(struct hyp_arm_smmu_v3_device *smmu)
419 {
420 	u64 val, old;
421 	int ret;
422 
423 	if (!(readl_relaxed(smmu->base + ARM_SMMU_GBPA) & GBPA_ABORT))
424 		return -EINVAL;
425 
426 	/* Initialize all RW registers that will be read by the SMMU */
427 	ret = smmu_write_cr0(smmu, 0);
428 	if (ret)
429 		return ret;
430 
431 	val = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
432 	      FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
433 	      FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
434 	      FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
435 	      FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
436 	      FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
437 	writel_relaxed(val, smmu->base + ARM_SMMU_CR1);
438 	writel_relaxed(CR2_PTM, smmu->base + ARM_SMMU_CR2);
439 
440 	val = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
441 	old = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
442 	/* Service Failure Mode is fatal */
443 	if ((val ^ old) & GERROR_SFM_ERR)
444 		return -EIO;
445 	/* Clear pending errors */
446 	writel_relaxed(val, smmu->base + ARM_SMMU_GERRORN);
447 
448 	return 0;
449 }
450 
smmu_init_cmdq(struct hyp_arm_smmu_v3_device * smmu)451 static int smmu_init_cmdq(struct hyp_arm_smmu_v3_device *smmu)
452 {
453 	u64 cmdq_base;
454 	size_t cmdq_nr_entries, cmdq_size;
455 	int ret;
456 	enum kvm_pgtable_prot prot = PAGE_HYP;
457 
458 	cmdq_base = readq_relaxed(smmu->base + ARM_SMMU_CMDQ_BASE);
459 	if (cmdq_base & ~(Q_BASE_RWA | Q_BASE_ADDR_MASK | Q_BASE_LOG2SIZE))
460 		return -EINVAL;
461 
462 	smmu->cmdq_log2size = cmdq_base & Q_BASE_LOG2SIZE;
463 	cmdq_nr_entries = 1 << smmu->cmdq_log2size;
464 	cmdq_size = cmdq_nr_entries * CMDQ_ENT_DWORDS * 8;
465 
466 	cmdq_base &= Q_BASE_ADDR_MASK;
467 
468 	if (!(smmu->features & ARM_SMMU_FEAT_COHERENCY))
469 		prot |= KVM_PGTABLE_PROT_NORMAL_NC;
470 
471 	ret = ___pkvm_host_donate_hyp_prot(cmdq_base >> PAGE_SHIFT,
472 					   PAGE_ALIGN(cmdq_size) >> PAGE_SHIFT,
473 					   false, prot);
474 	if (ret)
475 		return ret;
476 
477 	smmu->cmdq_base = hyp_phys_to_virt(cmdq_base);
478 
479 	memset(smmu->cmdq_base, 0, cmdq_size);
480 	writel_relaxed(0, smmu->base + ARM_SMMU_CMDQ_PROD);
481 	writel_relaxed(0, smmu->base + ARM_SMMU_CMDQ_CONS);
482 
483 	return 0;
484 }
485 
486 /*
487  * Event q support is optional and managed by the kernel,
488  * However, it must set in a shared state so it can't be donated
489  * to the hypervisor later.
490  * This relies on the ARM_SMMU_EVTQ_BASE can't be changed after
491  * de-privilege.
492  */
smmu_init_evtq(struct hyp_arm_smmu_v3_device * smmu)493 static int smmu_init_evtq(struct hyp_arm_smmu_v3_device *smmu)
494 {
495 	u64 evtq_base, evtq_pfn;
496 	size_t evtq_nr_entries, evtq_size, evtq_nr_pages;
497 	void *evtq_va, *evtq_end;
498 	size_t i;
499 	int ret;
500 
501 	evtq_base = readq_relaxed(smmu->base + ARM_SMMU_EVTQ_BASE);
502 	if (!evtq_base)
503 		return 0;
504 
505 	if (evtq_base & ~(Q_BASE_RWA | Q_BASE_ADDR_MASK | Q_BASE_LOG2SIZE))
506 		return -EINVAL;
507 
508 	evtq_nr_entries = 1 << (evtq_base & Q_BASE_LOG2SIZE);
509 	evtq_size = evtq_nr_entries * EVTQ_ENT_DWORDS * 8;
510 	evtq_nr_pages = PAGE_ALIGN(evtq_size) >> PAGE_SHIFT;
511 
512 	evtq_pfn = PAGE_ALIGN(evtq_base & Q_BASE_ADDR_MASK) >> PAGE_SHIFT;
513 
514 	for (i = 0 ; i < evtq_nr_pages ; ++i) {
515 		ret = __pkvm_host_share_hyp(evtq_pfn + i);
516 		if (ret)
517 			return ret;
518 	}
519 
520 	evtq_va = hyp_phys_to_virt(evtq_pfn << PAGE_SHIFT);
521 	evtq_end = hyp_phys_to_virt((evtq_pfn + evtq_nr_pages) << PAGE_SHIFT);
522 
523 	return hyp_pin_shared_mem(evtq_va, evtq_end);
524 }
525 
smmu_init_strtab(struct hyp_arm_smmu_v3_device * smmu)526 static int smmu_init_strtab(struct hyp_arm_smmu_v3_device *smmu)
527 {
528 	int ret;
529 	u64 strtab_base;
530 	size_t strtab_size;
531 	u32 strtab_cfg, fmt;
532 	int split, log2size;
533 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
534 	enum kvm_pgtable_prot prot = PAGE_HYP;
535 
536 	if (!(smmu->features & ARM_SMMU_FEAT_COHERENCY))
537 		prot |= KVM_PGTABLE_PROT_NORMAL_NC;
538 
539 	strtab_base = readq_relaxed(smmu->base + ARM_SMMU_STRTAB_BASE);
540 	if (strtab_base & ~(STRTAB_BASE_ADDR_MASK | STRTAB_BASE_RA))
541 		return -EINVAL;
542 
543 	strtab_cfg = readl_relaxed(smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
544 	if (strtab_cfg & ~(STRTAB_BASE_CFG_FMT | STRTAB_BASE_CFG_SPLIT |
545 			   STRTAB_BASE_CFG_LOG2SIZE))
546 		return -EINVAL;
547 
548 	fmt = FIELD_GET(STRTAB_BASE_CFG_FMT, strtab_cfg);
549 	split = FIELD_GET(STRTAB_BASE_CFG_SPLIT, strtab_cfg);
550 	log2size = FIELD_GET(STRTAB_BASE_CFG_LOG2SIZE, strtab_cfg);
551 	strtab_base &= STRTAB_BASE_ADDR_MASK;
552 
553 	switch (fmt) {
554 	case STRTAB_BASE_CFG_FMT_LINEAR:
555 		if (split)
556 			return -EINVAL;
557 		cfg->linear.num_ents = 1 << log2size;
558 		strtab_size = cfg->linear.num_ents * sizeof(struct arm_smmu_ste);
559 		cfg->linear.ste_dma = strtab_base;
560 		ret = ___pkvm_host_donate_hyp_prot(strtab_base >> PAGE_SHIFT,
561 						   PAGE_ALIGN(strtab_size) >> PAGE_SHIFT,
562 						   false, prot);
563 		if (ret)
564 			return -EINVAL;
565 		cfg->linear.table = hyp_phys_to_virt(strtab_base);
566 		/* Disable all STEs */
567 		memset(cfg->linear.table, 0, strtab_size);
568 		break;
569 	case STRTAB_BASE_CFG_FMT_2LVL:
570 		if (split != STRTAB_SPLIT)
571 			return -EINVAL;
572 		cfg->l2.num_l1_ents = 1 << max(0, log2size - split);
573 		strtab_size = cfg->l2.num_l1_ents * sizeof(struct arm_smmu_strtab_l1);
574 		cfg->l2.l1_dma = strtab_base;
575 		ret = ___pkvm_host_donate_hyp_prot(strtab_base >> PAGE_SHIFT,
576 						   PAGE_ALIGN(strtab_size) >> PAGE_SHIFT,
577 						   false, prot);
578 		if (ret)
579 			return -EINVAL;
580 		cfg->l2.l1tab = hyp_phys_to_virt(strtab_base);
581 		/* Disable all STEs */
582 		memset(cfg->l2.l1tab, 0, strtab_size);
583 		break;
584 	default:
585 		return -EINVAL;
586 	}
587 	return 0;
588 }
589 
smmu_reset_device(struct hyp_arm_smmu_v3_device * smmu)590 static int smmu_reset_device(struct hyp_arm_smmu_v3_device *smmu)
591 {
592 	int ret;
593 	struct arm_smmu_cmdq_ent cfgi_cmd = {
594 		.opcode = CMDQ_OP_CFGI_ALL,
595 	};
596 	struct arm_smmu_cmdq_ent tlbi_cmd = {
597 		.opcode = CMDQ_OP_TLBI_NSNH_ALL,
598 	};
599 
600 	/* Invalidate all cached configs and TLBs */
601 	ret = smmu_write_cr0(smmu, CR0_CMDQEN);
602 	if (ret)
603 		return ret;
604 
605 	ret = smmu_add_cmd(smmu, &cfgi_cmd);
606 	if (ret)
607 		goto err_disable_cmdq;
608 
609 	ret = smmu_add_cmd(smmu, &tlbi_cmd);
610 	if (ret)
611 		goto err_disable_cmdq;
612 
613 	ret = smmu_sync_cmd(smmu);
614 	if (ret)
615 		goto err_disable_cmdq;
616 
617 	/* Enable translation */
618 	return smmu_write_cr0(smmu, CR0_SMMUEN | CR0_CMDQEN | CR0_ATSCHK | CR0_EVTQEN);
619 
620 err_disable_cmdq:
621 	return smmu_write_cr0(smmu, 0);
622 }
623 
smmu_init_device(struct hyp_arm_smmu_v3_device * smmu)624 static int smmu_init_device(struct hyp_arm_smmu_v3_device *smmu)
625 {
626 	int ret;
627 
628 	if (!PAGE_ALIGNED(smmu->mmio_addr | smmu->mmio_size))
629 		return -EINVAL;
630 
631 	ret = ___pkvm_host_donate_hyp(smmu->mmio_addr >> PAGE_SHIFT,
632 				      smmu->mmio_size >> PAGE_SHIFT,
633 				      /* accept_mmio */ true);
634 	if (ret)
635 		return ret;
636 
637 	smmu->base = hyp_phys_to_virt(smmu->mmio_addr);
638 
639 	ret = smmu_init_registers(smmu);
640 	if (ret)
641 		return ret;
642 
643 	ret = smmu_init_cmdq(smmu);
644 	if (ret)
645 		return ret;
646 
647 	ret = smmu_init_evtq(smmu);
648 	if (ret)
649 		return ret;
650 
651 	ret = smmu_init_strtab(smmu);
652 	if (ret)
653 		return ret;
654 
655 	ret = smmu_reset_device(smmu);
656 	if (ret)
657 		return ret;
658 
659 	return kvm_iommu_init_device(&smmu->iommu);
660 }
661 
smmu_init(void)662 static int smmu_init(void)
663 {
664 	int ret;
665 	struct hyp_arm_smmu_v3_device *smmu;
666 	size_t smmu_arr_size = PAGE_ALIGN(sizeof(*kvm_hyp_arm_smmu_v3_smmus) *
667 					  kvm_hyp_arm_smmu_v3_count);
668 	phys_addr_t smmu_arr_phys;
669 
670 	kvm_hyp_arm_smmu_v3_smmus = kern_hyp_va(kvm_hyp_arm_smmu_v3_smmus);
671 
672 	smmu_arr_phys = hyp_virt_to_phys(kvm_hyp_arm_smmu_v3_smmus);
673 
674 	ret = smmu_take_pages(smmu_arr_phys, smmu_arr_size);
675 	if (ret)
676 		return ret;
677 
678 	for_each_smmu(smmu) {
679 		ret = smmu_init_device(smmu);
680 		if (ret)
681 			goto out_reclaim_smmu;
682 	}
683 
684 	return 0;
685 out_reclaim_smmu:
686 	smmu_reclaim_pages(smmu_arr_phys, smmu_arr_size);
687 	return ret;
688 }
689 
smmu_id_to_iommu(pkvm_handle_t smmu_id)690 static struct kvm_hyp_iommu *smmu_id_to_iommu(pkvm_handle_t smmu_id)
691 {
692 	if (smmu_id >= kvm_hyp_arm_smmu_v3_count)
693 		return NULL;
694 	smmu_id = array_index_nospec(smmu_id, kvm_hyp_arm_smmu_v3_count);
695 
696 	return &kvm_hyp_arm_smmu_v3_smmus[smmu_id].iommu;
697 }
698 
smmu_alloc_domain(struct kvm_hyp_iommu_domain * domain,int type)699 static int smmu_alloc_domain(struct kvm_hyp_iommu_domain *domain, int type)
700 {
701 	struct hyp_arm_smmu_v3_domain *smmu_domain;
702 
703 	if (type >= KVM_ARM_SMMU_DOMAIN_MAX)
704 		return -EINVAL;
705 
706 	smmu_domain = hyp_alloc(sizeof(*smmu_domain));
707 	if (!smmu_domain)
708 		return -ENOMEM;
709 
710 	INIT_LIST_HEAD(&smmu_domain->iommu_list);
711 	hyp_rwlock_init(&smmu_domain->list_lock);
712 	/*
713 	 * Can't do much without knowing the SMMUv3.
714 	 * Page table will be allocated at attach_dev, but can be
715 	 * freed from free domain.
716 	 */
717 	smmu_domain->domain = domain;
718 	smmu_domain->type = type;
719 	hyp_spin_lock_init(&smmu_domain->pgt_lock);
720 	domain->priv = (void *)smmu_domain;
721 
722 	return 0;
723 }
724 
smmu_free_domain(struct kvm_hyp_iommu_domain * domain)725 static void smmu_free_domain(struct kvm_hyp_iommu_domain *domain)
726 {
727 	struct hyp_arm_smmu_v3_domain *smmu_domain = domain->priv;
728 	struct domain_iommu_node *iommu_node, *temp;
729 
730 	if (smmu_domain->pgtable)
731 		kvm_arm_io_pgtable_free(smmu_domain->pgtable);
732 
733 	/*
734 	 * With device assignment it is possible to free a domain with attached devices,
735 	 * they will be disabled through dev_block_dma op.
736 	 * In that case free the IOMMU nodes to avoid leaking memory.
737 	 */
738 	list_for_each_entry_safe(iommu_node, temp, &smmu_domain->iommu_list, list) {
739 		list_del(&iommu_node->list);
740 		hyp_free(iommu_node);
741 	}
742 
743 	hyp_free(smmu_domain);
744 }
745 
smmu_inv_domain(struct hyp_arm_smmu_v3_device * smmu,struct hyp_arm_smmu_v3_domain * smmu_domain)746 static void smmu_inv_domain(struct hyp_arm_smmu_v3_device *smmu,
747 			    struct hyp_arm_smmu_v3_domain *smmu_domain)
748 {
749 	struct kvm_hyp_iommu_domain *domain = smmu_domain->domain;
750 	struct arm_smmu_cmdq_ent cmd = {};
751 
752 	if (smmu_domain->pgtable->cfg.fmt == ARM_64_LPAE_S2) {
753 		cmd.opcode = CMDQ_OP_TLBI_S12_VMALL;
754 		cmd.tlbi.vmid = domain->domain_id;
755 	} else {
756 		cmd.opcode = CMDQ_OP_TLBI_NH_ASID;
757 		cmd.tlbi.asid = domain->domain_id;
758 	}
759 
760 	if (smmu->iommu.power_is_off)
761 		return;
762 
763 	WARN_ON(smmu_send_cmd(smmu, &cmd));
764 }
765 
smmu_tlb_flush_all(void * cookie)766 static void smmu_tlb_flush_all(void *cookie)
767 {
768 	struct kvm_hyp_iommu_domain *domain = cookie;
769 	struct hyp_arm_smmu_v3_domain *smmu_domain = domain->priv;
770 	struct hyp_arm_smmu_v3_device *smmu;
771 	struct domain_iommu_node *iommu_node;
772 
773 	hyp_read_lock(&smmu_domain->list_lock);
774 	list_for_each_entry(iommu_node, &smmu_domain->iommu_list, list) {
775 		smmu = to_smmu(iommu_node->iommu);
776 		kvm_iommu_lock(&smmu->iommu);
777 		smmu_inv_domain(smmu, smmu_domain);
778 		kvm_iommu_unlock(&smmu->iommu);
779 	}
780 	hyp_read_unlock(&smmu_domain->list_lock);
781 }
782 
smmu_cmdq_batch_add(struct hyp_arm_smmu_v3_device * smmu,struct arm_smmu_cmdq_batch * cmds,struct arm_smmu_cmdq_ent * cmd)783 static void smmu_cmdq_batch_add(struct hyp_arm_smmu_v3_device *smmu,
784 				struct arm_smmu_cmdq_batch *cmds,
785 				struct arm_smmu_cmdq_ent *cmd)
786 {
787 	int index;
788 
789 	if (cmds->num == CMDQ_BATCH_ENTRIES) {
790 		smmu_issue_cmds(smmu, cmds->cmds, cmds->num);
791 		cmds->num = 0;
792 	}
793 
794 	index = cmds->num * CMDQ_ENT_DWORDS;
795 	smmu_build_cmd(&cmds->cmds[index], cmd);
796 
797 	cmds->num++;
798 }
799 
smmu_tlb_inv_range_smmu(struct hyp_arm_smmu_v3_device * smmu,struct kvm_hyp_iommu_domain * domain,struct arm_smmu_cmdq_ent * cmd,unsigned long iova,size_t size,size_t granule)800 static int smmu_tlb_inv_range_smmu(struct hyp_arm_smmu_v3_device *smmu,
801 				   struct kvm_hyp_iommu_domain *domain,
802 				   struct arm_smmu_cmdq_ent *cmd,
803 				   unsigned long iova, size_t size, size_t granule)
804 {
805 	int ret = 0;
806 	unsigned long end = iova + size, num_pages = 0, tg = 0;
807 	size_t inv_range = granule;
808 	struct hyp_arm_smmu_v3_domain *smmu_domain = domain->priv;
809 	struct arm_smmu_cmdq_batch cmds;
810 
811 	kvm_iommu_lock(&smmu->iommu);
812 	if (smmu->iommu.power_is_off)
813 		goto out_ret;
814 
815 	/* Almost copy-paste from the kernel dirver. */
816 	if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
817 		/* Get the leaf page size */
818 		tg = __ffs(smmu_domain->pgtable->cfg.pgsize_bitmap);
819 
820 		num_pages = size >> tg;
821 
822 		/* Convert page size of 12,14,16 (log2) to 1,2,3 */
823 		cmd->tlbi.tg = (tg - 10) / 2;
824 
825 		/*
826 		 * Determine what level the granule is at. For non-leaf, both
827 		 * io-pgtable and SVA pass a nominal last-level granule because
828 		 * they don't know what level(s) actually apply, so ignore that
829 		 * and leave TTL=0. However for various errata reasons we still
830 		 * want to use a range command, so avoid the SVA corner case
831 		 * where both scale and num could be 0 as well.
832 		 */
833 		if (cmd->tlbi.leaf)
834 			cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
835 		else if ((num_pages & CMDQ_TLBI_RANGE_NUM_MAX) == 1)
836 			num_pages++;
837 	}
838 
839 	cmds.num = 0;
840 
841 	while (iova < end) {
842 		if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
843 			/*
844 			 * On each iteration of the loop, the range is 5 bits
845 			 * worth of the aligned size remaining.
846 			 * The range in pages is:
847 			 *
848 			 * range = (num_pages & (0x1f << __ffs(num_pages)))
849 			 */
850 			unsigned long scale, num;
851 
852 			/* Determine the power of 2 multiple number of pages */
853 			scale = __ffs(num_pages);
854 			cmd->tlbi.scale = scale;
855 
856 			/* Determine how many chunks of 2^scale size we have */
857 			num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
858 			cmd->tlbi.num = num - 1;
859 
860 			/* range is num * 2^scale * pgsize */
861 			inv_range = num << (scale + tg);
862 
863 			/* Clear out the lower order bits for the next iteration */
864 			num_pages -= num << scale;
865 		}
866 		cmd->tlbi.addr = iova;
867 		smmu_cmdq_batch_add(smmu, &cmds, cmd);
868 		BUG_ON(iova + inv_range < iova);
869 		iova += inv_range;
870 	}
871 
872 	WARN_ON(smmu_issue_cmds(smmu, cmds.cmds, cmds.num));
873 	ret = smmu_sync_cmd(smmu);
874 out_ret:
875 	kvm_iommu_unlock(&smmu->iommu);
876 	return ret;
877 }
878 
smmu_tlb_inv_range(struct kvm_hyp_iommu_domain * domain,unsigned long iova,size_t size,size_t granule,bool leaf)879 static void smmu_tlb_inv_range(struct kvm_hyp_iommu_domain *domain,
880 			       unsigned long iova, size_t size, size_t granule,
881 			       bool leaf)
882 {
883 	struct hyp_arm_smmu_v3_domain *smmu_domain = domain->priv;
884 	struct hyp_arm_smmu_v3_device *smmu;
885 	struct domain_iommu_node *iommu_node;
886 	unsigned long end = iova + size;
887 	struct arm_smmu_cmdq_ent cmd;
888 
889 	cmd.tlbi.leaf = leaf;
890 	if (smmu_domain->pgtable->cfg.fmt == ARM_64_LPAE_S2) {
891 		cmd.opcode = CMDQ_OP_TLBI_S2_IPA;
892 		cmd.tlbi.vmid = domain->domain_id;
893 	} else {
894 		cmd.opcode = CMDQ_OP_TLBI_NH_VA;
895 		cmd.tlbi.asid = domain->domain_id;
896 		cmd.tlbi.vmid = 0;
897 	}
898 	/*
899 	 * There are no mappings at high addresses since we don't use TTB1, so
900 	 * no overflow possible.
901 	 */
902 	BUG_ON(end < iova);
903 	hyp_read_lock(&smmu_domain->list_lock);
904 	list_for_each_entry(iommu_node, &smmu_domain->iommu_list, list) {
905 		smmu = to_smmu(iommu_node->iommu);
906 		WARN_ON(smmu_tlb_inv_range_smmu(smmu, domain,
907 						&cmd, iova, size, granule));
908 	}
909 	hyp_read_unlock(&smmu_domain->list_lock);
910 }
911 
smmu_unmap_visit_leaf(phys_addr_t addr,size_t size,struct io_pgtable_walk_common * data,void * wd)912 static void smmu_unmap_visit_leaf(phys_addr_t addr, size_t size,
913 				  struct io_pgtable_walk_common *data,
914 				  void *wd)
915 {
916 	u64 *ptep = wd;
917 	u64 pte = *ptep;
918 
919 	/* Might be a cleared table. */
920 	if (!pte)
921 		return;
922 	WARN_ON(iommu_pkvm_unuse_dma(addr, size));
923 	*ptep = 0;
924 }
925 
smmu_tlb_flush_walk(unsigned long iova,size_t size,size_t granule,void * cookie)926 static void smmu_tlb_flush_walk(unsigned long iova, size_t size,
927 				size_t granule, void *cookie)
928 {
929 	struct kvm_hyp_iommu_domain *domain = cookie;
930 
931 	smmu_tlb_inv_range(domain, iova, size, granule, false);
932 }
933 
smmu_tlb_add_page(struct iommu_iotlb_gather * gather,unsigned long iova,size_t granule,void * cookie)934 static void smmu_tlb_add_page(struct iommu_iotlb_gather *gather,
935 			      unsigned long iova, size_t granule,
936 			      void *cookie)
937 {
938 	if (gather)
939 		kvm_iommu_iotlb_gather_add_page(cookie, gather, iova, granule);
940 	else
941 		smmu_tlb_inv_range(cookie, iova, granule, granule, true);
942 }
943 
smmu_free_leaf(unsigned long phys,size_t granule,void * cookie)944 static void smmu_free_leaf(unsigned long phys, size_t granule, void *cookie)
945 {
946 	struct kvm_hyp_iommu_domain *domain = cookie;
947 
948 	/* No tracking for idmap domain. */
949 	if (domain->domain_id == KVM_IOMMU_DOMAIN_IDMAP_ID)
950 		return;
951 
952 	WARN_ON(iommu_pkvm_unuse_dma(phys, granule));
953 }
954 
955 static const struct iommu_flush_ops smmu_tlb_ops = {
956 	.tlb_flush_all	= smmu_tlb_flush_all,
957 	.tlb_flush_walk = smmu_tlb_flush_walk,
958 	.tlb_add_page	= smmu_tlb_add_page,
959 	.free_leaf	= smmu_free_leaf,
960 };
961 
smmu_iotlb_sync(struct kvm_hyp_iommu_domain * domain,struct iommu_iotlb_gather * gather)962 static void smmu_iotlb_sync(struct kvm_hyp_iommu_domain *domain,
963 			    struct iommu_iotlb_gather *gather)
964 {
965 	size_t size;
966 	struct hyp_arm_smmu_v3_domain *smmu_domain = domain->priv;
967 	struct io_pgtable *pgtable = smmu_domain->pgtable;
968 	struct arm_lpae_io_pgtable *data = io_pgtable_to_data(pgtable);
969 	struct arm_lpae_io_pgtable_walk_data wd = {
970 		.cookie = data,
971 	};
972 	struct io_pgtable_walk_common walk_data = {
973 		.visit_leaf = smmu_unmap_visit_leaf,
974 		.data = &wd,
975 	};
976 
977 	if (!gather->pgsize)
978 		return;
979 	size = gather->end - gather->start + 1;
980 	smmu_tlb_inv_range(domain, gather->start, size,  gather->pgsize, true);
981 
982 	/*
983 	 * Now decrement the refcount of unmapped pages thanks to
984 	 * IO_PGTABLE_QUIRK_UNMAP_INVAL
985 	 */
986 	pgtable->ops.pgtable_walk(&pgtable->ops, gather->start, size, &walk_data);
987 }
988 
smmu_domain_config_s2(struct kvm_hyp_iommu_domain * domain,struct arm_smmu_ste * ste)989 static int smmu_domain_config_s2(struct kvm_hyp_iommu_domain *domain,
990 				 struct arm_smmu_ste *ste)
991 {
992 	struct io_pgtable_cfg *cfg;
993 	u64 ts, sl, ic, oc, sh, tg, ps;
994 	struct hyp_arm_smmu_v3_domain *smmu_domain = domain->priv;
995 
996 	cfg = &smmu_domain->pgtable->cfg;
997 	ps = cfg->arm_lpae_s2_cfg.vtcr.ps;
998 	tg = cfg->arm_lpae_s2_cfg.vtcr.tg;
999 	sh = cfg->arm_lpae_s2_cfg.vtcr.sh;
1000 	oc = cfg->arm_lpae_s2_cfg.vtcr.orgn;
1001 	ic = cfg->arm_lpae_s2_cfg.vtcr.irgn;
1002 	sl = cfg->arm_lpae_s2_cfg.vtcr.sl;
1003 	ts = cfg->arm_lpae_s2_cfg.vtcr.tsz;
1004 
1005 	ste->data[0] = STRTAB_STE_0_V |
1006 		FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
1007 	ste->data[1] = FIELD_PREP(STRTAB_STE_1_SHCFG, STRTAB_STE_1_SHCFG_INCOMING);
1008 	ste->data[2] = FIELD_PREP(STRTAB_STE_2_VTCR,
1009 			FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, ps) |
1010 			FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, tg) |
1011 			FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, sh) |
1012 			FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, oc) |
1013 			FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, ic) |
1014 			FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, sl) |
1015 			FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, ts)) |
1016 		 FIELD_PREP(STRTAB_STE_2_S2VMID, domain->domain_id) |
1017 		 STRTAB_STE_2_S2AA64 | STRTAB_STE_2_S2R;
1018 	ste->data[3] = cfg->arm_lpae_s2_cfg.vttbr & STRTAB_STE_3_S2TTB_MASK;
1019 
1020 	return 0;
1021 }
1022 
smmu_domain_config_s1_ste(struct hyp_arm_smmu_v3_device * smmu,u32 pasid_bits,struct arm_smmu_ste * ste)1023 static u64 *smmu_domain_config_s1_ste(struct hyp_arm_smmu_v3_device *smmu,
1024 				      u32 pasid_bits, struct arm_smmu_ste *ste)
1025 {
1026 	u64 *cd_table;
1027 
1028 	cd_table = smmu_alloc_cd(smmu, pasid_bits);
1029 	if (!cd_table)
1030 		return NULL;
1031 
1032 	ste->data[1] = FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
1033 		FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1034 		FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1035 		FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH);
1036 	ste->data[0] = ((u64)cd_table & STRTAB_STE_0_S1CTXPTR_MASK) |
1037 		FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
1038 		FIELD_PREP(STRTAB_STE_0_S1CDMAX, pasid_bits) |
1039 		FIELD_PREP(STRTAB_STE_0_S1FMT, STRTAB_STE_0_S1FMT_LINEAR) |
1040 		STRTAB_STE_0_V;
1041 
1042 	return cd_table;
1043 }
1044 
1045 /*
1046  * This function handles configuration for pasid and non-pasid domains
1047  * with the following assumptions:
1048  * - pasid 0 always attached first, this should be the typicall flow
1049  *   for the kernel where attach_dev is always called before set_dev_pasid.
1050  *   In that case only pasid 0 is allowed to allocate memory for the CD,
1051  *   and other pasids would expect to find the tabel.
1052  * - pasid 0 is detached last, also guaranteed from the kernel.
1053  */
smmu_domain_config_s1(struct hyp_arm_smmu_v3_device * smmu,struct kvm_hyp_iommu_domain * domain,u32 sid,u32 pasid,u32 pasid_bits,struct arm_smmu_ste * ste)1054 static int smmu_domain_config_s1(struct hyp_arm_smmu_v3_device *smmu,
1055 				 struct kvm_hyp_iommu_domain *domain,
1056 				 u32 sid, u32 pasid, u32 pasid_bits,
1057 				 struct arm_smmu_ste *ste)
1058 {
1059 	struct arm_smmu_ste *dst;
1060 	u64 val;
1061 	u64 *cd_entry, *cd_table;
1062 	struct io_pgtable_cfg *cfg;
1063 	struct hyp_arm_smmu_v3_domain *smmu_domain = domain->priv;
1064 
1065 	cfg = &smmu_domain->pgtable->cfg;
1066 	dst = smmu_get_ste_ptr(smmu, sid);
1067 	val = dst->data[0];
1068 
1069 	if (FIELD_GET(STRTAB_STE_0_CFG, val) == STRTAB_STE_0_CFG_S2_TRANS)
1070 		return -EBUSY;
1071 
1072 	if (pasid == 0) {
1073 		cd_table = smmu_domain_config_s1_ste(smmu, pasid_bits, ste);
1074 		if (!cd_table)
1075 			return -ENOMEM;
1076 	} else {
1077 		u32 nr_entries;
1078 
1079 		cd_table = (u64 *)(FIELD_GET(STRTAB_STE_0_S1CTXPTR_MASK, val) << 6);
1080 		if (!cd_table)
1081 			return -EINVAL;
1082 		nr_entries = 1 << FIELD_GET(STRTAB_STE_0_S1CDMAX, val);
1083 		if (pasid >= nr_entries)
1084 			return -E2BIG;
1085 	}
1086 
1087 	/* Write CD. */
1088 	cd_entry = smmu_get_cd_ptr(hyp_phys_to_virt((u64)cd_table), pasid);
1089 
1090 	/* CD already used by another device. */
1091 	if (cd_entry[0])
1092 		return -EBUSY;
1093 
1094 	cd_entry[1] = cpu_to_le64(cfg->arm_lpae_s1_cfg.ttbr & CTXDESC_CD_1_TTB0_MASK);
1095 	cd_entry[2] = 0;
1096 	cd_entry[3] = cpu_to_le64(cfg->arm_lpae_s1_cfg.mair);
1097 
1098 	/* STE is live. */
1099 	if (pasid)
1100 		smmu_sync_cd(smmu, sid, pasid);
1101 	val =  FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, cfg->arm_lpae_s1_cfg.tcr.tsz) |
1102 	       FIELD_PREP(CTXDESC_CD_0_TCR_TG0, cfg->arm_lpae_s1_cfg.tcr.tg) |
1103 	       FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, cfg->arm_lpae_s1_cfg.tcr.irgn) |
1104 	       FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, cfg->arm_lpae_s1_cfg.tcr.orgn) |
1105 	       FIELD_PREP(CTXDESC_CD_0_TCR_SH0, cfg->arm_lpae_s1_cfg.tcr.sh) |
1106 	       FIELD_PREP(CTXDESC_CD_0_TCR_IPS, cfg->arm_lpae_s1_cfg.tcr.ips) |
1107 	       CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64 |
1108 	       CTXDESC_CD_0_R | CTXDESC_CD_0_A |
1109 	       CTXDESC_CD_0_ASET |
1110 	       FIELD_PREP(CTXDESC_CD_0_ASID, domain->domain_id) |
1111 	       CTXDESC_CD_0_V;
1112 	WRITE_ONCE(cd_entry[0], cpu_to_le64(val));
1113 	/* STE is live. */
1114 	if (pasid)
1115 		smmu_sync_cd(smmu, sid, pasid);
1116 	return 0;
1117 }
1118 
smmu_domain_finalise(struct hyp_arm_smmu_v3_device * smmu,struct kvm_hyp_iommu_domain * domain)1119 static int smmu_domain_finalise(struct hyp_arm_smmu_v3_device *smmu,
1120 				struct kvm_hyp_iommu_domain *domain)
1121 {
1122 	int ret;
1123 	struct io_pgtable_cfg cfg;
1124 	struct hyp_arm_smmu_v3_domain *smmu_domain = domain->priv;
1125 	struct arm_lpae_io_pgtable *data;
1126 	bool idmapped = domain->domain_id == KVM_IOMMU_DOMAIN_IDMAP_ID;
1127 	unsigned long quirks = idmapped ? 0 : IO_PGTABLE_QUIRK_UNMAP_INVAL;
1128 
1129 	if (smmu_domain->type == KVM_ARM_SMMU_DOMAIN_S1) {
1130 		size_t ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
1131 
1132 		cfg = (struct io_pgtable_cfg) {
1133 			.fmt = ARM_64_LPAE_S1,
1134 			.pgsize_bitmap = smmu->pgsize_bitmap,
1135 			.ias = min_t(unsigned long, ias, VA_BITS),
1136 			.oas = smmu->ias,
1137 			.coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENCY,
1138 			.tlb = &smmu_tlb_ops,
1139 			.quirks = quirks,
1140 		};
1141 	} else {
1142 		cfg = (struct io_pgtable_cfg) {
1143 			.fmt = ARM_64_LPAE_S2,
1144 			.pgsize_bitmap = smmu->pgsize_bitmap,
1145 			.ias = smmu->ias,
1146 			.oas = smmu->oas,
1147 			.coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENCY,
1148 			.tlb = &smmu_tlb_ops,
1149 			.quirks = quirks,
1150 		};
1151 	}
1152 
1153 	hyp_spin_lock(&smmu_domain->pgt_lock);
1154 	smmu_domain->pgtable = kvm_arm_io_pgtable_alloc(&cfg, domain, &ret);
1155 	hyp_spin_unlock(&smmu_domain->pgt_lock);
1156 	if (ret)
1157 		return ret;
1158 
1159 	data = io_pgtable_to_data(smmu_domain->pgtable);
1160 	data->idmapped = idmapped;
1161 	return ret;
1162 }
1163 
smmu_domain_compat(struct hyp_arm_smmu_v3_device * smmu,struct hyp_arm_smmu_v3_domain * smmu_domain)1164 static bool smmu_domain_compat(struct hyp_arm_smmu_v3_device *smmu,
1165 			       struct hyp_arm_smmu_v3_domain *smmu_domain)
1166 {
1167 	struct io_pgtable_cfg *cfg;
1168 
1169 	/* Domain is empty. */
1170 	if (!smmu_domain->pgtable)
1171 		return true;
1172 
1173 	if (smmu_domain->type == KVM_ARM_SMMU_DOMAIN_S2) {
1174 		if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
1175 			return false;
1176 	} else {
1177 		if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
1178 			return false;
1179 	}
1180 
1181 	cfg = &smmu_domain->pgtable->cfg;
1182 
1183 	/* Best effort. */
1184 	return  ((smmu->pgsize_bitmap | cfg->pgsize_bitmap) == smmu->pgsize_bitmap);
1185 }
1186 
smmu_existing_in_domain(struct hyp_arm_smmu_v3_device * smmu,struct hyp_arm_smmu_v3_domain * smmu_domain)1187 static bool smmu_existing_in_domain(struct hyp_arm_smmu_v3_device *smmu,
1188 				    struct hyp_arm_smmu_v3_domain *smmu_domain)
1189 {
1190 	struct domain_iommu_node *iommu_node;
1191 	struct hyp_arm_smmu_v3_device *other;
1192 
1193 	hyp_assert_write_lock_held(&smmu_domain->list_lock);
1194 
1195 	list_for_each_entry(iommu_node, &smmu_domain->iommu_list, list) {
1196 		other = to_smmu(iommu_node->iommu);
1197 		if (other == smmu)
1198 			return true;
1199 	}
1200 
1201 	return false;
1202 }
1203 
smmu_get_ref_domain(struct hyp_arm_smmu_v3_device * smmu,struct hyp_arm_smmu_v3_domain * smmu_domain)1204 static void smmu_get_ref_domain(struct hyp_arm_smmu_v3_device *smmu,
1205 				struct hyp_arm_smmu_v3_domain *smmu_domain)
1206 {
1207 	struct domain_iommu_node *iommu_node;
1208 	struct hyp_arm_smmu_v3_device *other;
1209 
1210 	hyp_assert_write_lock_held(&smmu_domain->list_lock);
1211 
1212 	list_for_each_entry(iommu_node, &smmu_domain->iommu_list, list) {
1213 		other = to_smmu(iommu_node->iommu);
1214 		if (other == smmu) {
1215 			iommu_node->ref++;
1216 			return;
1217 		}
1218 	}
1219 }
1220 
smmu_put_ref_domain(struct hyp_arm_smmu_v3_device * smmu,struct hyp_arm_smmu_v3_domain * smmu_domain)1221 static void smmu_put_ref_domain(struct hyp_arm_smmu_v3_device *smmu,
1222 				struct hyp_arm_smmu_v3_domain *smmu_domain)
1223 {
1224 	struct domain_iommu_node *iommu_node, *temp;
1225 	struct hyp_arm_smmu_v3_device *other;
1226 
1227 	hyp_assert_write_lock_held(&smmu_domain->list_lock);
1228 
1229 	list_for_each_entry_safe(iommu_node, temp, &smmu_domain->iommu_list, list) {
1230 		other = to_smmu(iommu_node->iommu);
1231 		if (other == smmu) {
1232 			iommu_node->ref--;
1233 			if (iommu_node->ref == 0) {
1234 				/*
1235 				 * Ensure no stale tlb entries when domain_id
1236 				 * is re-used for this SMMU.
1237 				 */
1238 				smmu_inv_domain(smmu, smmu_domain);
1239 
1240 				list_del(&iommu_node->list);
1241 				hyp_free(iommu_node);
1242 			}
1243 			return;
1244 		}
1245 	}
1246 }
1247 
smmu_fix_up_domains(struct hyp_arm_smmu_v3_device * smmu,struct hyp_arm_smmu_v3_domain * smmu_domain)1248 static int smmu_fix_up_domains(struct hyp_arm_smmu_v3_device *smmu,
1249 			       struct hyp_arm_smmu_v3_domain *smmu_domain)
1250 {
1251 	/*
1252 	 * BYPASS domains only supported on stage-2 instances, that is over restrictive
1253 	 * but for now as stage-1 is limited to VA_BITS to match the kernel, it might
1254 	 * not cover the ia bits, we don't support it.
1255 	 */
1256 	if (smmu_domain->type == KVM_ARM_SMMU_DOMAIN_BYPASS) {
1257 		if (smmu->features & ARM_SMMU_FEAT_TRANS_S2)
1258 			smmu_domain->type = KVM_ARM_SMMU_DOMAIN_S2;
1259 		else
1260 			return -EINVAL;
1261 	} else if (smmu_domain->type == KVM_ARM_SMMU_DOMAIN_ANY) {
1262 		/* Any domain defaults to S1 as we don't know if the guest needs pasid. */
1263 		if (smmu->features & ARM_SMMU_FEAT_TRANS_S1) {
1264 			smmu_domain->type = KVM_ARM_SMMU_DOMAIN_S1;
1265 		} else {
1266 			smmu_domain->type = KVM_ARM_SMMU_DOMAIN_S2;
1267 		}
1268 	}
1269 
1270 	return 0;
1271 }
1272 
smmu_attach_dev(struct kvm_hyp_iommu * iommu,struct kvm_hyp_iommu_domain * domain,u32 sid,u32 pasid,u32 pasid_bits,unsigned long flags)1273 static int smmu_attach_dev(struct kvm_hyp_iommu *iommu, struct kvm_hyp_iommu_domain *domain,
1274 			   u32 sid, u32 pasid, u32 pasid_bits, unsigned long flags)
1275 {
1276 	int i;
1277 	int ret;
1278 	struct arm_smmu_ste *dst;
1279 	struct arm_smmu_ste ste = {};
1280 	struct hyp_arm_smmu_v3_device *smmu = to_smmu(iommu);
1281 	struct hyp_arm_smmu_v3_domain *smmu_domain = domain->priv;
1282 	struct domain_iommu_node *iommu_node = NULL;
1283 	bool init_idmap = false;
1284 
1285 	hyp_write_lock(&smmu_domain->list_lock);
1286 	kvm_iommu_lock(iommu);
1287 	dst = smmu_get_alloc_ste_ptr(smmu, sid);
1288 	if (!dst) {
1289 		ret = -ENOMEM;
1290 		goto out_unlock;
1291 	}
1292 
1293 	/* Map domain type to an SMMUv3 stage. */
1294 	ret = smmu_fix_up_domains(smmu, smmu_domain);
1295 	if (ret)
1296 		goto out_unlock;
1297 
1298 	if (!smmu_existing_in_domain(smmu, smmu_domain)) {
1299 		if (!smmu_domain_compat(smmu, smmu_domain)) {
1300 			ret = -EBUSY;
1301 			goto out_unlock;
1302 		}
1303 		iommu_node = hyp_alloc(sizeof(struct domain_iommu_node));
1304 		if (!iommu_node) {
1305 			ret = -ENOMEM;
1306 			goto out_unlock;
1307 		}
1308 		iommu_node->iommu = iommu;
1309 		iommu_node->ref = 1;
1310 	} else {
1311 		smmu_get_ref_domain(smmu, smmu_domain);
1312 	}
1313 
1314 	if (!smmu_domain->pgtable) {
1315 		ret = smmu_domain_finalise(smmu, domain);
1316 		if (ret)
1317 			goto out_unlock_ref;
1318 		if (domain->domain_id == KVM_IOMMU_DOMAIN_IDMAP_ID)
1319 			init_idmap = true;
1320 	}
1321 
1322 	if (smmu_domain->type == KVM_ARM_SMMU_DOMAIN_S2) {
1323 		/* Device already attached or pasid for s2. */
1324 		if (dst->data[0] || pasid) {
1325 			ret = -EBUSY;
1326 			goto out_unlock_ref;
1327 		}
1328 		ret = smmu_domain_config_s2(domain, &ste);
1329 	} else {
1330 		/*
1331 		 * Allocate and config CD, and update CD if possible.
1332 		 */
1333 		pasid_bits = min(pasid_bits, smmu->ssid_bits);
1334 		ret = smmu_domain_config_s1(smmu, domain, sid, pasid,
1335 					    pasid_bits, &ste);
1336 	}
1337 	/* We don't update STEs for pasid domains. */
1338 	if (ret || pasid)
1339 		goto out_unlock_ref;
1340 
1341 	/*
1342 	 * The SMMU may cache a disabled STE.
1343 	 * Initialize all fields, sync, then enable it.
1344 	 */
1345 	for (i = 1; i < STRTAB_STE_DWORDS; i++)
1346 		dst->data[i] = ste.data[i];
1347 
1348 	ret = smmu_sync_ste(smmu, sid);
1349 	if (ret)
1350 		goto out_unlock_ref;
1351 
1352 	WRITE_ONCE(dst->data[0], ste.data[0]);
1353 	ret = smmu_sync_ste(smmu, sid);
1354 	WARN_ON(ret);
1355 
1356 out_unlock_ref:
1357 	if (iommu_node && ret)
1358 		hyp_free(iommu_node);
1359 	else if (iommu_node)
1360 		list_add_tail(&iommu_node->list, &smmu_domain->iommu_list);
1361 	else if (ret)
1362 		smmu_put_ref_domain(smmu, smmu_domain);
1363 out_unlock:
1364 	kvm_iommu_unlock(iommu);
1365 	hyp_write_unlock(&smmu_domain->list_lock);
1366 
1367 	if (init_idmap)
1368 		ret = kvm_iommu_snapshot_host_stage2(domain);
1369 
1370 	return ret;
1371 }
1372 
smmu_detach_dev(struct kvm_hyp_iommu * iommu,struct kvm_hyp_iommu_domain * domain,u32 sid,u32 pasid)1373 static int smmu_detach_dev(struct kvm_hyp_iommu *iommu, struct kvm_hyp_iommu_domain *domain,
1374 			   u32 sid, u32 pasid)
1375 {
1376 	struct arm_smmu_ste *dst;
1377 	int i, ret;
1378 	struct hyp_arm_smmu_v3_device *smmu = to_smmu(iommu);
1379 	struct hyp_arm_smmu_v3_domain *smmu_domain = domain->priv;
1380 	u32 pasid_bits = 0;
1381 	u64 *cd_table, *cd;
1382 
1383 	hyp_write_lock(&smmu_domain->list_lock);
1384 	kvm_iommu_lock(iommu);
1385 	dst = smmu_get_ste_ptr(smmu, sid);
1386 	if (!dst) {
1387 		ret = -ENODEV;
1388 		goto out_unlock;
1389 	}
1390 
1391 	/*
1392 	 * For stage-1:
1393 	 * - The kernel has to detach pasid = 0 the last.
1394 	 * - This will free the CD.
1395 	 */
1396 	if (smmu_domain->type == KVM_ARM_SMMU_DOMAIN_S1) {
1397 		pasid_bits = FIELD_GET(STRTAB_STE_0_S1CDMAX, dst->data[0]);
1398 		if (pasid >= (1 << pasid_bits)) {
1399 			ret = -E2BIG;
1400 			goto out_unlock;
1401 		}
1402 		cd_table = (u64 *)(dst->data[0] & STRTAB_STE_0_S1CTXPTR_MASK);
1403 		if (WARN_ON(!cd_table)) {
1404 			ret = -ENODEV;
1405 			goto out_unlock;
1406 		}
1407 
1408 		cd_table = hyp_phys_to_virt((phys_addr_t)cd_table);
1409 		if (pasid == 0) {
1410 			int j;
1411 
1412 			/* Ensure other pasids are detached. */
1413 			for (j = 1 ; j < (1 << pasid_bits) ; ++j) {
1414 				cd = smmu_get_cd_ptr(cd_table, j);
1415 				if (cd[0] & CTXDESC_CD_0_V) {
1416 					ret = -EINVAL;
1417 					goto out_unlock;
1418 				}
1419 			}
1420 
1421 			smmu_free_cd(cd_table, pasid_bits);
1422 		} else {
1423 			cd = smmu_get_cd_ptr(cd_table, pasid);
1424 			if (!(cd[0] & CTXDESC_CD_0_V)) {
1425 				/* The device is not actually attached! */
1426 				ret = -ENOENT;
1427 				goto out_unlock;
1428 			}
1429 			cd[0] = 0;
1430 			smmu_sync_cd(smmu, sid, pasid);
1431 			cd[1] = 0;
1432 			cd[2] = 0;
1433 			cd[3] = 0;
1434 			ret = smmu_sync_cd(smmu, sid, pasid);
1435 			smmu_put_ref_domain(smmu, smmu_domain);
1436 			goto out_unlock;
1437 		}
1438 	}
1439 	/* For stage-2 and pasid = 0 */
1440 	dst->data[0] = 0;
1441 	ret = smmu_sync_ste(smmu, sid);
1442 	if (ret)
1443 		goto out_unlock;
1444 	for (i = 1; i < STRTAB_STE_DWORDS; i++)
1445 		dst->data[i] = 0;
1446 
1447 	ret = smmu_sync_ste(smmu, sid);
1448 
1449 	smmu_put_ref_domain(smmu, smmu_domain);
1450 out_unlock:
1451 	kvm_iommu_unlock(iommu);
1452 	hyp_write_unlock(&smmu_domain->list_lock);
1453 	return ret;
1454 }
1455 
smmu_map_pages(struct kvm_hyp_iommu_domain * domain,unsigned long iova,phys_addr_t paddr,size_t pgsize,size_t pgcount,int prot,size_t * total_mapped)1456 static int smmu_map_pages(struct kvm_hyp_iommu_domain *domain, unsigned long iova,
1457 			  phys_addr_t paddr, size_t pgsize,
1458 			  size_t pgcount, int prot, size_t *total_mapped)
1459 {
1460 	size_t mapped;
1461 	size_t granule;
1462 	int ret = 0;
1463 	struct hyp_arm_smmu_v3_domain *smmu_domain = domain->priv;
1464 	struct io_pgtable *pgtable = smmu_domain->pgtable;
1465 
1466 	if (!pgtable)
1467 		return -EINVAL;
1468 
1469 	granule = 1UL << __ffs(smmu_domain->pgtable->cfg.pgsize_bitmap);
1470 	if (!IS_ALIGNED(iova | paddr | pgsize, granule))
1471 		return -EINVAL;
1472 
1473 	hyp_spin_lock(&smmu_domain->pgt_lock);
1474 	while (pgcount) {
1475 		mapped = 0;
1476 		ret = pgtable->ops.map_pages(&pgtable->ops, iova, paddr,
1477 					     pgsize, pgcount, prot, 0, &mapped);
1478 		if (ret)
1479 			break;
1480 		WARN_ON(!IS_ALIGNED(mapped, pgsize));
1481 		WARN_ON(mapped > pgcount * pgsize);
1482 
1483 		pgcount -= mapped / pgsize;
1484 		*total_mapped += mapped;
1485 		iova += mapped;
1486 		paddr += mapped;
1487 	}
1488 	hyp_spin_unlock(&smmu_domain->pgt_lock);
1489 
1490 	return ret;
1491 }
1492 
smmu_unmap_pages(struct kvm_hyp_iommu_domain * domain,unsigned long iova,size_t pgsize,size_t pgcount,struct iommu_iotlb_gather * gather)1493 static size_t smmu_unmap_pages(struct kvm_hyp_iommu_domain *domain, unsigned long iova,
1494 			       size_t pgsize, size_t pgcount, struct iommu_iotlb_gather *gather)
1495 {
1496 	size_t granule, unmapped, total_unmapped = 0;
1497 	size_t size = pgsize * pgcount;
1498 	struct hyp_arm_smmu_v3_domain *smmu_domain = domain->priv;
1499 	struct io_pgtable *pgtable = smmu_domain->pgtable;
1500 
1501 	if (!pgtable)
1502 		return -EINVAL;
1503 
1504 	granule = 1UL << __ffs(smmu_domain->pgtable->cfg.pgsize_bitmap);
1505 	if (!IS_ALIGNED(iova | pgsize, granule))
1506 		return 0;
1507 
1508 	hyp_spin_lock(&smmu_domain->pgt_lock);
1509 	while (total_unmapped < size) {
1510 		unmapped = pgtable->ops.unmap_pages(&pgtable->ops, iova, pgsize,
1511 						    pgcount, gather);
1512 		if (!unmapped)
1513 			break;
1514 		iova += unmapped;
1515 		total_unmapped += unmapped;
1516 		pgcount -= unmapped / pgsize;
1517 	}
1518 	hyp_spin_unlock(&smmu_domain->pgt_lock);
1519 	return total_unmapped;
1520 }
1521 
smmu_iova_to_phys(struct kvm_hyp_iommu_domain * domain,unsigned long iova)1522 static phys_addr_t smmu_iova_to_phys(struct kvm_hyp_iommu_domain *domain,
1523 				     unsigned long iova)
1524 {
1525 	phys_addr_t paddr;
1526 	struct hyp_arm_smmu_v3_domain *smmu_domain = domain->priv;
1527 	struct io_pgtable *pgtable = smmu_domain->pgtable;
1528 
1529 	if (!pgtable)
1530 		return -EINVAL;
1531 
1532 	hyp_spin_lock(&smmu_domain->pgt_lock);
1533 	paddr = pgtable->ops.iova_to_phys(&pgtable->ops, iova);
1534 	hyp_spin_unlock(&smmu_domain->pgt_lock);
1535 
1536 	return paddr;
1537 }
1538 
smmu_dabt_device(struct hyp_arm_smmu_v3_device * smmu,struct user_pt_regs * regs,u64 esr,u32 off)1539 static bool smmu_dabt_device(struct hyp_arm_smmu_v3_device *smmu,
1540 			     struct user_pt_regs *regs,
1541 			     u64 esr, u32 off)
1542 {
1543 	bool is_write = esr & ESR_ELx_WNR;
1544 	unsigned int len = BIT((esr & ESR_ELx_SAS) >> ESR_ELx_SAS_SHIFT);
1545 	int rd = (esr & ESR_ELx_SRT_MASK) >> ESR_ELx_SRT_SHIFT;
1546 	const u32 no_access  = 0;
1547 	const u32 read_write = (u32)(-1);
1548 	const u32 read_only = is_write ? no_access : read_write;
1549 	u32 mask = no_access;
1550 
1551 	/*
1552 	 * Only handle MMIO access with u32 size and alignment.
1553 	 * We don't need to change 64-bit registers for now.
1554 	 */
1555 	if ((len != sizeof(u32)) || (off & (sizeof(u32) - 1)))
1556 		return false;
1557 
1558 	switch (off) {
1559 	case ARM_SMMU_EVTQ_PROD + SZ_64K:
1560 		mask = read_write;
1561 		break;
1562 	case ARM_SMMU_EVTQ_CONS + SZ_64K:
1563 		mask = read_write;
1564 		break;
1565 	case ARM_SMMU_GERROR:
1566 		mask = read_only;
1567 		break;
1568 	case ARM_SMMU_GERRORN:
1569 		mask = read_write;
1570 		break;
1571 	};
1572 
1573 	if (!mask)
1574 		return false;
1575 	if (is_write)
1576 		writel_relaxed(regs->regs[rd] & mask, smmu->base + off);
1577 	else
1578 		regs->regs[rd] = readl_relaxed(smmu->base + off);
1579 
1580 	return true;
1581 }
1582 
smmu_id_to_token(pkvm_handle_t smmu_id,u64 * out_token)1583 static int smmu_id_to_token(pkvm_handle_t smmu_id, u64 *out_token)
1584 {
1585 	if (smmu_id >= kvm_hyp_arm_smmu_v3_count)
1586 		return -EINVAL;
1587 
1588 	smmu_id = array_index_nospec(smmu_id, kvm_hyp_arm_smmu_v3_count);
1589 	*out_token = kvm_hyp_arm_smmu_v3_smmus[smmu_id].mmio_addr;
1590 	return 0;
1591 }
1592 
smmu_dev_block_dma(struct kvm_hyp_iommu * iommu,u32 sid,bool is_host2guest)1593 static int smmu_dev_block_dma(struct kvm_hyp_iommu *iommu, u32 sid, bool is_host2guest)
1594 {
1595 	struct hyp_arm_smmu_v3_device *smmu = to_smmu(iommu);
1596 	static struct arm_smmu_ste *dst;
1597 	int ret = 0;
1598 
1599 	kvm_iommu_lock(iommu);
1600 	dst = smmu_get_ste_ptr(smmu, sid);
1601 
1602 	/*
1603 	 * VFIO will attach the device to a blocking domain, this will make the
1604 	 * kernel driver detach the device which should be have zeroed STE.
1605 	 * So, if this is not the current state of the device, something
1606 	 * went wrong.
1607 	 * For guests, we need to do more as guests might not exit cleanly
1608 	 * and the device might be translating, so we have to actually block
1609 	 * the device and clean the STE/CD.
1610 	 */
1611 	if (dst->data[0]) {
1612 		if (is_host2guest) {
1613 			ret = -EINVAL;
1614 		} else {
1615 			int i = 0;
1616 			u32 cfg = FIELD_GET(STRTAB_STE_0_CFG, dst->data[0]);
1617 
1618 			if (cfg == STRTAB_STE_0_CFG_S1_TRANS) {
1619 				size_t nr_entries, cd_sz;
1620 				u64 cd_table;
1621 
1622 				cd_table = (dst->data[0] & STRTAB_STE_0_S1CTXPTR_MASK);
1623 				nr_entries = 1 << FIELD_GET(STRTAB_STE_0_S1CDMAX, dst->data[0]);
1624 				cd_sz = (1 << nr_entries) * (CTXDESC_CD_DWORDS << 3);
1625 				kvm_iommu_reclaim_pages(hyp_phys_to_virt(cd_table), get_order(cd_sz));
1626 			}
1627 			/* zap zippity zop. */
1628 			for (i = 0; i < STRTAB_STE_DWORDS; i++)
1629 				dst->data[i] = 0;
1630 			ret = smmu_sync_ste(smmu, sid);
1631 		}
1632 	}
1633 
1634 	kvm_iommu_unlock(iommu);
1635 	return ret;
1636 }
1637 
smmu_dabt_handler(struct user_pt_regs * regs,u64 esr,u64 addr)1638 static bool smmu_dabt_handler(struct user_pt_regs *regs, u64 esr, u64 addr)
1639 {
1640 	struct hyp_arm_smmu_v3_device *smmu;
1641 
1642 	for_each_smmu(smmu) {
1643 		if (addr < smmu->mmio_addr || addr >= smmu->mmio_addr + smmu->mmio_size)
1644 			continue;
1645 		return smmu_dabt_device(smmu, regs, esr, addr - smmu->mmio_addr);
1646 	}
1647 	return false;
1648 }
1649 
smmu_suspend(struct kvm_hyp_iommu * iommu)1650 int smmu_suspend(struct kvm_hyp_iommu *iommu)
1651 {
1652 	struct hyp_arm_smmu_v3_device *smmu = to_smmu(iommu);
1653 
1654 	/*
1655 	 * Disable translation, GBPA is validated at probe to be set, so all translation
1656 	 * would be aborted when SMMU is disabled.
1657 	 */
1658 	if (iommu->power_domain.type == KVM_POWER_DOMAIN_HOST_HVC)
1659 		return smmu_write_cr0(smmu, 0);
1660 	return 0;
1661 }
1662 
smmu_resume(struct kvm_hyp_iommu * iommu)1663 int smmu_resume(struct kvm_hyp_iommu *iommu)
1664 {
1665 	struct hyp_arm_smmu_v3_device *smmu = to_smmu(iommu);
1666 
1667 	/*
1668 	 * Re-enable and clean all caches.
1669 	 */
1670 	if (iommu->power_domain.type == KVM_POWER_DOMAIN_HOST_HVC)
1671 		return smmu_reset_device(smmu);
1672 	return 0;
1673 }
1674 
1675 /*
1676  * Although SMMU can support multiple granules, it must at least support PAGE_SIZE
1677  * as the CPU, and for the IDMAP domains, we only use this granule.
1678  * As we optimize for memory usage and performance, we try to use block mappings
1679  * when possible.
1680  */
smmu_pgsize_idmap(size_t size,u64 paddr)1681 static size_t smmu_pgsize_idmap(size_t size, u64 paddr)
1682 {
1683 	size_t pgsizes;
1684 	size_t pgsize_bitmask = 0;
1685 
1686 	if (PAGE_SIZE == SZ_4K) {
1687 		pgsize_bitmask = SZ_4K | SZ_2M | SZ_1G;
1688 	} else if (PAGE_SIZE == SZ_16K) {
1689 		pgsize_bitmask = SZ_16K | SZ_32M;
1690 	} else if (PAGE_SIZE == SZ_64K){
1691 		pgsize_bitmask = SZ_64K | SZ_512M;
1692 	}
1693 
1694 	/* All page sizes that fit the size */
1695 	pgsizes = pgsize_bitmask & GENMASK_ULL(__fls(size), 0);
1696 
1697 	/* Address must be aligned to page size */
1698 	if (likely(paddr))
1699 		pgsizes &= GENMASK_ULL(__ffs(paddr), 0);
1700 
1701 	WARN_ON(!pgsizes);
1702 
1703 	return BIT(__fls(pgsizes));
1704 }
1705 
smmu_host_stage2_idmap(struct kvm_hyp_iommu_domain * domain,phys_addr_t start,phys_addr_t end,int prot)1706 static void smmu_host_stage2_idmap(struct kvm_hyp_iommu_domain *domain,
1707 				   phys_addr_t start, phys_addr_t end, int prot)
1708 {
1709 	size_t size = end - start;
1710 	size_t pgsize, pgcount;
1711 	size_t mapped, unmapped;
1712 	int ret;
1713 	struct hyp_arm_smmu_v3_domain *smmu_domain = domain->priv;
1714 	struct io_pgtable *pgtable = smmu_domain->pgtable;
1715 
1716 	end = min(end, BIT(pgtable->cfg.oas));
1717 	if (start >= end)
1718 		return;
1719 
1720 	if (prot) {
1721 		if (!(prot & IOMMU_MMIO))
1722 			prot |= IOMMU_CACHE;
1723 
1724 		while (size) {
1725 			mapped = 0;
1726 			pgsize = smmu_pgsize_idmap(size, start);
1727 			pgcount = size / pgsize;
1728 			ret = pgtable->ops.map_pages(&pgtable->ops, start, start,
1729 						     pgsize, pgcount, prot, 0, &mapped);
1730 			size -= mapped;
1731 			start += mapped;
1732 			if (!mapped || ret)
1733 				return;
1734 		}
1735 	} else {
1736 		while (size) {
1737 			pgsize = smmu_pgsize_idmap(size, start);
1738 			pgcount = size / pgsize;
1739 			unmapped = pgtable->ops.unmap_pages(&pgtable->ops, start,
1740 							    pgsize, pgcount, NULL);
1741 			size -= unmapped;
1742 			start += unmapped;
1743 			if (!unmapped)
1744 				return;
1745 		}
1746 	}
1747 }
1748 
1749 #ifdef MODULE
smmu_init_hyp_module(const struct pkvm_module_ops * ops)1750 int smmu_init_hyp_module(const struct pkvm_module_ops *ops)
1751 {
1752 	if (!ops)
1753 		return -EINVAL;
1754 
1755 	mod_ops = ops;
1756 	return 0;
1757 }
1758 #endif
1759 
1760 /* Shared with the kernel driver in EL1 */
1761 struct kvm_iommu_ops smmu_ops = {
1762 	.init				= smmu_init,
1763 	.get_iommu_by_id		= smmu_id_to_iommu,
1764 	.alloc_domain			= smmu_alloc_domain,
1765 	.free_domain			= smmu_free_domain,
1766 	.iotlb_sync			= smmu_iotlb_sync,
1767 	.attach_dev			= smmu_attach_dev,
1768 	.detach_dev			= smmu_detach_dev,
1769 	.map_pages			= smmu_map_pages,
1770 	.unmap_pages			= smmu_unmap_pages,
1771 	.iova_to_phys			= smmu_iova_to_phys,
1772 	.dabt_handler			= smmu_dabt_handler,
1773 	.suspend			= smmu_suspend,
1774 	.resume				= smmu_resume,
1775 	.host_stage2_idmap		= smmu_host_stage2_idmap,
1776 	.dev_block_dma			= smmu_dev_block_dma,
1777 	.get_iommu_token_by_id		= smmu_id_to_token,
1778 };
1779