1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * pKVM hyp driver for the Arm SMMUv3
4 *
5 * Copyright (C) 2022 Linaro Ltd.
6 */
7 #include <asm/arm-smmu-v3-common.h>
8 #include <asm/kvm_hyp.h>
9 #include <linux/io-pgtable-arm.h>
10 #include <nvhe/alloc.h>
11 #include <nvhe/iommu.h>
12 #include <nvhe/mem_protect.h>
13 #include <nvhe/mm.h>
14 #include <nvhe/pkvm.h>
15 #include <nvhe/rwlock.h>
16 #include <nvhe/trap_handler.h>
17
18 #include "arm_smmu_v3.h"
19 #include "arm-smmu-v3-module.h"
20
21 #ifdef MODULE
memset(void * dst,int c,size_t count)22 void *memset(void *dst, int c, size_t count)
23 {
24 return CALL_FROM_OPS(memset, dst, c, count);
25 }
26
27 #ifdef CONFIG_LIST_HARDENED
__list_add_valid_or_report(struct list_head * new,struct list_head * prev,struct list_head * next)28 bool __list_add_valid_or_report(struct list_head *new,
29 struct list_head *prev,
30 struct list_head *next)
31 {
32 return CALL_FROM_OPS(list_add_valid_or_report, new, prev, next);
33 }
34
__list_del_entry_valid_or_report(struct list_head * entry)35 bool __list_del_entry_valid_or_report(struct list_head *entry)
36 {
37 return CALL_FROM_OPS(list_del_entry_valid_or_report, entry);
38 }
39 #endif
40
41 const struct pkvm_module_ops *mod_ops;
42 #endif
43
44 #define ARM_SMMU_POLL_TIMEOUT_US 100000 /* 100ms arbitrary timeout */
45
46 size_t __ro_after_init kvm_hyp_arm_smmu_v3_count;
47 struct hyp_arm_smmu_v3_device *kvm_hyp_arm_smmu_v3_smmus;
48
49 #define for_each_smmu(smmu) \
50 for ((smmu) = kvm_hyp_arm_smmu_v3_smmus; \
51 (smmu) != &kvm_hyp_arm_smmu_v3_smmus[kvm_hyp_arm_smmu_v3_count]; \
52 (smmu)++)
53
54 /*
55 * Wait until @cond is true.
56 * Return 0 on success, or -ETIMEDOUT
57 */
58 #define smmu_wait(_cond) \
59 ({ \
60 int __i = 0; \
61 int __ret = 0; \
62 \
63 while (!(_cond)) { \
64 if (++__i > ARM_SMMU_POLL_TIMEOUT_US) { \
65 __ret = -ETIMEDOUT; \
66 break; \
67 } \
68 pkvm_udelay(1); \
69 } \
70 __ret; \
71 })
72
73 #define smmu_wait_event(_smmu, _cond) \
74 ({ \
75 if ((_smmu)->features & ARM_SMMU_FEAT_SEV) { \
76 while (!(_cond)) \
77 wfe(); \
78 } \
79 smmu_wait(_cond); \
80 })
81
82 struct domain_iommu_node {
83 struct kvm_hyp_iommu *iommu;
84 struct list_head list;
85 unsigned long ref;
86 };
87
88 /*
89 * SMMUv3 domain:
90 * @domain: Pointer to the IOMMU domain.
91 * @iommu_list: List of SMMU instances for this domain
92 * @list_lock: Protects iommu_list
93 * @type: Type of domain (S1, S2)
94 * @pgt_lock: Lock for page table
95 * @pgtable: io_pgtable instance for this domain
96 */
97 struct hyp_arm_smmu_v3_domain {
98 struct kvm_hyp_iommu_domain *domain;
99 struct list_head iommu_list;
100 hyp_rwlock_t list_lock;
101 u32 type;
102 hyp_spinlock_t pgt_lock;
103 struct io_pgtable *pgtable;
104 };
105
to_smmu(struct kvm_hyp_iommu * iommu)106 static struct hyp_arm_smmu_v3_device *to_smmu(struct kvm_hyp_iommu *iommu)
107 {
108 return container_of(iommu, struct hyp_arm_smmu_v3_device, iommu);
109 }
110
smmu_write_cr0(struct hyp_arm_smmu_v3_device * smmu,u32 val)111 static int smmu_write_cr0(struct hyp_arm_smmu_v3_device *smmu, u32 val)
112 {
113 writel_relaxed(val, smmu->base + ARM_SMMU_CR0);
114 return smmu_wait(readl_relaxed(smmu->base + ARM_SMMU_CR0ACK) == val);
115 }
116
117 /* Transfer ownership of structures from host to hyp */
smmu_take_pages(u64 phys,size_t size)118 static int smmu_take_pages(u64 phys, size_t size)
119 {
120 WARN_ON(!PAGE_ALIGNED(phys) || !PAGE_ALIGNED(size));
121 return __pkvm_host_donate_hyp(phys >> PAGE_SHIFT, size >> PAGE_SHIFT);
122 }
123
smmu_reclaim_pages(u64 phys,size_t size)124 static void smmu_reclaim_pages(u64 phys, size_t size)
125 {
126 WARN_ON(!PAGE_ALIGNED(phys) || !PAGE_ALIGNED(size));
127 WARN_ON(__pkvm_hyp_donate_host(phys >> PAGE_SHIFT, size >> PAGE_SHIFT));
128 }
129
130 #define Q_WRAP(smmu, reg) ((reg) & (1 << (smmu)->cmdq_log2size))
131 #define Q_IDX(smmu, reg) ((reg) & ((1 << (smmu)->cmdq_log2size) - 1))
132
smmu_cmdq_has_space(struct hyp_arm_smmu_v3_device * smmu,u32 n)133 static bool smmu_cmdq_has_space(struct hyp_arm_smmu_v3_device *smmu, u32 n)
134 {
135 u64 smmu_cons = readl_relaxed(smmu->base + ARM_SMMU_CMDQ_CONS);
136 u32 space, prod, cons;
137
138 prod = Q_IDX(smmu, smmu->cmdq_prod);
139 cons = Q_IDX(smmu, smmu_cons);
140
141 if (Q_WRAP(smmu, smmu->cmdq_prod) == Q_WRAP(smmu, smmu_cons))
142 space = (1 << smmu->cmdq_log2size) - (prod - cons);
143 else
144 space = cons - prod;
145
146 return space >= n;
147 }
148
smmu_cmdq_empty(struct hyp_arm_smmu_v3_device * smmu)149 static bool smmu_cmdq_empty(struct hyp_arm_smmu_v3_device *smmu)
150 {
151 u64 cons = readl_relaxed(smmu->base + ARM_SMMU_CMDQ_CONS);
152
153 return Q_IDX(smmu, smmu->cmdq_prod) == Q_IDX(smmu, cons) &&
154 Q_WRAP(smmu, smmu->cmdq_prod) == Q_WRAP(smmu, cons);
155 }
156
smmu_build_cmd(u64 * cmd,struct arm_smmu_cmdq_ent * ent)157 static int smmu_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
158 {
159 cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
160
161 switch (ent->opcode) {
162 case CMDQ_OP_CFGI_ALL:
163 cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
164 break;
165 case CMDQ_OP_CFGI_CD:
166 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
167 fallthrough;
168 case CMDQ_OP_CFGI_STE:
169 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
170 cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
171 break;
172 case CMDQ_OP_TLBI_NH_VA:
173 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
174 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
175 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
176 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
177 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
178 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
179 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
180 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
181 break;
182 case CMDQ_OP_TLBI_NSNH_ALL:
183 break;
184 case CMDQ_OP_TLBI_NH_ASID:
185 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
186 fallthrough;
187 case CMDQ_OP_TLBI_S12_VMALL:
188 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
189 break;
190 case CMDQ_OP_TLBI_S2_IPA:
191 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
192 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
193 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
194 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
195 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
196 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
197 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
198 break;
199 case CMDQ_OP_CMD_SYNC:
200 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
201 break;
202 default:
203 return -EINVAL;
204 }
205
206 return 0;
207 }
208
smmu_issue_cmds(struct hyp_arm_smmu_v3_device * smmu,u64 * cmds,int n)209 static int smmu_issue_cmds(struct hyp_arm_smmu_v3_device *smmu,
210 u64 *cmds, int n)
211 {
212 int i;
213 int ret;
214 u32 prod;
215
216 ret = smmu_wait_event(smmu, smmu_cmdq_has_space(smmu, n));
217 if (ret)
218 return ret;
219
220 for (i = 0; i < n; i++) {
221 int j;
222 int idx = Q_IDX(smmu, smmu->cmdq_prod + i);
223 u64 *slot = smmu->cmdq_base + idx * CMDQ_ENT_DWORDS;
224
225 for (j = 0; j < CMDQ_ENT_DWORDS; j++)
226 slot[j] = cpu_to_le64(cmds[i * CMDQ_ENT_DWORDS + j]);
227 }
228
229 prod = (Q_WRAP(smmu, smmu->cmdq_prod) | Q_IDX(smmu, smmu->cmdq_prod)) + n;
230 smmu->cmdq_prod = Q_OVF(smmu->cmdq_prod) | Q_WRAP(smmu, prod) | Q_IDX(smmu, prod);
231
232 writel(Q_IDX(smmu, smmu->cmdq_prod) | Q_WRAP(smmu, smmu->cmdq_prod),
233 smmu->base + ARM_SMMU_CMDQ_PROD);
234 return 0;
235 }
236
smmu_add_cmd(struct hyp_arm_smmu_v3_device * smmu,struct arm_smmu_cmdq_ent * ent)237 static int smmu_add_cmd(struct hyp_arm_smmu_v3_device *smmu,
238 struct arm_smmu_cmdq_ent *ent)
239 {
240 u64 cmd[CMDQ_ENT_DWORDS] = {};
241 int ret;
242
243 ret = smmu_build_cmd(cmd, ent);
244 if (ret)
245 return ret;
246
247 return smmu_issue_cmds(smmu, cmd, 1);
248 }
249
smmu_sync_cmd(struct hyp_arm_smmu_v3_device * smmu)250 static int smmu_sync_cmd(struct hyp_arm_smmu_v3_device *smmu)
251 {
252 int ret;
253 struct arm_smmu_cmdq_ent cmd = {
254 .opcode = CMDQ_OP_CMD_SYNC,
255 };
256
257 ret = smmu_add_cmd(smmu, &cmd);
258 if (ret)
259 return ret;
260
261 return smmu_wait_event(smmu, smmu_cmdq_empty(smmu));
262 }
263
smmu_send_cmd(struct hyp_arm_smmu_v3_device * smmu,struct arm_smmu_cmdq_ent * cmd)264 static int smmu_send_cmd(struct hyp_arm_smmu_v3_device *smmu,
265 struct arm_smmu_cmdq_ent *cmd)
266 {
267 int ret;
268
269 if (smmu->iommu.power_is_off)
270 return 0;
271
272 ret = smmu_add_cmd(smmu, cmd);
273 if (ret)
274 return ret;
275
276 return smmu_sync_cmd(smmu);
277 }
278
smmu_sync_ste(struct hyp_arm_smmu_v3_device * smmu,u32 sid)279 static int smmu_sync_ste(struct hyp_arm_smmu_v3_device *smmu, u32 sid)
280 {
281 struct arm_smmu_cmdq_ent cmd = {
282 .opcode = CMDQ_OP_CFGI_STE,
283 .cfgi.sid = sid,
284 .cfgi.leaf = true,
285 };
286
287 return smmu_send_cmd(smmu, &cmd);
288 }
289
smmu_sync_cd(struct hyp_arm_smmu_v3_device * smmu,u32 sid,u32 ssid)290 static int smmu_sync_cd(struct hyp_arm_smmu_v3_device *smmu, u32 sid, u32 ssid)
291 {
292 struct arm_smmu_cmdq_ent cmd = {
293 .opcode = CMDQ_OP_CFGI_CD,
294 .cfgi.sid = sid,
295 .cfgi.ssid = ssid,
296 .cfgi.leaf = true,
297 };
298
299 return smmu_send_cmd(smmu, &cmd);
300 }
301
smmu_alloc_l2_strtab(struct hyp_arm_smmu_v3_device * smmu,u32 sid)302 static int smmu_alloc_l2_strtab(struct hyp_arm_smmu_v3_device *smmu, u32 sid)
303 {
304 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
305 struct arm_smmu_strtab_l1 *l1_desc;
306 dma_addr_t l2ptr_dma;
307 struct arm_smmu_strtab_l2 *l2table;
308 size_t l2_order = get_order(sizeof(struct arm_smmu_strtab_l2));
309 int flags = 0;
310 u32 l1_idx = arm_smmu_strtab_l1_idx(sid);
311
312 if (l1_idx >= cfg->l2.num_l1_ents)
313 return -EINVAL;
314
315 l1_desc = &cfg->l2.l1tab[l1_idx];
316 if (l1_desc->l2ptr)
317 return 0;
318
319 if (!(smmu->features & ARM_SMMU_FEAT_COHERENCY))
320 flags |= IOMMU_PAGE_NOCACHE;
321
322 l2table = kvm_iommu_donate_pages(l2_order, flags);
323 if (!l2table)
324 return -ENOMEM;
325
326 l2ptr_dma = hyp_virt_to_phys(l2table);
327
328 if (l2ptr_dma & (~STRTAB_L1_DESC_L2PTR_MASK | ~PAGE_MASK)) {
329 kvm_iommu_reclaim_pages(l2table, l2_order);
330 return -EINVAL;
331 }
332
333 /* Ensure the empty stream table is visible before the descriptor write */
334 wmb();
335
336 arm_smmu_write_strtab_l1_desc(l1_desc, l2ptr_dma);
337 return 0;
338 }
339
340 static struct arm_smmu_ste *
smmu_get_ste_ptr(struct hyp_arm_smmu_v3_device * smmu,u32 sid)341 smmu_get_ste_ptr(struct hyp_arm_smmu_v3_device *smmu, u32 sid)
342 {
343 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
344
345 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
346 struct arm_smmu_strtab_l1 *l1_desc =
347 &cfg->l2.l1tab[arm_smmu_strtab_l1_idx(sid)];
348 struct arm_smmu_strtab_l2 *l2ptr;
349
350 if (arm_smmu_strtab_l1_idx(sid) >= cfg->l2.num_l1_ents)
351 return NULL;
352 /* L2 should be allocated before calling this. */
353 if (WARN_ON(!l1_desc->l2ptr))
354 return NULL;
355
356 l2ptr = hyp_phys_to_virt(l1_desc->l2ptr & STRTAB_L1_DESC_L2PTR_MASK);
357 /* Two-level walk */
358 return &l2ptr->stes[arm_smmu_strtab_l2_idx(sid)];
359 }
360
361 if (sid >= cfg->linear.num_ents)
362 return NULL;
363 /* Simple linear lookup */
364 return &cfg->linear.table[sid];
365 }
366
367 static struct arm_smmu_ste *
smmu_get_alloc_ste_ptr(struct hyp_arm_smmu_v3_device * smmu,u32 sid)368 smmu_get_alloc_ste_ptr(struct hyp_arm_smmu_v3_device *smmu, u32 sid)
369 {
370 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
371 int ret = smmu_alloc_l2_strtab(smmu, sid);
372
373 if (ret) {
374 WARN_ON(ret != -ENOMEM);
375 return NULL;
376 }
377 }
378 return smmu_get_ste_ptr(smmu, sid);
379 }
380
smmu_get_cd_ptr(u64 * cdtab,u32 ssid)381 static u64 *smmu_get_cd_ptr(u64 *cdtab, u32 ssid)
382 {
383 /* Only linear supported for now. */
384 return cdtab + ssid * CTXDESC_CD_DWORDS;
385 }
386
smmu_alloc_cd(struct hyp_arm_smmu_v3_device * smmu,u32 pasid_bits)387 static u64 *smmu_alloc_cd(struct hyp_arm_smmu_v3_device *smmu, u32 pasid_bits)
388 {
389 u64 *cd_table;
390 int flags = 0;
391 u32 requested_order = get_order((1 << pasid_bits) *
392 (CTXDESC_CD_DWORDS << 3));
393
394 /*
395 * We support max of 64K linear tables only, this should be enough
396 * for 128 pasids
397 */
398 if (WARN_ON(requested_order > 4))
399 return NULL;
400
401 if (!(smmu->features & ARM_SMMU_FEAT_COHERENCY))
402 flags |= IOMMU_PAGE_NOCACHE;
403
404 cd_table = kvm_iommu_donate_pages(requested_order, flags);
405 if (!cd_table)
406 return NULL;
407 return (u64 *)hyp_virt_to_phys(cd_table);
408 }
409
smmu_free_cd(u64 * cd_table,u32 pasid_bits)410 static void smmu_free_cd(u64 *cd_table, u32 pasid_bits)
411 {
412 u32 order = get_order((1 << pasid_bits) *
413 (CTXDESC_CD_DWORDS << 3));
414
415 kvm_iommu_reclaim_pages(cd_table, order);
416 }
417
smmu_init_registers(struct hyp_arm_smmu_v3_device * smmu)418 static int smmu_init_registers(struct hyp_arm_smmu_v3_device *smmu)
419 {
420 u64 val, old;
421 int ret;
422
423 if (!(readl_relaxed(smmu->base + ARM_SMMU_GBPA) & GBPA_ABORT))
424 return -EINVAL;
425
426 /* Initialize all RW registers that will be read by the SMMU */
427 ret = smmu_write_cr0(smmu, 0);
428 if (ret)
429 return ret;
430
431 val = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
432 FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
433 FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
434 FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
435 FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
436 FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
437 writel_relaxed(val, smmu->base + ARM_SMMU_CR1);
438 writel_relaxed(CR2_PTM, smmu->base + ARM_SMMU_CR2);
439
440 val = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
441 old = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
442 /* Service Failure Mode is fatal */
443 if ((val ^ old) & GERROR_SFM_ERR)
444 return -EIO;
445 /* Clear pending errors */
446 writel_relaxed(val, smmu->base + ARM_SMMU_GERRORN);
447
448 return 0;
449 }
450
smmu_init_cmdq(struct hyp_arm_smmu_v3_device * smmu)451 static int smmu_init_cmdq(struct hyp_arm_smmu_v3_device *smmu)
452 {
453 u64 cmdq_base;
454 size_t cmdq_nr_entries, cmdq_size;
455 int ret;
456 enum kvm_pgtable_prot prot = PAGE_HYP;
457
458 cmdq_base = readq_relaxed(smmu->base + ARM_SMMU_CMDQ_BASE);
459 if (cmdq_base & ~(Q_BASE_RWA | Q_BASE_ADDR_MASK | Q_BASE_LOG2SIZE))
460 return -EINVAL;
461
462 smmu->cmdq_log2size = cmdq_base & Q_BASE_LOG2SIZE;
463 cmdq_nr_entries = 1 << smmu->cmdq_log2size;
464 cmdq_size = cmdq_nr_entries * CMDQ_ENT_DWORDS * 8;
465
466 cmdq_base &= Q_BASE_ADDR_MASK;
467
468 if (!(smmu->features & ARM_SMMU_FEAT_COHERENCY))
469 prot |= KVM_PGTABLE_PROT_NORMAL_NC;
470
471 ret = ___pkvm_host_donate_hyp_prot(cmdq_base >> PAGE_SHIFT,
472 PAGE_ALIGN(cmdq_size) >> PAGE_SHIFT,
473 false, prot);
474 if (ret)
475 return ret;
476
477 smmu->cmdq_base = hyp_phys_to_virt(cmdq_base);
478
479 memset(smmu->cmdq_base, 0, cmdq_size);
480 writel_relaxed(0, smmu->base + ARM_SMMU_CMDQ_PROD);
481 writel_relaxed(0, smmu->base + ARM_SMMU_CMDQ_CONS);
482
483 return 0;
484 }
485
486 /*
487 * Event q support is optional and managed by the kernel,
488 * However, it must set in a shared state so it can't be donated
489 * to the hypervisor later.
490 * This relies on the ARM_SMMU_EVTQ_BASE can't be changed after
491 * de-privilege.
492 */
smmu_init_evtq(struct hyp_arm_smmu_v3_device * smmu)493 static int smmu_init_evtq(struct hyp_arm_smmu_v3_device *smmu)
494 {
495 u64 evtq_base, evtq_pfn;
496 size_t evtq_nr_entries, evtq_size, evtq_nr_pages;
497 void *evtq_va, *evtq_end;
498 size_t i;
499 int ret;
500
501 evtq_base = readq_relaxed(smmu->base + ARM_SMMU_EVTQ_BASE);
502 if (!evtq_base)
503 return 0;
504
505 if (evtq_base & ~(Q_BASE_RWA | Q_BASE_ADDR_MASK | Q_BASE_LOG2SIZE))
506 return -EINVAL;
507
508 evtq_nr_entries = 1 << (evtq_base & Q_BASE_LOG2SIZE);
509 evtq_size = evtq_nr_entries * EVTQ_ENT_DWORDS * 8;
510 evtq_nr_pages = PAGE_ALIGN(evtq_size) >> PAGE_SHIFT;
511
512 evtq_pfn = PAGE_ALIGN(evtq_base & Q_BASE_ADDR_MASK) >> PAGE_SHIFT;
513
514 for (i = 0 ; i < evtq_nr_pages ; ++i) {
515 ret = __pkvm_host_share_hyp(evtq_pfn + i);
516 if (ret)
517 return ret;
518 }
519
520 evtq_va = hyp_phys_to_virt(evtq_pfn << PAGE_SHIFT);
521 evtq_end = hyp_phys_to_virt((evtq_pfn + evtq_nr_pages) << PAGE_SHIFT);
522
523 return hyp_pin_shared_mem(evtq_va, evtq_end);
524 }
525
smmu_init_strtab(struct hyp_arm_smmu_v3_device * smmu)526 static int smmu_init_strtab(struct hyp_arm_smmu_v3_device *smmu)
527 {
528 int ret;
529 u64 strtab_base;
530 size_t strtab_size;
531 u32 strtab_cfg, fmt;
532 int split, log2size;
533 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
534 enum kvm_pgtable_prot prot = PAGE_HYP;
535
536 if (!(smmu->features & ARM_SMMU_FEAT_COHERENCY))
537 prot |= KVM_PGTABLE_PROT_NORMAL_NC;
538
539 strtab_base = readq_relaxed(smmu->base + ARM_SMMU_STRTAB_BASE);
540 if (strtab_base & ~(STRTAB_BASE_ADDR_MASK | STRTAB_BASE_RA))
541 return -EINVAL;
542
543 strtab_cfg = readl_relaxed(smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
544 if (strtab_cfg & ~(STRTAB_BASE_CFG_FMT | STRTAB_BASE_CFG_SPLIT |
545 STRTAB_BASE_CFG_LOG2SIZE))
546 return -EINVAL;
547
548 fmt = FIELD_GET(STRTAB_BASE_CFG_FMT, strtab_cfg);
549 split = FIELD_GET(STRTAB_BASE_CFG_SPLIT, strtab_cfg);
550 log2size = FIELD_GET(STRTAB_BASE_CFG_LOG2SIZE, strtab_cfg);
551 strtab_base &= STRTAB_BASE_ADDR_MASK;
552
553 switch (fmt) {
554 case STRTAB_BASE_CFG_FMT_LINEAR:
555 if (split)
556 return -EINVAL;
557 cfg->linear.num_ents = 1 << log2size;
558 strtab_size = cfg->linear.num_ents * sizeof(struct arm_smmu_ste);
559 cfg->linear.ste_dma = strtab_base;
560 ret = ___pkvm_host_donate_hyp_prot(strtab_base >> PAGE_SHIFT,
561 PAGE_ALIGN(strtab_size) >> PAGE_SHIFT,
562 false, prot);
563 if (ret)
564 return -EINVAL;
565 cfg->linear.table = hyp_phys_to_virt(strtab_base);
566 /* Disable all STEs */
567 memset(cfg->linear.table, 0, strtab_size);
568 break;
569 case STRTAB_BASE_CFG_FMT_2LVL:
570 if (split != STRTAB_SPLIT)
571 return -EINVAL;
572 cfg->l2.num_l1_ents = 1 << max(0, log2size - split);
573 strtab_size = cfg->l2.num_l1_ents * sizeof(struct arm_smmu_strtab_l1);
574 cfg->l2.l1_dma = strtab_base;
575 ret = ___pkvm_host_donate_hyp_prot(strtab_base >> PAGE_SHIFT,
576 PAGE_ALIGN(strtab_size) >> PAGE_SHIFT,
577 false, prot);
578 if (ret)
579 return -EINVAL;
580 cfg->l2.l1tab = hyp_phys_to_virt(strtab_base);
581 /* Disable all STEs */
582 memset(cfg->l2.l1tab, 0, strtab_size);
583 break;
584 default:
585 return -EINVAL;
586 }
587 return 0;
588 }
589
smmu_reset_device(struct hyp_arm_smmu_v3_device * smmu)590 static int smmu_reset_device(struct hyp_arm_smmu_v3_device *smmu)
591 {
592 int ret;
593 struct arm_smmu_cmdq_ent cfgi_cmd = {
594 .opcode = CMDQ_OP_CFGI_ALL,
595 };
596 struct arm_smmu_cmdq_ent tlbi_cmd = {
597 .opcode = CMDQ_OP_TLBI_NSNH_ALL,
598 };
599
600 /* Invalidate all cached configs and TLBs */
601 ret = smmu_write_cr0(smmu, CR0_CMDQEN);
602 if (ret)
603 return ret;
604
605 ret = smmu_add_cmd(smmu, &cfgi_cmd);
606 if (ret)
607 goto err_disable_cmdq;
608
609 ret = smmu_add_cmd(smmu, &tlbi_cmd);
610 if (ret)
611 goto err_disable_cmdq;
612
613 ret = smmu_sync_cmd(smmu);
614 if (ret)
615 goto err_disable_cmdq;
616
617 /* Enable translation */
618 return smmu_write_cr0(smmu, CR0_SMMUEN | CR0_CMDQEN | CR0_ATSCHK | CR0_EVTQEN);
619
620 err_disable_cmdq:
621 return smmu_write_cr0(smmu, 0);
622 }
623
smmu_init_device(struct hyp_arm_smmu_v3_device * smmu)624 static int smmu_init_device(struct hyp_arm_smmu_v3_device *smmu)
625 {
626 int ret;
627
628 if (!PAGE_ALIGNED(smmu->mmio_addr | smmu->mmio_size))
629 return -EINVAL;
630
631 ret = ___pkvm_host_donate_hyp(smmu->mmio_addr >> PAGE_SHIFT,
632 smmu->mmio_size >> PAGE_SHIFT,
633 /* accept_mmio */ true);
634 if (ret)
635 return ret;
636
637 smmu->base = hyp_phys_to_virt(smmu->mmio_addr);
638
639 ret = smmu_init_registers(smmu);
640 if (ret)
641 return ret;
642
643 ret = smmu_init_cmdq(smmu);
644 if (ret)
645 return ret;
646
647 ret = smmu_init_evtq(smmu);
648 if (ret)
649 return ret;
650
651 ret = smmu_init_strtab(smmu);
652 if (ret)
653 return ret;
654
655 ret = smmu_reset_device(smmu);
656 if (ret)
657 return ret;
658
659 return kvm_iommu_init_device(&smmu->iommu);
660 }
661
smmu_init(void)662 static int smmu_init(void)
663 {
664 int ret;
665 struct hyp_arm_smmu_v3_device *smmu;
666 size_t smmu_arr_size = PAGE_ALIGN(sizeof(*kvm_hyp_arm_smmu_v3_smmus) *
667 kvm_hyp_arm_smmu_v3_count);
668 phys_addr_t smmu_arr_phys;
669
670 kvm_hyp_arm_smmu_v3_smmus = kern_hyp_va(kvm_hyp_arm_smmu_v3_smmus);
671
672 smmu_arr_phys = hyp_virt_to_phys(kvm_hyp_arm_smmu_v3_smmus);
673
674 ret = smmu_take_pages(smmu_arr_phys, smmu_arr_size);
675 if (ret)
676 return ret;
677
678 for_each_smmu(smmu) {
679 ret = smmu_init_device(smmu);
680 if (ret)
681 goto out_reclaim_smmu;
682 }
683
684 return 0;
685 out_reclaim_smmu:
686 smmu_reclaim_pages(smmu_arr_phys, smmu_arr_size);
687 return ret;
688 }
689
smmu_id_to_iommu(pkvm_handle_t smmu_id)690 static struct kvm_hyp_iommu *smmu_id_to_iommu(pkvm_handle_t smmu_id)
691 {
692 if (smmu_id >= kvm_hyp_arm_smmu_v3_count)
693 return NULL;
694 smmu_id = array_index_nospec(smmu_id, kvm_hyp_arm_smmu_v3_count);
695
696 return &kvm_hyp_arm_smmu_v3_smmus[smmu_id].iommu;
697 }
698
smmu_alloc_domain(struct kvm_hyp_iommu_domain * domain,int type)699 static int smmu_alloc_domain(struct kvm_hyp_iommu_domain *domain, int type)
700 {
701 struct hyp_arm_smmu_v3_domain *smmu_domain;
702
703 if (type >= KVM_ARM_SMMU_DOMAIN_MAX)
704 return -EINVAL;
705
706 smmu_domain = hyp_alloc(sizeof(*smmu_domain));
707 if (!smmu_domain)
708 return -ENOMEM;
709
710 INIT_LIST_HEAD(&smmu_domain->iommu_list);
711 hyp_rwlock_init(&smmu_domain->list_lock);
712 /*
713 * Can't do much without knowing the SMMUv3.
714 * Page table will be allocated at attach_dev, but can be
715 * freed from free domain.
716 */
717 smmu_domain->domain = domain;
718 smmu_domain->type = type;
719 hyp_spin_lock_init(&smmu_domain->pgt_lock);
720 domain->priv = (void *)smmu_domain;
721
722 return 0;
723 }
724
smmu_free_domain(struct kvm_hyp_iommu_domain * domain)725 static void smmu_free_domain(struct kvm_hyp_iommu_domain *domain)
726 {
727 struct hyp_arm_smmu_v3_domain *smmu_domain = domain->priv;
728 struct domain_iommu_node *iommu_node, *temp;
729
730 if (smmu_domain->pgtable)
731 kvm_arm_io_pgtable_free(smmu_domain->pgtable);
732
733 /*
734 * With device assignment it is possible to free a domain with attached devices,
735 * they will be disabled through dev_block_dma op.
736 * In that case free the IOMMU nodes to avoid leaking memory.
737 */
738 list_for_each_entry_safe(iommu_node, temp, &smmu_domain->iommu_list, list) {
739 list_del(&iommu_node->list);
740 hyp_free(iommu_node);
741 }
742
743 hyp_free(smmu_domain);
744 }
745
smmu_inv_domain(struct hyp_arm_smmu_v3_device * smmu,struct hyp_arm_smmu_v3_domain * smmu_domain)746 static void smmu_inv_domain(struct hyp_arm_smmu_v3_device *smmu,
747 struct hyp_arm_smmu_v3_domain *smmu_domain)
748 {
749 struct kvm_hyp_iommu_domain *domain = smmu_domain->domain;
750 struct arm_smmu_cmdq_ent cmd = {};
751
752 if (smmu_domain->pgtable->cfg.fmt == ARM_64_LPAE_S2) {
753 cmd.opcode = CMDQ_OP_TLBI_S12_VMALL;
754 cmd.tlbi.vmid = domain->domain_id;
755 } else {
756 cmd.opcode = CMDQ_OP_TLBI_NH_ASID;
757 cmd.tlbi.asid = domain->domain_id;
758 }
759
760 if (smmu->iommu.power_is_off)
761 return;
762
763 WARN_ON(smmu_send_cmd(smmu, &cmd));
764 }
765
smmu_tlb_flush_all(void * cookie)766 static void smmu_tlb_flush_all(void *cookie)
767 {
768 struct kvm_hyp_iommu_domain *domain = cookie;
769 struct hyp_arm_smmu_v3_domain *smmu_domain = domain->priv;
770 struct hyp_arm_smmu_v3_device *smmu;
771 struct domain_iommu_node *iommu_node;
772
773 hyp_read_lock(&smmu_domain->list_lock);
774 list_for_each_entry(iommu_node, &smmu_domain->iommu_list, list) {
775 smmu = to_smmu(iommu_node->iommu);
776 kvm_iommu_lock(&smmu->iommu);
777 smmu_inv_domain(smmu, smmu_domain);
778 kvm_iommu_unlock(&smmu->iommu);
779 }
780 hyp_read_unlock(&smmu_domain->list_lock);
781 }
782
smmu_cmdq_batch_add(struct hyp_arm_smmu_v3_device * smmu,struct arm_smmu_cmdq_batch * cmds,struct arm_smmu_cmdq_ent * cmd)783 static void smmu_cmdq_batch_add(struct hyp_arm_smmu_v3_device *smmu,
784 struct arm_smmu_cmdq_batch *cmds,
785 struct arm_smmu_cmdq_ent *cmd)
786 {
787 int index;
788
789 if (cmds->num == CMDQ_BATCH_ENTRIES) {
790 smmu_issue_cmds(smmu, cmds->cmds, cmds->num);
791 cmds->num = 0;
792 }
793
794 index = cmds->num * CMDQ_ENT_DWORDS;
795 smmu_build_cmd(&cmds->cmds[index], cmd);
796
797 cmds->num++;
798 }
799
smmu_tlb_inv_range_smmu(struct hyp_arm_smmu_v3_device * smmu,struct kvm_hyp_iommu_domain * domain,struct arm_smmu_cmdq_ent * cmd,unsigned long iova,size_t size,size_t granule)800 static int smmu_tlb_inv_range_smmu(struct hyp_arm_smmu_v3_device *smmu,
801 struct kvm_hyp_iommu_domain *domain,
802 struct arm_smmu_cmdq_ent *cmd,
803 unsigned long iova, size_t size, size_t granule)
804 {
805 int ret = 0;
806 unsigned long end = iova + size, num_pages = 0, tg = 0;
807 size_t inv_range = granule;
808 struct hyp_arm_smmu_v3_domain *smmu_domain = domain->priv;
809 struct arm_smmu_cmdq_batch cmds;
810
811 kvm_iommu_lock(&smmu->iommu);
812 if (smmu->iommu.power_is_off)
813 goto out_ret;
814
815 /* Almost copy-paste from the kernel dirver. */
816 if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
817 /* Get the leaf page size */
818 tg = __ffs(smmu_domain->pgtable->cfg.pgsize_bitmap);
819
820 num_pages = size >> tg;
821
822 /* Convert page size of 12,14,16 (log2) to 1,2,3 */
823 cmd->tlbi.tg = (tg - 10) / 2;
824
825 /*
826 * Determine what level the granule is at. For non-leaf, both
827 * io-pgtable and SVA pass a nominal last-level granule because
828 * they don't know what level(s) actually apply, so ignore that
829 * and leave TTL=0. However for various errata reasons we still
830 * want to use a range command, so avoid the SVA corner case
831 * where both scale and num could be 0 as well.
832 */
833 if (cmd->tlbi.leaf)
834 cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
835 else if ((num_pages & CMDQ_TLBI_RANGE_NUM_MAX) == 1)
836 num_pages++;
837 }
838
839 cmds.num = 0;
840
841 while (iova < end) {
842 if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
843 /*
844 * On each iteration of the loop, the range is 5 bits
845 * worth of the aligned size remaining.
846 * The range in pages is:
847 *
848 * range = (num_pages & (0x1f << __ffs(num_pages)))
849 */
850 unsigned long scale, num;
851
852 /* Determine the power of 2 multiple number of pages */
853 scale = __ffs(num_pages);
854 cmd->tlbi.scale = scale;
855
856 /* Determine how many chunks of 2^scale size we have */
857 num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
858 cmd->tlbi.num = num - 1;
859
860 /* range is num * 2^scale * pgsize */
861 inv_range = num << (scale + tg);
862
863 /* Clear out the lower order bits for the next iteration */
864 num_pages -= num << scale;
865 }
866 cmd->tlbi.addr = iova;
867 smmu_cmdq_batch_add(smmu, &cmds, cmd);
868 BUG_ON(iova + inv_range < iova);
869 iova += inv_range;
870 }
871
872 WARN_ON(smmu_issue_cmds(smmu, cmds.cmds, cmds.num));
873 ret = smmu_sync_cmd(smmu);
874 out_ret:
875 kvm_iommu_unlock(&smmu->iommu);
876 return ret;
877 }
878
smmu_tlb_inv_range(struct kvm_hyp_iommu_domain * domain,unsigned long iova,size_t size,size_t granule,bool leaf)879 static void smmu_tlb_inv_range(struct kvm_hyp_iommu_domain *domain,
880 unsigned long iova, size_t size, size_t granule,
881 bool leaf)
882 {
883 struct hyp_arm_smmu_v3_domain *smmu_domain = domain->priv;
884 struct hyp_arm_smmu_v3_device *smmu;
885 struct domain_iommu_node *iommu_node;
886 unsigned long end = iova + size;
887 struct arm_smmu_cmdq_ent cmd;
888
889 cmd.tlbi.leaf = leaf;
890 if (smmu_domain->pgtable->cfg.fmt == ARM_64_LPAE_S2) {
891 cmd.opcode = CMDQ_OP_TLBI_S2_IPA;
892 cmd.tlbi.vmid = domain->domain_id;
893 } else {
894 cmd.opcode = CMDQ_OP_TLBI_NH_VA;
895 cmd.tlbi.asid = domain->domain_id;
896 cmd.tlbi.vmid = 0;
897 }
898 /*
899 * There are no mappings at high addresses since we don't use TTB1, so
900 * no overflow possible.
901 */
902 BUG_ON(end < iova);
903 hyp_read_lock(&smmu_domain->list_lock);
904 list_for_each_entry(iommu_node, &smmu_domain->iommu_list, list) {
905 smmu = to_smmu(iommu_node->iommu);
906 WARN_ON(smmu_tlb_inv_range_smmu(smmu, domain,
907 &cmd, iova, size, granule));
908 }
909 hyp_read_unlock(&smmu_domain->list_lock);
910 }
911
smmu_unmap_visit_leaf(phys_addr_t addr,size_t size,struct io_pgtable_walk_common * data,void * wd)912 static void smmu_unmap_visit_leaf(phys_addr_t addr, size_t size,
913 struct io_pgtable_walk_common *data,
914 void *wd)
915 {
916 u64 *ptep = wd;
917 u64 pte = *ptep;
918
919 /* Might be a cleared table. */
920 if (!pte)
921 return;
922 WARN_ON(iommu_pkvm_unuse_dma(addr, size));
923 *ptep = 0;
924 }
925
smmu_tlb_flush_walk(unsigned long iova,size_t size,size_t granule,void * cookie)926 static void smmu_tlb_flush_walk(unsigned long iova, size_t size,
927 size_t granule, void *cookie)
928 {
929 struct kvm_hyp_iommu_domain *domain = cookie;
930
931 smmu_tlb_inv_range(domain, iova, size, granule, false);
932 }
933
smmu_tlb_add_page(struct iommu_iotlb_gather * gather,unsigned long iova,size_t granule,void * cookie)934 static void smmu_tlb_add_page(struct iommu_iotlb_gather *gather,
935 unsigned long iova, size_t granule,
936 void *cookie)
937 {
938 if (gather)
939 kvm_iommu_iotlb_gather_add_page(cookie, gather, iova, granule);
940 else
941 smmu_tlb_inv_range(cookie, iova, granule, granule, true);
942 }
943
smmu_free_leaf(unsigned long phys,size_t granule,void * cookie)944 static void smmu_free_leaf(unsigned long phys, size_t granule, void *cookie)
945 {
946 struct kvm_hyp_iommu_domain *domain = cookie;
947
948 /* No tracking for idmap domain. */
949 if (domain->domain_id == KVM_IOMMU_DOMAIN_IDMAP_ID)
950 return;
951
952 WARN_ON(iommu_pkvm_unuse_dma(phys, granule));
953 }
954
955 static const struct iommu_flush_ops smmu_tlb_ops = {
956 .tlb_flush_all = smmu_tlb_flush_all,
957 .tlb_flush_walk = smmu_tlb_flush_walk,
958 .tlb_add_page = smmu_tlb_add_page,
959 .free_leaf = smmu_free_leaf,
960 };
961
smmu_iotlb_sync(struct kvm_hyp_iommu_domain * domain,struct iommu_iotlb_gather * gather)962 static void smmu_iotlb_sync(struct kvm_hyp_iommu_domain *domain,
963 struct iommu_iotlb_gather *gather)
964 {
965 size_t size;
966 struct hyp_arm_smmu_v3_domain *smmu_domain = domain->priv;
967 struct io_pgtable *pgtable = smmu_domain->pgtable;
968 struct arm_lpae_io_pgtable *data = io_pgtable_to_data(pgtable);
969 struct arm_lpae_io_pgtable_walk_data wd = {
970 .cookie = data,
971 };
972 struct io_pgtable_walk_common walk_data = {
973 .visit_leaf = smmu_unmap_visit_leaf,
974 .data = &wd,
975 };
976
977 if (!gather->pgsize)
978 return;
979 size = gather->end - gather->start + 1;
980 smmu_tlb_inv_range(domain, gather->start, size, gather->pgsize, true);
981
982 /*
983 * Now decrement the refcount of unmapped pages thanks to
984 * IO_PGTABLE_QUIRK_UNMAP_INVAL
985 */
986 pgtable->ops.pgtable_walk(&pgtable->ops, gather->start, size, &walk_data);
987 }
988
smmu_domain_config_s2(struct kvm_hyp_iommu_domain * domain,struct arm_smmu_ste * ste)989 static int smmu_domain_config_s2(struct kvm_hyp_iommu_domain *domain,
990 struct arm_smmu_ste *ste)
991 {
992 struct io_pgtable_cfg *cfg;
993 u64 ts, sl, ic, oc, sh, tg, ps;
994 struct hyp_arm_smmu_v3_domain *smmu_domain = domain->priv;
995
996 cfg = &smmu_domain->pgtable->cfg;
997 ps = cfg->arm_lpae_s2_cfg.vtcr.ps;
998 tg = cfg->arm_lpae_s2_cfg.vtcr.tg;
999 sh = cfg->arm_lpae_s2_cfg.vtcr.sh;
1000 oc = cfg->arm_lpae_s2_cfg.vtcr.orgn;
1001 ic = cfg->arm_lpae_s2_cfg.vtcr.irgn;
1002 sl = cfg->arm_lpae_s2_cfg.vtcr.sl;
1003 ts = cfg->arm_lpae_s2_cfg.vtcr.tsz;
1004
1005 ste->data[0] = STRTAB_STE_0_V |
1006 FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
1007 ste->data[1] = FIELD_PREP(STRTAB_STE_1_SHCFG, STRTAB_STE_1_SHCFG_INCOMING);
1008 ste->data[2] = FIELD_PREP(STRTAB_STE_2_VTCR,
1009 FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, ps) |
1010 FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, tg) |
1011 FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, sh) |
1012 FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, oc) |
1013 FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, ic) |
1014 FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, sl) |
1015 FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, ts)) |
1016 FIELD_PREP(STRTAB_STE_2_S2VMID, domain->domain_id) |
1017 STRTAB_STE_2_S2AA64 | STRTAB_STE_2_S2R;
1018 ste->data[3] = cfg->arm_lpae_s2_cfg.vttbr & STRTAB_STE_3_S2TTB_MASK;
1019
1020 return 0;
1021 }
1022
smmu_domain_config_s1_ste(struct hyp_arm_smmu_v3_device * smmu,u32 pasid_bits,struct arm_smmu_ste * ste)1023 static u64 *smmu_domain_config_s1_ste(struct hyp_arm_smmu_v3_device *smmu,
1024 u32 pasid_bits, struct arm_smmu_ste *ste)
1025 {
1026 u64 *cd_table;
1027
1028 cd_table = smmu_alloc_cd(smmu, pasid_bits);
1029 if (!cd_table)
1030 return NULL;
1031
1032 ste->data[1] = FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
1033 FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1034 FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1035 FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH);
1036 ste->data[0] = ((u64)cd_table & STRTAB_STE_0_S1CTXPTR_MASK) |
1037 FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
1038 FIELD_PREP(STRTAB_STE_0_S1CDMAX, pasid_bits) |
1039 FIELD_PREP(STRTAB_STE_0_S1FMT, STRTAB_STE_0_S1FMT_LINEAR) |
1040 STRTAB_STE_0_V;
1041
1042 return cd_table;
1043 }
1044
1045 /*
1046 * This function handles configuration for pasid and non-pasid domains
1047 * with the following assumptions:
1048 * - pasid 0 always attached first, this should be the typicall flow
1049 * for the kernel where attach_dev is always called before set_dev_pasid.
1050 * In that case only pasid 0 is allowed to allocate memory for the CD,
1051 * and other pasids would expect to find the tabel.
1052 * - pasid 0 is detached last, also guaranteed from the kernel.
1053 */
smmu_domain_config_s1(struct hyp_arm_smmu_v3_device * smmu,struct kvm_hyp_iommu_domain * domain,u32 sid,u32 pasid,u32 pasid_bits,struct arm_smmu_ste * ste)1054 static int smmu_domain_config_s1(struct hyp_arm_smmu_v3_device *smmu,
1055 struct kvm_hyp_iommu_domain *domain,
1056 u32 sid, u32 pasid, u32 pasid_bits,
1057 struct arm_smmu_ste *ste)
1058 {
1059 struct arm_smmu_ste *dst;
1060 u64 val;
1061 u64 *cd_entry, *cd_table;
1062 struct io_pgtable_cfg *cfg;
1063 struct hyp_arm_smmu_v3_domain *smmu_domain = domain->priv;
1064
1065 cfg = &smmu_domain->pgtable->cfg;
1066 dst = smmu_get_ste_ptr(smmu, sid);
1067 val = dst->data[0];
1068
1069 if (FIELD_GET(STRTAB_STE_0_CFG, val) == STRTAB_STE_0_CFG_S2_TRANS)
1070 return -EBUSY;
1071
1072 if (pasid == 0) {
1073 cd_table = smmu_domain_config_s1_ste(smmu, pasid_bits, ste);
1074 if (!cd_table)
1075 return -ENOMEM;
1076 } else {
1077 u32 nr_entries;
1078
1079 cd_table = (u64 *)(FIELD_GET(STRTAB_STE_0_S1CTXPTR_MASK, val) << 6);
1080 if (!cd_table)
1081 return -EINVAL;
1082 nr_entries = 1 << FIELD_GET(STRTAB_STE_0_S1CDMAX, val);
1083 if (pasid >= nr_entries)
1084 return -E2BIG;
1085 }
1086
1087 /* Write CD. */
1088 cd_entry = smmu_get_cd_ptr(hyp_phys_to_virt((u64)cd_table), pasid);
1089
1090 /* CD already used by another device. */
1091 if (cd_entry[0])
1092 return -EBUSY;
1093
1094 cd_entry[1] = cpu_to_le64(cfg->arm_lpae_s1_cfg.ttbr & CTXDESC_CD_1_TTB0_MASK);
1095 cd_entry[2] = 0;
1096 cd_entry[3] = cpu_to_le64(cfg->arm_lpae_s1_cfg.mair);
1097
1098 /* STE is live. */
1099 if (pasid)
1100 smmu_sync_cd(smmu, sid, pasid);
1101 val = FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, cfg->arm_lpae_s1_cfg.tcr.tsz) |
1102 FIELD_PREP(CTXDESC_CD_0_TCR_TG0, cfg->arm_lpae_s1_cfg.tcr.tg) |
1103 FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, cfg->arm_lpae_s1_cfg.tcr.irgn) |
1104 FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, cfg->arm_lpae_s1_cfg.tcr.orgn) |
1105 FIELD_PREP(CTXDESC_CD_0_TCR_SH0, cfg->arm_lpae_s1_cfg.tcr.sh) |
1106 FIELD_PREP(CTXDESC_CD_0_TCR_IPS, cfg->arm_lpae_s1_cfg.tcr.ips) |
1107 CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64 |
1108 CTXDESC_CD_0_R | CTXDESC_CD_0_A |
1109 CTXDESC_CD_0_ASET |
1110 FIELD_PREP(CTXDESC_CD_0_ASID, domain->domain_id) |
1111 CTXDESC_CD_0_V;
1112 WRITE_ONCE(cd_entry[0], cpu_to_le64(val));
1113 /* STE is live. */
1114 if (pasid)
1115 smmu_sync_cd(smmu, sid, pasid);
1116 return 0;
1117 }
1118
smmu_domain_finalise(struct hyp_arm_smmu_v3_device * smmu,struct kvm_hyp_iommu_domain * domain)1119 static int smmu_domain_finalise(struct hyp_arm_smmu_v3_device *smmu,
1120 struct kvm_hyp_iommu_domain *domain)
1121 {
1122 int ret;
1123 struct io_pgtable_cfg cfg;
1124 struct hyp_arm_smmu_v3_domain *smmu_domain = domain->priv;
1125 struct arm_lpae_io_pgtable *data;
1126 bool idmapped = domain->domain_id == KVM_IOMMU_DOMAIN_IDMAP_ID;
1127 unsigned long quirks = idmapped ? 0 : IO_PGTABLE_QUIRK_UNMAP_INVAL;
1128
1129 if (smmu_domain->type == KVM_ARM_SMMU_DOMAIN_S1) {
1130 size_t ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
1131
1132 cfg = (struct io_pgtable_cfg) {
1133 .fmt = ARM_64_LPAE_S1,
1134 .pgsize_bitmap = smmu->pgsize_bitmap,
1135 .ias = min_t(unsigned long, ias, VA_BITS),
1136 .oas = smmu->ias,
1137 .coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENCY,
1138 .tlb = &smmu_tlb_ops,
1139 .quirks = quirks,
1140 };
1141 } else {
1142 cfg = (struct io_pgtable_cfg) {
1143 .fmt = ARM_64_LPAE_S2,
1144 .pgsize_bitmap = smmu->pgsize_bitmap,
1145 .ias = smmu->ias,
1146 .oas = smmu->oas,
1147 .coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENCY,
1148 .tlb = &smmu_tlb_ops,
1149 .quirks = quirks,
1150 };
1151 }
1152
1153 hyp_spin_lock(&smmu_domain->pgt_lock);
1154 smmu_domain->pgtable = kvm_arm_io_pgtable_alloc(&cfg, domain, &ret);
1155 hyp_spin_unlock(&smmu_domain->pgt_lock);
1156 if (ret)
1157 return ret;
1158
1159 data = io_pgtable_to_data(smmu_domain->pgtable);
1160 data->idmapped = idmapped;
1161 return ret;
1162 }
1163
smmu_domain_compat(struct hyp_arm_smmu_v3_device * smmu,struct hyp_arm_smmu_v3_domain * smmu_domain)1164 static bool smmu_domain_compat(struct hyp_arm_smmu_v3_device *smmu,
1165 struct hyp_arm_smmu_v3_domain *smmu_domain)
1166 {
1167 struct io_pgtable_cfg *cfg;
1168
1169 /* Domain is empty. */
1170 if (!smmu_domain->pgtable)
1171 return true;
1172
1173 if (smmu_domain->type == KVM_ARM_SMMU_DOMAIN_S2) {
1174 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
1175 return false;
1176 } else {
1177 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
1178 return false;
1179 }
1180
1181 cfg = &smmu_domain->pgtable->cfg;
1182
1183 /* Best effort. */
1184 return ((smmu->pgsize_bitmap | cfg->pgsize_bitmap) == smmu->pgsize_bitmap);
1185 }
1186
smmu_existing_in_domain(struct hyp_arm_smmu_v3_device * smmu,struct hyp_arm_smmu_v3_domain * smmu_domain)1187 static bool smmu_existing_in_domain(struct hyp_arm_smmu_v3_device *smmu,
1188 struct hyp_arm_smmu_v3_domain *smmu_domain)
1189 {
1190 struct domain_iommu_node *iommu_node;
1191 struct hyp_arm_smmu_v3_device *other;
1192
1193 hyp_assert_write_lock_held(&smmu_domain->list_lock);
1194
1195 list_for_each_entry(iommu_node, &smmu_domain->iommu_list, list) {
1196 other = to_smmu(iommu_node->iommu);
1197 if (other == smmu)
1198 return true;
1199 }
1200
1201 return false;
1202 }
1203
smmu_get_ref_domain(struct hyp_arm_smmu_v3_device * smmu,struct hyp_arm_smmu_v3_domain * smmu_domain)1204 static void smmu_get_ref_domain(struct hyp_arm_smmu_v3_device *smmu,
1205 struct hyp_arm_smmu_v3_domain *smmu_domain)
1206 {
1207 struct domain_iommu_node *iommu_node;
1208 struct hyp_arm_smmu_v3_device *other;
1209
1210 hyp_assert_write_lock_held(&smmu_domain->list_lock);
1211
1212 list_for_each_entry(iommu_node, &smmu_domain->iommu_list, list) {
1213 other = to_smmu(iommu_node->iommu);
1214 if (other == smmu) {
1215 iommu_node->ref++;
1216 return;
1217 }
1218 }
1219 }
1220
smmu_put_ref_domain(struct hyp_arm_smmu_v3_device * smmu,struct hyp_arm_smmu_v3_domain * smmu_domain)1221 static void smmu_put_ref_domain(struct hyp_arm_smmu_v3_device *smmu,
1222 struct hyp_arm_smmu_v3_domain *smmu_domain)
1223 {
1224 struct domain_iommu_node *iommu_node, *temp;
1225 struct hyp_arm_smmu_v3_device *other;
1226
1227 hyp_assert_write_lock_held(&smmu_domain->list_lock);
1228
1229 list_for_each_entry_safe(iommu_node, temp, &smmu_domain->iommu_list, list) {
1230 other = to_smmu(iommu_node->iommu);
1231 if (other == smmu) {
1232 iommu_node->ref--;
1233 if (iommu_node->ref == 0) {
1234 /*
1235 * Ensure no stale tlb entries when domain_id
1236 * is re-used for this SMMU.
1237 */
1238 smmu_inv_domain(smmu, smmu_domain);
1239
1240 list_del(&iommu_node->list);
1241 hyp_free(iommu_node);
1242 }
1243 return;
1244 }
1245 }
1246 }
1247
smmu_fix_up_domains(struct hyp_arm_smmu_v3_device * smmu,struct hyp_arm_smmu_v3_domain * smmu_domain)1248 static int smmu_fix_up_domains(struct hyp_arm_smmu_v3_device *smmu,
1249 struct hyp_arm_smmu_v3_domain *smmu_domain)
1250 {
1251 /*
1252 * BYPASS domains only supported on stage-2 instances, that is over restrictive
1253 * but for now as stage-1 is limited to VA_BITS to match the kernel, it might
1254 * not cover the ia bits, we don't support it.
1255 */
1256 if (smmu_domain->type == KVM_ARM_SMMU_DOMAIN_BYPASS) {
1257 if (smmu->features & ARM_SMMU_FEAT_TRANS_S2)
1258 smmu_domain->type = KVM_ARM_SMMU_DOMAIN_S2;
1259 else
1260 return -EINVAL;
1261 } else if (smmu_domain->type == KVM_ARM_SMMU_DOMAIN_ANY) {
1262 /* Any domain defaults to S1 as we don't know if the guest needs pasid. */
1263 if (smmu->features & ARM_SMMU_FEAT_TRANS_S1) {
1264 smmu_domain->type = KVM_ARM_SMMU_DOMAIN_S1;
1265 } else {
1266 smmu_domain->type = KVM_ARM_SMMU_DOMAIN_S2;
1267 }
1268 }
1269
1270 return 0;
1271 }
1272
smmu_attach_dev(struct kvm_hyp_iommu * iommu,struct kvm_hyp_iommu_domain * domain,u32 sid,u32 pasid,u32 pasid_bits,unsigned long flags)1273 static int smmu_attach_dev(struct kvm_hyp_iommu *iommu, struct kvm_hyp_iommu_domain *domain,
1274 u32 sid, u32 pasid, u32 pasid_bits, unsigned long flags)
1275 {
1276 int i;
1277 int ret;
1278 struct arm_smmu_ste *dst;
1279 struct arm_smmu_ste ste = {};
1280 struct hyp_arm_smmu_v3_device *smmu = to_smmu(iommu);
1281 struct hyp_arm_smmu_v3_domain *smmu_domain = domain->priv;
1282 struct domain_iommu_node *iommu_node = NULL;
1283 bool init_idmap = false;
1284
1285 hyp_write_lock(&smmu_domain->list_lock);
1286 kvm_iommu_lock(iommu);
1287 dst = smmu_get_alloc_ste_ptr(smmu, sid);
1288 if (!dst) {
1289 ret = -ENOMEM;
1290 goto out_unlock;
1291 }
1292
1293 /* Map domain type to an SMMUv3 stage. */
1294 ret = smmu_fix_up_domains(smmu, smmu_domain);
1295 if (ret)
1296 goto out_unlock;
1297
1298 if (!smmu_existing_in_domain(smmu, smmu_domain)) {
1299 if (!smmu_domain_compat(smmu, smmu_domain)) {
1300 ret = -EBUSY;
1301 goto out_unlock;
1302 }
1303 iommu_node = hyp_alloc(sizeof(struct domain_iommu_node));
1304 if (!iommu_node) {
1305 ret = -ENOMEM;
1306 goto out_unlock;
1307 }
1308 iommu_node->iommu = iommu;
1309 iommu_node->ref = 1;
1310 } else {
1311 smmu_get_ref_domain(smmu, smmu_domain);
1312 }
1313
1314 if (!smmu_domain->pgtable) {
1315 ret = smmu_domain_finalise(smmu, domain);
1316 if (ret)
1317 goto out_unlock_ref;
1318 if (domain->domain_id == KVM_IOMMU_DOMAIN_IDMAP_ID)
1319 init_idmap = true;
1320 }
1321
1322 if (smmu_domain->type == KVM_ARM_SMMU_DOMAIN_S2) {
1323 /* Device already attached or pasid for s2. */
1324 if (dst->data[0] || pasid) {
1325 ret = -EBUSY;
1326 goto out_unlock_ref;
1327 }
1328 ret = smmu_domain_config_s2(domain, &ste);
1329 } else {
1330 /*
1331 * Allocate and config CD, and update CD if possible.
1332 */
1333 pasid_bits = min(pasid_bits, smmu->ssid_bits);
1334 ret = smmu_domain_config_s1(smmu, domain, sid, pasid,
1335 pasid_bits, &ste);
1336 }
1337 /* We don't update STEs for pasid domains. */
1338 if (ret || pasid)
1339 goto out_unlock_ref;
1340
1341 /*
1342 * The SMMU may cache a disabled STE.
1343 * Initialize all fields, sync, then enable it.
1344 */
1345 for (i = 1; i < STRTAB_STE_DWORDS; i++)
1346 dst->data[i] = ste.data[i];
1347
1348 ret = smmu_sync_ste(smmu, sid);
1349 if (ret)
1350 goto out_unlock_ref;
1351
1352 WRITE_ONCE(dst->data[0], ste.data[0]);
1353 ret = smmu_sync_ste(smmu, sid);
1354 WARN_ON(ret);
1355
1356 out_unlock_ref:
1357 if (iommu_node && ret)
1358 hyp_free(iommu_node);
1359 else if (iommu_node)
1360 list_add_tail(&iommu_node->list, &smmu_domain->iommu_list);
1361 else if (ret)
1362 smmu_put_ref_domain(smmu, smmu_domain);
1363 out_unlock:
1364 kvm_iommu_unlock(iommu);
1365 hyp_write_unlock(&smmu_domain->list_lock);
1366
1367 if (init_idmap)
1368 ret = kvm_iommu_snapshot_host_stage2(domain);
1369
1370 return ret;
1371 }
1372
smmu_detach_dev(struct kvm_hyp_iommu * iommu,struct kvm_hyp_iommu_domain * domain,u32 sid,u32 pasid)1373 static int smmu_detach_dev(struct kvm_hyp_iommu *iommu, struct kvm_hyp_iommu_domain *domain,
1374 u32 sid, u32 pasid)
1375 {
1376 struct arm_smmu_ste *dst;
1377 int i, ret;
1378 struct hyp_arm_smmu_v3_device *smmu = to_smmu(iommu);
1379 struct hyp_arm_smmu_v3_domain *smmu_domain = domain->priv;
1380 u32 pasid_bits = 0;
1381 u64 *cd_table, *cd;
1382
1383 hyp_write_lock(&smmu_domain->list_lock);
1384 kvm_iommu_lock(iommu);
1385 dst = smmu_get_ste_ptr(smmu, sid);
1386 if (!dst) {
1387 ret = -ENODEV;
1388 goto out_unlock;
1389 }
1390
1391 /*
1392 * For stage-1:
1393 * - The kernel has to detach pasid = 0 the last.
1394 * - This will free the CD.
1395 */
1396 if (smmu_domain->type == KVM_ARM_SMMU_DOMAIN_S1) {
1397 pasid_bits = FIELD_GET(STRTAB_STE_0_S1CDMAX, dst->data[0]);
1398 if (pasid >= (1 << pasid_bits)) {
1399 ret = -E2BIG;
1400 goto out_unlock;
1401 }
1402 cd_table = (u64 *)(dst->data[0] & STRTAB_STE_0_S1CTXPTR_MASK);
1403 if (WARN_ON(!cd_table)) {
1404 ret = -ENODEV;
1405 goto out_unlock;
1406 }
1407
1408 cd_table = hyp_phys_to_virt((phys_addr_t)cd_table);
1409 if (pasid == 0) {
1410 int j;
1411
1412 /* Ensure other pasids are detached. */
1413 for (j = 1 ; j < (1 << pasid_bits) ; ++j) {
1414 cd = smmu_get_cd_ptr(cd_table, j);
1415 if (cd[0] & CTXDESC_CD_0_V) {
1416 ret = -EINVAL;
1417 goto out_unlock;
1418 }
1419 }
1420
1421 smmu_free_cd(cd_table, pasid_bits);
1422 } else {
1423 cd = smmu_get_cd_ptr(cd_table, pasid);
1424 if (!(cd[0] & CTXDESC_CD_0_V)) {
1425 /* The device is not actually attached! */
1426 ret = -ENOENT;
1427 goto out_unlock;
1428 }
1429 cd[0] = 0;
1430 smmu_sync_cd(smmu, sid, pasid);
1431 cd[1] = 0;
1432 cd[2] = 0;
1433 cd[3] = 0;
1434 ret = smmu_sync_cd(smmu, sid, pasid);
1435 smmu_put_ref_domain(smmu, smmu_domain);
1436 goto out_unlock;
1437 }
1438 }
1439 /* For stage-2 and pasid = 0 */
1440 dst->data[0] = 0;
1441 ret = smmu_sync_ste(smmu, sid);
1442 if (ret)
1443 goto out_unlock;
1444 for (i = 1; i < STRTAB_STE_DWORDS; i++)
1445 dst->data[i] = 0;
1446
1447 ret = smmu_sync_ste(smmu, sid);
1448
1449 smmu_put_ref_domain(smmu, smmu_domain);
1450 out_unlock:
1451 kvm_iommu_unlock(iommu);
1452 hyp_write_unlock(&smmu_domain->list_lock);
1453 return ret;
1454 }
1455
smmu_map_pages(struct kvm_hyp_iommu_domain * domain,unsigned long iova,phys_addr_t paddr,size_t pgsize,size_t pgcount,int prot,size_t * total_mapped)1456 static int smmu_map_pages(struct kvm_hyp_iommu_domain *domain, unsigned long iova,
1457 phys_addr_t paddr, size_t pgsize,
1458 size_t pgcount, int prot, size_t *total_mapped)
1459 {
1460 size_t mapped;
1461 size_t granule;
1462 int ret = 0;
1463 struct hyp_arm_smmu_v3_domain *smmu_domain = domain->priv;
1464 struct io_pgtable *pgtable = smmu_domain->pgtable;
1465
1466 if (!pgtable)
1467 return -EINVAL;
1468
1469 granule = 1UL << __ffs(smmu_domain->pgtable->cfg.pgsize_bitmap);
1470 if (!IS_ALIGNED(iova | paddr | pgsize, granule))
1471 return -EINVAL;
1472
1473 hyp_spin_lock(&smmu_domain->pgt_lock);
1474 while (pgcount) {
1475 mapped = 0;
1476 ret = pgtable->ops.map_pages(&pgtable->ops, iova, paddr,
1477 pgsize, pgcount, prot, 0, &mapped);
1478 if (ret)
1479 break;
1480 WARN_ON(!IS_ALIGNED(mapped, pgsize));
1481 WARN_ON(mapped > pgcount * pgsize);
1482
1483 pgcount -= mapped / pgsize;
1484 *total_mapped += mapped;
1485 iova += mapped;
1486 paddr += mapped;
1487 }
1488 hyp_spin_unlock(&smmu_domain->pgt_lock);
1489
1490 return ret;
1491 }
1492
smmu_unmap_pages(struct kvm_hyp_iommu_domain * domain,unsigned long iova,size_t pgsize,size_t pgcount,struct iommu_iotlb_gather * gather)1493 static size_t smmu_unmap_pages(struct kvm_hyp_iommu_domain *domain, unsigned long iova,
1494 size_t pgsize, size_t pgcount, struct iommu_iotlb_gather *gather)
1495 {
1496 size_t granule, unmapped, total_unmapped = 0;
1497 size_t size = pgsize * pgcount;
1498 struct hyp_arm_smmu_v3_domain *smmu_domain = domain->priv;
1499 struct io_pgtable *pgtable = smmu_domain->pgtable;
1500
1501 if (!pgtable)
1502 return -EINVAL;
1503
1504 granule = 1UL << __ffs(smmu_domain->pgtable->cfg.pgsize_bitmap);
1505 if (!IS_ALIGNED(iova | pgsize, granule))
1506 return 0;
1507
1508 hyp_spin_lock(&smmu_domain->pgt_lock);
1509 while (total_unmapped < size) {
1510 unmapped = pgtable->ops.unmap_pages(&pgtable->ops, iova, pgsize,
1511 pgcount, gather);
1512 if (!unmapped)
1513 break;
1514 iova += unmapped;
1515 total_unmapped += unmapped;
1516 pgcount -= unmapped / pgsize;
1517 }
1518 hyp_spin_unlock(&smmu_domain->pgt_lock);
1519 return total_unmapped;
1520 }
1521
smmu_iova_to_phys(struct kvm_hyp_iommu_domain * domain,unsigned long iova)1522 static phys_addr_t smmu_iova_to_phys(struct kvm_hyp_iommu_domain *domain,
1523 unsigned long iova)
1524 {
1525 phys_addr_t paddr;
1526 struct hyp_arm_smmu_v3_domain *smmu_domain = domain->priv;
1527 struct io_pgtable *pgtable = smmu_domain->pgtable;
1528
1529 if (!pgtable)
1530 return -EINVAL;
1531
1532 hyp_spin_lock(&smmu_domain->pgt_lock);
1533 paddr = pgtable->ops.iova_to_phys(&pgtable->ops, iova);
1534 hyp_spin_unlock(&smmu_domain->pgt_lock);
1535
1536 return paddr;
1537 }
1538
smmu_dabt_device(struct hyp_arm_smmu_v3_device * smmu,struct user_pt_regs * regs,u64 esr,u32 off)1539 static bool smmu_dabt_device(struct hyp_arm_smmu_v3_device *smmu,
1540 struct user_pt_regs *regs,
1541 u64 esr, u32 off)
1542 {
1543 bool is_write = esr & ESR_ELx_WNR;
1544 unsigned int len = BIT((esr & ESR_ELx_SAS) >> ESR_ELx_SAS_SHIFT);
1545 int rd = (esr & ESR_ELx_SRT_MASK) >> ESR_ELx_SRT_SHIFT;
1546 const u32 no_access = 0;
1547 const u32 read_write = (u32)(-1);
1548 const u32 read_only = is_write ? no_access : read_write;
1549 u32 mask = no_access;
1550
1551 /*
1552 * Only handle MMIO access with u32 size and alignment.
1553 * We don't need to change 64-bit registers for now.
1554 */
1555 if ((len != sizeof(u32)) || (off & (sizeof(u32) - 1)))
1556 return false;
1557
1558 switch (off) {
1559 case ARM_SMMU_EVTQ_PROD + SZ_64K:
1560 mask = read_write;
1561 break;
1562 case ARM_SMMU_EVTQ_CONS + SZ_64K:
1563 mask = read_write;
1564 break;
1565 case ARM_SMMU_GERROR:
1566 mask = read_only;
1567 break;
1568 case ARM_SMMU_GERRORN:
1569 mask = read_write;
1570 break;
1571 };
1572
1573 if (!mask)
1574 return false;
1575 if (is_write)
1576 writel_relaxed(regs->regs[rd] & mask, smmu->base + off);
1577 else
1578 regs->regs[rd] = readl_relaxed(smmu->base + off);
1579
1580 return true;
1581 }
1582
smmu_id_to_token(pkvm_handle_t smmu_id,u64 * out_token)1583 static int smmu_id_to_token(pkvm_handle_t smmu_id, u64 *out_token)
1584 {
1585 if (smmu_id >= kvm_hyp_arm_smmu_v3_count)
1586 return -EINVAL;
1587
1588 smmu_id = array_index_nospec(smmu_id, kvm_hyp_arm_smmu_v3_count);
1589 *out_token = kvm_hyp_arm_smmu_v3_smmus[smmu_id].mmio_addr;
1590 return 0;
1591 }
1592
smmu_dev_block_dma(struct kvm_hyp_iommu * iommu,u32 sid,bool is_host2guest)1593 static int smmu_dev_block_dma(struct kvm_hyp_iommu *iommu, u32 sid, bool is_host2guest)
1594 {
1595 struct hyp_arm_smmu_v3_device *smmu = to_smmu(iommu);
1596 static struct arm_smmu_ste *dst;
1597 int ret = 0;
1598
1599 kvm_iommu_lock(iommu);
1600 dst = smmu_get_ste_ptr(smmu, sid);
1601
1602 /*
1603 * VFIO will attach the device to a blocking domain, this will make the
1604 * kernel driver detach the device which should be have zeroed STE.
1605 * So, if this is not the current state of the device, something
1606 * went wrong.
1607 * For guests, we need to do more as guests might not exit cleanly
1608 * and the device might be translating, so we have to actually block
1609 * the device and clean the STE/CD.
1610 */
1611 if (dst->data[0]) {
1612 if (is_host2guest) {
1613 ret = -EINVAL;
1614 } else {
1615 int i = 0;
1616 u32 cfg = FIELD_GET(STRTAB_STE_0_CFG, dst->data[0]);
1617
1618 if (cfg == STRTAB_STE_0_CFG_S1_TRANS) {
1619 size_t nr_entries, cd_sz;
1620 u64 cd_table;
1621
1622 cd_table = (dst->data[0] & STRTAB_STE_0_S1CTXPTR_MASK);
1623 nr_entries = 1 << FIELD_GET(STRTAB_STE_0_S1CDMAX, dst->data[0]);
1624 cd_sz = (1 << nr_entries) * (CTXDESC_CD_DWORDS << 3);
1625 kvm_iommu_reclaim_pages(hyp_phys_to_virt(cd_table), get_order(cd_sz));
1626 }
1627 /* zap zippity zop. */
1628 for (i = 0; i < STRTAB_STE_DWORDS; i++)
1629 dst->data[i] = 0;
1630 ret = smmu_sync_ste(smmu, sid);
1631 }
1632 }
1633
1634 kvm_iommu_unlock(iommu);
1635 return ret;
1636 }
1637
smmu_dabt_handler(struct user_pt_regs * regs,u64 esr,u64 addr)1638 static bool smmu_dabt_handler(struct user_pt_regs *regs, u64 esr, u64 addr)
1639 {
1640 struct hyp_arm_smmu_v3_device *smmu;
1641
1642 for_each_smmu(smmu) {
1643 if (addr < smmu->mmio_addr || addr >= smmu->mmio_addr + smmu->mmio_size)
1644 continue;
1645 return smmu_dabt_device(smmu, regs, esr, addr - smmu->mmio_addr);
1646 }
1647 return false;
1648 }
1649
smmu_suspend(struct kvm_hyp_iommu * iommu)1650 int smmu_suspend(struct kvm_hyp_iommu *iommu)
1651 {
1652 struct hyp_arm_smmu_v3_device *smmu = to_smmu(iommu);
1653
1654 /*
1655 * Disable translation, GBPA is validated at probe to be set, so all translation
1656 * would be aborted when SMMU is disabled.
1657 */
1658 if (iommu->power_domain.type == KVM_POWER_DOMAIN_HOST_HVC)
1659 return smmu_write_cr0(smmu, 0);
1660 return 0;
1661 }
1662
smmu_resume(struct kvm_hyp_iommu * iommu)1663 int smmu_resume(struct kvm_hyp_iommu *iommu)
1664 {
1665 struct hyp_arm_smmu_v3_device *smmu = to_smmu(iommu);
1666
1667 /*
1668 * Re-enable and clean all caches.
1669 */
1670 if (iommu->power_domain.type == KVM_POWER_DOMAIN_HOST_HVC)
1671 return smmu_reset_device(smmu);
1672 return 0;
1673 }
1674
1675 /*
1676 * Although SMMU can support multiple granules, it must at least support PAGE_SIZE
1677 * as the CPU, and for the IDMAP domains, we only use this granule.
1678 * As we optimize for memory usage and performance, we try to use block mappings
1679 * when possible.
1680 */
smmu_pgsize_idmap(size_t size,u64 paddr)1681 static size_t smmu_pgsize_idmap(size_t size, u64 paddr)
1682 {
1683 size_t pgsizes;
1684 size_t pgsize_bitmask = 0;
1685
1686 if (PAGE_SIZE == SZ_4K) {
1687 pgsize_bitmask = SZ_4K | SZ_2M | SZ_1G;
1688 } else if (PAGE_SIZE == SZ_16K) {
1689 pgsize_bitmask = SZ_16K | SZ_32M;
1690 } else if (PAGE_SIZE == SZ_64K){
1691 pgsize_bitmask = SZ_64K | SZ_512M;
1692 }
1693
1694 /* All page sizes that fit the size */
1695 pgsizes = pgsize_bitmask & GENMASK_ULL(__fls(size), 0);
1696
1697 /* Address must be aligned to page size */
1698 if (likely(paddr))
1699 pgsizes &= GENMASK_ULL(__ffs(paddr), 0);
1700
1701 WARN_ON(!pgsizes);
1702
1703 return BIT(__fls(pgsizes));
1704 }
1705
smmu_host_stage2_idmap(struct kvm_hyp_iommu_domain * domain,phys_addr_t start,phys_addr_t end,int prot)1706 static void smmu_host_stage2_idmap(struct kvm_hyp_iommu_domain *domain,
1707 phys_addr_t start, phys_addr_t end, int prot)
1708 {
1709 size_t size = end - start;
1710 size_t pgsize, pgcount;
1711 size_t mapped, unmapped;
1712 int ret;
1713 struct hyp_arm_smmu_v3_domain *smmu_domain = domain->priv;
1714 struct io_pgtable *pgtable = smmu_domain->pgtable;
1715
1716 end = min(end, BIT(pgtable->cfg.oas));
1717 if (start >= end)
1718 return;
1719
1720 if (prot) {
1721 if (!(prot & IOMMU_MMIO))
1722 prot |= IOMMU_CACHE;
1723
1724 while (size) {
1725 mapped = 0;
1726 pgsize = smmu_pgsize_idmap(size, start);
1727 pgcount = size / pgsize;
1728 ret = pgtable->ops.map_pages(&pgtable->ops, start, start,
1729 pgsize, pgcount, prot, 0, &mapped);
1730 size -= mapped;
1731 start += mapped;
1732 if (!mapped || ret)
1733 return;
1734 }
1735 } else {
1736 while (size) {
1737 pgsize = smmu_pgsize_idmap(size, start);
1738 pgcount = size / pgsize;
1739 unmapped = pgtable->ops.unmap_pages(&pgtable->ops, start,
1740 pgsize, pgcount, NULL);
1741 size -= unmapped;
1742 start += unmapped;
1743 if (!unmapped)
1744 return;
1745 }
1746 }
1747 }
1748
1749 #ifdef MODULE
smmu_init_hyp_module(const struct pkvm_module_ops * ops)1750 int smmu_init_hyp_module(const struct pkvm_module_ops *ops)
1751 {
1752 if (!ops)
1753 return -EINVAL;
1754
1755 mod_ops = ops;
1756 return 0;
1757 }
1758 #endif
1759
1760 /* Shared with the kernel driver in EL1 */
1761 struct kvm_iommu_ops smmu_ops = {
1762 .init = smmu_init,
1763 .get_iommu_by_id = smmu_id_to_iommu,
1764 .alloc_domain = smmu_alloc_domain,
1765 .free_domain = smmu_free_domain,
1766 .iotlb_sync = smmu_iotlb_sync,
1767 .attach_dev = smmu_attach_dev,
1768 .detach_dev = smmu_detach_dev,
1769 .map_pages = smmu_map_pages,
1770 .unmap_pages = smmu_unmap_pages,
1771 .iova_to_phys = smmu_iova_to_phys,
1772 .dabt_handler = smmu_dabt_handler,
1773 .suspend = smmu_suspend,
1774 .resume = smmu_resume,
1775 .host_stage2_idmap = smmu_host_stage2_idmap,
1776 .dev_block_dma = smmu_dev_block_dma,
1777 .get_iommu_token_by_id = smmu_id_to_token,
1778 };
1779