• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * IOMMU API for ARM architected SMMUv3 implementations.
4  *
5  * Copyright (C) 2015 ARM Limited
6  *
7  * Author: Will Deacon <will.deacon@arm.com>
8  *
9  * This driver is powered by bad coffee and bombay mix.
10  */
11 
12 #include <linux/acpi.h>
13 #include <linux/acpi_iort.h>
14 #include <linux/bitops.h>
15 #include <linux/crash_dump.h>
16 #include <linux/delay.h>
17 #include <linux/dma-iommu.h>
18 #include <linux/err.h>
19 #include <linux/interrupt.h>
20 #include <linux/io-pgtable.h>
21 #include <linux/iopoll.h>
22 #include <linux/module.h>
23 #include <linux/msi.h>
24 #include <linux/of.h>
25 #include <linux/of_address.h>
26 #include <linux/of_iommu.h>
27 #include <linux/of_platform.h>
28 #include <linux/pci.h>
29 #include <linux/pci-ats.h>
30 #include <linux/platform_device.h>
31 
32 #include <linux/amba/bus.h>
33 
34 #include "arm-smmu-v3.h"
35 
36 static bool disable_bypass = 1;
37 module_param(disable_bypass, bool, 0444);
38 MODULE_PARM_DESC(disable_bypass,
39 	"Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
40 
41 static bool disable_msipolling;
42 module_param(disable_msipolling, bool, 0444);
43 MODULE_PARM_DESC(disable_msipolling,
44 	"Disable MSI-based polling for CMD_SYNC completion.");
45 
46 enum arm_smmu_msi_index {
47 	EVTQ_MSI_INDEX,
48 	GERROR_MSI_INDEX,
49 	PRIQ_MSI_INDEX,
50 	ARM_SMMU_MAX_MSIS,
51 };
52 
53 static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
54 	[EVTQ_MSI_INDEX] = {
55 		ARM_SMMU_EVTQ_IRQ_CFG0,
56 		ARM_SMMU_EVTQ_IRQ_CFG1,
57 		ARM_SMMU_EVTQ_IRQ_CFG2,
58 	},
59 	[GERROR_MSI_INDEX] = {
60 		ARM_SMMU_GERROR_IRQ_CFG0,
61 		ARM_SMMU_GERROR_IRQ_CFG1,
62 		ARM_SMMU_GERROR_IRQ_CFG2,
63 	},
64 	[PRIQ_MSI_INDEX] = {
65 		ARM_SMMU_PRIQ_IRQ_CFG0,
66 		ARM_SMMU_PRIQ_IRQ_CFG1,
67 		ARM_SMMU_PRIQ_IRQ_CFG2,
68 	},
69 };
70 
71 struct arm_smmu_option_prop {
72 	u32 opt;
73 	const char *prop;
74 };
75 
76 DEFINE_XARRAY_ALLOC1(arm_smmu_asid_xa);
77 DEFINE_MUTEX(arm_smmu_asid_lock);
78 
79 static struct arm_smmu_option_prop arm_smmu_options[] = {
80 	{ ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
81 	{ ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
82 	{ 0, NULL},
83 };
84 
arm_smmu_page1_fixup(unsigned long offset,struct arm_smmu_device * smmu)85 static inline void __iomem *arm_smmu_page1_fixup(unsigned long offset,
86 						 struct arm_smmu_device *smmu)
87 {
88 	if (offset > SZ_64K)
89 		return smmu->page1 + offset - SZ_64K;
90 
91 	return smmu->base + offset;
92 }
93 
to_smmu_domain(struct iommu_domain * dom)94 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
95 {
96 	return container_of(dom, struct arm_smmu_domain, domain);
97 }
98 
parse_driver_options(struct arm_smmu_device * smmu)99 static void parse_driver_options(struct arm_smmu_device *smmu)
100 {
101 	int i = 0;
102 
103 	do {
104 		if (of_property_read_bool(smmu->dev->of_node,
105 						arm_smmu_options[i].prop)) {
106 			smmu->options |= arm_smmu_options[i].opt;
107 			dev_notice(smmu->dev, "option %s\n",
108 				arm_smmu_options[i].prop);
109 		}
110 	} while (arm_smmu_options[++i].opt);
111 }
112 
113 /* Low-level queue manipulation functions */
queue_has_space(struct arm_smmu_ll_queue * q,u32 n)114 static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n)
115 {
116 	u32 space, prod, cons;
117 
118 	prod = Q_IDX(q, q->prod);
119 	cons = Q_IDX(q, q->cons);
120 
121 	if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons))
122 		space = (1 << q->max_n_shift) - (prod - cons);
123 	else
124 		space = cons - prod;
125 
126 	return space >= n;
127 }
128 
queue_full(struct arm_smmu_ll_queue * q)129 static bool queue_full(struct arm_smmu_ll_queue *q)
130 {
131 	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
132 	       Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
133 }
134 
queue_empty(struct arm_smmu_ll_queue * q)135 static bool queue_empty(struct arm_smmu_ll_queue *q)
136 {
137 	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
138 	       Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
139 }
140 
queue_consumed(struct arm_smmu_ll_queue * q,u32 prod)141 static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod)
142 {
143 	return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) &&
144 		(Q_IDX(q, q->cons) > Q_IDX(q, prod))) ||
145 	       ((Q_WRP(q, q->cons) != Q_WRP(q, prod)) &&
146 		(Q_IDX(q, q->cons) <= Q_IDX(q, prod)));
147 }
148 
queue_sync_cons_out(struct arm_smmu_queue * q)149 static void queue_sync_cons_out(struct arm_smmu_queue *q)
150 {
151 	/*
152 	 * Ensure that all CPU accesses (reads and writes) to the queue
153 	 * are complete before we update the cons pointer.
154 	 */
155 	__iomb();
156 	writel_relaxed(q->llq.cons, q->cons_reg);
157 }
158 
queue_inc_cons(struct arm_smmu_ll_queue * q)159 static void queue_inc_cons(struct arm_smmu_ll_queue *q)
160 {
161 	u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
162 	q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
163 }
164 
queue_sync_cons_ovf(struct arm_smmu_queue * q)165 static void queue_sync_cons_ovf(struct arm_smmu_queue *q)
166 {
167 	struct arm_smmu_ll_queue *llq = &q->llq;
168 
169 	if (likely(Q_OVF(llq->prod) == Q_OVF(llq->cons)))
170 		return;
171 
172 	llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
173 		      Q_IDX(llq, llq->cons);
174 	queue_sync_cons_out(q);
175 }
176 
queue_sync_prod_in(struct arm_smmu_queue * q)177 static int queue_sync_prod_in(struct arm_smmu_queue *q)
178 {
179 	u32 prod;
180 	int ret = 0;
181 
182 	/*
183 	 * We can't use the _relaxed() variant here, as we must prevent
184 	 * speculative reads of the queue before we have determined that
185 	 * prod has indeed moved.
186 	 */
187 	prod = readl(q->prod_reg);
188 
189 	if (Q_OVF(prod) != Q_OVF(q->llq.prod))
190 		ret = -EOVERFLOW;
191 
192 	q->llq.prod = prod;
193 	return ret;
194 }
195 
queue_inc_prod_n(struct arm_smmu_ll_queue * q,int n)196 static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n)
197 {
198 	u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n;
199 	return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
200 }
201 
queue_poll_init(struct arm_smmu_device * smmu,struct arm_smmu_queue_poll * qp)202 static void queue_poll_init(struct arm_smmu_device *smmu,
203 			    struct arm_smmu_queue_poll *qp)
204 {
205 	qp->delay = 1;
206 	qp->spin_cnt = 0;
207 	qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
208 	qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
209 }
210 
queue_poll(struct arm_smmu_queue_poll * qp)211 static int queue_poll(struct arm_smmu_queue_poll *qp)
212 {
213 	if (ktime_compare(ktime_get(), qp->timeout) > 0)
214 		return -ETIMEDOUT;
215 
216 	if (qp->wfe) {
217 		wfe();
218 	} else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) {
219 		cpu_relax();
220 	} else {
221 		udelay(qp->delay);
222 		qp->delay *= 2;
223 		qp->spin_cnt = 0;
224 	}
225 
226 	return 0;
227 }
228 
queue_write(__le64 * dst,u64 * src,size_t n_dwords)229 static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
230 {
231 	int i;
232 
233 	for (i = 0; i < n_dwords; ++i)
234 		*dst++ = cpu_to_le64(*src++);
235 }
236 
queue_read(u64 * dst,__le64 * src,size_t n_dwords)237 static void queue_read(u64 *dst, __le64 *src, size_t n_dwords)
238 {
239 	int i;
240 
241 	for (i = 0; i < n_dwords; ++i)
242 		*dst++ = le64_to_cpu(*src++);
243 }
244 
queue_remove_raw(struct arm_smmu_queue * q,u64 * ent)245 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
246 {
247 	if (queue_empty(&q->llq))
248 		return -EAGAIN;
249 
250 	queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords);
251 	queue_inc_cons(&q->llq);
252 	queue_sync_cons_out(q);
253 	return 0;
254 }
255 
256 /* High-level queue accessors */
arm_smmu_cmdq_build_cmd(u64 * cmd,struct arm_smmu_cmdq_ent * ent)257 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
258 {
259 	memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
260 	cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
261 
262 	switch (ent->opcode) {
263 	case CMDQ_OP_TLBI_EL2_ALL:
264 	case CMDQ_OP_TLBI_NSNH_ALL:
265 		break;
266 	case CMDQ_OP_PREFETCH_CFG:
267 		cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
268 		cmd[1] |= FIELD_PREP(CMDQ_PREFETCH_1_SIZE, ent->prefetch.size);
269 		cmd[1] |= ent->prefetch.addr & CMDQ_PREFETCH_1_ADDR_MASK;
270 		break;
271 	case CMDQ_OP_CFGI_CD:
272 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
273 		fallthrough;
274 	case CMDQ_OP_CFGI_STE:
275 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
276 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
277 		break;
278 	case CMDQ_OP_CFGI_CD_ALL:
279 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
280 		break;
281 	case CMDQ_OP_CFGI_ALL:
282 		/* Cover the entire SID range */
283 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
284 		break;
285 	case CMDQ_OP_TLBI_NH_VA:
286 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
287 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
288 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
289 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
290 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
291 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
292 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
293 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
294 		break;
295 	case CMDQ_OP_TLBI_S2_IPA:
296 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
297 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
298 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
299 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
300 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
301 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
302 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
303 		break;
304 	case CMDQ_OP_TLBI_NH_ASID:
305 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
306 		fallthrough;
307 	case CMDQ_OP_TLBI_S12_VMALL:
308 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
309 		break;
310 	case CMDQ_OP_ATC_INV:
311 		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
312 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global);
313 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid);
314 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid);
315 		cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size);
316 		cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK;
317 		break;
318 	case CMDQ_OP_PRI_RESP:
319 		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
320 		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
321 		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
322 		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
323 		switch (ent->pri.resp) {
324 		case PRI_RESP_DENY:
325 		case PRI_RESP_FAIL:
326 		case PRI_RESP_SUCC:
327 			break;
328 		default:
329 			return -EINVAL;
330 		}
331 		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
332 		break;
333 	case CMDQ_OP_CMD_SYNC:
334 		if (ent->sync.msiaddr) {
335 			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
336 			cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
337 		} else {
338 			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
339 		}
340 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
341 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
342 		break;
343 	default:
344 		return -ENOENT;
345 	}
346 
347 	return 0;
348 }
349 
arm_smmu_cmdq_build_sync_cmd(u64 * cmd,struct arm_smmu_device * smmu,u32 prod)350 static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
351 					 u32 prod)
352 {
353 	struct arm_smmu_queue *q = &smmu->cmdq.q;
354 	struct arm_smmu_cmdq_ent ent = {
355 		.opcode = CMDQ_OP_CMD_SYNC,
356 	};
357 
358 	/*
359 	 * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
360 	 * payload, so the write will zero the entire command on that platform.
361 	 */
362 	if (smmu->options & ARM_SMMU_OPT_MSIPOLL) {
363 		ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
364 				   q->ent_dwords * 8;
365 	}
366 
367 	arm_smmu_cmdq_build_cmd(cmd, &ent);
368 }
369 
arm_smmu_cmdq_skip_err(struct arm_smmu_device * smmu)370 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
371 {
372 	static const char *cerror_str[] = {
373 		[CMDQ_ERR_CERROR_NONE_IDX]	= "No error",
374 		[CMDQ_ERR_CERROR_ILL_IDX]	= "Illegal command",
375 		[CMDQ_ERR_CERROR_ABT_IDX]	= "Abort on command fetch",
376 		[CMDQ_ERR_CERROR_ATC_INV_IDX]	= "ATC invalidate timeout",
377 	};
378 
379 	int i;
380 	u64 cmd[CMDQ_ENT_DWORDS];
381 	struct arm_smmu_queue *q = &smmu->cmdq.q;
382 	u32 cons = readl_relaxed(q->cons_reg);
383 	u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
384 	struct arm_smmu_cmdq_ent cmd_sync = {
385 		.opcode = CMDQ_OP_CMD_SYNC,
386 	};
387 
388 	dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
389 		idx < ARRAY_SIZE(cerror_str) ?  cerror_str[idx] : "Unknown");
390 
391 	switch (idx) {
392 	case CMDQ_ERR_CERROR_ABT_IDX:
393 		dev_err(smmu->dev, "retrying command fetch\n");
394 	case CMDQ_ERR_CERROR_NONE_IDX:
395 		return;
396 	case CMDQ_ERR_CERROR_ATC_INV_IDX:
397 		/*
398 		 * ATC Invalidation Completion timeout. CONS is still pointing
399 		 * at the CMD_SYNC. Attempt to complete other pending commands
400 		 * by repeating the CMD_SYNC, though we might well end up back
401 		 * here since the ATC invalidation may still be pending.
402 		 */
403 		return;
404 	case CMDQ_ERR_CERROR_ILL_IDX:
405 	default:
406 		break;
407 	}
408 
409 	/*
410 	 * We may have concurrent producers, so we need to be careful
411 	 * not to touch any of the shadow cmdq state.
412 	 */
413 	queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
414 	dev_err(smmu->dev, "skipping command in error state:\n");
415 	for (i = 0; i < ARRAY_SIZE(cmd); ++i)
416 		dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
417 
418 	/* Convert the erroneous command into a CMD_SYNC */
419 	if (arm_smmu_cmdq_build_cmd(cmd, &cmd_sync)) {
420 		dev_err(smmu->dev, "failed to convert to CMD_SYNC\n");
421 		return;
422 	}
423 
424 	queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
425 }
426 
427 /*
428  * Command queue locking.
429  * This is a form of bastardised rwlock with the following major changes:
430  *
431  * - The only LOCK routines are exclusive_trylock() and shared_lock().
432  *   Neither have barrier semantics, and instead provide only a control
433  *   dependency.
434  *
435  * - The UNLOCK routines are supplemented with shared_tryunlock(), which
436  *   fails if the caller appears to be the last lock holder (yes, this is
437  *   racy). All successful UNLOCK routines have RELEASE semantics.
438  */
arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq * cmdq)439 static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)
440 {
441 	int val;
442 
443 	/*
444 	 * We can try to avoid the cmpxchg() loop by simply incrementing the
445 	 * lock counter. When held in exclusive state, the lock counter is set
446 	 * to INT_MIN so these increments won't hurt as the value will remain
447 	 * negative.
448 	 */
449 	if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0)
450 		return;
451 
452 	do {
453 		val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);
454 	} while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);
455 }
456 
arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq * cmdq)457 static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq)
458 {
459 	(void)atomic_dec_return_release(&cmdq->lock);
460 }
461 
arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq * cmdq)462 static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
463 {
464 	if (atomic_read(&cmdq->lock) == 1)
465 		return false;
466 
467 	arm_smmu_cmdq_shared_unlock(cmdq);
468 	return true;
469 }
470 
471 #define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)		\
472 ({									\
473 	bool __ret;							\
474 	local_irq_save(flags);						\
475 	__ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN);	\
476 	if (!__ret)							\
477 		local_irq_restore(flags);				\
478 	__ret;								\
479 })
480 
481 #define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags)		\
482 ({									\
483 	atomic_set_release(&cmdq->lock, 0);				\
484 	local_irq_restore(flags);					\
485 })
486 
487 
488 /*
489  * Command queue insertion.
490  * This is made fiddly by our attempts to achieve some sort of scalability
491  * since there is one queue shared amongst all of the CPUs in the system.  If
492  * you like mixed-size concurrency, dependency ordering and relaxed atomics,
493  * then you'll *love* this monstrosity.
494  *
495  * The basic idea is to split the queue up into ranges of commands that are
496  * owned by a given CPU; the owner may not have written all of the commands
497  * itself, but is responsible for advancing the hardware prod pointer when
498  * the time comes. The algorithm is roughly:
499  *
500  * 	1. Allocate some space in the queue. At this point we also discover
501  *	   whether the head of the queue is currently owned by another CPU,
502  *	   or whether we are the owner.
503  *
504  *	2. Write our commands into our allocated slots in the queue.
505  *
506  *	3. Mark our slots as valid in arm_smmu_cmdq.valid_map.
507  *
508  *	4. If we are an owner:
509  *		a. Wait for the previous owner to finish.
510  *		b. Mark the queue head as unowned, which tells us the range
511  *		   that we are responsible for publishing.
512  *		c. Wait for all commands in our owned range to become valid.
513  *		d. Advance the hardware prod pointer.
514  *		e. Tell the next owner we've finished.
515  *
516  *	5. If we are inserting a CMD_SYNC (we may or may not have been an
517  *	   owner), then we need to stick around until it has completed:
518  *		a. If we have MSIs, the SMMU can write back into the CMD_SYNC
519  *		   to clear the first 4 bytes.
520  *		b. Otherwise, we spin waiting for the hardware cons pointer to
521  *		   advance past our command.
522  *
523  * The devil is in the details, particularly the use of locking for handling
524  * SYNC completion and freeing up space in the queue before we think that it is
525  * full.
526  */
__arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq * cmdq,u32 sprod,u32 eprod,bool set)527 static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq,
528 					       u32 sprod, u32 eprod, bool set)
529 {
530 	u32 swidx, sbidx, ewidx, ebidx;
531 	struct arm_smmu_ll_queue llq = {
532 		.max_n_shift	= cmdq->q.llq.max_n_shift,
533 		.prod		= sprod,
534 	};
535 
536 	ewidx = BIT_WORD(Q_IDX(&llq, eprod));
537 	ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG;
538 
539 	while (llq.prod != eprod) {
540 		unsigned long mask;
541 		atomic_long_t *ptr;
542 		u32 limit = BITS_PER_LONG;
543 
544 		swidx = BIT_WORD(Q_IDX(&llq, llq.prod));
545 		sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG;
546 
547 		ptr = &cmdq->valid_map[swidx];
548 
549 		if ((swidx == ewidx) && (sbidx < ebidx))
550 			limit = ebidx;
551 
552 		mask = GENMASK(limit - 1, sbidx);
553 
554 		/*
555 		 * The valid bit is the inverse of the wrap bit. This means
556 		 * that a zero-initialised queue is invalid and, after marking
557 		 * all entries as valid, they become invalid again when we
558 		 * wrap.
559 		 */
560 		if (set) {
561 			atomic_long_xor(mask, ptr);
562 		} else { /* Poll */
563 			unsigned long valid;
564 
565 			valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask;
566 			atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid);
567 		}
568 
569 		llq.prod = queue_inc_prod_n(&llq, limit - sbidx);
570 	}
571 }
572 
573 /* Mark all entries in the range [sprod, eprod) as valid */
arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq * cmdq,u32 sprod,u32 eprod)574 static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq,
575 					u32 sprod, u32 eprod)
576 {
577 	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true);
578 }
579 
580 /* Wait for all entries in the range [sprod, eprod) to become valid */
arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq * cmdq,u32 sprod,u32 eprod)581 static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
582 					 u32 sprod, u32 eprod)
583 {
584 	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false);
585 }
586 
587 /* Wait for the command queue to become non-full */
arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device * smmu,struct arm_smmu_ll_queue * llq)588 static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
589 					     struct arm_smmu_ll_queue *llq)
590 {
591 	unsigned long flags;
592 	struct arm_smmu_queue_poll qp;
593 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
594 	int ret = 0;
595 
596 	/*
597 	 * Try to update our copy of cons by grabbing exclusive cmdq access. If
598 	 * that fails, spin until somebody else updates it for us.
599 	 */
600 	if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) {
601 		WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg));
602 		arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags);
603 		llq->val = READ_ONCE(cmdq->q.llq.val);
604 		return 0;
605 	}
606 
607 	queue_poll_init(smmu, &qp);
608 	do {
609 		llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
610 		if (!queue_full(llq))
611 			break;
612 
613 		ret = queue_poll(&qp);
614 	} while (!ret);
615 
616 	return ret;
617 }
618 
619 /*
620  * Wait until the SMMU signals a CMD_SYNC completion MSI.
621  * Must be called with the cmdq lock held in some capacity.
622  */
__arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device * smmu,struct arm_smmu_ll_queue * llq)623 static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
624 					  struct arm_smmu_ll_queue *llq)
625 {
626 	int ret = 0;
627 	struct arm_smmu_queue_poll qp;
628 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
629 	u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
630 
631 	queue_poll_init(smmu, &qp);
632 
633 	/*
634 	 * The MSI won't generate an event, since it's being written back
635 	 * into the command queue.
636 	 */
637 	qp.wfe = false;
638 	smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp)));
639 	llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1);
640 	return ret;
641 }
642 
643 /*
644  * Wait until the SMMU cons index passes llq->prod.
645  * Must be called with the cmdq lock held in some capacity.
646  */
__arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device * smmu,struct arm_smmu_ll_queue * llq)647 static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
648 					       struct arm_smmu_ll_queue *llq)
649 {
650 	struct arm_smmu_queue_poll qp;
651 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
652 	u32 prod = llq->prod;
653 	int ret = 0;
654 
655 	queue_poll_init(smmu, &qp);
656 	llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
657 	do {
658 		if (queue_consumed(llq, prod))
659 			break;
660 
661 		ret = queue_poll(&qp);
662 
663 		/*
664 		 * This needs to be a readl() so that our subsequent call
665 		 * to arm_smmu_cmdq_shared_tryunlock() can fail accurately.
666 		 *
667 		 * Specifically, we need to ensure that we observe all
668 		 * shared_lock()s by other CMD_SYNCs that share our owner,
669 		 * so that a failing call to tryunlock() means that we're
670 		 * the last one out and therefore we can safely advance
671 		 * cmdq->q.llq.cons. Roughly speaking:
672 		 *
673 		 * CPU 0		CPU1			CPU2 (us)
674 		 *
675 		 * if (sync)
676 		 * 	shared_lock();
677 		 *
678 		 * dma_wmb();
679 		 * set_valid_map();
680 		 *
681 		 * 			if (owner) {
682 		 *				poll_valid_map();
683 		 *				<control dependency>
684 		 *				writel(prod_reg);
685 		 *
686 		 *						readl(cons_reg);
687 		 *						tryunlock();
688 		 *
689 		 * Requires us to see CPU 0's shared_lock() acquisition.
690 		 */
691 		llq->cons = readl(cmdq->q.cons_reg);
692 	} while (!ret);
693 
694 	return ret;
695 }
696 
arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device * smmu,struct arm_smmu_ll_queue * llq)697 static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
698 					 struct arm_smmu_ll_queue *llq)
699 {
700 	if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
701 		return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
702 
703 	return __arm_smmu_cmdq_poll_until_consumed(smmu, llq);
704 }
705 
arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq * cmdq,u64 * cmds,u32 prod,int n)706 static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
707 					u32 prod, int n)
708 {
709 	int i;
710 	struct arm_smmu_ll_queue llq = {
711 		.max_n_shift	= cmdq->q.llq.max_n_shift,
712 		.prod		= prod,
713 	};
714 
715 	for (i = 0; i < n; ++i) {
716 		u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS];
717 
718 		prod = queue_inc_prod_n(&llq, i);
719 		queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS);
720 	}
721 }
722 
723 /*
724  * This is the actual insertion function, and provides the following
725  * ordering guarantees to callers:
726  *
727  * - There is a dma_wmb() before publishing any commands to the queue.
728  *   This can be relied upon to order prior writes to data structures
729  *   in memory (such as a CD or an STE) before the command.
730  *
731  * - On completion of a CMD_SYNC, there is a control dependency.
732  *   This can be relied upon to order subsequent writes to memory (e.g.
733  *   freeing an IOVA) after completion of the CMD_SYNC.
734  *
735  * - Command insertion is totally ordered, so if two CPUs each race to
736  *   insert their own list of commands then all of the commands from one
737  *   CPU will appear before any of the commands from the other CPU.
738  */
arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device * smmu,u64 * cmds,int n,bool sync)739 static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
740 				       u64 *cmds, int n, bool sync)
741 {
742 	u64 cmd_sync[CMDQ_ENT_DWORDS];
743 	u32 prod;
744 	unsigned long flags;
745 	bool owner;
746 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
747 	struct arm_smmu_ll_queue llq = {
748 		.max_n_shift = cmdq->q.llq.max_n_shift,
749 	}, head = llq;
750 	int ret = 0;
751 
752 	/* 1. Allocate some space in the queue */
753 	local_irq_save(flags);
754 	llq.val = READ_ONCE(cmdq->q.llq.val);
755 	do {
756 		u64 old;
757 
758 		while (!queue_has_space(&llq, n + sync)) {
759 			local_irq_restore(flags);
760 			if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq))
761 				dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
762 			local_irq_save(flags);
763 		}
764 
765 		head.cons = llq.cons;
766 		head.prod = queue_inc_prod_n(&llq, n + sync) |
767 					     CMDQ_PROD_OWNED_FLAG;
768 
769 		old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val);
770 		if (old == llq.val)
771 			break;
772 
773 		llq.val = old;
774 	} while (1);
775 	owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG);
776 	head.prod &= ~CMDQ_PROD_OWNED_FLAG;
777 	llq.prod &= ~CMDQ_PROD_OWNED_FLAG;
778 
779 	/*
780 	 * 2. Write our commands into the queue
781 	 * Dependency ordering from the cmpxchg() loop above.
782 	 */
783 	arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
784 	if (sync) {
785 		prod = queue_inc_prod_n(&llq, n);
786 		arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, prod);
787 		queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
788 
789 		/*
790 		 * In order to determine completion of our CMD_SYNC, we must
791 		 * ensure that the queue can't wrap twice without us noticing.
792 		 * We achieve that by taking the cmdq lock as shared before
793 		 * marking our slot as valid.
794 		 */
795 		arm_smmu_cmdq_shared_lock(cmdq);
796 	}
797 
798 	/* 3. Mark our slots as valid, ensuring commands are visible first */
799 	dma_wmb();
800 	arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
801 
802 	/* 4. If we are the owner, take control of the SMMU hardware */
803 	if (owner) {
804 		/* a. Wait for previous owner to finish */
805 		atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
806 
807 		/* b. Stop gathering work by clearing the owned flag */
808 		prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG,
809 						   &cmdq->q.llq.atomic.prod);
810 		prod &= ~CMDQ_PROD_OWNED_FLAG;
811 
812 		/*
813 		 * c. Wait for any gathered work to be written to the queue.
814 		 * Note that we read our own entries so that we have the control
815 		 * dependency required by (d).
816 		 */
817 		arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
818 
819 		/*
820 		 * d. Advance the hardware prod pointer
821 		 * Control dependency ordering from the entries becoming valid.
822 		 */
823 		writel_relaxed(prod, cmdq->q.prod_reg);
824 
825 		/*
826 		 * e. Tell the next owner we're done
827 		 * Make sure we've updated the hardware first, so that we don't
828 		 * race to update prod and potentially move it backwards.
829 		 */
830 		atomic_set_release(&cmdq->owner_prod, prod);
831 	}
832 
833 	/* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
834 	if (sync) {
835 		llq.prod = queue_inc_prod_n(&llq, n);
836 		ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq);
837 		if (ret) {
838 			dev_err_ratelimited(smmu->dev,
839 					    "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
840 					    llq.prod,
841 					    readl_relaxed(cmdq->q.prod_reg),
842 					    readl_relaxed(cmdq->q.cons_reg));
843 		}
844 
845 		/*
846 		 * Try to unlock the cmdq lock. This will fail if we're the last
847 		 * reader, in which case we can safely update cmdq->q.llq.cons
848 		 */
849 		if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
850 			WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
851 			arm_smmu_cmdq_shared_unlock(cmdq);
852 		}
853 	}
854 
855 	local_irq_restore(flags);
856 	return ret;
857 }
858 
arm_smmu_cmdq_issue_cmd(struct arm_smmu_device * smmu,struct arm_smmu_cmdq_ent * ent)859 static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
860 				   struct arm_smmu_cmdq_ent *ent)
861 {
862 	u64 cmd[CMDQ_ENT_DWORDS];
863 
864 	if (arm_smmu_cmdq_build_cmd(cmd, ent)) {
865 		dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
866 			 ent->opcode);
867 		return -EINVAL;
868 	}
869 
870 	return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, false);
871 }
872 
arm_smmu_cmdq_issue_sync(struct arm_smmu_device * smmu)873 static int arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
874 {
875 	return arm_smmu_cmdq_issue_cmdlist(smmu, NULL, 0, true);
876 }
877 
arm_smmu_cmdq_batch_add(struct arm_smmu_device * smmu,struct arm_smmu_cmdq_batch * cmds,struct arm_smmu_cmdq_ent * cmd)878 static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
879 				    struct arm_smmu_cmdq_batch *cmds,
880 				    struct arm_smmu_cmdq_ent *cmd)
881 {
882 	if (cmds->num == CMDQ_BATCH_ENTRIES) {
883 		arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false);
884 		cmds->num = 0;
885 	}
886 	arm_smmu_cmdq_build_cmd(&cmds->cmds[cmds->num * CMDQ_ENT_DWORDS], cmd);
887 	cmds->num++;
888 }
889 
arm_smmu_cmdq_batch_submit(struct arm_smmu_device * smmu,struct arm_smmu_cmdq_batch * cmds)890 static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
891 				      struct arm_smmu_cmdq_batch *cmds)
892 {
893 	return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
894 }
895 
896 /* Context descriptor manipulation functions */
arm_smmu_tlb_inv_asid(struct arm_smmu_device * smmu,u16 asid)897 void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
898 {
899 	struct arm_smmu_cmdq_ent cmd = {
900 		.opcode = CMDQ_OP_TLBI_NH_ASID,
901 		.tlbi.asid = asid,
902 	};
903 
904 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
905 	arm_smmu_cmdq_issue_sync(smmu);
906 }
907 
arm_smmu_sync_cd(struct arm_smmu_domain * smmu_domain,int ssid,bool leaf)908 static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
909 			     int ssid, bool leaf)
910 {
911 	size_t i;
912 	unsigned long flags;
913 	struct arm_smmu_master *master;
914 	struct arm_smmu_cmdq_batch cmds = {};
915 	struct arm_smmu_device *smmu = smmu_domain->smmu;
916 	struct arm_smmu_cmdq_ent cmd = {
917 		.opcode	= CMDQ_OP_CFGI_CD,
918 		.cfgi	= {
919 			.ssid	= ssid,
920 			.leaf	= leaf,
921 		},
922 	};
923 
924 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
925 	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
926 		for (i = 0; i < master->num_sids; i++) {
927 			cmd.cfgi.sid = master->sids[i];
928 			arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
929 		}
930 	}
931 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
932 
933 	arm_smmu_cmdq_batch_submit(smmu, &cmds);
934 }
935 
arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device * smmu,struct arm_smmu_l1_ctx_desc * l1_desc)936 static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
937 					struct arm_smmu_l1_ctx_desc *l1_desc)
938 {
939 	size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
940 
941 	l1_desc->l2ptr = dmam_alloc_coherent(smmu->dev, size,
942 					     &l1_desc->l2ptr_dma, GFP_KERNEL);
943 	if (!l1_desc->l2ptr) {
944 		dev_warn(smmu->dev,
945 			 "failed to allocate context descriptor table\n");
946 		return -ENOMEM;
947 	}
948 	return 0;
949 }
950 
arm_smmu_write_cd_l1_desc(__le64 * dst,struct arm_smmu_l1_ctx_desc * l1_desc)951 static void arm_smmu_write_cd_l1_desc(__le64 *dst,
952 				      struct arm_smmu_l1_ctx_desc *l1_desc)
953 {
954 	u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) |
955 		  CTXDESC_L1_DESC_V;
956 
957 	/* See comment in arm_smmu_write_ctx_desc() */
958 	WRITE_ONCE(*dst, cpu_to_le64(val));
959 }
960 
arm_smmu_get_cd_ptr(struct arm_smmu_domain * smmu_domain,u32 ssid)961 static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
962 				   u32 ssid)
963 {
964 	__le64 *l1ptr;
965 	unsigned int idx;
966 	struct arm_smmu_l1_ctx_desc *l1_desc;
967 	struct arm_smmu_device *smmu = smmu_domain->smmu;
968 	struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
969 
970 	if (smmu_domain->s1_cfg.s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
971 		return cdcfg->cdtab + ssid * CTXDESC_CD_DWORDS;
972 
973 	idx = ssid >> CTXDESC_SPLIT;
974 	l1_desc = &cdcfg->l1_desc[idx];
975 	if (!l1_desc->l2ptr) {
976 		if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
977 			return NULL;
978 
979 		l1ptr = cdcfg->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
980 		arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
981 		/* An invalid L1CD can be cached */
982 		arm_smmu_sync_cd(smmu_domain, ssid, false);
983 	}
984 	idx = ssid & (CTXDESC_L2_ENTRIES - 1);
985 	return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS;
986 }
987 
arm_smmu_write_ctx_desc(struct arm_smmu_domain * smmu_domain,int ssid,struct arm_smmu_ctx_desc * cd)988 int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
989 			    struct arm_smmu_ctx_desc *cd)
990 {
991 	/*
992 	 * This function handles the following cases:
993 	 *
994 	 * (1) Install primary CD, for normal DMA traffic (SSID = 0).
995 	 * (2) Install a secondary CD, for SID+SSID traffic.
996 	 * (3) Update ASID of a CD. Atomically write the first 64 bits of the
997 	 *     CD, then invalidate the old entry and mappings.
998 	 * (4) Remove a secondary CD.
999 	 */
1000 	u64 val;
1001 	bool cd_live;
1002 	__le64 *cdptr;
1003 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1004 
1005 	if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))
1006 		return -E2BIG;
1007 
1008 	cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
1009 	if (!cdptr)
1010 		return -ENOMEM;
1011 
1012 	val = le64_to_cpu(cdptr[0]);
1013 	cd_live = !!(val & CTXDESC_CD_0_V);
1014 
1015 	if (!cd) { /* (4) */
1016 		val = 0;
1017 	} else if (cd_live) { /* (3) */
1018 		val &= ~CTXDESC_CD_0_ASID;
1019 		val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid);
1020 		/*
1021 		 * Until CD+TLB invalidation, both ASIDs may be used for tagging
1022 		 * this substream's traffic
1023 		 */
1024 	} else { /* (1) and (2) */
1025 		cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK);
1026 		cdptr[2] = 0;
1027 		cdptr[3] = cpu_to_le64(cd->mair);
1028 
1029 		/*
1030 		 * STE is live, and the SMMU might read dwords of this CD in any
1031 		 * order. Ensure that it observes valid values before reading
1032 		 * V=1.
1033 		 */
1034 		arm_smmu_sync_cd(smmu_domain, ssid, true);
1035 
1036 		val = cd->tcr |
1037 #ifdef __BIG_ENDIAN
1038 			CTXDESC_CD_0_ENDI |
1039 #endif
1040 			CTXDESC_CD_0_R | CTXDESC_CD_0_A |
1041 			(cd->mm ? 0 : CTXDESC_CD_0_ASET) |
1042 			CTXDESC_CD_0_AA64 |
1043 			FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
1044 			CTXDESC_CD_0_V;
1045 
1046 		/* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */
1047 		if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
1048 			val |= CTXDESC_CD_0_S;
1049 	}
1050 
1051 	/*
1052 	 * The SMMU accesses 64-bit values atomically. See IHI0070Ca 3.21.3
1053 	 * "Configuration structures and configuration invalidation completion"
1054 	 *
1055 	 *   The size of single-copy atomic reads made by the SMMU is
1056 	 *   IMPLEMENTATION DEFINED but must be at least 64 bits. Any single
1057 	 *   field within an aligned 64-bit span of a structure can be altered
1058 	 *   without first making the structure invalid.
1059 	 */
1060 	WRITE_ONCE(cdptr[0], cpu_to_le64(val));
1061 	arm_smmu_sync_cd(smmu_domain, ssid, true);
1062 	return 0;
1063 }
1064 
arm_smmu_alloc_cd_tables(struct arm_smmu_domain * smmu_domain)1065 static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
1066 {
1067 	int ret;
1068 	size_t l1size;
1069 	size_t max_contexts;
1070 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1071 	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1072 	struct arm_smmu_ctx_desc_cfg *cdcfg = &cfg->cdcfg;
1073 
1074 	max_contexts = 1 << cfg->s1cdmax;
1075 
1076 	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
1077 	    max_contexts <= CTXDESC_L2_ENTRIES) {
1078 		cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
1079 		cdcfg->num_l1_ents = max_contexts;
1080 
1081 		l1size = max_contexts * (CTXDESC_CD_DWORDS << 3);
1082 	} else {
1083 		cfg->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
1084 		cdcfg->num_l1_ents = DIV_ROUND_UP(max_contexts,
1085 						  CTXDESC_L2_ENTRIES);
1086 
1087 		cdcfg->l1_desc = devm_kcalloc(smmu->dev, cdcfg->num_l1_ents,
1088 					      sizeof(*cdcfg->l1_desc),
1089 					      GFP_KERNEL);
1090 		if (!cdcfg->l1_desc)
1091 			return -ENOMEM;
1092 
1093 		l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1094 	}
1095 
1096 	cdcfg->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cdcfg->cdtab_dma,
1097 					   GFP_KERNEL);
1098 	if (!cdcfg->cdtab) {
1099 		dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1100 		ret = -ENOMEM;
1101 		goto err_free_l1;
1102 	}
1103 
1104 	return 0;
1105 
1106 err_free_l1:
1107 	if (cdcfg->l1_desc) {
1108 		devm_kfree(smmu->dev, cdcfg->l1_desc);
1109 		cdcfg->l1_desc = NULL;
1110 	}
1111 	return ret;
1112 }
1113 
arm_smmu_free_cd_tables(struct arm_smmu_domain * smmu_domain)1114 static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain)
1115 {
1116 	int i;
1117 	size_t size, l1size;
1118 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1119 	struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
1120 
1121 	if (cdcfg->l1_desc) {
1122 		size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1123 
1124 		for (i = 0; i < cdcfg->num_l1_ents; i++) {
1125 			if (!cdcfg->l1_desc[i].l2ptr)
1126 				continue;
1127 
1128 			dmam_free_coherent(smmu->dev, size,
1129 					   cdcfg->l1_desc[i].l2ptr,
1130 					   cdcfg->l1_desc[i].l2ptr_dma);
1131 		}
1132 		devm_kfree(smmu->dev, cdcfg->l1_desc);
1133 		cdcfg->l1_desc = NULL;
1134 
1135 		l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1136 	} else {
1137 		l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
1138 	}
1139 
1140 	dmam_free_coherent(smmu->dev, l1size, cdcfg->cdtab, cdcfg->cdtab_dma);
1141 	cdcfg->cdtab_dma = 0;
1142 	cdcfg->cdtab = NULL;
1143 }
1144 
arm_smmu_free_asid(struct arm_smmu_ctx_desc * cd)1145 bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd)
1146 {
1147 	bool free;
1148 	struct arm_smmu_ctx_desc *old_cd;
1149 
1150 	if (!cd->asid)
1151 		return false;
1152 
1153 	free = refcount_dec_and_test(&cd->refs);
1154 	if (free) {
1155 		old_cd = xa_erase(&arm_smmu_asid_xa, cd->asid);
1156 		WARN_ON(old_cd != cd);
1157 	}
1158 	return free;
1159 }
1160 
1161 /* Stream table manipulation functions */
1162 static void
arm_smmu_write_strtab_l1_desc(__le64 * dst,struct arm_smmu_strtab_l1_desc * desc)1163 arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
1164 {
1165 	u64 val = 0;
1166 
1167 	val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
1168 	val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
1169 
1170 	/* See comment in arm_smmu_write_ctx_desc() */
1171 	WRITE_ONCE(*dst, cpu_to_le64(val));
1172 }
1173 
arm_smmu_sync_ste_for_sid(struct arm_smmu_device * smmu,u32 sid)1174 static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
1175 {
1176 	struct arm_smmu_cmdq_ent cmd = {
1177 		.opcode	= CMDQ_OP_CFGI_STE,
1178 		.cfgi	= {
1179 			.sid	= sid,
1180 			.leaf	= true,
1181 		},
1182 	};
1183 
1184 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1185 	arm_smmu_cmdq_issue_sync(smmu);
1186 }
1187 
arm_smmu_write_strtab_ent(struct arm_smmu_master * master,u32 sid,__le64 * dst)1188 static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
1189 				      __le64 *dst)
1190 {
1191 	/*
1192 	 * This is hideously complicated, but we only really care about
1193 	 * three cases at the moment:
1194 	 *
1195 	 * 1. Invalid (all zero) -> bypass/fault (init)
1196 	 * 2. Bypass/fault -> translation/bypass (attach)
1197 	 * 3. Translation/bypass -> bypass/fault (detach)
1198 	 *
1199 	 * Given that we can't update the STE atomically and the SMMU
1200 	 * doesn't read the thing in a defined order, that leaves us
1201 	 * with the following maintenance requirements:
1202 	 *
1203 	 * 1. Update Config, return (init time STEs aren't live)
1204 	 * 2. Write everything apart from dword 0, sync, write dword 0, sync
1205 	 * 3. Update Config, sync
1206 	 */
1207 	u64 val = le64_to_cpu(dst[0]);
1208 	bool ste_live = false;
1209 	struct arm_smmu_device *smmu = NULL;
1210 	struct arm_smmu_s1_cfg *s1_cfg = NULL;
1211 	struct arm_smmu_s2_cfg *s2_cfg = NULL;
1212 	struct arm_smmu_domain *smmu_domain = NULL;
1213 	struct arm_smmu_cmdq_ent prefetch_cmd = {
1214 		.opcode		= CMDQ_OP_PREFETCH_CFG,
1215 		.prefetch	= {
1216 			.sid	= sid,
1217 		},
1218 	};
1219 
1220 	if (master) {
1221 		smmu_domain = master->domain;
1222 		smmu = master->smmu;
1223 	}
1224 
1225 	if (smmu_domain) {
1226 		switch (smmu_domain->stage) {
1227 		case ARM_SMMU_DOMAIN_S1:
1228 			s1_cfg = &smmu_domain->s1_cfg;
1229 			break;
1230 		case ARM_SMMU_DOMAIN_S2:
1231 		case ARM_SMMU_DOMAIN_NESTED:
1232 			s2_cfg = &smmu_domain->s2_cfg;
1233 			break;
1234 		default:
1235 			break;
1236 		}
1237 	}
1238 
1239 	if (val & STRTAB_STE_0_V) {
1240 		switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
1241 		case STRTAB_STE_0_CFG_BYPASS:
1242 			break;
1243 		case STRTAB_STE_0_CFG_S1_TRANS:
1244 		case STRTAB_STE_0_CFG_S2_TRANS:
1245 			ste_live = true;
1246 			break;
1247 		case STRTAB_STE_0_CFG_ABORT:
1248 			BUG_ON(!disable_bypass);
1249 			break;
1250 		default:
1251 			BUG(); /* STE corruption */
1252 		}
1253 	}
1254 
1255 	/* Nuke the existing STE_0 value, as we're going to rewrite it */
1256 	val = STRTAB_STE_0_V;
1257 
1258 	/* Bypass/fault */
1259 	if (!smmu_domain || !(s1_cfg || s2_cfg)) {
1260 		if (!smmu_domain && disable_bypass)
1261 			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1262 		else
1263 			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1264 
1265 		dst[0] = cpu_to_le64(val);
1266 		dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1267 						STRTAB_STE_1_SHCFG_INCOMING));
1268 		dst[2] = 0; /* Nuke the VMID */
1269 		/*
1270 		 * The SMMU can perform negative caching, so we must sync
1271 		 * the STE regardless of whether the old value was live.
1272 		 */
1273 		if (smmu)
1274 			arm_smmu_sync_ste_for_sid(smmu, sid);
1275 		return;
1276 	}
1277 
1278 	if (s1_cfg) {
1279 		BUG_ON(ste_live);
1280 		dst[1] = cpu_to_le64(
1281 			 FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
1282 			 FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1283 			 FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1284 			 FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
1285 			 FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_NSEL1));
1286 
1287 		if (smmu->features & ARM_SMMU_FEAT_STALLS &&
1288 		   !(smmu->features & ARM_SMMU_FEAT_STALL_FORCE))
1289 			dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
1290 
1291 		val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
1292 			FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
1293 			FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) |
1294 			FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt);
1295 	}
1296 
1297 	if (s2_cfg) {
1298 		BUG_ON(ste_live);
1299 		dst[2] = cpu_to_le64(
1300 			 FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
1301 			 FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) |
1302 #ifdef __BIG_ENDIAN
1303 			 STRTAB_STE_2_S2ENDI |
1304 #endif
1305 			 STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1306 			 STRTAB_STE_2_S2R);
1307 
1308 		dst[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
1309 
1310 		val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
1311 	}
1312 
1313 	if (master->ats_enabled)
1314 		dst[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS,
1315 						 STRTAB_STE_1_EATS_TRANS));
1316 
1317 	arm_smmu_sync_ste_for_sid(smmu, sid);
1318 	/* See comment in arm_smmu_write_ctx_desc() */
1319 	WRITE_ONCE(dst[0], cpu_to_le64(val));
1320 	arm_smmu_sync_ste_for_sid(smmu, sid);
1321 
1322 	/* It's likely that we'll want to use the new STE soon */
1323 	if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
1324 		arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1325 }
1326 
arm_smmu_init_bypass_stes(__le64 * strtab,unsigned int nent)1327 static void arm_smmu_init_bypass_stes(__le64 *strtab, unsigned int nent)
1328 {
1329 	unsigned int i;
1330 
1331 	for (i = 0; i < nent; ++i) {
1332 		arm_smmu_write_strtab_ent(NULL, -1, strtab);
1333 		strtab += STRTAB_STE_DWORDS;
1334 	}
1335 }
1336 
arm_smmu_init_l2_strtab(struct arm_smmu_device * smmu,u32 sid)1337 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1338 {
1339 	size_t size;
1340 	void *strtab;
1341 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1342 	struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1343 
1344 	if (desc->l2ptr)
1345 		return 0;
1346 
1347 	size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1348 	strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1349 
1350 	desc->span = STRTAB_SPLIT + 1;
1351 	desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1352 					  GFP_KERNEL);
1353 	if (!desc->l2ptr) {
1354 		dev_err(smmu->dev,
1355 			"failed to allocate l2 stream table for SID %u\n",
1356 			sid);
1357 		return -ENOMEM;
1358 	}
1359 
1360 	arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT);
1361 	arm_smmu_write_strtab_l1_desc(strtab, desc);
1362 	return 0;
1363 }
1364 
1365 /* IRQ and event handlers */
arm_smmu_evtq_thread(int irq,void * dev)1366 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1367 {
1368 	int i;
1369 	struct arm_smmu_device *smmu = dev;
1370 	struct arm_smmu_queue *q = &smmu->evtq.q;
1371 	struct arm_smmu_ll_queue *llq = &q->llq;
1372 	u64 evt[EVTQ_ENT_DWORDS];
1373 
1374 	do {
1375 		while (!queue_remove_raw(q, evt)) {
1376 			u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
1377 
1378 			dev_info(smmu->dev, "event 0x%02x received:\n", id);
1379 			for (i = 0; i < ARRAY_SIZE(evt); ++i)
1380 				dev_info(smmu->dev, "\t0x%016llx\n",
1381 					 (unsigned long long)evt[i]);
1382 
1383 			cond_resched();
1384 		}
1385 
1386 		/*
1387 		 * Not much we can do on overflow, so scream and pretend we're
1388 		 * trying harder.
1389 		 */
1390 		if (queue_sync_prod_in(q) == -EOVERFLOW)
1391 			dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1392 	} while (!queue_empty(llq));
1393 
1394 	/* Sync our overflow flag, as we believe we're up to speed */
1395 	queue_sync_cons_ovf(q);
1396 	return IRQ_HANDLED;
1397 }
1398 
arm_smmu_handle_ppr(struct arm_smmu_device * smmu,u64 * evt)1399 static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
1400 {
1401 	u32 sid, ssid;
1402 	u16 grpid;
1403 	bool ssv, last;
1404 
1405 	sid = FIELD_GET(PRIQ_0_SID, evt[0]);
1406 	ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
1407 	ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : 0;
1408 	last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
1409 	grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
1410 
1411 	dev_info(smmu->dev, "unexpected PRI request received:\n");
1412 	dev_info(smmu->dev,
1413 		 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1414 		 sid, ssid, grpid, last ? "L" : "",
1415 		 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1416 		 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1417 		 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1418 		 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1419 		 evt[1] & PRIQ_1_ADDR_MASK);
1420 
1421 	if (last) {
1422 		struct arm_smmu_cmdq_ent cmd = {
1423 			.opcode			= CMDQ_OP_PRI_RESP,
1424 			.substream_valid	= ssv,
1425 			.pri			= {
1426 				.sid	= sid,
1427 				.ssid	= ssid,
1428 				.grpid	= grpid,
1429 				.resp	= PRI_RESP_DENY,
1430 			},
1431 		};
1432 
1433 		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1434 	}
1435 }
1436 
arm_smmu_priq_thread(int irq,void * dev)1437 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1438 {
1439 	struct arm_smmu_device *smmu = dev;
1440 	struct arm_smmu_queue *q = &smmu->priq.q;
1441 	struct arm_smmu_ll_queue *llq = &q->llq;
1442 	u64 evt[PRIQ_ENT_DWORDS];
1443 
1444 	do {
1445 		while (!queue_remove_raw(q, evt))
1446 			arm_smmu_handle_ppr(smmu, evt);
1447 
1448 		if (queue_sync_prod_in(q) == -EOVERFLOW)
1449 			dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1450 	} while (!queue_empty(llq));
1451 
1452 	/* Sync our overflow flag, as we believe we're up to speed */
1453 	queue_sync_cons_ovf(q);
1454 	return IRQ_HANDLED;
1455 }
1456 
1457 static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1458 
arm_smmu_gerror_handler(int irq,void * dev)1459 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1460 {
1461 	u32 gerror, gerrorn, active;
1462 	struct arm_smmu_device *smmu = dev;
1463 
1464 	gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1465 	gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1466 
1467 	active = gerror ^ gerrorn;
1468 	if (!(active & GERROR_ERR_MASK))
1469 		return IRQ_NONE; /* No errors pending */
1470 
1471 	dev_warn(smmu->dev,
1472 		 "unexpected global error reported (0x%08x), this could be serious\n",
1473 		 active);
1474 
1475 	if (active & GERROR_SFM_ERR) {
1476 		dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1477 		arm_smmu_device_disable(smmu);
1478 	}
1479 
1480 	if (active & GERROR_MSI_GERROR_ABT_ERR)
1481 		dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1482 
1483 	if (active & GERROR_MSI_PRIQ_ABT_ERR)
1484 		dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1485 
1486 	if (active & GERROR_MSI_EVTQ_ABT_ERR)
1487 		dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1488 
1489 	if (active & GERROR_MSI_CMDQ_ABT_ERR)
1490 		dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1491 
1492 	if (active & GERROR_PRIQ_ABT_ERR)
1493 		dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1494 
1495 	if (active & GERROR_EVTQ_ABT_ERR)
1496 		dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1497 
1498 	if (active & GERROR_CMDQ_ERR)
1499 		arm_smmu_cmdq_skip_err(smmu);
1500 
1501 	writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1502 	return IRQ_HANDLED;
1503 }
1504 
arm_smmu_combined_irq_thread(int irq,void * dev)1505 static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
1506 {
1507 	struct arm_smmu_device *smmu = dev;
1508 
1509 	arm_smmu_evtq_thread(irq, dev);
1510 	if (smmu->features & ARM_SMMU_FEAT_PRI)
1511 		arm_smmu_priq_thread(irq, dev);
1512 
1513 	return IRQ_HANDLED;
1514 }
1515 
arm_smmu_combined_irq_handler(int irq,void * dev)1516 static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
1517 {
1518 	arm_smmu_gerror_handler(irq, dev);
1519 	return IRQ_WAKE_THREAD;
1520 }
1521 
1522 static void
arm_smmu_atc_inv_to_cmd(int ssid,unsigned long iova,size_t size,struct arm_smmu_cmdq_ent * cmd)1523 arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
1524 			struct arm_smmu_cmdq_ent *cmd)
1525 {
1526 	size_t log2_span;
1527 	size_t span_mask;
1528 	/* ATC invalidates are always on 4096-bytes pages */
1529 	size_t inval_grain_shift = 12;
1530 	unsigned long page_start, page_end;
1531 
1532 	*cmd = (struct arm_smmu_cmdq_ent) {
1533 		.opcode			= CMDQ_OP_ATC_INV,
1534 		.substream_valid	= !!ssid,
1535 		.atc.ssid		= ssid,
1536 	};
1537 
1538 	if (!size) {
1539 		cmd->atc.size = ATC_INV_SIZE_ALL;
1540 		return;
1541 	}
1542 
1543 	page_start	= iova >> inval_grain_shift;
1544 	page_end	= (iova + size - 1) >> inval_grain_shift;
1545 
1546 	/*
1547 	 * In an ATS Invalidate Request, the address must be aligned on the
1548 	 * range size, which must be a power of two number of page sizes. We
1549 	 * thus have to choose between grossly over-invalidating the region, or
1550 	 * splitting the invalidation into multiple commands. For simplicity
1551 	 * we'll go with the first solution, but should refine it in the future
1552 	 * if multiple commands are shown to be more efficient.
1553 	 *
1554 	 * Find the smallest power of two that covers the range. The most
1555 	 * significant differing bit between the start and end addresses,
1556 	 * fls(start ^ end), indicates the required span. For example:
1557 	 *
1558 	 * We want to invalidate pages [8; 11]. This is already the ideal range:
1559 	 *		x = 0b1000 ^ 0b1011 = 0b11
1560 	 *		span = 1 << fls(x) = 4
1561 	 *
1562 	 * To invalidate pages [7; 10], we need to invalidate [0; 15]:
1563 	 *		x = 0b0111 ^ 0b1010 = 0b1101
1564 	 *		span = 1 << fls(x) = 16
1565 	 */
1566 	log2_span	= fls_long(page_start ^ page_end);
1567 	span_mask	= (1ULL << log2_span) - 1;
1568 
1569 	page_start	&= ~span_mask;
1570 
1571 	cmd->atc.addr	= page_start << inval_grain_shift;
1572 	cmd->atc.size	= log2_span;
1573 }
1574 
arm_smmu_atc_inv_master(struct arm_smmu_master * master)1575 static int arm_smmu_atc_inv_master(struct arm_smmu_master *master)
1576 {
1577 	int i;
1578 	struct arm_smmu_cmdq_ent cmd;
1579 
1580 	arm_smmu_atc_inv_to_cmd(0, 0, 0, &cmd);
1581 
1582 	for (i = 0; i < master->num_sids; i++) {
1583 		cmd.atc.sid = master->sids[i];
1584 		arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
1585 	}
1586 
1587 	return arm_smmu_cmdq_issue_sync(master->smmu);
1588 }
1589 
arm_smmu_atc_inv_domain(struct arm_smmu_domain * smmu_domain,int ssid,unsigned long iova,size_t size)1590 static int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain,
1591 				   int ssid, unsigned long iova, size_t size)
1592 {
1593 	int i;
1594 	unsigned long flags;
1595 	struct arm_smmu_cmdq_ent cmd;
1596 	struct arm_smmu_master *master;
1597 	struct arm_smmu_cmdq_batch cmds = {};
1598 
1599 	if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
1600 		return 0;
1601 
1602 	/*
1603 	 * Ensure that we've completed prior invalidation of the main TLBs
1604 	 * before we read 'nr_ats_masters' in case of a concurrent call to
1605 	 * arm_smmu_enable_ats():
1606 	 *
1607 	 *	// unmap()			// arm_smmu_enable_ats()
1608 	 *	TLBI+SYNC			atomic_inc(&nr_ats_masters);
1609 	 *	smp_mb();			[...]
1610 	 *	atomic_read(&nr_ats_masters);	pci_enable_ats() // writel()
1611 	 *
1612 	 * Ensures that we always see the incremented 'nr_ats_masters' count if
1613 	 * ATS was enabled at the PCI device before completion of the TLBI.
1614 	 */
1615 	smp_mb();
1616 	if (!atomic_read(&smmu_domain->nr_ats_masters))
1617 		return 0;
1618 
1619 	arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);
1620 
1621 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
1622 	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
1623 		if (!master->ats_enabled)
1624 			continue;
1625 
1626 		for (i = 0; i < master->num_sids; i++) {
1627 			cmd.atc.sid = master->sids[i];
1628 			arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);
1629 		}
1630 	}
1631 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
1632 
1633 	return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);
1634 }
1635 
1636 /* IO_PGTABLE API */
arm_smmu_tlb_inv_context(void * cookie)1637 static void arm_smmu_tlb_inv_context(void *cookie)
1638 {
1639 	struct arm_smmu_domain *smmu_domain = cookie;
1640 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1641 	struct arm_smmu_cmdq_ent cmd;
1642 
1643 	/*
1644 	 * NOTE: when io-pgtable is in non-strict mode, we may get here with
1645 	 * PTEs previously cleared by unmaps on the current CPU not yet visible
1646 	 * to the SMMU. We are relying on the dma_wmb() implicit during cmd
1647 	 * insertion to guarantee those are observed before the TLBI. Do be
1648 	 * careful, 007.
1649 	 */
1650 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1651 		arm_smmu_tlb_inv_asid(smmu, smmu_domain->s1_cfg.cd.asid);
1652 	} else {
1653 		cmd.opcode	= CMDQ_OP_TLBI_S12_VMALL;
1654 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
1655 		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1656 		arm_smmu_cmdq_issue_sync(smmu);
1657 	}
1658 	arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
1659 }
1660 
arm_smmu_tlb_inv_range(unsigned long iova,size_t size,size_t granule,bool leaf,struct arm_smmu_domain * smmu_domain)1661 static void arm_smmu_tlb_inv_range(unsigned long iova, size_t size,
1662 				   size_t granule, bool leaf,
1663 				   struct arm_smmu_domain *smmu_domain)
1664 {
1665 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1666 	unsigned long start = iova, end = iova + size, num_pages = 0, tg = 0;
1667 	size_t inv_range = granule;
1668 	struct arm_smmu_cmdq_batch cmds = {};
1669 	struct arm_smmu_cmdq_ent cmd = {
1670 		.tlbi = {
1671 			.leaf	= leaf,
1672 		},
1673 	};
1674 
1675 	if (!size)
1676 		return;
1677 
1678 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1679 		cmd.opcode	= CMDQ_OP_TLBI_NH_VA;
1680 		cmd.tlbi.asid	= smmu_domain->s1_cfg.cd.asid;
1681 	} else {
1682 		cmd.opcode	= CMDQ_OP_TLBI_S2_IPA;
1683 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
1684 	}
1685 
1686 	if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1687 		/* Get the leaf page size */
1688 		tg = __ffs(smmu_domain->domain.pgsize_bitmap);
1689 
1690 		/* Convert page size of 12,14,16 (log2) to 1,2,3 */
1691 		cmd.tlbi.tg = (tg - 10) / 2;
1692 
1693 		/* Determine what level the granule is at */
1694 		cmd.tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
1695 
1696 		num_pages = size >> tg;
1697 	}
1698 
1699 	while (iova < end) {
1700 		if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1701 			/*
1702 			 * On each iteration of the loop, the range is 5 bits
1703 			 * worth of the aligned size remaining.
1704 			 * The range in pages is:
1705 			 *
1706 			 * range = (num_pages & (0x1f << __ffs(num_pages)))
1707 			 */
1708 			unsigned long scale, num;
1709 
1710 			/* Determine the power of 2 multiple number of pages */
1711 			scale = __ffs(num_pages);
1712 			cmd.tlbi.scale = scale;
1713 
1714 			/* Determine how many chunks of 2^scale size we have */
1715 			num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
1716 			cmd.tlbi.num = num - 1;
1717 
1718 			/* range is num * 2^scale * pgsize */
1719 			inv_range = num << (scale + tg);
1720 
1721 			/* Clear out the lower order bits for the next iteration */
1722 			num_pages -= num << scale;
1723 		}
1724 
1725 		cmd.tlbi.addr = iova;
1726 		arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
1727 		iova += inv_range;
1728 	}
1729 	arm_smmu_cmdq_batch_submit(smmu, &cmds);
1730 
1731 	/*
1732 	 * Unfortunately, this can't be leaf-only since we may have
1733 	 * zapped an entire table.
1734 	 */
1735 	arm_smmu_atc_inv_domain(smmu_domain, 0, start, size);
1736 }
1737 
arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather * gather,unsigned long iova,size_t granule,void * cookie)1738 static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
1739 					 unsigned long iova, size_t granule,
1740 					 void *cookie)
1741 {
1742 	struct arm_smmu_domain *smmu_domain = cookie;
1743 	struct iommu_domain *domain = &smmu_domain->domain;
1744 
1745 	iommu_iotlb_gather_add_page(domain, gather, iova, granule);
1746 }
1747 
arm_smmu_tlb_inv_walk(unsigned long iova,size_t size,size_t granule,void * cookie)1748 static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
1749 				  size_t granule, void *cookie)
1750 {
1751 	arm_smmu_tlb_inv_range(iova, size, granule, false, cookie);
1752 }
1753 
1754 static const struct iommu_flush_ops arm_smmu_flush_ops = {
1755 	.tlb_flush_all	= arm_smmu_tlb_inv_context,
1756 	.tlb_flush_walk = arm_smmu_tlb_inv_walk,
1757 	.tlb_add_page	= arm_smmu_tlb_inv_page_nosync,
1758 };
1759 
1760 /* IOMMU API */
arm_smmu_capable(enum iommu_cap cap)1761 static bool arm_smmu_capable(enum iommu_cap cap)
1762 {
1763 	switch (cap) {
1764 	case IOMMU_CAP_CACHE_COHERENCY:
1765 		return true;
1766 	case IOMMU_CAP_NOEXEC:
1767 		return true;
1768 	default:
1769 		return false;
1770 	}
1771 }
1772 
arm_smmu_domain_alloc(unsigned type)1773 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
1774 {
1775 	struct arm_smmu_domain *smmu_domain;
1776 
1777 	if (type != IOMMU_DOMAIN_UNMANAGED &&
1778 	    type != IOMMU_DOMAIN_DMA &&
1779 	    type != IOMMU_DOMAIN_IDENTITY)
1780 		return NULL;
1781 
1782 	/*
1783 	 * Allocate the domain and initialise some of its data structures.
1784 	 * We can't really do anything meaningful until we've added a
1785 	 * master.
1786 	 */
1787 	smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
1788 	if (!smmu_domain)
1789 		return NULL;
1790 
1791 	if (type == IOMMU_DOMAIN_DMA &&
1792 	    iommu_get_dma_cookie(&smmu_domain->domain)) {
1793 		kfree(smmu_domain);
1794 		return NULL;
1795 	}
1796 
1797 	mutex_init(&smmu_domain->init_mutex);
1798 	INIT_LIST_HEAD(&smmu_domain->devices);
1799 	spin_lock_init(&smmu_domain->devices_lock);
1800 
1801 	return &smmu_domain->domain;
1802 }
1803 
arm_smmu_bitmap_alloc(unsigned long * map,int span)1804 static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
1805 {
1806 	int idx, size = 1 << span;
1807 
1808 	do {
1809 		idx = find_first_zero_bit(map, size);
1810 		if (idx == size)
1811 			return -ENOSPC;
1812 	} while (test_and_set_bit(idx, map));
1813 
1814 	return idx;
1815 }
1816 
arm_smmu_bitmap_free(unsigned long * map,int idx)1817 static void arm_smmu_bitmap_free(unsigned long *map, int idx)
1818 {
1819 	clear_bit(idx, map);
1820 }
1821 
arm_smmu_domain_free(struct iommu_domain * domain)1822 static void arm_smmu_domain_free(struct iommu_domain *domain)
1823 {
1824 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1825 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1826 
1827 	iommu_put_dma_cookie(domain);
1828 	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
1829 
1830 	/* Free the CD and ASID, if we allocated them */
1831 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1832 		struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1833 
1834 		/* Prevent SVA from touching the CD while we're freeing it */
1835 		mutex_lock(&arm_smmu_asid_lock);
1836 		if (cfg->cdcfg.cdtab)
1837 			arm_smmu_free_cd_tables(smmu_domain);
1838 		arm_smmu_free_asid(&cfg->cd);
1839 		mutex_unlock(&arm_smmu_asid_lock);
1840 	} else {
1841 		struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1842 		if (cfg->vmid)
1843 			arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
1844 	}
1845 
1846 	kfree(smmu_domain);
1847 }
1848 
arm_smmu_domain_finalise_s1(struct arm_smmu_domain * smmu_domain,struct arm_smmu_master * master,struct io_pgtable_cfg * pgtbl_cfg)1849 static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
1850 				       struct arm_smmu_master *master,
1851 				       struct io_pgtable_cfg *pgtbl_cfg)
1852 {
1853 	int ret;
1854 	u32 asid;
1855 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1856 	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1857 	typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
1858 
1859 	refcount_set(&cfg->cd.refs, 1);
1860 
1861 	/* Prevent SVA from modifying the ASID until it is written to the CD */
1862 	mutex_lock(&arm_smmu_asid_lock);
1863 	ret = xa_alloc(&arm_smmu_asid_xa, &asid, &cfg->cd,
1864 		       XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
1865 	if (ret)
1866 		goto out_unlock;
1867 
1868 	cfg->s1cdmax = master->ssid_bits;
1869 
1870 	ret = arm_smmu_alloc_cd_tables(smmu_domain);
1871 	if (ret)
1872 		goto out_free_asid;
1873 
1874 	cfg->cd.asid	= (u16)asid;
1875 	cfg->cd.ttbr	= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
1876 	cfg->cd.tcr	= FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
1877 			  FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
1878 			  FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
1879 			  FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
1880 			  FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
1881 			  FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
1882 			  CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
1883 	cfg->cd.mair	= pgtbl_cfg->arm_lpae_s1_cfg.mair;
1884 
1885 	/*
1886 	 * Note that this will end up calling arm_smmu_sync_cd() before
1887 	 * the master has been added to the devices list for this domain.
1888 	 * This isn't an issue because the STE hasn't been installed yet.
1889 	 */
1890 	ret = arm_smmu_write_ctx_desc(smmu_domain, 0, &cfg->cd);
1891 	if (ret)
1892 		goto out_free_cd_tables;
1893 
1894 	mutex_unlock(&arm_smmu_asid_lock);
1895 	return 0;
1896 
1897 out_free_cd_tables:
1898 	arm_smmu_free_cd_tables(smmu_domain);
1899 out_free_asid:
1900 	arm_smmu_free_asid(&cfg->cd);
1901 out_unlock:
1902 	mutex_unlock(&arm_smmu_asid_lock);
1903 	return ret;
1904 }
1905 
arm_smmu_domain_finalise_s2(struct arm_smmu_domain * smmu_domain,struct arm_smmu_master * master,struct io_pgtable_cfg * pgtbl_cfg)1906 static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
1907 				       struct arm_smmu_master *master,
1908 				       struct io_pgtable_cfg *pgtbl_cfg)
1909 {
1910 	int vmid;
1911 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1912 	struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1913 	typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr;
1914 
1915 	vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
1916 	if (vmid < 0)
1917 		return vmid;
1918 
1919 	vtcr = &pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
1920 	cfg->vmid	= (u16)vmid;
1921 	cfg->vttbr	= pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
1922 	cfg->vtcr	= FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
1923 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
1924 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) |
1925 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) |
1926 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) |
1927 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) |
1928 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps);
1929 	return 0;
1930 }
1931 
arm_smmu_domain_finalise(struct iommu_domain * domain,struct arm_smmu_master * master)1932 static int arm_smmu_domain_finalise(struct iommu_domain *domain,
1933 				    struct arm_smmu_master *master)
1934 {
1935 	int ret;
1936 	unsigned long ias, oas;
1937 	enum io_pgtable_fmt fmt;
1938 	struct io_pgtable_cfg pgtbl_cfg;
1939 	struct io_pgtable_ops *pgtbl_ops;
1940 	int (*finalise_stage_fn)(struct arm_smmu_domain *,
1941 				 struct arm_smmu_master *,
1942 				 struct io_pgtable_cfg *);
1943 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1944 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1945 
1946 	if (domain->type == IOMMU_DOMAIN_IDENTITY) {
1947 		smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
1948 		return 0;
1949 	}
1950 
1951 	/* Restrict the stage to what we can actually support */
1952 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
1953 		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
1954 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
1955 		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1956 
1957 	switch (smmu_domain->stage) {
1958 	case ARM_SMMU_DOMAIN_S1:
1959 		ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
1960 		ias = min_t(unsigned long, ias, VA_BITS);
1961 		oas = smmu->ias;
1962 		fmt = ARM_64_LPAE_S1;
1963 		finalise_stage_fn = arm_smmu_domain_finalise_s1;
1964 		break;
1965 	case ARM_SMMU_DOMAIN_NESTED:
1966 	case ARM_SMMU_DOMAIN_S2:
1967 		ias = smmu->ias;
1968 		oas = smmu->oas;
1969 		fmt = ARM_64_LPAE_S2;
1970 		finalise_stage_fn = arm_smmu_domain_finalise_s2;
1971 		break;
1972 	default:
1973 		return -EINVAL;
1974 	}
1975 
1976 	pgtbl_cfg = (struct io_pgtable_cfg) {
1977 		.pgsize_bitmap	= smmu->pgsize_bitmap,
1978 		.ias		= ias,
1979 		.oas		= oas,
1980 		.coherent_walk	= smmu->features & ARM_SMMU_FEAT_COHERENCY,
1981 		.tlb		= &arm_smmu_flush_ops,
1982 		.iommu_dev	= smmu->dev,
1983 	};
1984 
1985 	if (smmu_domain->non_strict)
1986 		pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
1987 
1988 	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
1989 	if (!pgtbl_ops)
1990 		return -ENOMEM;
1991 
1992 	domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
1993 	domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
1994 	domain->geometry.force_aperture = true;
1995 
1996 	ret = finalise_stage_fn(smmu_domain, master, &pgtbl_cfg);
1997 	if (ret < 0) {
1998 		free_io_pgtable_ops(pgtbl_ops);
1999 		return ret;
2000 	}
2001 
2002 	smmu_domain->pgtbl_ops = pgtbl_ops;
2003 	return 0;
2004 }
2005 
arm_smmu_get_step_for_sid(struct arm_smmu_device * smmu,u32 sid)2006 static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
2007 {
2008 	__le64 *step;
2009 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2010 
2011 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2012 		struct arm_smmu_strtab_l1_desc *l1_desc;
2013 		int idx;
2014 
2015 		/* Two-level walk */
2016 		idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
2017 		l1_desc = &cfg->l1_desc[idx];
2018 		idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
2019 		step = &l1_desc->l2ptr[idx];
2020 	} else {
2021 		/* Simple linear lookup */
2022 		step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
2023 	}
2024 
2025 	return step;
2026 }
2027 
arm_smmu_install_ste_for_dev(struct arm_smmu_master * master)2028 static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master)
2029 {
2030 	int i, j;
2031 	struct arm_smmu_device *smmu = master->smmu;
2032 
2033 	for (i = 0; i < master->num_sids; ++i) {
2034 		u32 sid = master->sids[i];
2035 		__le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
2036 
2037 		/* Bridged PCI devices may end up with duplicated IDs */
2038 		for (j = 0; j < i; j++)
2039 			if (master->sids[j] == sid)
2040 				break;
2041 		if (j < i)
2042 			continue;
2043 
2044 		arm_smmu_write_strtab_ent(master, sid, step);
2045 	}
2046 }
2047 
arm_smmu_ats_supported(struct arm_smmu_master * master)2048 static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
2049 {
2050 	struct device *dev = master->dev;
2051 	struct arm_smmu_device *smmu = master->smmu;
2052 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2053 
2054 	if (!(smmu->features & ARM_SMMU_FEAT_ATS))
2055 		return false;
2056 
2057 	if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS))
2058 		return false;
2059 
2060 	return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev));
2061 }
2062 
arm_smmu_enable_ats(struct arm_smmu_master * master)2063 static void arm_smmu_enable_ats(struct arm_smmu_master *master)
2064 {
2065 	size_t stu;
2066 	struct pci_dev *pdev;
2067 	struct arm_smmu_device *smmu = master->smmu;
2068 	struct arm_smmu_domain *smmu_domain = master->domain;
2069 
2070 	/* Don't enable ATS at the endpoint if it's not enabled in the STE */
2071 	if (!master->ats_enabled)
2072 		return;
2073 
2074 	/* Smallest Translation Unit: log2 of the smallest supported granule */
2075 	stu = __ffs(smmu->pgsize_bitmap);
2076 	pdev = to_pci_dev(master->dev);
2077 
2078 	atomic_inc(&smmu_domain->nr_ats_masters);
2079 	arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
2080 	if (pci_enable_ats(pdev, stu))
2081 		dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
2082 }
2083 
arm_smmu_disable_ats(struct arm_smmu_master * master)2084 static void arm_smmu_disable_ats(struct arm_smmu_master *master)
2085 {
2086 	struct arm_smmu_domain *smmu_domain = master->domain;
2087 
2088 	if (!master->ats_enabled)
2089 		return;
2090 
2091 	pci_disable_ats(to_pci_dev(master->dev));
2092 	/*
2093 	 * Ensure ATS is disabled at the endpoint before we issue the
2094 	 * ATC invalidation via the SMMU.
2095 	 */
2096 	wmb();
2097 	arm_smmu_atc_inv_master(master);
2098 	atomic_dec(&smmu_domain->nr_ats_masters);
2099 }
2100 
arm_smmu_enable_pasid(struct arm_smmu_master * master)2101 static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
2102 {
2103 	int ret;
2104 	int features;
2105 	int num_pasids;
2106 	struct pci_dev *pdev;
2107 
2108 	if (!dev_is_pci(master->dev))
2109 		return -ENODEV;
2110 
2111 	pdev = to_pci_dev(master->dev);
2112 
2113 	features = pci_pasid_features(pdev);
2114 	if (features < 0)
2115 		return features;
2116 
2117 	num_pasids = pci_max_pasids(pdev);
2118 	if (num_pasids <= 0)
2119 		return num_pasids;
2120 
2121 	ret = pci_enable_pasid(pdev, features);
2122 	if (ret) {
2123 		dev_err(&pdev->dev, "Failed to enable PASID\n");
2124 		return ret;
2125 	}
2126 
2127 	master->ssid_bits = min_t(u8, ilog2(num_pasids),
2128 				  master->smmu->ssid_bits);
2129 	return 0;
2130 }
2131 
arm_smmu_disable_pasid(struct arm_smmu_master * master)2132 static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
2133 {
2134 	struct pci_dev *pdev;
2135 
2136 	if (!dev_is_pci(master->dev))
2137 		return;
2138 
2139 	pdev = to_pci_dev(master->dev);
2140 
2141 	if (!pdev->pasid_enabled)
2142 		return;
2143 
2144 	master->ssid_bits = 0;
2145 	pci_disable_pasid(pdev);
2146 }
2147 
arm_smmu_detach_dev(struct arm_smmu_master * master)2148 static void arm_smmu_detach_dev(struct arm_smmu_master *master)
2149 {
2150 	unsigned long flags;
2151 	struct arm_smmu_domain *smmu_domain = master->domain;
2152 
2153 	if (!smmu_domain)
2154 		return;
2155 
2156 	arm_smmu_disable_ats(master);
2157 
2158 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2159 	list_del(&master->domain_head);
2160 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2161 
2162 	master->domain = NULL;
2163 	master->ats_enabled = false;
2164 	arm_smmu_install_ste_for_dev(master);
2165 }
2166 
arm_smmu_attach_dev(struct iommu_domain * domain,struct device * dev)2167 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
2168 {
2169 	int ret = 0;
2170 	unsigned long flags;
2171 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2172 	struct arm_smmu_device *smmu;
2173 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2174 	struct arm_smmu_master *master;
2175 
2176 	if (!fwspec)
2177 		return -ENOENT;
2178 
2179 	master = dev_iommu_priv_get(dev);
2180 	smmu = master->smmu;
2181 
2182 	/*
2183 	 * Checking that SVA is disabled ensures that this device isn't bound to
2184 	 * any mm, and can be safely detached from its old domain. Bonds cannot
2185 	 * be removed concurrently since we're holding the group mutex.
2186 	 */
2187 	if (arm_smmu_master_sva_enabled(master)) {
2188 		dev_err(dev, "cannot attach - SVA enabled\n");
2189 		return -EBUSY;
2190 	}
2191 
2192 	arm_smmu_detach_dev(master);
2193 
2194 	mutex_lock(&smmu_domain->init_mutex);
2195 
2196 	if (!smmu_domain->smmu) {
2197 		smmu_domain->smmu = smmu;
2198 		ret = arm_smmu_domain_finalise(domain, master);
2199 		if (ret) {
2200 			smmu_domain->smmu = NULL;
2201 			goto out_unlock;
2202 		}
2203 	} else if (smmu_domain->smmu != smmu) {
2204 		dev_err(dev,
2205 			"cannot attach to SMMU %s (upstream of %s)\n",
2206 			dev_name(smmu_domain->smmu->dev),
2207 			dev_name(smmu->dev));
2208 		ret = -ENXIO;
2209 		goto out_unlock;
2210 	} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
2211 		   master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) {
2212 		dev_err(dev,
2213 			"cannot attach to incompatible domain (%u SSID bits != %u)\n",
2214 			smmu_domain->s1_cfg.s1cdmax, master->ssid_bits);
2215 		ret = -EINVAL;
2216 		goto out_unlock;
2217 	}
2218 
2219 	master->domain = smmu_domain;
2220 
2221 	if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
2222 		master->ats_enabled = arm_smmu_ats_supported(master);
2223 
2224 	arm_smmu_install_ste_for_dev(master);
2225 
2226 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2227 	list_add(&master->domain_head, &smmu_domain->devices);
2228 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2229 
2230 	arm_smmu_enable_ats(master);
2231 
2232 out_unlock:
2233 	mutex_unlock(&smmu_domain->init_mutex);
2234 	return ret;
2235 }
2236 
arm_smmu_map(struct iommu_domain * domain,unsigned long iova,phys_addr_t paddr,size_t size,int prot,gfp_t gfp)2237 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
2238 			phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
2239 {
2240 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2241 
2242 	if (!ops)
2243 		return -ENODEV;
2244 
2245 	return ops->map(ops, iova, paddr, size, prot, gfp);
2246 }
2247 
arm_smmu_unmap(struct iommu_domain * domain,unsigned long iova,size_t size,struct iommu_iotlb_gather * gather)2248 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
2249 			     size_t size, struct iommu_iotlb_gather *gather)
2250 {
2251 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2252 	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
2253 
2254 	if (!ops)
2255 		return 0;
2256 
2257 	return ops->unmap(ops, iova, size, gather);
2258 }
2259 
arm_smmu_flush_iotlb_all(struct iommu_domain * domain)2260 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
2261 {
2262 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2263 
2264 	if (smmu_domain->smmu)
2265 		arm_smmu_tlb_inv_context(smmu_domain);
2266 }
2267 
arm_smmu_iotlb_sync(struct iommu_domain * domain,struct iommu_iotlb_gather * gather)2268 static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
2269 				struct iommu_iotlb_gather *gather)
2270 {
2271 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2272 
2273 	arm_smmu_tlb_inv_range(gather->start, gather->end - gather->start + 1,
2274 			       gather->pgsize, true, smmu_domain);
2275 }
2276 
2277 static phys_addr_t
arm_smmu_iova_to_phys(struct iommu_domain * domain,dma_addr_t iova)2278 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
2279 {
2280 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2281 
2282 	if (domain->type == IOMMU_DOMAIN_IDENTITY)
2283 		return iova;
2284 
2285 	if (!ops)
2286 		return 0;
2287 
2288 	return ops->iova_to_phys(ops, iova);
2289 }
2290 
2291 static struct platform_driver arm_smmu_driver;
2292 
2293 static
arm_smmu_get_by_fwnode(struct fwnode_handle * fwnode)2294 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
2295 {
2296 	struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
2297 							  fwnode);
2298 	put_device(dev);
2299 	return dev ? dev_get_drvdata(dev) : NULL;
2300 }
2301 
arm_smmu_sid_in_range(struct arm_smmu_device * smmu,u32 sid)2302 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
2303 {
2304 	unsigned long limit = smmu->strtab_cfg.num_l1_ents;
2305 
2306 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2307 		limit *= 1UL << STRTAB_SPLIT;
2308 
2309 	return sid < limit;
2310 }
2311 
2312 static struct iommu_ops arm_smmu_ops;
2313 
arm_smmu_probe_device(struct device * dev)2314 static struct iommu_device *arm_smmu_probe_device(struct device *dev)
2315 {
2316 	int i, ret;
2317 	struct arm_smmu_device *smmu;
2318 	struct arm_smmu_master *master;
2319 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2320 
2321 	if (!fwspec || fwspec->ops != &arm_smmu_ops)
2322 		return ERR_PTR(-ENODEV);
2323 
2324 	if (WARN_ON_ONCE(dev_iommu_priv_get(dev)))
2325 		return ERR_PTR(-EBUSY);
2326 
2327 	smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
2328 	if (!smmu)
2329 		return ERR_PTR(-ENODEV);
2330 
2331 	master = kzalloc(sizeof(*master), GFP_KERNEL);
2332 	if (!master)
2333 		return ERR_PTR(-ENOMEM);
2334 
2335 	master->dev = dev;
2336 	master->smmu = smmu;
2337 	master->sids = fwspec->ids;
2338 	master->num_sids = fwspec->num_ids;
2339 	INIT_LIST_HEAD(&master->bonds);
2340 	dev_iommu_priv_set(dev, master);
2341 
2342 	/* Check the SIDs are in range of the SMMU and our stream table */
2343 	for (i = 0; i < master->num_sids; i++) {
2344 		u32 sid = master->sids[i];
2345 
2346 		if (!arm_smmu_sid_in_range(smmu, sid)) {
2347 			ret = -ERANGE;
2348 			goto err_free_master;
2349 		}
2350 
2351 		/* Ensure l2 strtab is initialised */
2352 		if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2353 			ret = arm_smmu_init_l2_strtab(smmu, sid);
2354 			if (ret)
2355 				goto err_free_master;
2356 		}
2357 	}
2358 
2359 	master->ssid_bits = min(smmu->ssid_bits, fwspec->num_pasid_bits);
2360 
2361 	/*
2362 	 * Note that PASID must be enabled before, and disabled after ATS:
2363 	 * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register
2364 	 *
2365 	 *   Behavior is undefined if this bit is Set and the value of the PASID
2366 	 *   Enable, Execute Requested Enable, or Privileged Mode Requested bits
2367 	 *   are changed.
2368 	 */
2369 	arm_smmu_enable_pasid(master);
2370 
2371 	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
2372 		master->ssid_bits = min_t(u8, master->ssid_bits,
2373 					  CTXDESC_LINEAR_CDMAX);
2374 
2375 	return &smmu->iommu;
2376 
2377 err_free_master:
2378 	kfree(master);
2379 	dev_iommu_priv_set(dev, NULL);
2380 	return ERR_PTR(ret);
2381 }
2382 
arm_smmu_release_device(struct device * dev)2383 static void arm_smmu_release_device(struct device *dev)
2384 {
2385 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2386 	struct arm_smmu_master *master;
2387 
2388 	if (!fwspec || fwspec->ops != &arm_smmu_ops)
2389 		return;
2390 
2391 	master = dev_iommu_priv_get(dev);
2392 	WARN_ON(arm_smmu_master_sva_enabled(master));
2393 	arm_smmu_detach_dev(master);
2394 	arm_smmu_disable_pasid(master);
2395 	kfree(master);
2396 	iommu_fwspec_free(dev);
2397 }
2398 
arm_smmu_device_group(struct device * dev)2399 static struct iommu_group *arm_smmu_device_group(struct device *dev)
2400 {
2401 	struct iommu_group *group;
2402 
2403 	/*
2404 	 * We don't support devices sharing stream IDs other than PCI RID
2405 	 * aliases, since the necessary ID-to-device lookup becomes rather
2406 	 * impractical given a potential sparse 32-bit stream ID space.
2407 	 */
2408 	if (dev_is_pci(dev))
2409 		group = pci_device_group(dev);
2410 	else
2411 		group = generic_device_group(dev);
2412 
2413 	return group;
2414 }
2415 
arm_smmu_domain_get_attr(struct iommu_domain * domain,enum iommu_attr attr,void * data)2416 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
2417 				    enum iommu_attr attr, void *data)
2418 {
2419 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2420 
2421 	switch (domain->type) {
2422 	case IOMMU_DOMAIN_UNMANAGED:
2423 		switch (attr) {
2424 		case DOMAIN_ATTR_NESTING:
2425 			*(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
2426 			return 0;
2427 		default:
2428 			return -ENODEV;
2429 		}
2430 		break;
2431 	case IOMMU_DOMAIN_DMA:
2432 		switch (attr) {
2433 		case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
2434 			*(int *)data = smmu_domain->non_strict;
2435 			return 0;
2436 		default:
2437 			return -ENODEV;
2438 		}
2439 		break;
2440 	default:
2441 		return -EINVAL;
2442 	}
2443 }
2444 
arm_smmu_domain_set_attr(struct iommu_domain * domain,enum iommu_attr attr,void * data)2445 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
2446 				    enum iommu_attr attr, void *data)
2447 {
2448 	int ret = 0;
2449 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2450 
2451 	mutex_lock(&smmu_domain->init_mutex);
2452 
2453 	switch (domain->type) {
2454 	case IOMMU_DOMAIN_UNMANAGED:
2455 		switch (attr) {
2456 		case DOMAIN_ATTR_NESTING:
2457 			if (smmu_domain->smmu) {
2458 				ret = -EPERM;
2459 				goto out_unlock;
2460 			}
2461 
2462 			if (*(int *)data)
2463 				smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
2464 			else
2465 				smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
2466 			break;
2467 		default:
2468 			ret = -ENODEV;
2469 		}
2470 		break;
2471 	case IOMMU_DOMAIN_DMA:
2472 		switch(attr) {
2473 		case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
2474 			smmu_domain->non_strict = *(int *)data;
2475 			break;
2476 		default:
2477 			ret = -ENODEV;
2478 		}
2479 		break;
2480 	default:
2481 		ret = -EINVAL;
2482 	}
2483 
2484 out_unlock:
2485 	mutex_unlock(&smmu_domain->init_mutex);
2486 	return ret;
2487 }
2488 
arm_smmu_of_xlate(struct device * dev,struct of_phandle_args * args)2489 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
2490 {
2491 	return iommu_fwspec_add_ids(dev, args->args, 1);
2492 }
2493 
arm_smmu_get_resv_regions(struct device * dev,struct list_head * head)2494 static void arm_smmu_get_resv_regions(struct device *dev,
2495 				      struct list_head *head)
2496 {
2497 	struct iommu_resv_region *region;
2498 	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
2499 
2500 	region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
2501 					 prot, IOMMU_RESV_SW_MSI);
2502 	if (!region)
2503 		return;
2504 
2505 	list_add_tail(&region->list, head);
2506 
2507 	iommu_dma_get_resv_regions(dev, head);
2508 }
2509 
arm_smmu_dev_has_feature(struct device * dev,enum iommu_dev_features feat)2510 static bool arm_smmu_dev_has_feature(struct device *dev,
2511 				     enum iommu_dev_features feat)
2512 {
2513 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2514 
2515 	if (!master)
2516 		return false;
2517 
2518 	switch (feat) {
2519 	case IOMMU_DEV_FEAT_SVA:
2520 		return arm_smmu_master_sva_supported(master);
2521 	default:
2522 		return false;
2523 	}
2524 }
2525 
arm_smmu_dev_feature_enabled(struct device * dev,enum iommu_dev_features feat)2526 static bool arm_smmu_dev_feature_enabled(struct device *dev,
2527 					 enum iommu_dev_features feat)
2528 {
2529 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2530 
2531 	if (!master)
2532 		return false;
2533 
2534 	switch (feat) {
2535 	case IOMMU_DEV_FEAT_SVA:
2536 		return arm_smmu_master_sva_enabled(master);
2537 	default:
2538 		return false;
2539 	}
2540 }
2541 
arm_smmu_dev_enable_feature(struct device * dev,enum iommu_dev_features feat)2542 static int arm_smmu_dev_enable_feature(struct device *dev,
2543 				       enum iommu_dev_features feat)
2544 {
2545 	if (!arm_smmu_dev_has_feature(dev, feat))
2546 		return -ENODEV;
2547 
2548 	if (arm_smmu_dev_feature_enabled(dev, feat))
2549 		return -EBUSY;
2550 
2551 	switch (feat) {
2552 	case IOMMU_DEV_FEAT_SVA:
2553 		return arm_smmu_master_enable_sva(dev_iommu_priv_get(dev));
2554 	default:
2555 		return -EINVAL;
2556 	}
2557 }
2558 
arm_smmu_dev_disable_feature(struct device * dev,enum iommu_dev_features feat)2559 static int arm_smmu_dev_disable_feature(struct device *dev,
2560 					enum iommu_dev_features feat)
2561 {
2562 	if (!arm_smmu_dev_feature_enabled(dev, feat))
2563 		return -EINVAL;
2564 
2565 	switch (feat) {
2566 	case IOMMU_DEV_FEAT_SVA:
2567 		return arm_smmu_master_disable_sva(dev_iommu_priv_get(dev));
2568 	default:
2569 		return -EINVAL;
2570 	}
2571 }
2572 
2573 static struct iommu_ops arm_smmu_ops = {
2574 	.capable		= arm_smmu_capable,
2575 	.domain_alloc		= arm_smmu_domain_alloc,
2576 	.domain_free		= arm_smmu_domain_free,
2577 	.attach_dev		= arm_smmu_attach_dev,
2578 	.map			= arm_smmu_map,
2579 	.unmap			= arm_smmu_unmap,
2580 	.flush_iotlb_all	= arm_smmu_flush_iotlb_all,
2581 	.iotlb_sync		= arm_smmu_iotlb_sync,
2582 	.iova_to_phys		= arm_smmu_iova_to_phys,
2583 	.probe_device		= arm_smmu_probe_device,
2584 	.release_device		= arm_smmu_release_device,
2585 	.device_group		= arm_smmu_device_group,
2586 	.domain_get_attr	= arm_smmu_domain_get_attr,
2587 	.domain_set_attr	= arm_smmu_domain_set_attr,
2588 	.of_xlate		= arm_smmu_of_xlate,
2589 	.get_resv_regions	= arm_smmu_get_resv_regions,
2590 	.put_resv_regions	= generic_iommu_put_resv_regions,
2591 	.dev_has_feat		= arm_smmu_dev_has_feature,
2592 	.dev_feat_enabled	= arm_smmu_dev_feature_enabled,
2593 	.dev_enable_feat	= arm_smmu_dev_enable_feature,
2594 	.dev_disable_feat	= arm_smmu_dev_disable_feature,
2595 	.pgsize_bitmap		= -1UL, /* Restricted during device attach */
2596 };
2597 
2598 /* Probing and initialisation functions */
arm_smmu_init_one_queue(struct arm_smmu_device * smmu,struct arm_smmu_queue * q,unsigned long prod_off,unsigned long cons_off,size_t dwords,const char * name)2599 static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
2600 				   struct arm_smmu_queue *q,
2601 				   unsigned long prod_off,
2602 				   unsigned long cons_off,
2603 				   size_t dwords, const char *name)
2604 {
2605 	size_t qsz;
2606 
2607 	do {
2608 		qsz = ((1 << q->llq.max_n_shift) * dwords) << 3;
2609 		q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma,
2610 					      GFP_KERNEL);
2611 		if (q->base || qsz < PAGE_SIZE)
2612 			break;
2613 
2614 		q->llq.max_n_shift--;
2615 	} while (1);
2616 
2617 	if (!q->base) {
2618 		dev_err(smmu->dev,
2619 			"failed to allocate queue (0x%zx bytes) for %s\n",
2620 			qsz, name);
2621 		return -ENOMEM;
2622 	}
2623 
2624 	if (!WARN_ON(q->base_dma & (qsz - 1))) {
2625 		dev_info(smmu->dev, "allocated %u entries for %s\n",
2626 			 1 << q->llq.max_n_shift, name);
2627 	}
2628 
2629 	q->prod_reg	= arm_smmu_page1_fixup(prod_off, smmu);
2630 	q->cons_reg	= arm_smmu_page1_fixup(cons_off, smmu);
2631 	q->ent_dwords	= dwords;
2632 
2633 	q->q_base  = Q_BASE_RWA;
2634 	q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
2635 	q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift);
2636 
2637 	q->llq.prod = q->llq.cons = 0;
2638 	return 0;
2639 }
2640 
arm_smmu_cmdq_free_bitmap(void * data)2641 static void arm_smmu_cmdq_free_bitmap(void *data)
2642 {
2643 	unsigned long *bitmap = data;
2644 	bitmap_free(bitmap);
2645 }
2646 
arm_smmu_cmdq_init(struct arm_smmu_device * smmu)2647 static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
2648 {
2649 	int ret = 0;
2650 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
2651 	unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
2652 	atomic_long_t *bitmap;
2653 
2654 	atomic_set(&cmdq->owner_prod, 0);
2655 	atomic_set(&cmdq->lock, 0);
2656 
2657 	bitmap = (atomic_long_t *)bitmap_zalloc(nents, GFP_KERNEL);
2658 	if (!bitmap) {
2659 		dev_err(smmu->dev, "failed to allocate cmdq bitmap\n");
2660 		ret = -ENOMEM;
2661 	} else {
2662 		cmdq->valid_map = bitmap;
2663 		devm_add_action(smmu->dev, arm_smmu_cmdq_free_bitmap, bitmap);
2664 	}
2665 
2666 	return ret;
2667 }
2668 
arm_smmu_init_queues(struct arm_smmu_device * smmu)2669 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
2670 {
2671 	int ret;
2672 
2673 	/* cmdq */
2674 	ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, ARM_SMMU_CMDQ_PROD,
2675 				      ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS,
2676 				      "cmdq");
2677 	if (ret)
2678 		return ret;
2679 
2680 	ret = arm_smmu_cmdq_init(smmu);
2681 	if (ret)
2682 		return ret;
2683 
2684 	/* evtq */
2685 	ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, ARM_SMMU_EVTQ_PROD,
2686 				      ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS,
2687 				      "evtq");
2688 	if (ret)
2689 		return ret;
2690 
2691 	/* priq */
2692 	if (!(smmu->features & ARM_SMMU_FEAT_PRI))
2693 		return 0;
2694 
2695 	return arm_smmu_init_one_queue(smmu, &smmu->priq.q, ARM_SMMU_PRIQ_PROD,
2696 				       ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS,
2697 				       "priq");
2698 }
2699 
arm_smmu_init_l1_strtab(struct arm_smmu_device * smmu)2700 static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
2701 {
2702 	unsigned int i;
2703 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2704 	size_t size = sizeof(*cfg->l1_desc) * cfg->num_l1_ents;
2705 	void *strtab = smmu->strtab_cfg.strtab;
2706 
2707 	cfg->l1_desc = devm_kzalloc(smmu->dev, size, GFP_KERNEL);
2708 	if (!cfg->l1_desc) {
2709 		dev_err(smmu->dev, "failed to allocate l1 stream table desc\n");
2710 		return -ENOMEM;
2711 	}
2712 
2713 	for (i = 0; i < cfg->num_l1_ents; ++i) {
2714 		arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
2715 		strtab += STRTAB_L1_DESC_DWORDS << 3;
2716 	}
2717 
2718 	return 0;
2719 }
2720 
arm_smmu_init_strtab_2lvl(struct arm_smmu_device * smmu)2721 static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
2722 {
2723 	void *strtab;
2724 	u64 reg;
2725 	u32 size, l1size;
2726 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2727 
2728 	/* Calculate the L1 size, capped to the SIDSIZE. */
2729 	size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
2730 	size = min(size, smmu->sid_bits - STRTAB_SPLIT);
2731 	cfg->num_l1_ents = 1 << size;
2732 
2733 	size += STRTAB_SPLIT;
2734 	if (size < smmu->sid_bits)
2735 		dev_warn(smmu->dev,
2736 			 "2-level strtab only covers %u/%u bits of SID\n",
2737 			 size, smmu->sid_bits);
2738 
2739 	l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
2740 	strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
2741 				     GFP_KERNEL);
2742 	if (!strtab) {
2743 		dev_err(smmu->dev,
2744 			"failed to allocate l1 stream table (%u bytes)\n",
2745 			l1size);
2746 		return -ENOMEM;
2747 	}
2748 	cfg->strtab = strtab;
2749 
2750 	/* Configure strtab_base_cfg for 2 levels */
2751 	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
2752 	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
2753 	reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
2754 	cfg->strtab_base_cfg = reg;
2755 
2756 	return arm_smmu_init_l1_strtab(smmu);
2757 }
2758 
arm_smmu_init_strtab_linear(struct arm_smmu_device * smmu)2759 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
2760 {
2761 	void *strtab;
2762 	u64 reg;
2763 	u32 size;
2764 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2765 
2766 	size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
2767 	strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
2768 				     GFP_KERNEL);
2769 	if (!strtab) {
2770 		dev_err(smmu->dev,
2771 			"failed to allocate linear stream table (%u bytes)\n",
2772 			size);
2773 		return -ENOMEM;
2774 	}
2775 	cfg->strtab = strtab;
2776 	cfg->num_l1_ents = 1 << smmu->sid_bits;
2777 
2778 	/* Configure strtab_base_cfg for a linear table covering all SIDs */
2779 	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
2780 	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
2781 	cfg->strtab_base_cfg = reg;
2782 
2783 	arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents);
2784 	return 0;
2785 }
2786 
arm_smmu_init_strtab(struct arm_smmu_device * smmu)2787 static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
2788 {
2789 	u64 reg;
2790 	int ret;
2791 
2792 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2793 		ret = arm_smmu_init_strtab_2lvl(smmu);
2794 	else
2795 		ret = arm_smmu_init_strtab_linear(smmu);
2796 
2797 	if (ret)
2798 		return ret;
2799 
2800 	/* Set the strtab base address */
2801 	reg  = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
2802 	reg |= STRTAB_BASE_RA;
2803 	smmu->strtab_cfg.strtab_base = reg;
2804 
2805 	/* Allocate the first VMID for stage-2 bypass STEs */
2806 	set_bit(0, smmu->vmid_map);
2807 	return 0;
2808 }
2809 
arm_smmu_init_structures(struct arm_smmu_device * smmu)2810 static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
2811 {
2812 	int ret;
2813 
2814 	ret = arm_smmu_init_queues(smmu);
2815 	if (ret)
2816 		return ret;
2817 
2818 	return arm_smmu_init_strtab(smmu);
2819 }
2820 
arm_smmu_write_reg_sync(struct arm_smmu_device * smmu,u32 val,unsigned int reg_off,unsigned int ack_off)2821 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
2822 				   unsigned int reg_off, unsigned int ack_off)
2823 {
2824 	u32 reg;
2825 
2826 	writel_relaxed(val, smmu->base + reg_off);
2827 	return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
2828 					  1, ARM_SMMU_POLL_TIMEOUT_US);
2829 }
2830 
2831 /* GBPA is "special" */
arm_smmu_update_gbpa(struct arm_smmu_device * smmu,u32 set,u32 clr)2832 static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
2833 {
2834 	int ret;
2835 	u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
2836 
2837 	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
2838 					 1, ARM_SMMU_POLL_TIMEOUT_US);
2839 	if (ret)
2840 		return ret;
2841 
2842 	reg &= ~clr;
2843 	reg |= set;
2844 	writel_relaxed(reg | GBPA_UPDATE, gbpa);
2845 	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
2846 					 1, ARM_SMMU_POLL_TIMEOUT_US);
2847 
2848 	if (ret)
2849 		dev_err(smmu->dev, "GBPA not responding to update\n");
2850 	return ret;
2851 }
2852 
arm_smmu_free_msis(void * data)2853 static void arm_smmu_free_msis(void *data)
2854 {
2855 	struct device *dev = data;
2856 	platform_msi_domain_free_irqs(dev);
2857 }
2858 
arm_smmu_write_msi_msg(struct msi_desc * desc,struct msi_msg * msg)2859 static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
2860 {
2861 	phys_addr_t doorbell;
2862 	struct device *dev = msi_desc_to_dev(desc);
2863 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2864 	phys_addr_t *cfg = arm_smmu_msi_cfg[desc->platform.msi_index];
2865 
2866 	doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
2867 	doorbell &= MSI_CFG0_ADDR_MASK;
2868 
2869 	writeq_relaxed(doorbell, smmu->base + cfg[0]);
2870 	writel_relaxed(msg->data, smmu->base + cfg[1]);
2871 	writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
2872 }
2873 
arm_smmu_setup_msis(struct arm_smmu_device * smmu)2874 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
2875 {
2876 	struct msi_desc *desc;
2877 	int ret, nvec = ARM_SMMU_MAX_MSIS;
2878 	struct device *dev = smmu->dev;
2879 
2880 	/* Clear the MSI address regs */
2881 	writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
2882 	writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
2883 
2884 	if (smmu->features & ARM_SMMU_FEAT_PRI)
2885 		writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
2886 	else
2887 		nvec--;
2888 
2889 	if (!(smmu->features & ARM_SMMU_FEAT_MSI))
2890 		return;
2891 
2892 	if (!dev->msi_domain) {
2893 		dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
2894 		return;
2895 	}
2896 
2897 	/* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
2898 	ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
2899 	if (ret) {
2900 		dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
2901 		return;
2902 	}
2903 
2904 	for_each_msi_entry(desc, dev) {
2905 		switch (desc->platform.msi_index) {
2906 		case EVTQ_MSI_INDEX:
2907 			smmu->evtq.q.irq = desc->irq;
2908 			break;
2909 		case GERROR_MSI_INDEX:
2910 			smmu->gerr_irq = desc->irq;
2911 			break;
2912 		case PRIQ_MSI_INDEX:
2913 			smmu->priq.q.irq = desc->irq;
2914 			break;
2915 		default:	/* Unknown */
2916 			continue;
2917 		}
2918 	}
2919 
2920 	/* Add callback to free MSIs on teardown */
2921 	devm_add_action(dev, arm_smmu_free_msis, dev);
2922 }
2923 
arm_smmu_setup_unique_irqs(struct arm_smmu_device * smmu)2924 static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
2925 {
2926 	int irq, ret;
2927 
2928 	arm_smmu_setup_msis(smmu);
2929 
2930 	/* Request interrupt lines */
2931 	irq = smmu->evtq.q.irq;
2932 	if (irq) {
2933 		ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
2934 						arm_smmu_evtq_thread,
2935 						IRQF_ONESHOT,
2936 						"arm-smmu-v3-evtq", smmu);
2937 		if (ret < 0)
2938 			dev_warn(smmu->dev, "failed to enable evtq irq\n");
2939 	} else {
2940 		dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
2941 	}
2942 
2943 	irq = smmu->gerr_irq;
2944 	if (irq) {
2945 		ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
2946 				       0, "arm-smmu-v3-gerror", smmu);
2947 		if (ret < 0)
2948 			dev_warn(smmu->dev, "failed to enable gerror irq\n");
2949 	} else {
2950 		dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
2951 	}
2952 
2953 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
2954 		irq = smmu->priq.q.irq;
2955 		if (irq) {
2956 			ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
2957 							arm_smmu_priq_thread,
2958 							IRQF_ONESHOT,
2959 							"arm-smmu-v3-priq",
2960 							smmu);
2961 			if (ret < 0)
2962 				dev_warn(smmu->dev,
2963 					 "failed to enable priq irq\n");
2964 		} else {
2965 			dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
2966 		}
2967 	}
2968 }
2969 
arm_smmu_setup_irqs(struct arm_smmu_device * smmu)2970 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
2971 {
2972 	int ret, irq;
2973 	u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
2974 
2975 	/* Disable IRQs first */
2976 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
2977 				      ARM_SMMU_IRQ_CTRLACK);
2978 	if (ret) {
2979 		dev_err(smmu->dev, "failed to disable irqs\n");
2980 		return ret;
2981 	}
2982 
2983 	irq = smmu->combined_irq;
2984 	if (irq) {
2985 		/*
2986 		 * Cavium ThunderX2 implementation doesn't support unique irq
2987 		 * lines. Use a single irq line for all the SMMUv3 interrupts.
2988 		 */
2989 		ret = devm_request_threaded_irq(smmu->dev, irq,
2990 					arm_smmu_combined_irq_handler,
2991 					arm_smmu_combined_irq_thread,
2992 					IRQF_ONESHOT,
2993 					"arm-smmu-v3-combined-irq", smmu);
2994 		if (ret < 0)
2995 			dev_warn(smmu->dev, "failed to enable combined irq\n");
2996 	} else
2997 		arm_smmu_setup_unique_irqs(smmu);
2998 
2999 	if (smmu->features & ARM_SMMU_FEAT_PRI)
3000 		irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
3001 
3002 	/* Enable interrupt generation on the SMMU */
3003 	ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
3004 				      ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
3005 	if (ret)
3006 		dev_warn(smmu->dev, "failed to enable irqs\n");
3007 
3008 	return 0;
3009 }
3010 
arm_smmu_device_disable(struct arm_smmu_device * smmu)3011 static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
3012 {
3013 	int ret;
3014 
3015 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
3016 	if (ret)
3017 		dev_err(smmu->dev, "failed to clear cr0\n");
3018 
3019 	return ret;
3020 }
3021 
arm_smmu_device_reset(struct arm_smmu_device * smmu,bool bypass)3022 static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
3023 {
3024 	int ret;
3025 	u32 reg, enables;
3026 	struct arm_smmu_cmdq_ent cmd;
3027 
3028 	/* Clear CR0 and sync (disables SMMU and queue processing) */
3029 	reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
3030 	if (reg & CR0_SMMUEN) {
3031 		dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
3032 		WARN_ON(is_kdump_kernel() && !disable_bypass);
3033 		arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
3034 	}
3035 
3036 	ret = arm_smmu_device_disable(smmu);
3037 	if (ret)
3038 		return ret;
3039 
3040 	/* CR1 (table and queue memory attributes) */
3041 	reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
3042 	      FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
3043 	      FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
3044 	      FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
3045 	      FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
3046 	      FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
3047 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
3048 
3049 	/* CR2 (random crap) */
3050 	reg = CR2_PTM | CR2_RECINVSID | CR2_E2H;
3051 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
3052 
3053 	/* Stream table */
3054 	writeq_relaxed(smmu->strtab_cfg.strtab_base,
3055 		       smmu->base + ARM_SMMU_STRTAB_BASE);
3056 	writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
3057 		       smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
3058 
3059 	/* Command queue */
3060 	writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
3061 	writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
3062 	writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
3063 
3064 	enables = CR0_CMDQEN;
3065 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3066 				      ARM_SMMU_CR0ACK);
3067 	if (ret) {
3068 		dev_err(smmu->dev, "failed to enable command queue\n");
3069 		return ret;
3070 	}
3071 
3072 	/* Invalidate any cached configuration */
3073 	cmd.opcode = CMDQ_OP_CFGI_ALL;
3074 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3075 	arm_smmu_cmdq_issue_sync(smmu);
3076 
3077 	/* Invalidate any stale TLB entries */
3078 	if (smmu->features & ARM_SMMU_FEAT_HYP) {
3079 		cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
3080 		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3081 	}
3082 
3083 	cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
3084 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3085 	arm_smmu_cmdq_issue_sync(smmu);
3086 
3087 	/* Event queue */
3088 	writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
3089 	writel_relaxed(smmu->evtq.q.llq.prod,
3090 		       arm_smmu_page1_fixup(ARM_SMMU_EVTQ_PROD, smmu));
3091 	writel_relaxed(smmu->evtq.q.llq.cons,
3092 		       arm_smmu_page1_fixup(ARM_SMMU_EVTQ_CONS, smmu));
3093 
3094 	enables |= CR0_EVTQEN;
3095 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3096 				      ARM_SMMU_CR0ACK);
3097 	if (ret) {
3098 		dev_err(smmu->dev, "failed to enable event queue\n");
3099 		return ret;
3100 	}
3101 
3102 	/* PRI queue */
3103 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
3104 		writeq_relaxed(smmu->priq.q.q_base,
3105 			       smmu->base + ARM_SMMU_PRIQ_BASE);
3106 		writel_relaxed(smmu->priq.q.llq.prod,
3107 			       arm_smmu_page1_fixup(ARM_SMMU_PRIQ_PROD, smmu));
3108 		writel_relaxed(smmu->priq.q.llq.cons,
3109 			       arm_smmu_page1_fixup(ARM_SMMU_PRIQ_CONS, smmu));
3110 
3111 		enables |= CR0_PRIQEN;
3112 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3113 					      ARM_SMMU_CR0ACK);
3114 		if (ret) {
3115 			dev_err(smmu->dev, "failed to enable PRI queue\n");
3116 			return ret;
3117 		}
3118 	}
3119 
3120 	if (smmu->features & ARM_SMMU_FEAT_ATS) {
3121 		enables |= CR0_ATSCHK;
3122 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3123 					      ARM_SMMU_CR0ACK);
3124 		if (ret) {
3125 			dev_err(smmu->dev, "failed to enable ATS check\n");
3126 			return ret;
3127 		}
3128 	}
3129 
3130 	ret = arm_smmu_setup_irqs(smmu);
3131 	if (ret) {
3132 		dev_err(smmu->dev, "failed to setup irqs\n");
3133 		return ret;
3134 	}
3135 
3136 	if (is_kdump_kernel())
3137 		enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
3138 
3139 	/* Enable the SMMU interface, or ensure bypass */
3140 	if (!bypass || disable_bypass) {
3141 		enables |= CR0_SMMUEN;
3142 	} else {
3143 		ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
3144 		if (ret)
3145 			return ret;
3146 	}
3147 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3148 				      ARM_SMMU_CR0ACK);
3149 	if (ret) {
3150 		dev_err(smmu->dev, "failed to enable SMMU interface\n");
3151 		return ret;
3152 	}
3153 
3154 	return 0;
3155 }
3156 
arm_smmu_device_hw_probe(struct arm_smmu_device * smmu)3157 static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
3158 {
3159 	u32 reg;
3160 	bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
3161 
3162 	/* IDR0 */
3163 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
3164 
3165 	/* 2-level structures */
3166 	if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
3167 		smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
3168 
3169 	if (reg & IDR0_CD2L)
3170 		smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
3171 
3172 	/*
3173 	 * Translation table endianness.
3174 	 * We currently require the same endianness as the CPU, but this
3175 	 * could be changed later by adding a new IO_PGTABLE_QUIRK.
3176 	 */
3177 	switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
3178 	case IDR0_TTENDIAN_MIXED:
3179 		smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
3180 		break;
3181 #ifdef __BIG_ENDIAN
3182 	case IDR0_TTENDIAN_BE:
3183 		smmu->features |= ARM_SMMU_FEAT_TT_BE;
3184 		break;
3185 #else
3186 	case IDR0_TTENDIAN_LE:
3187 		smmu->features |= ARM_SMMU_FEAT_TT_LE;
3188 		break;
3189 #endif
3190 	default:
3191 		dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
3192 		return -ENXIO;
3193 	}
3194 
3195 	/* Boolean feature flags */
3196 	if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
3197 		smmu->features |= ARM_SMMU_FEAT_PRI;
3198 
3199 	if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
3200 		smmu->features |= ARM_SMMU_FEAT_ATS;
3201 
3202 	if (reg & IDR0_SEV)
3203 		smmu->features |= ARM_SMMU_FEAT_SEV;
3204 
3205 	if (reg & IDR0_MSI) {
3206 		smmu->features |= ARM_SMMU_FEAT_MSI;
3207 		if (coherent && !disable_msipolling)
3208 			smmu->options |= ARM_SMMU_OPT_MSIPOLL;
3209 	}
3210 
3211 	if (reg & IDR0_HYP)
3212 		smmu->features |= ARM_SMMU_FEAT_HYP;
3213 
3214 	/*
3215 	 * The coherency feature as set by FW is used in preference to the ID
3216 	 * register, but warn on mismatch.
3217 	 */
3218 	if (!!(reg & IDR0_COHACC) != coherent)
3219 		dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
3220 			 coherent ? "true" : "false");
3221 
3222 	switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
3223 	case IDR0_STALL_MODEL_FORCE:
3224 		smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
3225 		fallthrough;
3226 	case IDR0_STALL_MODEL_STALL:
3227 		smmu->features |= ARM_SMMU_FEAT_STALLS;
3228 	}
3229 
3230 	if (reg & IDR0_S1P)
3231 		smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
3232 
3233 	if (reg & IDR0_S2P)
3234 		smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
3235 
3236 	if (!(reg & (IDR0_S1P | IDR0_S2P))) {
3237 		dev_err(smmu->dev, "no translation support!\n");
3238 		return -ENXIO;
3239 	}
3240 
3241 	/* We only support the AArch64 table format at present */
3242 	switch (FIELD_GET(IDR0_TTF, reg)) {
3243 	case IDR0_TTF_AARCH32_64:
3244 		smmu->ias = 40;
3245 		fallthrough;
3246 	case IDR0_TTF_AARCH64:
3247 		break;
3248 	default:
3249 		dev_err(smmu->dev, "AArch64 table format not supported!\n");
3250 		return -ENXIO;
3251 	}
3252 
3253 	/* ASID/VMID sizes */
3254 	smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
3255 	smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
3256 
3257 	/* IDR1 */
3258 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
3259 	if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
3260 		dev_err(smmu->dev, "embedded implementation not supported\n");
3261 		return -ENXIO;
3262 	}
3263 
3264 	/* Queue sizes, capped to ensure natural alignment */
3265 	smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
3266 					     FIELD_GET(IDR1_CMDQS, reg));
3267 	if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) {
3268 		/*
3269 		 * We don't support splitting up batches, so one batch of
3270 		 * commands plus an extra sync needs to fit inside the command
3271 		 * queue. There's also no way we can handle the weird alignment
3272 		 * restrictions on the base pointer for a unit-length queue.
3273 		 */
3274 		dev_err(smmu->dev, "command queue size <= %d entries not supported\n",
3275 			CMDQ_BATCH_ENTRIES);
3276 		return -ENXIO;
3277 	}
3278 
3279 	smmu->evtq.q.llq.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
3280 					     FIELD_GET(IDR1_EVTQS, reg));
3281 	smmu->priq.q.llq.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
3282 					     FIELD_GET(IDR1_PRIQS, reg));
3283 
3284 	/* SID/SSID sizes */
3285 	smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
3286 	smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
3287 
3288 	/*
3289 	 * If the SMMU supports fewer bits than would fill a single L2 stream
3290 	 * table, use a linear table instead.
3291 	 */
3292 	if (smmu->sid_bits <= STRTAB_SPLIT)
3293 		smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
3294 
3295 	/* IDR3 */
3296 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3);
3297 	if (FIELD_GET(IDR3_RIL, reg))
3298 		smmu->features |= ARM_SMMU_FEAT_RANGE_INV;
3299 
3300 	/* IDR5 */
3301 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
3302 
3303 	/* Maximum number of outstanding stalls */
3304 	smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
3305 
3306 	/* Page sizes */
3307 	if (reg & IDR5_GRAN64K)
3308 		smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
3309 	if (reg & IDR5_GRAN16K)
3310 		smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
3311 	if (reg & IDR5_GRAN4K)
3312 		smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
3313 
3314 	/* Input address size */
3315 	if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
3316 		smmu->features |= ARM_SMMU_FEAT_VAX;
3317 
3318 	/* Output address size */
3319 	switch (FIELD_GET(IDR5_OAS, reg)) {
3320 	case IDR5_OAS_32_BIT:
3321 		smmu->oas = 32;
3322 		break;
3323 	case IDR5_OAS_36_BIT:
3324 		smmu->oas = 36;
3325 		break;
3326 	case IDR5_OAS_40_BIT:
3327 		smmu->oas = 40;
3328 		break;
3329 	case IDR5_OAS_42_BIT:
3330 		smmu->oas = 42;
3331 		break;
3332 	case IDR5_OAS_44_BIT:
3333 		smmu->oas = 44;
3334 		break;
3335 	case IDR5_OAS_52_BIT:
3336 		smmu->oas = 52;
3337 		smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
3338 		break;
3339 	default:
3340 		dev_info(smmu->dev,
3341 			"unknown output address size. Truncating to 48-bit\n");
3342 		fallthrough;
3343 	case IDR5_OAS_48_BIT:
3344 		smmu->oas = 48;
3345 	}
3346 
3347 	if (arm_smmu_ops.pgsize_bitmap == -1UL)
3348 		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
3349 	else
3350 		arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
3351 
3352 	/* Set the DMA mask for our table walker */
3353 	if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
3354 		dev_warn(smmu->dev,
3355 			 "failed to set DMA mask for table walker\n");
3356 
3357 	smmu->ias = max(smmu->ias, smmu->oas);
3358 
3359 	if (arm_smmu_sva_supported(smmu))
3360 		smmu->features |= ARM_SMMU_FEAT_SVA;
3361 
3362 	dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
3363 		 smmu->ias, smmu->oas, smmu->features);
3364 	return 0;
3365 }
3366 
3367 #ifdef CONFIG_ACPI
acpi_smmu_get_options(u32 model,struct arm_smmu_device * smmu)3368 static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
3369 {
3370 	switch (model) {
3371 	case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
3372 		smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
3373 		break;
3374 	case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
3375 		smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
3376 		break;
3377 	}
3378 
3379 	dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
3380 }
3381 
arm_smmu_device_acpi_probe(struct platform_device * pdev,struct arm_smmu_device * smmu)3382 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3383 				      struct arm_smmu_device *smmu)
3384 {
3385 	struct acpi_iort_smmu_v3 *iort_smmu;
3386 	struct device *dev = smmu->dev;
3387 	struct acpi_iort_node *node;
3388 
3389 	node = *(struct acpi_iort_node **)dev_get_platdata(dev);
3390 
3391 	/* Retrieve SMMUv3 specific data */
3392 	iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
3393 
3394 	acpi_smmu_get_options(iort_smmu->model, smmu);
3395 
3396 	if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
3397 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3398 
3399 	return 0;
3400 }
3401 #else
arm_smmu_device_acpi_probe(struct platform_device * pdev,struct arm_smmu_device * smmu)3402 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3403 					     struct arm_smmu_device *smmu)
3404 {
3405 	return -ENODEV;
3406 }
3407 #endif
3408 
arm_smmu_device_dt_probe(struct platform_device * pdev,struct arm_smmu_device * smmu)3409 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
3410 				    struct arm_smmu_device *smmu)
3411 {
3412 	struct device *dev = &pdev->dev;
3413 	u32 cells;
3414 	int ret = -EINVAL;
3415 
3416 	if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
3417 		dev_err(dev, "missing #iommu-cells property\n");
3418 	else if (cells != 1)
3419 		dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
3420 	else
3421 		ret = 0;
3422 
3423 	parse_driver_options(smmu);
3424 
3425 	if (of_dma_is_coherent(dev->of_node))
3426 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3427 
3428 	return ret;
3429 }
3430 
arm_smmu_resource_size(struct arm_smmu_device * smmu)3431 static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
3432 {
3433 	if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
3434 		return SZ_64K;
3435 	else
3436 		return SZ_128K;
3437 }
3438 
arm_smmu_set_bus_ops(struct iommu_ops * ops)3439 static int arm_smmu_set_bus_ops(struct iommu_ops *ops)
3440 {
3441 	int err;
3442 
3443 #ifdef CONFIG_PCI
3444 	if (pci_bus_type.iommu_ops != ops) {
3445 		err = bus_set_iommu(&pci_bus_type, ops);
3446 		if (err)
3447 			return err;
3448 	}
3449 #endif
3450 #ifdef CONFIG_ARM_AMBA
3451 	if (amba_bustype.iommu_ops != ops) {
3452 		err = bus_set_iommu(&amba_bustype, ops);
3453 		if (err)
3454 			goto err_reset_pci_ops;
3455 	}
3456 #endif
3457 	if (platform_bus_type.iommu_ops != ops) {
3458 		err = bus_set_iommu(&platform_bus_type, ops);
3459 		if (err)
3460 			goto err_reset_amba_ops;
3461 	}
3462 
3463 	return 0;
3464 
3465 err_reset_amba_ops:
3466 #ifdef CONFIG_ARM_AMBA
3467 	bus_set_iommu(&amba_bustype, NULL);
3468 #endif
3469 err_reset_pci_ops: __maybe_unused;
3470 #ifdef CONFIG_PCI
3471 	bus_set_iommu(&pci_bus_type, NULL);
3472 #endif
3473 	return err;
3474 }
3475 
arm_smmu_ioremap(struct device * dev,resource_size_t start,resource_size_t size)3476 static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start,
3477 				      resource_size_t size)
3478 {
3479 	struct resource res = {
3480 		.flags = IORESOURCE_MEM,
3481 		.start = start,
3482 		.end = start + size - 1,
3483 	};
3484 
3485 	return devm_ioremap_resource(dev, &res);
3486 }
3487 
arm_smmu_device_probe(struct platform_device * pdev)3488 static int arm_smmu_device_probe(struct platform_device *pdev)
3489 {
3490 	int irq, ret;
3491 	struct resource *res;
3492 	resource_size_t ioaddr;
3493 	struct arm_smmu_device *smmu;
3494 	struct device *dev = &pdev->dev;
3495 	bool bypass;
3496 
3497 	smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
3498 	if (!smmu) {
3499 		dev_err(dev, "failed to allocate arm_smmu_device\n");
3500 		return -ENOMEM;
3501 	}
3502 	smmu->dev = dev;
3503 
3504 	if (dev->of_node) {
3505 		ret = arm_smmu_device_dt_probe(pdev, smmu);
3506 	} else {
3507 		ret = arm_smmu_device_acpi_probe(pdev, smmu);
3508 		if (ret == -ENODEV)
3509 			return ret;
3510 	}
3511 
3512 	/* Set bypass mode according to firmware probing result */
3513 	bypass = !!ret;
3514 
3515 	/* Base address */
3516 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
3517 	if (!res)
3518 		return -EINVAL;
3519 	if (resource_size(res) < arm_smmu_resource_size(smmu)) {
3520 		dev_err(dev, "MMIO region too small (%pr)\n", res);
3521 		return -EINVAL;
3522 	}
3523 	ioaddr = res->start;
3524 
3525 	/*
3526 	 * Don't map the IMPLEMENTATION DEFINED regions, since they may contain
3527 	 * the PMCG registers which are reserved by the PMU driver.
3528 	 */
3529 	smmu->base = arm_smmu_ioremap(dev, ioaddr, ARM_SMMU_REG_SZ);
3530 	if (IS_ERR(smmu->base))
3531 		return PTR_ERR(smmu->base);
3532 
3533 	if (arm_smmu_resource_size(smmu) > SZ_64K) {
3534 		smmu->page1 = arm_smmu_ioremap(dev, ioaddr + SZ_64K,
3535 					       ARM_SMMU_REG_SZ);
3536 		if (IS_ERR(smmu->page1))
3537 			return PTR_ERR(smmu->page1);
3538 	} else {
3539 		smmu->page1 = smmu->base;
3540 	}
3541 
3542 	/* Interrupt lines */
3543 
3544 	irq = platform_get_irq_byname_optional(pdev, "combined");
3545 	if (irq > 0)
3546 		smmu->combined_irq = irq;
3547 	else {
3548 		irq = platform_get_irq_byname_optional(pdev, "eventq");
3549 		if (irq > 0)
3550 			smmu->evtq.q.irq = irq;
3551 
3552 		irq = platform_get_irq_byname_optional(pdev, "priq");
3553 		if (irq > 0)
3554 			smmu->priq.q.irq = irq;
3555 
3556 		irq = platform_get_irq_byname_optional(pdev, "gerror");
3557 		if (irq > 0)
3558 			smmu->gerr_irq = irq;
3559 	}
3560 	/* Probe the h/w */
3561 	ret = arm_smmu_device_hw_probe(smmu);
3562 	if (ret)
3563 		return ret;
3564 
3565 	/* Initialise in-memory data structures */
3566 	ret = arm_smmu_init_structures(smmu);
3567 	if (ret)
3568 		return ret;
3569 
3570 	/* Record our private device structure */
3571 	platform_set_drvdata(pdev, smmu);
3572 
3573 	/* Reset the device */
3574 	ret = arm_smmu_device_reset(smmu, bypass);
3575 	if (ret)
3576 		return ret;
3577 
3578 	/* And we're up. Go go go! */
3579 	ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
3580 				     "smmu3.%pa", &ioaddr);
3581 	if (ret)
3582 		return ret;
3583 
3584 	iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops);
3585 	iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
3586 
3587 	ret = iommu_device_register(&smmu->iommu);
3588 	if (ret) {
3589 		dev_err(dev, "Failed to register iommu\n");
3590 		return ret;
3591 	}
3592 
3593 	return arm_smmu_set_bus_ops(&arm_smmu_ops);
3594 }
3595 
arm_smmu_device_remove(struct platform_device * pdev)3596 static int arm_smmu_device_remove(struct platform_device *pdev)
3597 {
3598 	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
3599 
3600 	arm_smmu_set_bus_ops(NULL);
3601 	iommu_device_unregister(&smmu->iommu);
3602 	iommu_device_sysfs_remove(&smmu->iommu);
3603 	arm_smmu_device_disable(smmu);
3604 
3605 	return 0;
3606 }
3607 
arm_smmu_device_shutdown(struct platform_device * pdev)3608 static void arm_smmu_device_shutdown(struct platform_device *pdev)
3609 {
3610 	arm_smmu_device_remove(pdev);
3611 }
3612 
3613 static const struct of_device_id arm_smmu_of_match[] = {
3614 	{ .compatible = "arm,smmu-v3", },
3615 	{ },
3616 };
3617 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
3618 
3619 static struct platform_driver arm_smmu_driver = {
3620 	.driver	= {
3621 		.name			= "arm-smmu-v3",
3622 		.of_match_table		= arm_smmu_of_match,
3623 		.suppress_bind_attrs	= true,
3624 	},
3625 	.probe	= arm_smmu_device_probe,
3626 	.remove	= arm_smmu_device_remove,
3627 	.shutdown = arm_smmu_device_shutdown,
3628 };
3629 module_platform_driver(arm_smmu_driver);
3630 
3631 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
3632 MODULE_AUTHOR("Will Deacon <will@kernel.org>");
3633 MODULE_ALIAS("platform:arm-smmu-v3");
3634 MODULE_LICENSE("GPL v2");
3635