• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * IOMMU API for ARM architected SMMUv3 implementations.
4  *
5  * Copyright (C) 2015 ARM Limited
6  *
7  * Author: Will Deacon <will.deacon@arm.com>
8  *
9  * This driver is powered by bad coffee and bombay mix.
10  */
11 
12 #include <linux/acpi.h>
13 #include <linux/acpi_iort.h>
14 #include <linux/bitops.h>
15 #include <linux/crash_dump.h>
16 #include <linux/delay.h>
17 #include <linux/dma-iommu.h>
18 #include <linux/err.h>
19 #include <linux/interrupt.h>
20 #include <linux/io-pgtable.h>
21 #include <linux/iopoll.h>
22 #include <linux/module.h>
23 #include <linux/msi.h>
24 #include <linux/of.h>
25 #include <linux/of_address.h>
26 #include <linux/of_iommu.h>
27 #include <linux/of_platform.h>
28 #include <linux/pci.h>
29 #include <linux/pci-ats.h>
30 #include <linux/platform_device.h>
31 
32 #include <linux/amba/bus.h>
33 
34 #include "arm-smmu-v3.h"
35 
36 static bool disable_bypass = 1;
37 module_param(disable_bypass, bool, 0444);
38 MODULE_PARM_DESC(disable_bypass,
39 	"Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
40 
41 static bool disable_msipolling;
42 module_param(disable_msipolling, bool, 0444);
43 MODULE_PARM_DESC(disable_msipolling,
44 	"Disable MSI-based polling for CMD_SYNC completion.");
45 
46 enum arm_smmu_msi_index {
47 	EVTQ_MSI_INDEX,
48 	GERROR_MSI_INDEX,
49 	PRIQ_MSI_INDEX,
50 	ARM_SMMU_MAX_MSIS,
51 };
52 
53 static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
54 	[EVTQ_MSI_INDEX] = {
55 		ARM_SMMU_EVTQ_IRQ_CFG0,
56 		ARM_SMMU_EVTQ_IRQ_CFG1,
57 		ARM_SMMU_EVTQ_IRQ_CFG2,
58 	},
59 	[GERROR_MSI_INDEX] = {
60 		ARM_SMMU_GERROR_IRQ_CFG0,
61 		ARM_SMMU_GERROR_IRQ_CFG1,
62 		ARM_SMMU_GERROR_IRQ_CFG2,
63 	},
64 	[PRIQ_MSI_INDEX] = {
65 		ARM_SMMU_PRIQ_IRQ_CFG0,
66 		ARM_SMMU_PRIQ_IRQ_CFG1,
67 		ARM_SMMU_PRIQ_IRQ_CFG2,
68 	},
69 };
70 
71 struct arm_smmu_option_prop {
72 	u32 opt;
73 	const char *prop;
74 };
75 
76 DEFINE_XARRAY_ALLOC1(arm_smmu_asid_xa);
77 DEFINE_MUTEX(arm_smmu_asid_lock);
78 
79 static struct arm_smmu_option_prop arm_smmu_options[] = {
80 	{ ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
81 	{ ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
82 	{ 0, NULL},
83 };
84 
arm_smmu_page1_fixup(unsigned long offset,struct arm_smmu_device * smmu)85 static inline void __iomem *arm_smmu_page1_fixup(unsigned long offset,
86 						 struct arm_smmu_device *smmu)
87 {
88 	if (offset > SZ_64K)
89 		return smmu->page1 + offset - SZ_64K;
90 
91 	return smmu->base + offset;
92 }
93 
to_smmu_domain(struct iommu_domain * dom)94 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
95 {
96 	return container_of(dom, struct arm_smmu_domain, domain);
97 }
98 
parse_driver_options(struct arm_smmu_device * smmu)99 static void parse_driver_options(struct arm_smmu_device *smmu)
100 {
101 	int i = 0;
102 
103 	do {
104 		if (of_property_read_bool(smmu->dev->of_node,
105 						arm_smmu_options[i].prop)) {
106 			smmu->options |= arm_smmu_options[i].opt;
107 			dev_notice(smmu->dev, "option %s\n",
108 				arm_smmu_options[i].prop);
109 		}
110 	} while (arm_smmu_options[++i].opt);
111 }
112 
113 /* Low-level queue manipulation functions */
queue_has_space(struct arm_smmu_ll_queue * q,u32 n)114 static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n)
115 {
116 	u32 space, prod, cons;
117 
118 	prod = Q_IDX(q, q->prod);
119 	cons = Q_IDX(q, q->cons);
120 
121 	if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons))
122 		space = (1 << q->max_n_shift) - (prod - cons);
123 	else
124 		space = cons - prod;
125 
126 	return space >= n;
127 }
128 
queue_full(struct arm_smmu_ll_queue * q)129 static bool queue_full(struct arm_smmu_ll_queue *q)
130 {
131 	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
132 	       Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
133 }
134 
queue_empty(struct arm_smmu_ll_queue * q)135 static bool queue_empty(struct arm_smmu_ll_queue *q)
136 {
137 	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
138 	       Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
139 }
140 
queue_consumed(struct arm_smmu_ll_queue * q,u32 prod)141 static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod)
142 {
143 	return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) &&
144 		(Q_IDX(q, q->cons) > Q_IDX(q, prod))) ||
145 	       ((Q_WRP(q, q->cons) != Q_WRP(q, prod)) &&
146 		(Q_IDX(q, q->cons) <= Q_IDX(q, prod)));
147 }
148 
queue_sync_cons_out(struct arm_smmu_queue * q)149 static void queue_sync_cons_out(struct arm_smmu_queue *q)
150 {
151 	/*
152 	 * Ensure that all CPU accesses (reads and writes) to the queue
153 	 * are complete before we update the cons pointer.
154 	 */
155 	__iomb();
156 	writel_relaxed(q->llq.cons, q->cons_reg);
157 }
158 
queue_inc_cons(struct arm_smmu_ll_queue * q)159 static void queue_inc_cons(struct arm_smmu_ll_queue *q)
160 {
161 	u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
162 	q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
163 }
164 
queue_sync_cons_ovf(struct arm_smmu_queue * q)165 static void queue_sync_cons_ovf(struct arm_smmu_queue *q)
166 {
167 	struct arm_smmu_ll_queue *llq = &q->llq;
168 
169 	if (likely(Q_OVF(llq->prod) == Q_OVF(llq->cons)))
170 		return;
171 
172 	llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
173 		      Q_IDX(llq, llq->cons);
174 	queue_sync_cons_out(q);
175 }
176 
queue_sync_prod_in(struct arm_smmu_queue * q)177 static int queue_sync_prod_in(struct arm_smmu_queue *q)
178 {
179 	u32 prod;
180 	int ret = 0;
181 
182 	/*
183 	 * We can't use the _relaxed() variant here, as we must prevent
184 	 * speculative reads of the queue before we have determined that
185 	 * prod has indeed moved.
186 	 */
187 	prod = readl(q->prod_reg);
188 
189 	if (Q_OVF(prod) != Q_OVF(q->llq.prod))
190 		ret = -EOVERFLOW;
191 
192 	q->llq.prod = prod;
193 	return ret;
194 }
195 
queue_inc_prod_n(struct arm_smmu_ll_queue * q,int n)196 static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n)
197 {
198 	u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n;
199 	return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
200 }
201 
queue_poll_init(struct arm_smmu_device * smmu,struct arm_smmu_queue_poll * qp)202 static void queue_poll_init(struct arm_smmu_device *smmu,
203 			    struct arm_smmu_queue_poll *qp)
204 {
205 	qp->delay = 1;
206 	qp->spin_cnt = 0;
207 	qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
208 	qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
209 }
210 
queue_poll(struct arm_smmu_queue_poll * qp)211 static int queue_poll(struct arm_smmu_queue_poll *qp)
212 {
213 	if (ktime_compare(ktime_get(), qp->timeout) > 0)
214 		return -ETIMEDOUT;
215 
216 	if (qp->wfe) {
217 		wfe();
218 	} else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) {
219 		cpu_relax();
220 	} else {
221 		udelay(qp->delay);
222 		qp->delay *= 2;
223 		qp->spin_cnt = 0;
224 	}
225 
226 	return 0;
227 }
228 
queue_write(__le64 * dst,u64 * src,size_t n_dwords)229 static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
230 {
231 	int i;
232 
233 	for (i = 0; i < n_dwords; ++i)
234 		*dst++ = cpu_to_le64(*src++);
235 }
236 
queue_read(u64 * dst,__le64 * src,size_t n_dwords)237 static void queue_read(u64 *dst, __le64 *src, size_t n_dwords)
238 {
239 	int i;
240 
241 	for (i = 0; i < n_dwords; ++i)
242 		*dst++ = le64_to_cpu(*src++);
243 }
244 
queue_remove_raw(struct arm_smmu_queue * q,u64 * ent)245 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
246 {
247 	if (queue_empty(&q->llq))
248 		return -EAGAIN;
249 
250 	queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords);
251 	queue_inc_cons(&q->llq);
252 	queue_sync_cons_out(q);
253 	return 0;
254 }
255 
256 /* High-level queue accessors */
arm_smmu_cmdq_build_cmd(u64 * cmd,struct arm_smmu_cmdq_ent * ent)257 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
258 {
259 	memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
260 	cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
261 
262 	switch (ent->opcode) {
263 	case CMDQ_OP_TLBI_EL2_ALL:
264 	case CMDQ_OP_TLBI_NSNH_ALL:
265 		break;
266 	case CMDQ_OP_PREFETCH_CFG:
267 		cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
268 		cmd[1] |= FIELD_PREP(CMDQ_PREFETCH_1_SIZE, ent->prefetch.size);
269 		cmd[1] |= ent->prefetch.addr & CMDQ_PREFETCH_1_ADDR_MASK;
270 		break;
271 	case CMDQ_OP_CFGI_CD:
272 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
273 		fallthrough;
274 	case CMDQ_OP_CFGI_STE:
275 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
276 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
277 		break;
278 	case CMDQ_OP_CFGI_CD_ALL:
279 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
280 		break;
281 	case CMDQ_OP_CFGI_ALL:
282 		/* Cover the entire SID range */
283 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
284 		break;
285 	case CMDQ_OP_TLBI_NH_VA:
286 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
287 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
288 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
289 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
290 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
291 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
292 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
293 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
294 		break;
295 	case CMDQ_OP_TLBI_S2_IPA:
296 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
297 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
298 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
299 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
300 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
301 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
302 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
303 		break;
304 	case CMDQ_OP_TLBI_NH_ASID:
305 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
306 		fallthrough;
307 	case CMDQ_OP_TLBI_S12_VMALL:
308 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
309 		break;
310 	case CMDQ_OP_ATC_INV:
311 		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
312 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global);
313 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid);
314 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid);
315 		cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size);
316 		cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK;
317 		break;
318 	case CMDQ_OP_PRI_RESP:
319 		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
320 		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
321 		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
322 		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
323 		switch (ent->pri.resp) {
324 		case PRI_RESP_DENY:
325 		case PRI_RESP_FAIL:
326 		case PRI_RESP_SUCC:
327 			break;
328 		default:
329 			return -EINVAL;
330 		}
331 		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
332 		break;
333 	case CMDQ_OP_CMD_SYNC:
334 		if (ent->sync.msiaddr) {
335 			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
336 			cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
337 		} else {
338 			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
339 		}
340 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
341 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
342 		break;
343 	default:
344 		return -ENOENT;
345 	}
346 
347 	return 0;
348 }
349 
arm_smmu_cmdq_build_sync_cmd(u64 * cmd,struct arm_smmu_device * smmu,u32 prod)350 static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
351 					 u32 prod)
352 {
353 	struct arm_smmu_queue *q = &smmu->cmdq.q;
354 	struct arm_smmu_cmdq_ent ent = {
355 		.opcode = CMDQ_OP_CMD_SYNC,
356 	};
357 
358 	/*
359 	 * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
360 	 * payload, so the write will zero the entire command on that platform.
361 	 */
362 	if (smmu->options & ARM_SMMU_OPT_MSIPOLL) {
363 		ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
364 				   q->ent_dwords * 8;
365 	}
366 
367 	arm_smmu_cmdq_build_cmd(cmd, &ent);
368 }
369 
arm_smmu_cmdq_skip_err(struct arm_smmu_device * smmu)370 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
371 {
372 	static const char *cerror_str[] = {
373 		[CMDQ_ERR_CERROR_NONE_IDX]	= "No error",
374 		[CMDQ_ERR_CERROR_ILL_IDX]	= "Illegal command",
375 		[CMDQ_ERR_CERROR_ABT_IDX]	= "Abort on command fetch",
376 		[CMDQ_ERR_CERROR_ATC_INV_IDX]	= "ATC invalidate timeout",
377 	};
378 
379 	int i;
380 	u64 cmd[CMDQ_ENT_DWORDS];
381 	struct arm_smmu_queue *q = &smmu->cmdq.q;
382 	u32 cons = readl_relaxed(q->cons_reg);
383 	u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
384 	struct arm_smmu_cmdq_ent cmd_sync = {
385 		.opcode = CMDQ_OP_CMD_SYNC,
386 	};
387 
388 	dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
389 		idx < ARRAY_SIZE(cerror_str) ?  cerror_str[idx] : "Unknown");
390 
391 	switch (idx) {
392 	case CMDQ_ERR_CERROR_ABT_IDX:
393 		dev_err(smmu->dev, "retrying command fetch\n");
394 	case CMDQ_ERR_CERROR_NONE_IDX:
395 		return;
396 	case CMDQ_ERR_CERROR_ATC_INV_IDX:
397 		/*
398 		 * ATC Invalidation Completion timeout. CONS is still pointing
399 		 * at the CMD_SYNC. Attempt to complete other pending commands
400 		 * by repeating the CMD_SYNC, though we might well end up back
401 		 * here since the ATC invalidation may still be pending.
402 		 */
403 		return;
404 	case CMDQ_ERR_CERROR_ILL_IDX:
405 	default:
406 		break;
407 	}
408 
409 	/*
410 	 * We may have concurrent producers, so we need to be careful
411 	 * not to touch any of the shadow cmdq state.
412 	 */
413 	queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
414 	dev_err(smmu->dev, "skipping command in error state:\n");
415 	for (i = 0; i < ARRAY_SIZE(cmd); ++i)
416 		dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
417 
418 	/* Convert the erroneous command into a CMD_SYNC */
419 	if (arm_smmu_cmdq_build_cmd(cmd, &cmd_sync)) {
420 		dev_err(smmu->dev, "failed to convert to CMD_SYNC\n");
421 		return;
422 	}
423 
424 	queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
425 }
426 
427 /*
428  * Command queue locking.
429  * This is a form of bastardised rwlock with the following major changes:
430  *
431  * - The only LOCK routines are exclusive_trylock() and shared_lock().
432  *   Neither have barrier semantics, and instead provide only a control
433  *   dependency.
434  *
435  * - The UNLOCK routines are supplemented with shared_tryunlock(), which
436  *   fails if the caller appears to be the last lock holder (yes, this is
437  *   racy). All successful UNLOCK routines have RELEASE semantics.
438  */
arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq * cmdq)439 static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)
440 {
441 	int val;
442 
443 	/*
444 	 * We can try to avoid the cmpxchg() loop by simply incrementing the
445 	 * lock counter. When held in exclusive state, the lock counter is set
446 	 * to INT_MIN so these increments won't hurt as the value will remain
447 	 * negative.
448 	 */
449 	if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0)
450 		return;
451 
452 	do {
453 		val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);
454 	} while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);
455 }
456 
arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq * cmdq)457 static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq)
458 {
459 	(void)atomic_dec_return_release(&cmdq->lock);
460 }
461 
arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq * cmdq)462 static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
463 {
464 	if (atomic_read(&cmdq->lock) == 1)
465 		return false;
466 
467 	arm_smmu_cmdq_shared_unlock(cmdq);
468 	return true;
469 }
470 
471 #define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)		\
472 ({									\
473 	bool __ret;							\
474 	local_irq_save(flags);						\
475 	__ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN);	\
476 	if (!__ret)							\
477 		local_irq_restore(flags);				\
478 	__ret;								\
479 })
480 
481 #define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags)		\
482 ({									\
483 	atomic_set_release(&cmdq->lock, 0);				\
484 	local_irq_restore(flags);					\
485 })
486 
487 
488 /*
489  * Command queue insertion.
490  * This is made fiddly by our attempts to achieve some sort of scalability
491  * since there is one queue shared amongst all of the CPUs in the system.  If
492  * you like mixed-size concurrency, dependency ordering and relaxed atomics,
493  * then you'll *love* this monstrosity.
494  *
495  * The basic idea is to split the queue up into ranges of commands that are
496  * owned by a given CPU; the owner may not have written all of the commands
497  * itself, but is responsible for advancing the hardware prod pointer when
498  * the time comes. The algorithm is roughly:
499  *
500  * 	1. Allocate some space in the queue. At this point we also discover
501  *	   whether the head of the queue is currently owned by another CPU,
502  *	   or whether we are the owner.
503  *
504  *	2. Write our commands into our allocated slots in the queue.
505  *
506  *	3. Mark our slots as valid in arm_smmu_cmdq.valid_map.
507  *
508  *	4. If we are an owner:
509  *		a. Wait for the previous owner to finish.
510  *		b. Mark the queue head as unowned, which tells us the range
511  *		   that we are responsible for publishing.
512  *		c. Wait for all commands in our owned range to become valid.
513  *		d. Advance the hardware prod pointer.
514  *		e. Tell the next owner we've finished.
515  *
516  *	5. If we are inserting a CMD_SYNC (we may or may not have been an
517  *	   owner), then we need to stick around until it has completed:
518  *		a. If we have MSIs, the SMMU can write back into the CMD_SYNC
519  *		   to clear the first 4 bytes.
520  *		b. Otherwise, we spin waiting for the hardware cons pointer to
521  *		   advance past our command.
522  *
523  * The devil is in the details, particularly the use of locking for handling
524  * SYNC completion and freeing up space in the queue before we think that it is
525  * full.
526  */
__arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq * cmdq,u32 sprod,u32 eprod,bool set)527 static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq,
528 					       u32 sprod, u32 eprod, bool set)
529 {
530 	u32 swidx, sbidx, ewidx, ebidx;
531 	struct arm_smmu_ll_queue llq = {
532 		.max_n_shift	= cmdq->q.llq.max_n_shift,
533 		.prod		= sprod,
534 	};
535 
536 	ewidx = BIT_WORD(Q_IDX(&llq, eprod));
537 	ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG;
538 
539 	while (llq.prod != eprod) {
540 		unsigned long mask;
541 		atomic_long_t *ptr;
542 		u32 limit = BITS_PER_LONG;
543 
544 		swidx = BIT_WORD(Q_IDX(&llq, llq.prod));
545 		sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG;
546 
547 		ptr = &cmdq->valid_map[swidx];
548 
549 		if ((swidx == ewidx) && (sbidx < ebidx))
550 			limit = ebidx;
551 
552 		mask = GENMASK(limit - 1, sbidx);
553 
554 		/*
555 		 * The valid bit is the inverse of the wrap bit. This means
556 		 * that a zero-initialised queue is invalid and, after marking
557 		 * all entries as valid, they become invalid again when we
558 		 * wrap.
559 		 */
560 		if (set) {
561 			atomic_long_xor(mask, ptr);
562 		} else { /* Poll */
563 			unsigned long valid;
564 
565 			valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask;
566 			atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid);
567 		}
568 
569 		llq.prod = queue_inc_prod_n(&llq, limit - sbidx);
570 	}
571 }
572 
573 /* Mark all entries in the range [sprod, eprod) as valid */
arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq * cmdq,u32 sprod,u32 eprod)574 static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq,
575 					u32 sprod, u32 eprod)
576 {
577 	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true);
578 }
579 
580 /* Wait for all entries in the range [sprod, eprod) to become valid */
arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq * cmdq,u32 sprod,u32 eprod)581 static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
582 					 u32 sprod, u32 eprod)
583 {
584 	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false);
585 }
586 
587 /* Wait for the command queue to become non-full */
arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device * smmu,struct arm_smmu_ll_queue * llq)588 static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
589 					     struct arm_smmu_ll_queue *llq)
590 {
591 	unsigned long flags;
592 	struct arm_smmu_queue_poll qp;
593 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
594 	int ret = 0;
595 
596 	/*
597 	 * Try to update our copy of cons by grabbing exclusive cmdq access. If
598 	 * that fails, spin until somebody else updates it for us.
599 	 */
600 	if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) {
601 		WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg));
602 		arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags);
603 		llq->val = READ_ONCE(cmdq->q.llq.val);
604 		return 0;
605 	}
606 
607 	queue_poll_init(smmu, &qp);
608 	do {
609 		llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
610 		if (!queue_full(llq))
611 			break;
612 
613 		ret = queue_poll(&qp);
614 	} while (!ret);
615 
616 	return ret;
617 }
618 
619 /*
620  * Wait until the SMMU signals a CMD_SYNC completion MSI.
621  * Must be called with the cmdq lock held in some capacity.
622  */
__arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device * smmu,struct arm_smmu_ll_queue * llq)623 static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
624 					  struct arm_smmu_ll_queue *llq)
625 {
626 	int ret = 0;
627 	struct arm_smmu_queue_poll qp;
628 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
629 	u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
630 
631 	queue_poll_init(smmu, &qp);
632 
633 	/*
634 	 * The MSI won't generate an event, since it's being written back
635 	 * into the command queue.
636 	 */
637 	qp.wfe = false;
638 	smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp)));
639 	llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1);
640 	return ret;
641 }
642 
643 /*
644  * Wait until the SMMU cons index passes llq->prod.
645  * Must be called with the cmdq lock held in some capacity.
646  */
__arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device * smmu,struct arm_smmu_ll_queue * llq)647 static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
648 					       struct arm_smmu_ll_queue *llq)
649 {
650 	struct arm_smmu_queue_poll qp;
651 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
652 	u32 prod = llq->prod;
653 	int ret = 0;
654 
655 	queue_poll_init(smmu, &qp);
656 	llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
657 	do {
658 		if (queue_consumed(llq, prod))
659 			break;
660 
661 		ret = queue_poll(&qp);
662 
663 		/*
664 		 * This needs to be a readl() so that our subsequent call
665 		 * to arm_smmu_cmdq_shared_tryunlock() can fail accurately.
666 		 *
667 		 * Specifically, we need to ensure that we observe all
668 		 * shared_lock()s by other CMD_SYNCs that share our owner,
669 		 * so that a failing call to tryunlock() means that we're
670 		 * the last one out and therefore we can safely advance
671 		 * cmdq->q.llq.cons. Roughly speaking:
672 		 *
673 		 * CPU 0		CPU1			CPU2 (us)
674 		 *
675 		 * if (sync)
676 		 * 	shared_lock();
677 		 *
678 		 * dma_wmb();
679 		 * set_valid_map();
680 		 *
681 		 * 			if (owner) {
682 		 *				poll_valid_map();
683 		 *				<control dependency>
684 		 *				writel(prod_reg);
685 		 *
686 		 *						readl(cons_reg);
687 		 *						tryunlock();
688 		 *
689 		 * Requires us to see CPU 0's shared_lock() acquisition.
690 		 */
691 		llq->cons = readl(cmdq->q.cons_reg);
692 	} while (!ret);
693 
694 	return ret;
695 }
696 
arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device * smmu,struct arm_smmu_ll_queue * llq)697 static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
698 					 struct arm_smmu_ll_queue *llq)
699 {
700 	if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
701 		return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
702 
703 	return __arm_smmu_cmdq_poll_until_consumed(smmu, llq);
704 }
705 
arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq * cmdq,u64 * cmds,u32 prod,int n)706 static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
707 					u32 prod, int n)
708 {
709 	int i;
710 	struct arm_smmu_ll_queue llq = {
711 		.max_n_shift	= cmdq->q.llq.max_n_shift,
712 		.prod		= prod,
713 	};
714 
715 	for (i = 0; i < n; ++i) {
716 		u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS];
717 
718 		prod = queue_inc_prod_n(&llq, i);
719 		queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS);
720 	}
721 }
722 
723 /*
724  * This is the actual insertion function, and provides the following
725  * ordering guarantees to callers:
726  *
727  * - There is a dma_wmb() before publishing any commands to the queue.
728  *   This can be relied upon to order prior writes to data structures
729  *   in memory (such as a CD or an STE) before the command.
730  *
731  * - On completion of a CMD_SYNC, there is a control dependency.
732  *   This can be relied upon to order subsequent writes to memory (e.g.
733  *   freeing an IOVA) after completion of the CMD_SYNC.
734  *
735  * - Command insertion is totally ordered, so if two CPUs each race to
736  *   insert their own list of commands then all of the commands from one
737  *   CPU will appear before any of the commands from the other CPU.
738  */
arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device * smmu,u64 * cmds,int n,bool sync)739 static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
740 				       u64 *cmds, int n, bool sync)
741 {
742 	u64 cmd_sync[CMDQ_ENT_DWORDS];
743 	u32 prod;
744 	unsigned long flags;
745 	bool owner;
746 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
747 	struct arm_smmu_ll_queue llq = {
748 		.max_n_shift = cmdq->q.llq.max_n_shift,
749 	}, head = llq;
750 	int ret = 0;
751 
752 	/* 1. Allocate some space in the queue */
753 	local_irq_save(flags);
754 	llq.val = READ_ONCE(cmdq->q.llq.val);
755 	do {
756 		u64 old;
757 
758 		while (!queue_has_space(&llq, n + sync)) {
759 			local_irq_restore(flags);
760 			if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq))
761 				dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
762 			local_irq_save(flags);
763 		}
764 
765 		head.cons = llq.cons;
766 		head.prod = queue_inc_prod_n(&llq, n + sync) |
767 					     CMDQ_PROD_OWNED_FLAG;
768 
769 		old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val);
770 		if (old == llq.val)
771 			break;
772 
773 		llq.val = old;
774 	} while (1);
775 	owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG);
776 	head.prod &= ~CMDQ_PROD_OWNED_FLAG;
777 	llq.prod &= ~CMDQ_PROD_OWNED_FLAG;
778 
779 	/*
780 	 * 2. Write our commands into the queue
781 	 * Dependency ordering from the cmpxchg() loop above.
782 	 */
783 	arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
784 	if (sync) {
785 		prod = queue_inc_prod_n(&llq, n);
786 		arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, prod);
787 		queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
788 
789 		/*
790 		 * In order to determine completion of our CMD_SYNC, we must
791 		 * ensure that the queue can't wrap twice without us noticing.
792 		 * We achieve that by taking the cmdq lock as shared before
793 		 * marking our slot as valid.
794 		 */
795 		arm_smmu_cmdq_shared_lock(cmdq);
796 	}
797 
798 	/* 3. Mark our slots as valid, ensuring commands are visible first */
799 	dma_wmb();
800 	arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
801 
802 	/* 4. If we are the owner, take control of the SMMU hardware */
803 	if (owner) {
804 		/* a. Wait for previous owner to finish */
805 		atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
806 
807 		/* b. Stop gathering work by clearing the owned flag */
808 		prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG,
809 						   &cmdq->q.llq.atomic.prod);
810 		prod &= ~CMDQ_PROD_OWNED_FLAG;
811 
812 		/*
813 		 * c. Wait for any gathered work to be written to the queue.
814 		 * Note that we read our own entries so that we have the control
815 		 * dependency required by (d).
816 		 */
817 		arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
818 
819 		/*
820 		 * d. Advance the hardware prod pointer
821 		 * Control dependency ordering from the entries becoming valid.
822 		 */
823 		writel_relaxed(prod, cmdq->q.prod_reg);
824 
825 		/*
826 		 * e. Tell the next owner we're done
827 		 * Make sure we've updated the hardware first, so that we don't
828 		 * race to update prod and potentially move it backwards.
829 		 */
830 		atomic_set_release(&cmdq->owner_prod, prod);
831 	}
832 
833 	/* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
834 	if (sync) {
835 		llq.prod = queue_inc_prod_n(&llq, n);
836 		ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq);
837 		if (ret) {
838 			dev_err_ratelimited(smmu->dev,
839 					    "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
840 					    llq.prod,
841 					    readl_relaxed(cmdq->q.prod_reg),
842 					    readl_relaxed(cmdq->q.cons_reg));
843 		}
844 
845 		/*
846 		 * Try to unlock the cmdq lock. This will fail if we're the last
847 		 * reader, in which case we can safely update cmdq->q.llq.cons
848 		 */
849 		if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
850 			WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
851 			arm_smmu_cmdq_shared_unlock(cmdq);
852 		}
853 	}
854 
855 	local_irq_restore(flags);
856 	return ret;
857 }
858 
arm_smmu_cmdq_issue_cmd(struct arm_smmu_device * smmu,struct arm_smmu_cmdq_ent * ent)859 static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
860 				   struct arm_smmu_cmdq_ent *ent)
861 {
862 	u64 cmd[CMDQ_ENT_DWORDS];
863 
864 	if (arm_smmu_cmdq_build_cmd(cmd, ent)) {
865 		dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
866 			 ent->opcode);
867 		return -EINVAL;
868 	}
869 
870 	return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, false);
871 }
872 
arm_smmu_cmdq_issue_sync(struct arm_smmu_device * smmu)873 static int arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
874 {
875 	return arm_smmu_cmdq_issue_cmdlist(smmu, NULL, 0, true);
876 }
877 
arm_smmu_cmdq_batch_add(struct arm_smmu_device * smmu,struct arm_smmu_cmdq_batch * cmds,struct arm_smmu_cmdq_ent * cmd)878 static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
879 				    struct arm_smmu_cmdq_batch *cmds,
880 				    struct arm_smmu_cmdq_ent *cmd)
881 {
882 	if (cmds->num == CMDQ_BATCH_ENTRIES) {
883 		arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false);
884 		cmds->num = 0;
885 	}
886 	arm_smmu_cmdq_build_cmd(&cmds->cmds[cmds->num * CMDQ_ENT_DWORDS], cmd);
887 	cmds->num++;
888 }
889 
arm_smmu_cmdq_batch_submit(struct arm_smmu_device * smmu,struct arm_smmu_cmdq_batch * cmds)890 static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
891 				      struct arm_smmu_cmdq_batch *cmds)
892 {
893 	return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
894 }
895 
896 /* Context descriptor manipulation functions */
arm_smmu_tlb_inv_asid(struct arm_smmu_device * smmu,u16 asid)897 void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
898 {
899 	struct arm_smmu_cmdq_ent cmd = {
900 		.opcode = CMDQ_OP_TLBI_NH_ASID,
901 		.tlbi.asid = asid,
902 	};
903 
904 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
905 	arm_smmu_cmdq_issue_sync(smmu);
906 }
907 
arm_smmu_sync_cd(struct arm_smmu_domain * smmu_domain,int ssid,bool leaf)908 static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
909 			     int ssid, bool leaf)
910 {
911 	size_t i;
912 	unsigned long flags;
913 	struct arm_smmu_master *master;
914 	struct arm_smmu_cmdq_batch cmds = {};
915 	struct arm_smmu_device *smmu = smmu_domain->smmu;
916 	struct arm_smmu_cmdq_ent cmd = {
917 		.opcode	= CMDQ_OP_CFGI_CD,
918 		.cfgi	= {
919 			.ssid	= ssid,
920 			.leaf	= leaf,
921 		},
922 	};
923 
924 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
925 	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
926 		for (i = 0; i < master->num_sids; i++) {
927 			cmd.cfgi.sid = master->sids[i];
928 			arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
929 		}
930 	}
931 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
932 
933 	arm_smmu_cmdq_batch_submit(smmu, &cmds);
934 }
935 
arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device * smmu,struct arm_smmu_l1_ctx_desc * l1_desc)936 static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
937 					struct arm_smmu_l1_ctx_desc *l1_desc)
938 {
939 	size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
940 
941 	l1_desc->l2ptr = dmam_alloc_coherent(smmu->dev, size,
942 					     &l1_desc->l2ptr_dma, GFP_KERNEL);
943 	if (!l1_desc->l2ptr) {
944 		dev_warn(smmu->dev,
945 			 "failed to allocate context descriptor table\n");
946 		return -ENOMEM;
947 	}
948 	return 0;
949 }
950 
arm_smmu_write_cd_l1_desc(__le64 * dst,struct arm_smmu_l1_ctx_desc * l1_desc)951 static void arm_smmu_write_cd_l1_desc(__le64 *dst,
952 				      struct arm_smmu_l1_ctx_desc *l1_desc)
953 {
954 	u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) |
955 		  CTXDESC_L1_DESC_V;
956 
957 	/* See comment in arm_smmu_write_ctx_desc() */
958 	WRITE_ONCE(*dst, cpu_to_le64(val));
959 }
960 
arm_smmu_get_cd_ptr(struct arm_smmu_domain * smmu_domain,u32 ssid)961 static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
962 				   u32 ssid)
963 {
964 	__le64 *l1ptr;
965 	unsigned int idx;
966 	struct arm_smmu_l1_ctx_desc *l1_desc;
967 	struct arm_smmu_device *smmu = smmu_domain->smmu;
968 	struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
969 
970 	if (smmu_domain->s1_cfg.s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
971 		return cdcfg->cdtab + ssid * CTXDESC_CD_DWORDS;
972 
973 	idx = ssid >> CTXDESC_SPLIT;
974 	l1_desc = &cdcfg->l1_desc[idx];
975 	if (!l1_desc->l2ptr) {
976 		if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
977 			return NULL;
978 
979 		l1ptr = cdcfg->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
980 		arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
981 		/* An invalid L1CD can be cached */
982 		arm_smmu_sync_cd(smmu_domain, ssid, false);
983 	}
984 	idx = ssid & (CTXDESC_L2_ENTRIES - 1);
985 	return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS;
986 }
987 
arm_smmu_write_ctx_desc(struct arm_smmu_domain * smmu_domain,int ssid,struct arm_smmu_ctx_desc * cd)988 int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
989 			    struct arm_smmu_ctx_desc *cd)
990 {
991 	/*
992 	 * This function handles the following cases:
993 	 *
994 	 * (1) Install primary CD, for normal DMA traffic (SSID = 0).
995 	 * (2) Install a secondary CD, for SID+SSID traffic.
996 	 * (3) Update ASID of a CD. Atomically write the first 64 bits of the
997 	 *     CD, then invalidate the old entry and mappings.
998 	 * (4) Remove a secondary CD.
999 	 */
1000 	u64 val;
1001 	bool cd_live;
1002 	__le64 *cdptr;
1003 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1004 
1005 	if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))
1006 		return -E2BIG;
1007 
1008 	cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
1009 	if (!cdptr)
1010 		return -ENOMEM;
1011 
1012 	val = le64_to_cpu(cdptr[0]);
1013 	cd_live = !!(val & CTXDESC_CD_0_V);
1014 
1015 	if (!cd) { /* (4) */
1016 		val = 0;
1017 	} else if (cd_live) { /* (3) */
1018 		val &= ~CTXDESC_CD_0_ASID;
1019 		val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid);
1020 		/*
1021 		 * Until CD+TLB invalidation, both ASIDs may be used for tagging
1022 		 * this substream's traffic
1023 		 */
1024 	} else { /* (1) and (2) */
1025 		cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK);
1026 		cdptr[2] = 0;
1027 		cdptr[3] = cpu_to_le64(cd->mair);
1028 
1029 		/*
1030 		 * STE is live, and the SMMU might read dwords of this CD in any
1031 		 * order. Ensure that it observes valid values before reading
1032 		 * V=1.
1033 		 */
1034 		arm_smmu_sync_cd(smmu_domain, ssid, true);
1035 
1036 		val = cd->tcr |
1037 #ifdef __BIG_ENDIAN
1038 			CTXDESC_CD_0_ENDI |
1039 #endif
1040 			CTXDESC_CD_0_R | CTXDESC_CD_0_A |
1041 			(cd->mm ? 0 : CTXDESC_CD_0_ASET) |
1042 			CTXDESC_CD_0_AA64 |
1043 			FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
1044 			CTXDESC_CD_0_V;
1045 
1046 		/* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */
1047 		if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
1048 			val |= CTXDESC_CD_0_S;
1049 	}
1050 
1051 	/*
1052 	 * The SMMU accesses 64-bit values atomically. See IHI0070Ca 3.21.3
1053 	 * "Configuration structures and configuration invalidation completion"
1054 	 *
1055 	 *   The size of single-copy atomic reads made by the SMMU is
1056 	 *   IMPLEMENTATION DEFINED but must be at least 64 bits. Any single
1057 	 *   field within an aligned 64-bit span of a structure can be altered
1058 	 *   without first making the structure invalid.
1059 	 */
1060 	WRITE_ONCE(cdptr[0], cpu_to_le64(val));
1061 	arm_smmu_sync_cd(smmu_domain, ssid, true);
1062 	return 0;
1063 }
1064 
arm_smmu_alloc_cd_tables(struct arm_smmu_domain * smmu_domain)1065 static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
1066 {
1067 	int ret;
1068 	size_t l1size;
1069 	size_t max_contexts;
1070 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1071 	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1072 	struct arm_smmu_ctx_desc_cfg *cdcfg = &cfg->cdcfg;
1073 
1074 	max_contexts = 1 << cfg->s1cdmax;
1075 
1076 	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
1077 	    max_contexts <= CTXDESC_L2_ENTRIES) {
1078 		cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
1079 		cdcfg->num_l1_ents = max_contexts;
1080 
1081 		l1size = max_contexts * (CTXDESC_CD_DWORDS << 3);
1082 	} else {
1083 		cfg->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
1084 		cdcfg->num_l1_ents = DIV_ROUND_UP(max_contexts,
1085 						  CTXDESC_L2_ENTRIES);
1086 
1087 		cdcfg->l1_desc = devm_kcalloc(smmu->dev, cdcfg->num_l1_ents,
1088 					      sizeof(*cdcfg->l1_desc),
1089 					      GFP_KERNEL);
1090 		if (!cdcfg->l1_desc)
1091 			return -ENOMEM;
1092 
1093 		l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1094 	}
1095 
1096 	cdcfg->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cdcfg->cdtab_dma,
1097 					   GFP_KERNEL);
1098 	if (!cdcfg->cdtab) {
1099 		dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1100 		ret = -ENOMEM;
1101 		goto err_free_l1;
1102 	}
1103 
1104 	return 0;
1105 
1106 err_free_l1:
1107 	if (cdcfg->l1_desc) {
1108 		devm_kfree(smmu->dev, cdcfg->l1_desc);
1109 		cdcfg->l1_desc = NULL;
1110 	}
1111 	return ret;
1112 }
1113 
arm_smmu_free_cd_tables(struct arm_smmu_domain * smmu_domain)1114 static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain)
1115 {
1116 	int i;
1117 	size_t size, l1size;
1118 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1119 	struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
1120 
1121 	if (cdcfg->l1_desc) {
1122 		size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1123 
1124 		for (i = 0; i < cdcfg->num_l1_ents; i++) {
1125 			if (!cdcfg->l1_desc[i].l2ptr)
1126 				continue;
1127 
1128 			dmam_free_coherent(smmu->dev, size,
1129 					   cdcfg->l1_desc[i].l2ptr,
1130 					   cdcfg->l1_desc[i].l2ptr_dma);
1131 		}
1132 		devm_kfree(smmu->dev, cdcfg->l1_desc);
1133 		cdcfg->l1_desc = NULL;
1134 
1135 		l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1136 	} else {
1137 		l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
1138 	}
1139 
1140 	dmam_free_coherent(smmu->dev, l1size, cdcfg->cdtab, cdcfg->cdtab_dma);
1141 	cdcfg->cdtab_dma = 0;
1142 	cdcfg->cdtab = NULL;
1143 }
1144 
arm_smmu_free_asid(struct arm_smmu_ctx_desc * cd)1145 bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd)
1146 {
1147 	bool free;
1148 	struct arm_smmu_ctx_desc *old_cd;
1149 
1150 	if (!cd->asid)
1151 		return false;
1152 
1153 	free = refcount_dec_and_test(&cd->refs);
1154 	if (free) {
1155 		old_cd = xa_erase(&arm_smmu_asid_xa, cd->asid);
1156 		WARN_ON(old_cd != cd);
1157 	}
1158 	return free;
1159 }
1160 
1161 /* Stream table manipulation functions */
1162 static void
arm_smmu_write_strtab_l1_desc(__le64 * dst,struct arm_smmu_strtab_l1_desc * desc)1163 arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
1164 {
1165 	u64 val = 0;
1166 
1167 	val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
1168 	val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
1169 
1170 	/* See comment in arm_smmu_write_ctx_desc() */
1171 	WRITE_ONCE(*dst, cpu_to_le64(val));
1172 }
1173 
arm_smmu_sync_ste_for_sid(struct arm_smmu_device * smmu,u32 sid)1174 static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
1175 {
1176 	struct arm_smmu_cmdq_ent cmd = {
1177 		.opcode	= CMDQ_OP_CFGI_STE,
1178 		.cfgi	= {
1179 			.sid	= sid,
1180 			.leaf	= true,
1181 		},
1182 	};
1183 
1184 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1185 	arm_smmu_cmdq_issue_sync(smmu);
1186 }
1187 
arm_smmu_write_strtab_ent(struct arm_smmu_master * master,u32 sid,__le64 * dst)1188 static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
1189 				      __le64 *dst)
1190 {
1191 	/*
1192 	 * This is hideously complicated, but we only really care about
1193 	 * three cases at the moment:
1194 	 *
1195 	 * 1. Invalid (all zero) -> bypass/fault (init)
1196 	 * 2. Bypass/fault -> translation/bypass (attach)
1197 	 * 3. Translation/bypass -> bypass/fault (detach)
1198 	 *
1199 	 * Given that we can't update the STE atomically and the SMMU
1200 	 * doesn't read the thing in a defined order, that leaves us
1201 	 * with the following maintenance requirements:
1202 	 *
1203 	 * 1. Update Config, return (init time STEs aren't live)
1204 	 * 2. Write everything apart from dword 0, sync, write dword 0, sync
1205 	 * 3. Update Config, sync
1206 	 */
1207 	u64 val = le64_to_cpu(dst[0]);
1208 	bool ste_live = false;
1209 	struct arm_smmu_device *smmu = NULL;
1210 	struct arm_smmu_s1_cfg *s1_cfg = NULL;
1211 	struct arm_smmu_s2_cfg *s2_cfg = NULL;
1212 	struct arm_smmu_domain *smmu_domain = NULL;
1213 	struct arm_smmu_cmdq_ent prefetch_cmd = {
1214 		.opcode		= CMDQ_OP_PREFETCH_CFG,
1215 		.prefetch	= {
1216 			.sid	= sid,
1217 		},
1218 	};
1219 
1220 	if (master) {
1221 		smmu_domain = master->domain;
1222 		smmu = master->smmu;
1223 	}
1224 
1225 	if (smmu_domain) {
1226 		switch (smmu_domain->stage) {
1227 		case ARM_SMMU_DOMAIN_S1:
1228 			s1_cfg = &smmu_domain->s1_cfg;
1229 			break;
1230 		case ARM_SMMU_DOMAIN_S2:
1231 		case ARM_SMMU_DOMAIN_NESTED:
1232 			s2_cfg = &smmu_domain->s2_cfg;
1233 			break;
1234 		default:
1235 			break;
1236 		}
1237 	}
1238 
1239 	if (val & STRTAB_STE_0_V) {
1240 		switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
1241 		case STRTAB_STE_0_CFG_BYPASS:
1242 			break;
1243 		case STRTAB_STE_0_CFG_S1_TRANS:
1244 		case STRTAB_STE_0_CFG_S2_TRANS:
1245 			ste_live = true;
1246 			break;
1247 		case STRTAB_STE_0_CFG_ABORT:
1248 			BUG_ON(!disable_bypass);
1249 			break;
1250 		default:
1251 			BUG(); /* STE corruption */
1252 		}
1253 	}
1254 
1255 	/* Nuke the existing STE_0 value, as we're going to rewrite it */
1256 	val = STRTAB_STE_0_V;
1257 
1258 	/* Bypass/fault */
1259 	if (!smmu_domain || !(s1_cfg || s2_cfg)) {
1260 		if (!smmu_domain && disable_bypass)
1261 			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1262 		else
1263 			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1264 
1265 		dst[0] = cpu_to_le64(val);
1266 		dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1267 						STRTAB_STE_1_SHCFG_INCOMING));
1268 		dst[2] = 0; /* Nuke the VMID */
1269 		/*
1270 		 * The SMMU can perform negative caching, so we must sync
1271 		 * the STE regardless of whether the old value was live.
1272 		 */
1273 		if (smmu)
1274 			arm_smmu_sync_ste_for_sid(smmu, sid);
1275 		return;
1276 	}
1277 
1278 	if (s1_cfg) {
1279 		BUG_ON(ste_live);
1280 		dst[1] = cpu_to_le64(
1281 			 FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
1282 			 FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1283 			 FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1284 			 FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
1285 			 FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_NSEL1));
1286 
1287 		if (smmu->features & ARM_SMMU_FEAT_STALLS &&
1288 		   !(smmu->features & ARM_SMMU_FEAT_STALL_FORCE))
1289 			dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
1290 
1291 		val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
1292 			FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
1293 			FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) |
1294 			FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt);
1295 	}
1296 
1297 	if (s2_cfg) {
1298 		BUG_ON(ste_live);
1299 		dst[2] = cpu_to_le64(
1300 			 FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
1301 			 FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) |
1302 #ifdef __BIG_ENDIAN
1303 			 STRTAB_STE_2_S2ENDI |
1304 #endif
1305 			 STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1306 			 STRTAB_STE_2_S2R);
1307 
1308 		dst[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
1309 
1310 		val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
1311 	}
1312 
1313 	if (master->ats_enabled)
1314 		dst[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS,
1315 						 STRTAB_STE_1_EATS_TRANS));
1316 
1317 	arm_smmu_sync_ste_for_sid(smmu, sid);
1318 	/* See comment in arm_smmu_write_ctx_desc() */
1319 	WRITE_ONCE(dst[0], cpu_to_le64(val));
1320 	arm_smmu_sync_ste_for_sid(smmu, sid);
1321 
1322 	/* It's likely that we'll want to use the new STE soon */
1323 	if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
1324 		arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1325 }
1326 
arm_smmu_init_bypass_stes(__le64 * strtab,unsigned int nent)1327 static void arm_smmu_init_bypass_stes(__le64 *strtab, unsigned int nent)
1328 {
1329 	unsigned int i;
1330 
1331 	for (i = 0; i < nent; ++i) {
1332 		arm_smmu_write_strtab_ent(NULL, -1, strtab);
1333 		strtab += STRTAB_STE_DWORDS;
1334 	}
1335 }
1336 
arm_smmu_init_l2_strtab(struct arm_smmu_device * smmu,u32 sid)1337 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1338 {
1339 	size_t size;
1340 	void *strtab;
1341 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1342 	struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1343 
1344 	if (desc->l2ptr)
1345 		return 0;
1346 
1347 	size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1348 	strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1349 
1350 	desc->span = STRTAB_SPLIT + 1;
1351 	desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1352 					  GFP_KERNEL);
1353 	if (!desc->l2ptr) {
1354 		dev_err(smmu->dev,
1355 			"failed to allocate l2 stream table for SID %u\n",
1356 			sid);
1357 		return -ENOMEM;
1358 	}
1359 
1360 	arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT);
1361 	arm_smmu_write_strtab_l1_desc(strtab, desc);
1362 	return 0;
1363 }
1364 
1365 /* IRQ and event handlers */
arm_smmu_evtq_thread(int irq,void * dev)1366 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1367 {
1368 	int i;
1369 	struct arm_smmu_device *smmu = dev;
1370 	struct arm_smmu_queue *q = &smmu->evtq.q;
1371 	struct arm_smmu_ll_queue *llq = &q->llq;
1372 	u64 evt[EVTQ_ENT_DWORDS];
1373 
1374 	do {
1375 		while (!queue_remove_raw(q, evt)) {
1376 			u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
1377 
1378 			dev_info(smmu->dev, "event 0x%02x received:\n", id);
1379 			for (i = 0; i < ARRAY_SIZE(evt); ++i)
1380 				dev_info(smmu->dev, "\t0x%016llx\n",
1381 					 (unsigned long long)evt[i]);
1382 
1383 			cond_resched();
1384 		}
1385 
1386 		/*
1387 		 * Not much we can do on overflow, so scream and pretend we're
1388 		 * trying harder.
1389 		 */
1390 		if (queue_sync_prod_in(q) == -EOVERFLOW)
1391 			dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1392 	} while (!queue_empty(llq));
1393 
1394 	/* Sync our overflow flag, as we believe we're up to speed */
1395 	queue_sync_cons_ovf(q);
1396 	return IRQ_HANDLED;
1397 }
1398 
arm_smmu_handle_ppr(struct arm_smmu_device * smmu,u64 * evt)1399 static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
1400 {
1401 	u32 sid, ssid;
1402 	u16 grpid;
1403 	bool ssv, last;
1404 
1405 	sid = FIELD_GET(PRIQ_0_SID, evt[0]);
1406 	ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
1407 	ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : 0;
1408 	last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
1409 	grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
1410 
1411 	dev_info(smmu->dev, "unexpected PRI request received:\n");
1412 	dev_info(smmu->dev,
1413 		 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1414 		 sid, ssid, grpid, last ? "L" : "",
1415 		 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1416 		 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1417 		 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1418 		 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1419 		 evt[1] & PRIQ_1_ADDR_MASK);
1420 
1421 	if (last) {
1422 		struct arm_smmu_cmdq_ent cmd = {
1423 			.opcode			= CMDQ_OP_PRI_RESP,
1424 			.substream_valid	= ssv,
1425 			.pri			= {
1426 				.sid	= sid,
1427 				.ssid	= ssid,
1428 				.grpid	= grpid,
1429 				.resp	= PRI_RESP_DENY,
1430 			},
1431 		};
1432 
1433 		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1434 	}
1435 }
1436 
arm_smmu_priq_thread(int irq,void * dev)1437 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1438 {
1439 	struct arm_smmu_device *smmu = dev;
1440 	struct arm_smmu_queue *q = &smmu->priq.q;
1441 	struct arm_smmu_ll_queue *llq = &q->llq;
1442 	u64 evt[PRIQ_ENT_DWORDS];
1443 
1444 	do {
1445 		while (!queue_remove_raw(q, evt))
1446 			arm_smmu_handle_ppr(smmu, evt);
1447 
1448 		if (queue_sync_prod_in(q) == -EOVERFLOW)
1449 			dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1450 	} while (!queue_empty(llq));
1451 
1452 	/* Sync our overflow flag, as we believe we're up to speed */
1453 	queue_sync_cons_ovf(q);
1454 	return IRQ_HANDLED;
1455 }
1456 
1457 static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1458 
arm_smmu_gerror_handler(int irq,void * dev)1459 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1460 {
1461 	u32 gerror, gerrorn, active;
1462 	struct arm_smmu_device *smmu = dev;
1463 
1464 	gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1465 	gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1466 
1467 	active = gerror ^ gerrorn;
1468 	if (!(active & GERROR_ERR_MASK))
1469 		return IRQ_NONE; /* No errors pending */
1470 
1471 	dev_warn(smmu->dev,
1472 		 "unexpected global error reported (0x%08x), this could be serious\n",
1473 		 active);
1474 
1475 	if (active & GERROR_SFM_ERR) {
1476 		dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1477 		arm_smmu_device_disable(smmu);
1478 	}
1479 
1480 	if (active & GERROR_MSI_GERROR_ABT_ERR)
1481 		dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1482 
1483 	if (active & GERROR_MSI_PRIQ_ABT_ERR)
1484 		dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1485 
1486 	if (active & GERROR_MSI_EVTQ_ABT_ERR)
1487 		dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1488 
1489 	if (active & GERROR_MSI_CMDQ_ABT_ERR)
1490 		dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1491 
1492 	if (active & GERROR_PRIQ_ABT_ERR)
1493 		dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1494 
1495 	if (active & GERROR_EVTQ_ABT_ERR)
1496 		dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1497 
1498 	if (active & GERROR_CMDQ_ERR)
1499 		arm_smmu_cmdq_skip_err(smmu);
1500 
1501 	writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1502 	return IRQ_HANDLED;
1503 }
1504 
arm_smmu_combined_irq_thread(int irq,void * dev)1505 static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
1506 {
1507 	struct arm_smmu_device *smmu = dev;
1508 
1509 	arm_smmu_evtq_thread(irq, dev);
1510 	if (smmu->features & ARM_SMMU_FEAT_PRI)
1511 		arm_smmu_priq_thread(irq, dev);
1512 
1513 	return IRQ_HANDLED;
1514 }
1515 
arm_smmu_combined_irq_handler(int irq,void * dev)1516 static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
1517 {
1518 	arm_smmu_gerror_handler(irq, dev);
1519 	return IRQ_WAKE_THREAD;
1520 }
1521 
1522 static void
arm_smmu_atc_inv_to_cmd(int ssid,unsigned long iova,size_t size,struct arm_smmu_cmdq_ent * cmd)1523 arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
1524 			struct arm_smmu_cmdq_ent *cmd)
1525 {
1526 	size_t log2_span;
1527 	size_t span_mask;
1528 	/* ATC invalidates are always on 4096-bytes pages */
1529 	size_t inval_grain_shift = 12;
1530 	unsigned long page_start, page_end;
1531 
1532 	*cmd = (struct arm_smmu_cmdq_ent) {
1533 		.opcode			= CMDQ_OP_ATC_INV,
1534 		.substream_valid	= !!ssid,
1535 		.atc.ssid		= ssid,
1536 	};
1537 
1538 	if (!size) {
1539 		cmd->atc.size = ATC_INV_SIZE_ALL;
1540 		return;
1541 	}
1542 
1543 	page_start	= iova >> inval_grain_shift;
1544 	page_end	= (iova + size - 1) >> inval_grain_shift;
1545 
1546 	/*
1547 	 * In an ATS Invalidate Request, the address must be aligned on the
1548 	 * range size, which must be a power of two number of page sizes. We
1549 	 * thus have to choose between grossly over-invalidating the region, or
1550 	 * splitting the invalidation into multiple commands. For simplicity
1551 	 * we'll go with the first solution, but should refine it in the future
1552 	 * if multiple commands are shown to be more efficient.
1553 	 *
1554 	 * Find the smallest power of two that covers the range. The most
1555 	 * significant differing bit between the start and end addresses,
1556 	 * fls(start ^ end), indicates the required span. For example:
1557 	 *
1558 	 * We want to invalidate pages [8; 11]. This is already the ideal range:
1559 	 *		x = 0b1000 ^ 0b1011 = 0b11
1560 	 *		span = 1 << fls(x) = 4
1561 	 *
1562 	 * To invalidate pages [7; 10], we need to invalidate [0; 15]:
1563 	 *		x = 0b0111 ^ 0b1010 = 0b1101
1564 	 *		span = 1 << fls(x) = 16
1565 	 */
1566 	log2_span	= fls_long(page_start ^ page_end);
1567 	span_mask	= (1ULL << log2_span) - 1;
1568 
1569 	page_start	&= ~span_mask;
1570 
1571 	cmd->atc.addr	= page_start << inval_grain_shift;
1572 	cmd->atc.size	= log2_span;
1573 }
1574 
arm_smmu_atc_inv_master(struct arm_smmu_master * master)1575 static int arm_smmu_atc_inv_master(struct arm_smmu_master *master)
1576 {
1577 	int i;
1578 	struct arm_smmu_cmdq_ent cmd;
1579 
1580 	arm_smmu_atc_inv_to_cmd(0, 0, 0, &cmd);
1581 
1582 	for (i = 0; i < master->num_sids; i++) {
1583 		cmd.atc.sid = master->sids[i];
1584 		arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
1585 	}
1586 
1587 	return arm_smmu_cmdq_issue_sync(master->smmu);
1588 }
1589 
arm_smmu_atc_inv_domain(struct arm_smmu_domain * smmu_domain,int ssid,unsigned long iova,size_t size)1590 static int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain,
1591 				   int ssid, unsigned long iova, size_t size)
1592 {
1593 	int i;
1594 	unsigned long flags;
1595 	struct arm_smmu_cmdq_ent cmd;
1596 	struct arm_smmu_master *master;
1597 	struct arm_smmu_cmdq_batch cmds = {};
1598 
1599 	if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
1600 		return 0;
1601 
1602 	/*
1603 	 * Ensure that we've completed prior invalidation of the main TLBs
1604 	 * before we read 'nr_ats_masters' in case of a concurrent call to
1605 	 * arm_smmu_enable_ats():
1606 	 *
1607 	 *	// unmap()			// arm_smmu_enable_ats()
1608 	 *	TLBI+SYNC			atomic_inc(&nr_ats_masters);
1609 	 *	smp_mb();			[...]
1610 	 *	atomic_read(&nr_ats_masters);	pci_enable_ats() // writel()
1611 	 *
1612 	 * Ensures that we always see the incremented 'nr_ats_masters' count if
1613 	 * ATS was enabled at the PCI device before completion of the TLBI.
1614 	 */
1615 	smp_mb();
1616 	if (!atomic_read(&smmu_domain->nr_ats_masters))
1617 		return 0;
1618 
1619 	arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);
1620 
1621 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
1622 	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
1623 		if (!master->ats_enabled)
1624 			continue;
1625 
1626 		for (i = 0; i < master->num_sids; i++) {
1627 			cmd.atc.sid = master->sids[i];
1628 			arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);
1629 		}
1630 	}
1631 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
1632 
1633 	return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);
1634 }
1635 
1636 /* IO_PGTABLE API */
arm_smmu_tlb_inv_context(void * cookie)1637 static void arm_smmu_tlb_inv_context(void *cookie)
1638 {
1639 	struct arm_smmu_domain *smmu_domain = cookie;
1640 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1641 	struct arm_smmu_cmdq_ent cmd;
1642 
1643 	/*
1644 	 * NOTE: when io-pgtable is in non-strict mode, we may get here with
1645 	 * PTEs previously cleared by unmaps on the current CPU not yet visible
1646 	 * to the SMMU. We are relying on the dma_wmb() implicit during cmd
1647 	 * insertion to guarantee those are observed before the TLBI. Do be
1648 	 * careful, 007.
1649 	 */
1650 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1651 		arm_smmu_tlb_inv_asid(smmu, smmu_domain->s1_cfg.cd.asid);
1652 	} else {
1653 		cmd.opcode	= CMDQ_OP_TLBI_S12_VMALL;
1654 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
1655 		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1656 		arm_smmu_cmdq_issue_sync(smmu);
1657 	}
1658 	arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
1659 }
1660 
arm_smmu_tlb_inv_range(unsigned long iova,size_t size,size_t granule,bool leaf,struct arm_smmu_domain * smmu_domain)1661 static void arm_smmu_tlb_inv_range(unsigned long iova, size_t size,
1662 				   size_t granule, bool leaf,
1663 				   struct arm_smmu_domain *smmu_domain)
1664 {
1665 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1666 	unsigned long start = iova, end = iova + size, num_pages = 0, tg = 0;
1667 	size_t inv_range = granule;
1668 	struct arm_smmu_cmdq_batch cmds = {};
1669 	struct arm_smmu_cmdq_ent cmd = {
1670 		.tlbi = {
1671 			.leaf	= leaf,
1672 		},
1673 	};
1674 
1675 	if (!size)
1676 		return;
1677 
1678 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1679 		cmd.opcode	= CMDQ_OP_TLBI_NH_VA;
1680 		cmd.tlbi.asid	= smmu_domain->s1_cfg.cd.asid;
1681 	} else {
1682 		cmd.opcode	= CMDQ_OP_TLBI_S2_IPA;
1683 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
1684 	}
1685 
1686 	if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1687 		/* Get the leaf page size */
1688 		tg = __ffs(smmu_domain->domain.pgsize_bitmap);
1689 
1690 		/* Convert page size of 12,14,16 (log2) to 1,2,3 */
1691 		cmd.tlbi.tg = (tg - 10) / 2;
1692 
1693 		/* Determine what level the granule is at */
1694 		cmd.tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
1695 
1696 		num_pages = size >> tg;
1697 	}
1698 
1699 	while (iova < end) {
1700 		if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1701 			/*
1702 			 * On each iteration of the loop, the range is 5 bits
1703 			 * worth of the aligned size remaining.
1704 			 * The range in pages is:
1705 			 *
1706 			 * range = (num_pages & (0x1f << __ffs(num_pages)))
1707 			 */
1708 			unsigned long scale, num;
1709 
1710 			/* Determine the power of 2 multiple number of pages */
1711 			scale = __ffs(num_pages);
1712 			cmd.tlbi.scale = scale;
1713 
1714 			/* Determine how many chunks of 2^scale size we have */
1715 			num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
1716 			cmd.tlbi.num = num - 1;
1717 
1718 			/* range is num * 2^scale * pgsize */
1719 			inv_range = num << (scale + tg);
1720 
1721 			/* Clear out the lower order bits for the next iteration */
1722 			num_pages -= num << scale;
1723 		}
1724 
1725 		cmd.tlbi.addr = iova;
1726 		arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
1727 		iova += inv_range;
1728 	}
1729 	arm_smmu_cmdq_batch_submit(smmu, &cmds);
1730 
1731 	/*
1732 	 * Unfortunately, this can't be leaf-only since we may have
1733 	 * zapped an entire table.
1734 	 */
1735 	arm_smmu_atc_inv_domain(smmu_domain, 0, start, size);
1736 }
1737 
arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather * gather,unsigned long iova,size_t granule,void * cookie)1738 static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
1739 					 unsigned long iova, size_t granule,
1740 					 void *cookie)
1741 {
1742 	struct arm_smmu_domain *smmu_domain = cookie;
1743 	struct iommu_domain *domain = &smmu_domain->domain;
1744 
1745 	iommu_iotlb_gather_add_page(domain, gather, iova, granule);
1746 }
1747 
arm_smmu_tlb_inv_walk(unsigned long iova,size_t size,size_t granule,void * cookie)1748 static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
1749 				  size_t granule, void *cookie)
1750 {
1751 	arm_smmu_tlb_inv_range(iova, size, granule, false, cookie);
1752 }
1753 
arm_smmu_tlb_inv_leaf(unsigned long iova,size_t size,size_t granule,void * cookie)1754 static void arm_smmu_tlb_inv_leaf(unsigned long iova, size_t size,
1755 				  size_t granule, void *cookie)
1756 {
1757 	arm_smmu_tlb_inv_range(iova, size, granule, true, cookie);
1758 }
1759 
1760 static const struct iommu_flush_ops arm_smmu_flush_ops = {
1761 	.tlb_flush_all	= arm_smmu_tlb_inv_context,
1762 	.tlb_flush_walk = arm_smmu_tlb_inv_walk,
1763 	.tlb_flush_leaf = arm_smmu_tlb_inv_leaf,
1764 	.tlb_add_page	= arm_smmu_tlb_inv_page_nosync,
1765 };
1766 
1767 /* IOMMU API */
arm_smmu_capable(enum iommu_cap cap)1768 static bool arm_smmu_capable(enum iommu_cap cap)
1769 {
1770 	switch (cap) {
1771 	case IOMMU_CAP_CACHE_COHERENCY:
1772 		return true;
1773 	case IOMMU_CAP_NOEXEC:
1774 		return true;
1775 	default:
1776 		return false;
1777 	}
1778 }
1779 
arm_smmu_domain_alloc(unsigned type)1780 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
1781 {
1782 	struct arm_smmu_domain *smmu_domain;
1783 
1784 	if (type != IOMMU_DOMAIN_UNMANAGED &&
1785 	    type != IOMMU_DOMAIN_DMA &&
1786 	    type != IOMMU_DOMAIN_IDENTITY)
1787 		return NULL;
1788 
1789 	/*
1790 	 * Allocate the domain and initialise some of its data structures.
1791 	 * We can't really do anything meaningful until we've added a
1792 	 * master.
1793 	 */
1794 	smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
1795 	if (!smmu_domain)
1796 		return NULL;
1797 
1798 	if (type == IOMMU_DOMAIN_DMA &&
1799 	    iommu_get_dma_cookie(&smmu_domain->domain)) {
1800 		kfree(smmu_domain);
1801 		return NULL;
1802 	}
1803 
1804 	mutex_init(&smmu_domain->init_mutex);
1805 	INIT_LIST_HEAD(&smmu_domain->devices);
1806 	spin_lock_init(&smmu_domain->devices_lock);
1807 
1808 	return &smmu_domain->domain;
1809 }
1810 
arm_smmu_bitmap_alloc(unsigned long * map,int span)1811 static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
1812 {
1813 	int idx, size = 1 << span;
1814 
1815 	do {
1816 		idx = find_first_zero_bit(map, size);
1817 		if (idx == size)
1818 			return -ENOSPC;
1819 	} while (test_and_set_bit(idx, map));
1820 
1821 	return idx;
1822 }
1823 
arm_smmu_bitmap_free(unsigned long * map,int idx)1824 static void arm_smmu_bitmap_free(unsigned long *map, int idx)
1825 {
1826 	clear_bit(idx, map);
1827 }
1828 
arm_smmu_domain_free(struct iommu_domain * domain)1829 static void arm_smmu_domain_free(struct iommu_domain *domain)
1830 {
1831 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1832 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1833 
1834 	iommu_put_dma_cookie(domain);
1835 	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
1836 
1837 	/* Free the CD and ASID, if we allocated them */
1838 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1839 		struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1840 
1841 		/* Prevent SVA from touching the CD while we're freeing it */
1842 		mutex_lock(&arm_smmu_asid_lock);
1843 		if (cfg->cdcfg.cdtab)
1844 			arm_smmu_free_cd_tables(smmu_domain);
1845 		arm_smmu_free_asid(&cfg->cd);
1846 		mutex_unlock(&arm_smmu_asid_lock);
1847 	} else {
1848 		struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1849 		if (cfg->vmid)
1850 			arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
1851 	}
1852 
1853 	kfree(smmu_domain);
1854 }
1855 
arm_smmu_domain_finalise_s1(struct arm_smmu_domain * smmu_domain,struct arm_smmu_master * master,struct io_pgtable_cfg * pgtbl_cfg)1856 static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
1857 				       struct arm_smmu_master *master,
1858 				       struct io_pgtable_cfg *pgtbl_cfg)
1859 {
1860 	int ret;
1861 	u32 asid;
1862 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1863 	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1864 	typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
1865 
1866 	refcount_set(&cfg->cd.refs, 1);
1867 
1868 	/* Prevent SVA from modifying the ASID until it is written to the CD */
1869 	mutex_lock(&arm_smmu_asid_lock);
1870 	ret = xa_alloc(&arm_smmu_asid_xa, &asid, &cfg->cd,
1871 		       XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
1872 	if (ret)
1873 		goto out_unlock;
1874 
1875 	cfg->s1cdmax = master->ssid_bits;
1876 
1877 	ret = arm_smmu_alloc_cd_tables(smmu_domain);
1878 	if (ret)
1879 		goto out_free_asid;
1880 
1881 	cfg->cd.asid	= (u16)asid;
1882 	cfg->cd.ttbr	= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
1883 	cfg->cd.tcr	= FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
1884 			  FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
1885 			  FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
1886 			  FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
1887 			  FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
1888 			  FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
1889 			  CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
1890 	cfg->cd.mair	= pgtbl_cfg->arm_lpae_s1_cfg.mair;
1891 
1892 	/*
1893 	 * Note that this will end up calling arm_smmu_sync_cd() before
1894 	 * the master has been added to the devices list for this domain.
1895 	 * This isn't an issue because the STE hasn't been installed yet.
1896 	 */
1897 	ret = arm_smmu_write_ctx_desc(smmu_domain, 0, &cfg->cd);
1898 	if (ret)
1899 		goto out_free_cd_tables;
1900 
1901 	mutex_unlock(&arm_smmu_asid_lock);
1902 	return 0;
1903 
1904 out_free_cd_tables:
1905 	arm_smmu_free_cd_tables(smmu_domain);
1906 out_free_asid:
1907 	arm_smmu_free_asid(&cfg->cd);
1908 out_unlock:
1909 	mutex_unlock(&arm_smmu_asid_lock);
1910 	return ret;
1911 }
1912 
arm_smmu_domain_finalise_s2(struct arm_smmu_domain * smmu_domain,struct arm_smmu_master * master,struct io_pgtable_cfg * pgtbl_cfg)1913 static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
1914 				       struct arm_smmu_master *master,
1915 				       struct io_pgtable_cfg *pgtbl_cfg)
1916 {
1917 	int vmid;
1918 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1919 	struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1920 	typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr;
1921 
1922 	vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
1923 	if (vmid < 0)
1924 		return vmid;
1925 
1926 	vtcr = &pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
1927 	cfg->vmid	= (u16)vmid;
1928 	cfg->vttbr	= pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
1929 	cfg->vtcr	= FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
1930 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
1931 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) |
1932 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) |
1933 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) |
1934 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) |
1935 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps);
1936 	return 0;
1937 }
1938 
arm_smmu_domain_finalise(struct iommu_domain * domain,struct arm_smmu_master * master)1939 static int arm_smmu_domain_finalise(struct iommu_domain *domain,
1940 				    struct arm_smmu_master *master)
1941 {
1942 	int ret;
1943 	unsigned long ias, oas;
1944 	enum io_pgtable_fmt fmt;
1945 	struct io_pgtable_cfg pgtbl_cfg;
1946 	struct io_pgtable_ops *pgtbl_ops;
1947 	int (*finalise_stage_fn)(struct arm_smmu_domain *,
1948 				 struct arm_smmu_master *,
1949 				 struct io_pgtable_cfg *);
1950 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1951 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1952 
1953 	if (domain->type == IOMMU_DOMAIN_IDENTITY) {
1954 		smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
1955 		return 0;
1956 	}
1957 
1958 	/* Restrict the stage to what we can actually support */
1959 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
1960 		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
1961 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
1962 		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1963 
1964 	switch (smmu_domain->stage) {
1965 	case ARM_SMMU_DOMAIN_S1:
1966 		ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
1967 		ias = min_t(unsigned long, ias, VA_BITS);
1968 		oas = smmu->ias;
1969 		fmt = ARM_64_LPAE_S1;
1970 		finalise_stage_fn = arm_smmu_domain_finalise_s1;
1971 		break;
1972 	case ARM_SMMU_DOMAIN_NESTED:
1973 	case ARM_SMMU_DOMAIN_S2:
1974 		ias = smmu->ias;
1975 		oas = smmu->oas;
1976 		fmt = ARM_64_LPAE_S2;
1977 		finalise_stage_fn = arm_smmu_domain_finalise_s2;
1978 		break;
1979 	default:
1980 		return -EINVAL;
1981 	}
1982 
1983 	pgtbl_cfg = (struct io_pgtable_cfg) {
1984 		.pgsize_bitmap	= smmu->pgsize_bitmap,
1985 		.ias		= ias,
1986 		.oas		= oas,
1987 		.coherent_walk	= smmu->features & ARM_SMMU_FEAT_COHERENCY,
1988 		.tlb		= &arm_smmu_flush_ops,
1989 		.iommu_dev	= smmu->dev,
1990 	};
1991 
1992 	if (smmu_domain->non_strict)
1993 		pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
1994 
1995 	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
1996 	if (!pgtbl_ops)
1997 		return -ENOMEM;
1998 
1999 	domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
2000 	domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
2001 	domain->geometry.force_aperture = true;
2002 
2003 	ret = finalise_stage_fn(smmu_domain, master, &pgtbl_cfg);
2004 	if (ret < 0) {
2005 		free_io_pgtable_ops(pgtbl_ops);
2006 		return ret;
2007 	}
2008 
2009 	smmu_domain->pgtbl_ops = pgtbl_ops;
2010 	return 0;
2011 }
2012 
arm_smmu_get_step_for_sid(struct arm_smmu_device * smmu,u32 sid)2013 static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
2014 {
2015 	__le64 *step;
2016 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2017 
2018 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2019 		struct arm_smmu_strtab_l1_desc *l1_desc;
2020 		int idx;
2021 
2022 		/* Two-level walk */
2023 		idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
2024 		l1_desc = &cfg->l1_desc[idx];
2025 		idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
2026 		step = &l1_desc->l2ptr[idx];
2027 	} else {
2028 		/* Simple linear lookup */
2029 		step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
2030 	}
2031 
2032 	return step;
2033 }
2034 
arm_smmu_install_ste_for_dev(struct arm_smmu_master * master)2035 static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master)
2036 {
2037 	int i, j;
2038 	struct arm_smmu_device *smmu = master->smmu;
2039 
2040 	for (i = 0; i < master->num_sids; ++i) {
2041 		u32 sid = master->sids[i];
2042 		__le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
2043 
2044 		/* Bridged PCI devices may end up with duplicated IDs */
2045 		for (j = 0; j < i; j++)
2046 			if (master->sids[j] == sid)
2047 				break;
2048 		if (j < i)
2049 			continue;
2050 
2051 		arm_smmu_write_strtab_ent(master, sid, step);
2052 	}
2053 }
2054 
arm_smmu_ats_supported(struct arm_smmu_master * master)2055 static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
2056 {
2057 	struct device *dev = master->dev;
2058 	struct arm_smmu_device *smmu = master->smmu;
2059 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2060 
2061 	if (!(smmu->features & ARM_SMMU_FEAT_ATS))
2062 		return false;
2063 
2064 	if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS))
2065 		return false;
2066 
2067 	return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev));
2068 }
2069 
arm_smmu_enable_ats(struct arm_smmu_master * master)2070 static void arm_smmu_enable_ats(struct arm_smmu_master *master)
2071 {
2072 	size_t stu;
2073 	struct pci_dev *pdev;
2074 	struct arm_smmu_device *smmu = master->smmu;
2075 	struct arm_smmu_domain *smmu_domain = master->domain;
2076 
2077 	/* Don't enable ATS at the endpoint if it's not enabled in the STE */
2078 	if (!master->ats_enabled)
2079 		return;
2080 
2081 	/* Smallest Translation Unit: log2 of the smallest supported granule */
2082 	stu = __ffs(smmu->pgsize_bitmap);
2083 	pdev = to_pci_dev(master->dev);
2084 
2085 	atomic_inc(&smmu_domain->nr_ats_masters);
2086 	arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
2087 	if (pci_enable_ats(pdev, stu))
2088 		dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
2089 }
2090 
arm_smmu_disable_ats(struct arm_smmu_master * master)2091 static void arm_smmu_disable_ats(struct arm_smmu_master *master)
2092 {
2093 	struct arm_smmu_domain *smmu_domain = master->domain;
2094 
2095 	if (!master->ats_enabled)
2096 		return;
2097 
2098 	pci_disable_ats(to_pci_dev(master->dev));
2099 	/*
2100 	 * Ensure ATS is disabled at the endpoint before we issue the
2101 	 * ATC invalidation via the SMMU.
2102 	 */
2103 	wmb();
2104 	arm_smmu_atc_inv_master(master);
2105 	atomic_dec(&smmu_domain->nr_ats_masters);
2106 }
2107 
arm_smmu_enable_pasid(struct arm_smmu_master * master)2108 static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
2109 {
2110 	int ret;
2111 	int features;
2112 	int num_pasids;
2113 	struct pci_dev *pdev;
2114 
2115 	if (!dev_is_pci(master->dev))
2116 		return -ENODEV;
2117 
2118 	pdev = to_pci_dev(master->dev);
2119 
2120 	features = pci_pasid_features(pdev);
2121 	if (features < 0)
2122 		return features;
2123 
2124 	num_pasids = pci_max_pasids(pdev);
2125 	if (num_pasids <= 0)
2126 		return num_pasids;
2127 
2128 	ret = pci_enable_pasid(pdev, features);
2129 	if (ret) {
2130 		dev_err(&pdev->dev, "Failed to enable PASID\n");
2131 		return ret;
2132 	}
2133 
2134 	master->ssid_bits = min_t(u8, ilog2(num_pasids),
2135 				  master->smmu->ssid_bits);
2136 	return 0;
2137 }
2138 
arm_smmu_disable_pasid(struct arm_smmu_master * master)2139 static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
2140 {
2141 	struct pci_dev *pdev;
2142 
2143 	if (!dev_is_pci(master->dev))
2144 		return;
2145 
2146 	pdev = to_pci_dev(master->dev);
2147 
2148 	if (!pdev->pasid_enabled)
2149 		return;
2150 
2151 	master->ssid_bits = 0;
2152 	pci_disable_pasid(pdev);
2153 }
2154 
arm_smmu_detach_dev(struct arm_smmu_master * master)2155 static void arm_smmu_detach_dev(struct arm_smmu_master *master)
2156 {
2157 	unsigned long flags;
2158 	struct arm_smmu_domain *smmu_domain = master->domain;
2159 
2160 	if (!smmu_domain)
2161 		return;
2162 
2163 	arm_smmu_disable_ats(master);
2164 
2165 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2166 	list_del(&master->domain_head);
2167 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2168 
2169 	master->domain = NULL;
2170 	master->ats_enabled = false;
2171 	arm_smmu_install_ste_for_dev(master);
2172 }
2173 
arm_smmu_attach_dev(struct iommu_domain * domain,struct device * dev)2174 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
2175 {
2176 	int ret = 0;
2177 	unsigned long flags;
2178 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2179 	struct arm_smmu_device *smmu;
2180 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2181 	struct arm_smmu_master *master;
2182 
2183 	if (!fwspec)
2184 		return -ENOENT;
2185 
2186 	master = dev_iommu_priv_get(dev);
2187 	smmu = master->smmu;
2188 
2189 	/*
2190 	 * Checking that SVA is disabled ensures that this device isn't bound to
2191 	 * any mm, and can be safely detached from its old domain. Bonds cannot
2192 	 * be removed concurrently since we're holding the group mutex.
2193 	 */
2194 	if (arm_smmu_master_sva_enabled(master)) {
2195 		dev_err(dev, "cannot attach - SVA enabled\n");
2196 		return -EBUSY;
2197 	}
2198 
2199 	arm_smmu_detach_dev(master);
2200 
2201 	mutex_lock(&smmu_domain->init_mutex);
2202 
2203 	if (!smmu_domain->smmu) {
2204 		smmu_domain->smmu = smmu;
2205 		ret = arm_smmu_domain_finalise(domain, master);
2206 		if (ret) {
2207 			smmu_domain->smmu = NULL;
2208 			goto out_unlock;
2209 		}
2210 	} else if (smmu_domain->smmu != smmu) {
2211 		dev_err(dev,
2212 			"cannot attach to SMMU %s (upstream of %s)\n",
2213 			dev_name(smmu_domain->smmu->dev),
2214 			dev_name(smmu->dev));
2215 		ret = -ENXIO;
2216 		goto out_unlock;
2217 	} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
2218 		   master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) {
2219 		dev_err(dev,
2220 			"cannot attach to incompatible domain (%u SSID bits != %u)\n",
2221 			smmu_domain->s1_cfg.s1cdmax, master->ssid_bits);
2222 		ret = -EINVAL;
2223 		goto out_unlock;
2224 	}
2225 
2226 	master->domain = smmu_domain;
2227 
2228 	if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
2229 		master->ats_enabled = arm_smmu_ats_supported(master);
2230 
2231 	arm_smmu_install_ste_for_dev(master);
2232 
2233 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2234 	list_add(&master->domain_head, &smmu_domain->devices);
2235 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2236 
2237 	arm_smmu_enable_ats(master);
2238 
2239 out_unlock:
2240 	mutex_unlock(&smmu_domain->init_mutex);
2241 	return ret;
2242 }
2243 
arm_smmu_map(struct iommu_domain * domain,unsigned long iova,phys_addr_t paddr,size_t size,int prot,gfp_t gfp)2244 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
2245 			phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
2246 {
2247 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2248 
2249 	if (!ops)
2250 		return -ENODEV;
2251 
2252 	return ops->map(ops, iova, paddr, size, prot, gfp);
2253 }
2254 
arm_smmu_unmap(struct iommu_domain * domain,unsigned long iova,size_t size,struct iommu_iotlb_gather * gather)2255 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
2256 			     size_t size, struct iommu_iotlb_gather *gather)
2257 {
2258 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2259 	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
2260 
2261 	if (!ops)
2262 		return 0;
2263 
2264 	return ops->unmap(ops, iova, size, gather);
2265 }
2266 
arm_smmu_flush_iotlb_all(struct iommu_domain * domain)2267 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
2268 {
2269 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2270 
2271 	if (smmu_domain->smmu)
2272 		arm_smmu_tlb_inv_context(smmu_domain);
2273 }
2274 
arm_smmu_iotlb_sync(struct iommu_domain * domain,struct iommu_iotlb_gather * gather)2275 static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
2276 				struct iommu_iotlb_gather *gather)
2277 {
2278 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2279 
2280 	arm_smmu_tlb_inv_range(gather->start, gather->end - gather->start + 1,
2281 			       gather->pgsize, true, smmu_domain);
2282 }
2283 
2284 static phys_addr_t
arm_smmu_iova_to_phys(struct iommu_domain * domain,dma_addr_t iova)2285 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
2286 {
2287 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2288 
2289 	if (domain->type == IOMMU_DOMAIN_IDENTITY)
2290 		return iova;
2291 
2292 	if (!ops)
2293 		return 0;
2294 
2295 	return ops->iova_to_phys(ops, iova);
2296 }
2297 
2298 static struct platform_driver arm_smmu_driver;
2299 
2300 static
arm_smmu_get_by_fwnode(struct fwnode_handle * fwnode)2301 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
2302 {
2303 	struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
2304 							  fwnode);
2305 	put_device(dev);
2306 	return dev ? dev_get_drvdata(dev) : NULL;
2307 }
2308 
arm_smmu_sid_in_range(struct arm_smmu_device * smmu,u32 sid)2309 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
2310 {
2311 	unsigned long limit = smmu->strtab_cfg.num_l1_ents;
2312 
2313 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2314 		limit *= 1UL << STRTAB_SPLIT;
2315 
2316 	return sid < limit;
2317 }
2318 
2319 static struct iommu_ops arm_smmu_ops;
2320 
arm_smmu_probe_device(struct device * dev)2321 static struct iommu_device *arm_smmu_probe_device(struct device *dev)
2322 {
2323 	int i, ret;
2324 	struct arm_smmu_device *smmu;
2325 	struct arm_smmu_master *master;
2326 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2327 
2328 	if (!fwspec || fwspec->ops != &arm_smmu_ops)
2329 		return ERR_PTR(-ENODEV);
2330 
2331 	if (WARN_ON_ONCE(dev_iommu_priv_get(dev)))
2332 		return ERR_PTR(-EBUSY);
2333 
2334 	smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
2335 	if (!smmu)
2336 		return ERR_PTR(-ENODEV);
2337 
2338 	master = kzalloc(sizeof(*master), GFP_KERNEL);
2339 	if (!master)
2340 		return ERR_PTR(-ENOMEM);
2341 
2342 	master->dev = dev;
2343 	master->smmu = smmu;
2344 	master->sids = fwspec->ids;
2345 	master->num_sids = fwspec->num_ids;
2346 	INIT_LIST_HEAD(&master->bonds);
2347 	dev_iommu_priv_set(dev, master);
2348 
2349 	/* Check the SIDs are in range of the SMMU and our stream table */
2350 	for (i = 0; i < master->num_sids; i++) {
2351 		u32 sid = master->sids[i];
2352 
2353 		if (!arm_smmu_sid_in_range(smmu, sid)) {
2354 			ret = -ERANGE;
2355 			goto err_free_master;
2356 		}
2357 
2358 		/* Ensure l2 strtab is initialised */
2359 		if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2360 			ret = arm_smmu_init_l2_strtab(smmu, sid);
2361 			if (ret)
2362 				goto err_free_master;
2363 		}
2364 	}
2365 
2366 	master->ssid_bits = min(smmu->ssid_bits, fwspec->num_pasid_bits);
2367 
2368 	/*
2369 	 * Note that PASID must be enabled before, and disabled after ATS:
2370 	 * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register
2371 	 *
2372 	 *   Behavior is undefined if this bit is Set and the value of the PASID
2373 	 *   Enable, Execute Requested Enable, or Privileged Mode Requested bits
2374 	 *   are changed.
2375 	 */
2376 	arm_smmu_enable_pasid(master);
2377 
2378 	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
2379 		master->ssid_bits = min_t(u8, master->ssid_bits,
2380 					  CTXDESC_LINEAR_CDMAX);
2381 
2382 	return &smmu->iommu;
2383 
2384 err_free_master:
2385 	kfree(master);
2386 	dev_iommu_priv_set(dev, NULL);
2387 	return ERR_PTR(ret);
2388 }
2389 
arm_smmu_release_device(struct device * dev)2390 static void arm_smmu_release_device(struct device *dev)
2391 {
2392 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2393 	struct arm_smmu_master *master;
2394 
2395 	if (!fwspec || fwspec->ops != &arm_smmu_ops)
2396 		return;
2397 
2398 	master = dev_iommu_priv_get(dev);
2399 	WARN_ON(arm_smmu_master_sva_enabled(master));
2400 	arm_smmu_detach_dev(master);
2401 	arm_smmu_disable_pasid(master);
2402 	kfree(master);
2403 	iommu_fwspec_free(dev);
2404 }
2405 
arm_smmu_device_group(struct device * dev)2406 static struct iommu_group *arm_smmu_device_group(struct device *dev)
2407 {
2408 	struct iommu_group *group;
2409 
2410 	/*
2411 	 * We don't support devices sharing stream IDs other than PCI RID
2412 	 * aliases, since the necessary ID-to-device lookup becomes rather
2413 	 * impractical given a potential sparse 32-bit stream ID space.
2414 	 */
2415 	if (dev_is_pci(dev))
2416 		group = pci_device_group(dev);
2417 	else
2418 		group = generic_device_group(dev);
2419 
2420 	return group;
2421 }
2422 
arm_smmu_domain_get_attr(struct iommu_domain * domain,enum iommu_attr attr,void * data)2423 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
2424 				    enum iommu_attr attr, void *data)
2425 {
2426 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2427 
2428 	switch (domain->type) {
2429 	case IOMMU_DOMAIN_UNMANAGED:
2430 		switch (attr) {
2431 		case DOMAIN_ATTR_NESTING:
2432 			*(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
2433 			return 0;
2434 		default:
2435 			return -ENODEV;
2436 		}
2437 		break;
2438 	case IOMMU_DOMAIN_DMA:
2439 		switch (attr) {
2440 		case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
2441 			*(int *)data = smmu_domain->non_strict;
2442 			return 0;
2443 		default:
2444 			return -ENODEV;
2445 		}
2446 		break;
2447 	default:
2448 		return -EINVAL;
2449 	}
2450 }
2451 
arm_smmu_domain_set_attr(struct iommu_domain * domain,enum iommu_attr attr,void * data)2452 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
2453 				    enum iommu_attr attr, void *data)
2454 {
2455 	int ret = 0;
2456 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2457 
2458 	mutex_lock(&smmu_domain->init_mutex);
2459 
2460 	switch (domain->type) {
2461 	case IOMMU_DOMAIN_UNMANAGED:
2462 		switch (attr) {
2463 		case DOMAIN_ATTR_NESTING:
2464 			if (smmu_domain->smmu) {
2465 				ret = -EPERM;
2466 				goto out_unlock;
2467 			}
2468 
2469 			if (*(int *)data)
2470 				smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
2471 			else
2472 				smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
2473 			break;
2474 		default:
2475 			ret = -ENODEV;
2476 		}
2477 		break;
2478 	case IOMMU_DOMAIN_DMA:
2479 		switch(attr) {
2480 		case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
2481 			smmu_domain->non_strict = *(int *)data;
2482 			break;
2483 		default:
2484 			ret = -ENODEV;
2485 		}
2486 		break;
2487 	default:
2488 		ret = -EINVAL;
2489 	}
2490 
2491 out_unlock:
2492 	mutex_unlock(&smmu_domain->init_mutex);
2493 	return ret;
2494 }
2495 
arm_smmu_of_xlate(struct device * dev,struct of_phandle_args * args)2496 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
2497 {
2498 	return iommu_fwspec_add_ids(dev, args->args, 1);
2499 }
2500 
arm_smmu_get_resv_regions(struct device * dev,struct list_head * head)2501 static void arm_smmu_get_resv_regions(struct device *dev,
2502 				      struct list_head *head)
2503 {
2504 	struct iommu_resv_region *region;
2505 	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
2506 
2507 	region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
2508 					 prot, IOMMU_RESV_SW_MSI);
2509 	if (!region)
2510 		return;
2511 
2512 	list_add_tail(&region->list, head);
2513 
2514 	iommu_dma_get_resv_regions(dev, head);
2515 }
2516 
arm_smmu_dev_has_feature(struct device * dev,enum iommu_dev_features feat)2517 static bool arm_smmu_dev_has_feature(struct device *dev,
2518 				     enum iommu_dev_features feat)
2519 {
2520 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2521 
2522 	if (!master)
2523 		return false;
2524 
2525 	switch (feat) {
2526 	case IOMMU_DEV_FEAT_SVA:
2527 		return arm_smmu_master_sva_supported(master);
2528 	default:
2529 		return false;
2530 	}
2531 }
2532 
arm_smmu_dev_feature_enabled(struct device * dev,enum iommu_dev_features feat)2533 static bool arm_smmu_dev_feature_enabled(struct device *dev,
2534 					 enum iommu_dev_features feat)
2535 {
2536 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2537 
2538 	if (!master)
2539 		return false;
2540 
2541 	switch (feat) {
2542 	case IOMMU_DEV_FEAT_SVA:
2543 		return arm_smmu_master_sva_enabled(master);
2544 	default:
2545 		return false;
2546 	}
2547 }
2548 
arm_smmu_dev_enable_feature(struct device * dev,enum iommu_dev_features feat)2549 static int arm_smmu_dev_enable_feature(struct device *dev,
2550 				       enum iommu_dev_features feat)
2551 {
2552 	if (!arm_smmu_dev_has_feature(dev, feat))
2553 		return -ENODEV;
2554 
2555 	if (arm_smmu_dev_feature_enabled(dev, feat))
2556 		return -EBUSY;
2557 
2558 	switch (feat) {
2559 	case IOMMU_DEV_FEAT_SVA:
2560 		return arm_smmu_master_enable_sva(dev_iommu_priv_get(dev));
2561 	default:
2562 		return -EINVAL;
2563 	}
2564 }
2565 
arm_smmu_dev_disable_feature(struct device * dev,enum iommu_dev_features feat)2566 static int arm_smmu_dev_disable_feature(struct device *dev,
2567 					enum iommu_dev_features feat)
2568 {
2569 	if (!arm_smmu_dev_feature_enabled(dev, feat))
2570 		return -EINVAL;
2571 
2572 	switch (feat) {
2573 	case IOMMU_DEV_FEAT_SVA:
2574 		return arm_smmu_master_disable_sva(dev_iommu_priv_get(dev));
2575 	default:
2576 		return -EINVAL;
2577 	}
2578 }
2579 
2580 static struct iommu_ops arm_smmu_ops = {
2581 	.capable		= arm_smmu_capable,
2582 	.domain_alloc		= arm_smmu_domain_alloc,
2583 	.domain_free		= arm_smmu_domain_free,
2584 	.attach_dev		= arm_smmu_attach_dev,
2585 	.map			= arm_smmu_map,
2586 	.unmap			= arm_smmu_unmap,
2587 	.flush_iotlb_all	= arm_smmu_flush_iotlb_all,
2588 	.iotlb_sync		= arm_smmu_iotlb_sync,
2589 	.iova_to_phys		= arm_smmu_iova_to_phys,
2590 	.probe_device		= arm_smmu_probe_device,
2591 	.release_device		= arm_smmu_release_device,
2592 	.device_group		= arm_smmu_device_group,
2593 	.domain_get_attr	= arm_smmu_domain_get_attr,
2594 	.domain_set_attr	= arm_smmu_domain_set_attr,
2595 	.of_xlate		= arm_smmu_of_xlate,
2596 	.get_resv_regions	= arm_smmu_get_resv_regions,
2597 	.put_resv_regions	= generic_iommu_put_resv_regions,
2598 	.dev_has_feat		= arm_smmu_dev_has_feature,
2599 	.dev_feat_enabled	= arm_smmu_dev_feature_enabled,
2600 	.dev_enable_feat	= arm_smmu_dev_enable_feature,
2601 	.dev_disable_feat	= arm_smmu_dev_disable_feature,
2602 	.pgsize_bitmap		= -1UL, /* Restricted during device attach */
2603 };
2604 
2605 /* Probing and initialisation functions */
arm_smmu_init_one_queue(struct arm_smmu_device * smmu,struct arm_smmu_queue * q,unsigned long prod_off,unsigned long cons_off,size_t dwords,const char * name)2606 static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
2607 				   struct arm_smmu_queue *q,
2608 				   unsigned long prod_off,
2609 				   unsigned long cons_off,
2610 				   size_t dwords, const char *name)
2611 {
2612 	size_t qsz;
2613 
2614 	do {
2615 		qsz = ((1 << q->llq.max_n_shift) * dwords) << 3;
2616 		q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma,
2617 					      GFP_KERNEL);
2618 		if (q->base || qsz < PAGE_SIZE)
2619 			break;
2620 
2621 		q->llq.max_n_shift--;
2622 	} while (1);
2623 
2624 	if (!q->base) {
2625 		dev_err(smmu->dev,
2626 			"failed to allocate queue (0x%zx bytes) for %s\n",
2627 			qsz, name);
2628 		return -ENOMEM;
2629 	}
2630 
2631 	if (!WARN_ON(q->base_dma & (qsz - 1))) {
2632 		dev_info(smmu->dev, "allocated %u entries for %s\n",
2633 			 1 << q->llq.max_n_shift, name);
2634 	}
2635 
2636 	q->prod_reg	= arm_smmu_page1_fixup(prod_off, smmu);
2637 	q->cons_reg	= arm_smmu_page1_fixup(cons_off, smmu);
2638 	q->ent_dwords	= dwords;
2639 
2640 	q->q_base  = Q_BASE_RWA;
2641 	q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
2642 	q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift);
2643 
2644 	q->llq.prod = q->llq.cons = 0;
2645 	return 0;
2646 }
2647 
arm_smmu_cmdq_free_bitmap(void * data)2648 static void arm_smmu_cmdq_free_bitmap(void *data)
2649 {
2650 	unsigned long *bitmap = data;
2651 	bitmap_free(bitmap);
2652 }
2653 
arm_smmu_cmdq_init(struct arm_smmu_device * smmu)2654 static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
2655 {
2656 	int ret = 0;
2657 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
2658 	unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
2659 	atomic_long_t *bitmap;
2660 
2661 	atomic_set(&cmdq->owner_prod, 0);
2662 	atomic_set(&cmdq->lock, 0);
2663 
2664 	bitmap = (atomic_long_t *)bitmap_zalloc(nents, GFP_KERNEL);
2665 	if (!bitmap) {
2666 		dev_err(smmu->dev, "failed to allocate cmdq bitmap\n");
2667 		ret = -ENOMEM;
2668 	} else {
2669 		cmdq->valid_map = bitmap;
2670 		devm_add_action(smmu->dev, arm_smmu_cmdq_free_bitmap, bitmap);
2671 	}
2672 
2673 	return ret;
2674 }
2675 
arm_smmu_init_queues(struct arm_smmu_device * smmu)2676 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
2677 {
2678 	int ret;
2679 
2680 	/* cmdq */
2681 	ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, ARM_SMMU_CMDQ_PROD,
2682 				      ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS,
2683 				      "cmdq");
2684 	if (ret)
2685 		return ret;
2686 
2687 	ret = arm_smmu_cmdq_init(smmu);
2688 	if (ret)
2689 		return ret;
2690 
2691 	/* evtq */
2692 	ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, ARM_SMMU_EVTQ_PROD,
2693 				      ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS,
2694 				      "evtq");
2695 	if (ret)
2696 		return ret;
2697 
2698 	/* priq */
2699 	if (!(smmu->features & ARM_SMMU_FEAT_PRI))
2700 		return 0;
2701 
2702 	return arm_smmu_init_one_queue(smmu, &smmu->priq.q, ARM_SMMU_PRIQ_PROD,
2703 				       ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS,
2704 				       "priq");
2705 }
2706 
arm_smmu_init_l1_strtab(struct arm_smmu_device * smmu)2707 static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
2708 {
2709 	unsigned int i;
2710 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2711 	size_t size = sizeof(*cfg->l1_desc) * cfg->num_l1_ents;
2712 	void *strtab = smmu->strtab_cfg.strtab;
2713 
2714 	cfg->l1_desc = devm_kzalloc(smmu->dev, size, GFP_KERNEL);
2715 	if (!cfg->l1_desc) {
2716 		dev_err(smmu->dev, "failed to allocate l1 stream table desc\n");
2717 		return -ENOMEM;
2718 	}
2719 
2720 	for (i = 0; i < cfg->num_l1_ents; ++i) {
2721 		arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
2722 		strtab += STRTAB_L1_DESC_DWORDS << 3;
2723 	}
2724 
2725 	return 0;
2726 }
2727 
arm_smmu_init_strtab_2lvl(struct arm_smmu_device * smmu)2728 static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
2729 {
2730 	void *strtab;
2731 	u64 reg;
2732 	u32 size, l1size;
2733 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2734 
2735 	/* Calculate the L1 size, capped to the SIDSIZE. */
2736 	size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
2737 	size = min(size, smmu->sid_bits - STRTAB_SPLIT);
2738 	cfg->num_l1_ents = 1 << size;
2739 
2740 	size += STRTAB_SPLIT;
2741 	if (size < smmu->sid_bits)
2742 		dev_warn(smmu->dev,
2743 			 "2-level strtab only covers %u/%u bits of SID\n",
2744 			 size, smmu->sid_bits);
2745 
2746 	l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
2747 	strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
2748 				     GFP_KERNEL);
2749 	if (!strtab) {
2750 		dev_err(smmu->dev,
2751 			"failed to allocate l1 stream table (%u bytes)\n",
2752 			l1size);
2753 		return -ENOMEM;
2754 	}
2755 	cfg->strtab = strtab;
2756 
2757 	/* Configure strtab_base_cfg for 2 levels */
2758 	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
2759 	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
2760 	reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
2761 	cfg->strtab_base_cfg = reg;
2762 
2763 	return arm_smmu_init_l1_strtab(smmu);
2764 }
2765 
arm_smmu_init_strtab_linear(struct arm_smmu_device * smmu)2766 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
2767 {
2768 	void *strtab;
2769 	u64 reg;
2770 	u32 size;
2771 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2772 
2773 	size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
2774 	strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
2775 				     GFP_KERNEL);
2776 	if (!strtab) {
2777 		dev_err(smmu->dev,
2778 			"failed to allocate linear stream table (%u bytes)\n",
2779 			size);
2780 		return -ENOMEM;
2781 	}
2782 	cfg->strtab = strtab;
2783 	cfg->num_l1_ents = 1 << smmu->sid_bits;
2784 
2785 	/* Configure strtab_base_cfg for a linear table covering all SIDs */
2786 	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
2787 	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
2788 	cfg->strtab_base_cfg = reg;
2789 
2790 	arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents);
2791 	return 0;
2792 }
2793 
arm_smmu_init_strtab(struct arm_smmu_device * smmu)2794 static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
2795 {
2796 	u64 reg;
2797 	int ret;
2798 
2799 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2800 		ret = arm_smmu_init_strtab_2lvl(smmu);
2801 	else
2802 		ret = arm_smmu_init_strtab_linear(smmu);
2803 
2804 	if (ret)
2805 		return ret;
2806 
2807 	/* Set the strtab base address */
2808 	reg  = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
2809 	reg |= STRTAB_BASE_RA;
2810 	smmu->strtab_cfg.strtab_base = reg;
2811 
2812 	/* Allocate the first VMID for stage-2 bypass STEs */
2813 	set_bit(0, smmu->vmid_map);
2814 	return 0;
2815 }
2816 
arm_smmu_init_structures(struct arm_smmu_device * smmu)2817 static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
2818 {
2819 	int ret;
2820 
2821 	ret = arm_smmu_init_queues(smmu);
2822 	if (ret)
2823 		return ret;
2824 
2825 	return arm_smmu_init_strtab(smmu);
2826 }
2827 
arm_smmu_write_reg_sync(struct arm_smmu_device * smmu,u32 val,unsigned int reg_off,unsigned int ack_off)2828 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
2829 				   unsigned int reg_off, unsigned int ack_off)
2830 {
2831 	u32 reg;
2832 
2833 	writel_relaxed(val, smmu->base + reg_off);
2834 	return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
2835 					  1, ARM_SMMU_POLL_TIMEOUT_US);
2836 }
2837 
2838 /* GBPA is "special" */
arm_smmu_update_gbpa(struct arm_smmu_device * smmu,u32 set,u32 clr)2839 static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
2840 {
2841 	int ret;
2842 	u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
2843 
2844 	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
2845 					 1, ARM_SMMU_POLL_TIMEOUT_US);
2846 	if (ret)
2847 		return ret;
2848 
2849 	reg &= ~clr;
2850 	reg |= set;
2851 	writel_relaxed(reg | GBPA_UPDATE, gbpa);
2852 	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
2853 					 1, ARM_SMMU_POLL_TIMEOUT_US);
2854 
2855 	if (ret)
2856 		dev_err(smmu->dev, "GBPA not responding to update\n");
2857 	return ret;
2858 }
2859 
arm_smmu_free_msis(void * data)2860 static void arm_smmu_free_msis(void *data)
2861 {
2862 	struct device *dev = data;
2863 	platform_msi_domain_free_irqs(dev);
2864 }
2865 
arm_smmu_write_msi_msg(struct msi_desc * desc,struct msi_msg * msg)2866 static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
2867 {
2868 	phys_addr_t doorbell;
2869 	struct device *dev = msi_desc_to_dev(desc);
2870 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2871 	phys_addr_t *cfg = arm_smmu_msi_cfg[desc->platform.msi_index];
2872 
2873 	doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
2874 	doorbell &= MSI_CFG0_ADDR_MASK;
2875 
2876 	writeq_relaxed(doorbell, smmu->base + cfg[0]);
2877 	writel_relaxed(msg->data, smmu->base + cfg[1]);
2878 	writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
2879 }
2880 
arm_smmu_setup_msis(struct arm_smmu_device * smmu)2881 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
2882 {
2883 	struct msi_desc *desc;
2884 	int ret, nvec = ARM_SMMU_MAX_MSIS;
2885 	struct device *dev = smmu->dev;
2886 
2887 	/* Clear the MSI address regs */
2888 	writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
2889 	writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
2890 
2891 	if (smmu->features & ARM_SMMU_FEAT_PRI)
2892 		writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
2893 	else
2894 		nvec--;
2895 
2896 	if (!(smmu->features & ARM_SMMU_FEAT_MSI))
2897 		return;
2898 
2899 	if (!dev->msi_domain) {
2900 		dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
2901 		return;
2902 	}
2903 
2904 	/* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
2905 	ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
2906 	if (ret) {
2907 		dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
2908 		return;
2909 	}
2910 
2911 	for_each_msi_entry(desc, dev) {
2912 		switch (desc->platform.msi_index) {
2913 		case EVTQ_MSI_INDEX:
2914 			smmu->evtq.q.irq = desc->irq;
2915 			break;
2916 		case GERROR_MSI_INDEX:
2917 			smmu->gerr_irq = desc->irq;
2918 			break;
2919 		case PRIQ_MSI_INDEX:
2920 			smmu->priq.q.irq = desc->irq;
2921 			break;
2922 		default:	/* Unknown */
2923 			continue;
2924 		}
2925 	}
2926 
2927 	/* Add callback to free MSIs on teardown */
2928 	devm_add_action(dev, arm_smmu_free_msis, dev);
2929 }
2930 
arm_smmu_setup_unique_irqs(struct arm_smmu_device * smmu)2931 static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
2932 {
2933 	int irq, ret;
2934 
2935 	arm_smmu_setup_msis(smmu);
2936 
2937 	/* Request interrupt lines */
2938 	irq = smmu->evtq.q.irq;
2939 	if (irq) {
2940 		ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
2941 						arm_smmu_evtq_thread,
2942 						IRQF_ONESHOT,
2943 						"arm-smmu-v3-evtq", smmu);
2944 		if (ret < 0)
2945 			dev_warn(smmu->dev, "failed to enable evtq irq\n");
2946 	} else {
2947 		dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
2948 	}
2949 
2950 	irq = smmu->gerr_irq;
2951 	if (irq) {
2952 		ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
2953 				       0, "arm-smmu-v3-gerror", smmu);
2954 		if (ret < 0)
2955 			dev_warn(smmu->dev, "failed to enable gerror irq\n");
2956 	} else {
2957 		dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
2958 	}
2959 
2960 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
2961 		irq = smmu->priq.q.irq;
2962 		if (irq) {
2963 			ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
2964 							arm_smmu_priq_thread,
2965 							IRQF_ONESHOT,
2966 							"arm-smmu-v3-priq",
2967 							smmu);
2968 			if (ret < 0)
2969 				dev_warn(smmu->dev,
2970 					 "failed to enable priq irq\n");
2971 		} else {
2972 			dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
2973 		}
2974 	}
2975 }
2976 
arm_smmu_setup_irqs(struct arm_smmu_device * smmu)2977 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
2978 {
2979 	int ret, irq;
2980 	u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
2981 
2982 	/* Disable IRQs first */
2983 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
2984 				      ARM_SMMU_IRQ_CTRLACK);
2985 	if (ret) {
2986 		dev_err(smmu->dev, "failed to disable irqs\n");
2987 		return ret;
2988 	}
2989 
2990 	irq = smmu->combined_irq;
2991 	if (irq) {
2992 		/*
2993 		 * Cavium ThunderX2 implementation doesn't support unique irq
2994 		 * lines. Use a single irq line for all the SMMUv3 interrupts.
2995 		 */
2996 		ret = devm_request_threaded_irq(smmu->dev, irq,
2997 					arm_smmu_combined_irq_handler,
2998 					arm_smmu_combined_irq_thread,
2999 					IRQF_ONESHOT,
3000 					"arm-smmu-v3-combined-irq", smmu);
3001 		if (ret < 0)
3002 			dev_warn(smmu->dev, "failed to enable combined irq\n");
3003 	} else
3004 		arm_smmu_setup_unique_irqs(smmu);
3005 
3006 	if (smmu->features & ARM_SMMU_FEAT_PRI)
3007 		irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
3008 
3009 	/* Enable interrupt generation on the SMMU */
3010 	ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
3011 				      ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
3012 	if (ret)
3013 		dev_warn(smmu->dev, "failed to enable irqs\n");
3014 
3015 	return 0;
3016 }
3017 
arm_smmu_device_disable(struct arm_smmu_device * smmu)3018 static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
3019 {
3020 	int ret;
3021 
3022 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
3023 	if (ret)
3024 		dev_err(smmu->dev, "failed to clear cr0\n");
3025 
3026 	return ret;
3027 }
3028 
arm_smmu_device_reset(struct arm_smmu_device * smmu,bool bypass)3029 static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
3030 {
3031 	int ret;
3032 	u32 reg, enables;
3033 	struct arm_smmu_cmdq_ent cmd;
3034 
3035 	/* Clear CR0 and sync (disables SMMU and queue processing) */
3036 	reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
3037 	if (reg & CR0_SMMUEN) {
3038 		dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
3039 		WARN_ON(is_kdump_kernel() && !disable_bypass);
3040 		arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
3041 	}
3042 
3043 	ret = arm_smmu_device_disable(smmu);
3044 	if (ret)
3045 		return ret;
3046 
3047 	/* CR1 (table and queue memory attributes) */
3048 	reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
3049 	      FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
3050 	      FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
3051 	      FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
3052 	      FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
3053 	      FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
3054 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
3055 
3056 	/* CR2 (random crap) */
3057 	reg = CR2_PTM | CR2_RECINVSID | CR2_E2H;
3058 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
3059 
3060 	/* Stream table */
3061 	writeq_relaxed(smmu->strtab_cfg.strtab_base,
3062 		       smmu->base + ARM_SMMU_STRTAB_BASE);
3063 	writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
3064 		       smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
3065 
3066 	/* Command queue */
3067 	writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
3068 	writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
3069 	writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
3070 
3071 	enables = CR0_CMDQEN;
3072 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3073 				      ARM_SMMU_CR0ACK);
3074 	if (ret) {
3075 		dev_err(smmu->dev, "failed to enable command queue\n");
3076 		return ret;
3077 	}
3078 
3079 	/* Invalidate any cached configuration */
3080 	cmd.opcode = CMDQ_OP_CFGI_ALL;
3081 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3082 	arm_smmu_cmdq_issue_sync(smmu);
3083 
3084 	/* Invalidate any stale TLB entries */
3085 	if (smmu->features & ARM_SMMU_FEAT_HYP) {
3086 		cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
3087 		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3088 	}
3089 
3090 	cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
3091 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3092 	arm_smmu_cmdq_issue_sync(smmu);
3093 
3094 	/* Event queue */
3095 	writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
3096 	writel_relaxed(smmu->evtq.q.llq.prod,
3097 		       arm_smmu_page1_fixup(ARM_SMMU_EVTQ_PROD, smmu));
3098 	writel_relaxed(smmu->evtq.q.llq.cons,
3099 		       arm_smmu_page1_fixup(ARM_SMMU_EVTQ_CONS, smmu));
3100 
3101 	enables |= CR0_EVTQEN;
3102 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3103 				      ARM_SMMU_CR0ACK);
3104 	if (ret) {
3105 		dev_err(smmu->dev, "failed to enable event queue\n");
3106 		return ret;
3107 	}
3108 
3109 	/* PRI queue */
3110 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
3111 		writeq_relaxed(smmu->priq.q.q_base,
3112 			       smmu->base + ARM_SMMU_PRIQ_BASE);
3113 		writel_relaxed(smmu->priq.q.llq.prod,
3114 			       arm_smmu_page1_fixup(ARM_SMMU_PRIQ_PROD, smmu));
3115 		writel_relaxed(smmu->priq.q.llq.cons,
3116 			       arm_smmu_page1_fixup(ARM_SMMU_PRIQ_CONS, smmu));
3117 
3118 		enables |= CR0_PRIQEN;
3119 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3120 					      ARM_SMMU_CR0ACK);
3121 		if (ret) {
3122 			dev_err(smmu->dev, "failed to enable PRI queue\n");
3123 			return ret;
3124 		}
3125 	}
3126 
3127 	if (smmu->features & ARM_SMMU_FEAT_ATS) {
3128 		enables |= CR0_ATSCHK;
3129 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3130 					      ARM_SMMU_CR0ACK);
3131 		if (ret) {
3132 			dev_err(smmu->dev, "failed to enable ATS check\n");
3133 			return ret;
3134 		}
3135 	}
3136 
3137 	ret = arm_smmu_setup_irqs(smmu);
3138 	if (ret) {
3139 		dev_err(smmu->dev, "failed to setup irqs\n");
3140 		return ret;
3141 	}
3142 
3143 	if (is_kdump_kernel())
3144 		enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
3145 
3146 	/* Enable the SMMU interface, or ensure bypass */
3147 	if (!bypass || disable_bypass) {
3148 		enables |= CR0_SMMUEN;
3149 	} else {
3150 		ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
3151 		if (ret)
3152 			return ret;
3153 	}
3154 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3155 				      ARM_SMMU_CR0ACK);
3156 	if (ret) {
3157 		dev_err(smmu->dev, "failed to enable SMMU interface\n");
3158 		return ret;
3159 	}
3160 
3161 	return 0;
3162 }
3163 
arm_smmu_device_hw_probe(struct arm_smmu_device * smmu)3164 static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
3165 {
3166 	u32 reg;
3167 	bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
3168 
3169 	/* IDR0 */
3170 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
3171 
3172 	/* 2-level structures */
3173 	if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
3174 		smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
3175 
3176 	if (reg & IDR0_CD2L)
3177 		smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
3178 
3179 	/*
3180 	 * Translation table endianness.
3181 	 * We currently require the same endianness as the CPU, but this
3182 	 * could be changed later by adding a new IO_PGTABLE_QUIRK.
3183 	 */
3184 	switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
3185 	case IDR0_TTENDIAN_MIXED:
3186 		smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
3187 		break;
3188 #ifdef __BIG_ENDIAN
3189 	case IDR0_TTENDIAN_BE:
3190 		smmu->features |= ARM_SMMU_FEAT_TT_BE;
3191 		break;
3192 #else
3193 	case IDR0_TTENDIAN_LE:
3194 		smmu->features |= ARM_SMMU_FEAT_TT_LE;
3195 		break;
3196 #endif
3197 	default:
3198 		dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
3199 		return -ENXIO;
3200 	}
3201 
3202 	/* Boolean feature flags */
3203 	if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
3204 		smmu->features |= ARM_SMMU_FEAT_PRI;
3205 
3206 	if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
3207 		smmu->features |= ARM_SMMU_FEAT_ATS;
3208 
3209 	if (reg & IDR0_SEV)
3210 		smmu->features |= ARM_SMMU_FEAT_SEV;
3211 
3212 	if (reg & IDR0_MSI) {
3213 		smmu->features |= ARM_SMMU_FEAT_MSI;
3214 		if (coherent && !disable_msipolling)
3215 			smmu->options |= ARM_SMMU_OPT_MSIPOLL;
3216 	}
3217 
3218 	if (reg & IDR0_HYP)
3219 		smmu->features |= ARM_SMMU_FEAT_HYP;
3220 
3221 	/*
3222 	 * The coherency feature as set by FW is used in preference to the ID
3223 	 * register, but warn on mismatch.
3224 	 */
3225 	if (!!(reg & IDR0_COHACC) != coherent)
3226 		dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
3227 			 coherent ? "true" : "false");
3228 
3229 	switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
3230 	case IDR0_STALL_MODEL_FORCE:
3231 		smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
3232 		fallthrough;
3233 	case IDR0_STALL_MODEL_STALL:
3234 		smmu->features |= ARM_SMMU_FEAT_STALLS;
3235 	}
3236 
3237 	if (reg & IDR0_S1P)
3238 		smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
3239 
3240 	if (reg & IDR0_S2P)
3241 		smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
3242 
3243 	if (!(reg & (IDR0_S1P | IDR0_S2P))) {
3244 		dev_err(smmu->dev, "no translation support!\n");
3245 		return -ENXIO;
3246 	}
3247 
3248 	/* We only support the AArch64 table format at present */
3249 	switch (FIELD_GET(IDR0_TTF, reg)) {
3250 	case IDR0_TTF_AARCH32_64:
3251 		smmu->ias = 40;
3252 		fallthrough;
3253 	case IDR0_TTF_AARCH64:
3254 		break;
3255 	default:
3256 		dev_err(smmu->dev, "AArch64 table format not supported!\n");
3257 		return -ENXIO;
3258 	}
3259 
3260 	/* ASID/VMID sizes */
3261 	smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
3262 	smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
3263 
3264 	/* IDR1 */
3265 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
3266 	if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
3267 		dev_err(smmu->dev, "embedded implementation not supported\n");
3268 		return -ENXIO;
3269 	}
3270 
3271 	/* Queue sizes, capped to ensure natural alignment */
3272 	smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
3273 					     FIELD_GET(IDR1_CMDQS, reg));
3274 	if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) {
3275 		/*
3276 		 * We don't support splitting up batches, so one batch of
3277 		 * commands plus an extra sync needs to fit inside the command
3278 		 * queue. There's also no way we can handle the weird alignment
3279 		 * restrictions on the base pointer for a unit-length queue.
3280 		 */
3281 		dev_err(smmu->dev, "command queue size <= %d entries not supported\n",
3282 			CMDQ_BATCH_ENTRIES);
3283 		return -ENXIO;
3284 	}
3285 
3286 	smmu->evtq.q.llq.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
3287 					     FIELD_GET(IDR1_EVTQS, reg));
3288 	smmu->priq.q.llq.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
3289 					     FIELD_GET(IDR1_PRIQS, reg));
3290 
3291 	/* SID/SSID sizes */
3292 	smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
3293 	smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
3294 
3295 	/*
3296 	 * If the SMMU supports fewer bits than would fill a single L2 stream
3297 	 * table, use a linear table instead.
3298 	 */
3299 	if (smmu->sid_bits <= STRTAB_SPLIT)
3300 		smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
3301 
3302 	/* IDR3 */
3303 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3);
3304 	if (FIELD_GET(IDR3_RIL, reg))
3305 		smmu->features |= ARM_SMMU_FEAT_RANGE_INV;
3306 
3307 	/* IDR5 */
3308 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
3309 
3310 	/* Maximum number of outstanding stalls */
3311 	smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
3312 
3313 	/* Page sizes */
3314 	if (reg & IDR5_GRAN64K)
3315 		smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
3316 	if (reg & IDR5_GRAN16K)
3317 		smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
3318 	if (reg & IDR5_GRAN4K)
3319 		smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
3320 
3321 	/* Input address size */
3322 	if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
3323 		smmu->features |= ARM_SMMU_FEAT_VAX;
3324 
3325 	/* Output address size */
3326 	switch (FIELD_GET(IDR5_OAS, reg)) {
3327 	case IDR5_OAS_32_BIT:
3328 		smmu->oas = 32;
3329 		break;
3330 	case IDR5_OAS_36_BIT:
3331 		smmu->oas = 36;
3332 		break;
3333 	case IDR5_OAS_40_BIT:
3334 		smmu->oas = 40;
3335 		break;
3336 	case IDR5_OAS_42_BIT:
3337 		smmu->oas = 42;
3338 		break;
3339 	case IDR5_OAS_44_BIT:
3340 		smmu->oas = 44;
3341 		break;
3342 	case IDR5_OAS_52_BIT:
3343 		smmu->oas = 52;
3344 		smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
3345 		break;
3346 	default:
3347 		dev_info(smmu->dev,
3348 			"unknown output address size. Truncating to 48-bit\n");
3349 		fallthrough;
3350 	case IDR5_OAS_48_BIT:
3351 		smmu->oas = 48;
3352 	}
3353 
3354 	if (arm_smmu_ops.pgsize_bitmap == -1UL)
3355 		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
3356 	else
3357 		arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
3358 
3359 	/* Set the DMA mask for our table walker */
3360 	if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
3361 		dev_warn(smmu->dev,
3362 			 "failed to set DMA mask for table walker\n");
3363 
3364 	smmu->ias = max(smmu->ias, smmu->oas);
3365 
3366 	if (arm_smmu_sva_supported(smmu))
3367 		smmu->features |= ARM_SMMU_FEAT_SVA;
3368 
3369 	dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
3370 		 smmu->ias, smmu->oas, smmu->features);
3371 	return 0;
3372 }
3373 
3374 #ifdef CONFIG_ACPI
acpi_smmu_get_options(u32 model,struct arm_smmu_device * smmu)3375 static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
3376 {
3377 	switch (model) {
3378 	case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
3379 		smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
3380 		break;
3381 	case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
3382 		smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
3383 		break;
3384 	}
3385 
3386 	dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
3387 }
3388 
arm_smmu_device_acpi_probe(struct platform_device * pdev,struct arm_smmu_device * smmu)3389 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3390 				      struct arm_smmu_device *smmu)
3391 {
3392 	struct acpi_iort_smmu_v3 *iort_smmu;
3393 	struct device *dev = smmu->dev;
3394 	struct acpi_iort_node *node;
3395 
3396 	node = *(struct acpi_iort_node **)dev_get_platdata(dev);
3397 
3398 	/* Retrieve SMMUv3 specific data */
3399 	iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
3400 
3401 	acpi_smmu_get_options(iort_smmu->model, smmu);
3402 
3403 	if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
3404 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3405 
3406 	return 0;
3407 }
3408 #else
arm_smmu_device_acpi_probe(struct platform_device * pdev,struct arm_smmu_device * smmu)3409 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3410 					     struct arm_smmu_device *smmu)
3411 {
3412 	return -ENODEV;
3413 }
3414 #endif
3415 
arm_smmu_device_dt_probe(struct platform_device * pdev,struct arm_smmu_device * smmu)3416 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
3417 				    struct arm_smmu_device *smmu)
3418 {
3419 	struct device *dev = &pdev->dev;
3420 	u32 cells;
3421 	int ret = -EINVAL;
3422 
3423 	if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
3424 		dev_err(dev, "missing #iommu-cells property\n");
3425 	else if (cells != 1)
3426 		dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
3427 	else
3428 		ret = 0;
3429 
3430 	parse_driver_options(smmu);
3431 
3432 	if (of_dma_is_coherent(dev->of_node))
3433 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3434 
3435 	return ret;
3436 }
3437 
arm_smmu_resource_size(struct arm_smmu_device * smmu)3438 static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
3439 {
3440 	if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
3441 		return SZ_64K;
3442 	else
3443 		return SZ_128K;
3444 }
3445 
arm_smmu_set_bus_ops(struct iommu_ops * ops)3446 static int arm_smmu_set_bus_ops(struct iommu_ops *ops)
3447 {
3448 	int err;
3449 
3450 #ifdef CONFIG_PCI
3451 	if (pci_bus_type.iommu_ops != ops) {
3452 		err = bus_set_iommu(&pci_bus_type, ops);
3453 		if (err)
3454 			return err;
3455 	}
3456 #endif
3457 #ifdef CONFIG_ARM_AMBA
3458 	if (amba_bustype.iommu_ops != ops) {
3459 		err = bus_set_iommu(&amba_bustype, ops);
3460 		if (err)
3461 			goto err_reset_pci_ops;
3462 	}
3463 #endif
3464 	if (platform_bus_type.iommu_ops != ops) {
3465 		err = bus_set_iommu(&platform_bus_type, ops);
3466 		if (err)
3467 			goto err_reset_amba_ops;
3468 	}
3469 
3470 	return 0;
3471 
3472 err_reset_amba_ops:
3473 #ifdef CONFIG_ARM_AMBA
3474 	bus_set_iommu(&amba_bustype, NULL);
3475 #endif
3476 err_reset_pci_ops: __maybe_unused;
3477 #ifdef CONFIG_PCI
3478 	bus_set_iommu(&pci_bus_type, NULL);
3479 #endif
3480 	return err;
3481 }
3482 
arm_smmu_ioremap(struct device * dev,resource_size_t start,resource_size_t size)3483 static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start,
3484 				      resource_size_t size)
3485 {
3486 	struct resource res = {
3487 		.flags = IORESOURCE_MEM,
3488 		.start = start,
3489 		.end = start + size - 1,
3490 	};
3491 
3492 	return devm_ioremap_resource(dev, &res);
3493 }
3494 
arm_smmu_device_probe(struct platform_device * pdev)3495 static int arm_smmu_device_probe(struct platform_device *pdev)
3496 {
3497 	int irq, ret;
3498 	struct resource *res;
3499 	resource_size_t ioaddr;
3500 	struct arm_smmu_device *smmu;
3501 	struct device *dev = &pdev->dev;
3502 	bool bypass;
3503 
3504 	smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
3505 	if (!smmu) {
3506 		dev_err(dev, "failed to allocate arm_smmu_device\n");
3507 		return -ENOMEM;
3508 	}
3509 	smmu->dev = dev;
3510 
3511 	if (dev->of_node) {
3512 		ret = arm_smmu_device_dt_probe(pdev, smmu);
3513 	} else {
3514 		ret = arm_smmu_device_acpi_probe(pdev, smmu);
3515 		if (ret == -ENODEV)
3516 			return ret;
3517 	}
3518 
3519 	/* Set bypass mode according to firmware probing result */
3520 	bypass = !!ret;
3521 
3522 	/* Base address */
3523 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
3524 	if (!res)
3525 		return -EINVAL;
3526 	if (resource_size(res) < arm_smmu_resource_size(smmu)) {
3527 		dev_err(dev, "MMIO region too small (%pr)\n", res);
3528 		return -EINVAL;
3529 	}
3530 	ioaddr = res->start;
3531 
3532 	/*
3533 	 * Don't map the IMPLEMENTATION DEFINED regions, since they may contain
3534 	 * the PMCG registers which are reserved by the PMU driver.
3535 	 */
3536 	smmu->base = arm_smmu_ioremap(dev, ioaddr, ARM_SMMU_REG_SZ);
3537 	if (IS_ERR(smmu->base))
3538 		return PTR_ERR(smmu->base);
3539 
3540 	if (arm_smmu_resource_size(smmu) > SZ_64K) {
3541 		smmu->page1 = arm_smmu_ioremap(dev, ioaddr + SZ_64K,
3542 					       ARM_SMMU_REG_SZ);
3543 		if (IS_ERR(smmu->page1))
3544 			return PTR_ERR(smmu->page1);
3545 	} else {
3546 		smmu->page1 = smmu->base;
3547 	}
3548 
3549 	/* Interrupt lines */
3550 
3551 	irq = platform_get_irq_byname_optional(pdev, "combined");
3552 	if (irq > 0)
3553 		smmu->combined_irq = irq;
3554 	else {
3555 		irq = platform_get_irq_byname_optional(pdev, "eventq");
3556 		if (irq > 0)
3557 			smmu->evtq.q.irq = irq;
3558 
3559 		irq = platform_get_irq_byname_optional(pdev, "priq");
3560 		if (irq > 0)
3561 			smmu->priq.q.irq = irq;
3562 
3563 		irq = platform_get_irq_byname_optional(pdev, "gerror");
3564 		if (irq > 0)
3565 			smmu->gerr_irq = irq;
3566 	}
3567 	/* Probe the h/w */
3568 	ret = arm_smmu_device_hw_probe(smmu);
3569 	if (ret)
3570 		return ret;
3571 
3572 	/* Initialise in-memory data structures */
3573 	ret = arm_smmu_init_structures(smmu);
3574 	if (ret)
3575 		return ret;
3576 
3577 	/* Record our private device structure */
3578 	platform_set_drvdata(pdev, smmu);
3579 
3580 	/* Reset the device */
3581 	ret = arm_smmu_device_reset(smmu, bypass);
3582 	if (ret)
3583 		return ret;
3584 
3585 	/* And we're up. Go go go! */
3586 	ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
3587 				     "smmu3.%pa", &ioaddr);
3588 	if (ret)
3589 		return ret;
3590 
3591 	iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops);
3592 	iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
3593 
3594 	ret = iommu_device_register(&smmu->iommu);
3595 	if (ret) {
3596 		dev_err(dev, "Failed to register iommu\n");
3597 		return ret;
3598 	}
3599 
3600 	return arm_smmu_set_bus_ops(&arm_smmu_ops);
3601 }
3602 
arm_smmu_device_remove(struct platform_device * pdev)3603 static int arm_smmu_device_remove(struct platform_device *pdev)
3604 {
3605 	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
3606 
3607 	arm_smmu_set_bus_ops(NULL);
3608 	iommu_device_unregister(&smmu->iommu);
3609 	iommu_device_sysfs_remove(&smmu->iommu);
3610 	arm_smmu_device_disable(smmu);
3611 
3612 	return 0;
3613 }
3614 
arm_smmu_device_shutdown(struct platform_device * pdev)3615 static void arm_smmu_device_shutdown(struct platform_device *pdev)
3616 {
3617 	arm_smmu_device_remove(pdev);
3618 }
3619 
3620 static const struct of_device_id arm_smmu_of_match[] = {
3621 	{ .compatible = "arm,smmu-v3", },
3622 	{ },
3623 };
3624 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
3625 
3626 static struct platform_driver arm_smmu_driver = {
3627 	.driver	= {
3628 		.name			= "arm-smmu-v3",
3629 		.of_match_table		= arm_smmu_of_match,
3630 		.suppress_bind_attrs	= true,
3631 	},
3632 	.probe	= arm_smmu_device_probe,
3633 	.remove	= arm_smmu_device_remove,
3634 	.shutdown = arm_smmu_device_shutdown,
3635 };
3636 module_platform_driver(arm_smmu_driver);
3637 
3638 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
3639 MODULE_AUTHOR("Will Deacon <will@kernel.org>");
3640 MODULE_ALIAS("platform:arm-smmu-v3");
3641 MODULE_LICENSE("GPL v2");
3642