• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * IOMMU API for ARM architected SMMUv3 implementations.
4  *
5  * Copyright (C) 2015 ARM Limited
6  *
7  * Author: Will Deacon <will.deacon@arm.com>
8  *
9  * This driver is powered by bad coffee and bombay mix.
10  */
11 
12 #include <linux/acpi.h>
13 #include <linux/acpi_iort.h>
14 #include <linux/bitops.h>
15 #include <linux/crash_dump.h>
16 #include <linux/delay.h>
17 #include <linux/dma-iommu.h>
18 #include <linux/err.h>
19 #include <linux/interrupt.h>
20 #include <linux/io-pgtable.h>
21 #include <linux/iopoll.h>
22 #include <linux/module.h>
23 #include <linux/msi.h>
24 #include <linux/of.h>
25 #include <linux/of_address.h>
26 #include <linux/of_iommu.h>
27 #include <linux/of_platform.h>
28 #include <linux/pci.h>
29 #include <linux/pci-ats.h>
30 #include <linux/platform_device.h>
31 
32 #include <linux/amba/bus.h>
33 
34 #include "arm-smmu-v3.h"
35 
36 static bool disable_bypass = 1;
37 module_param(disable_bypass, bool, 0444);
38 MODULE_PARM_DESC(disable_bypass,
39 	"Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
40 
41 static bool disable_msipolling;
42 module_param(disable_msipolling, bool, 0444);
43 MODULE_PARM_DESC(disable_msipolling,
44 	"Disable MSI-based polling for CMD_SYNC completion.");
45 
46 enum arm_smmu_msi_index {
47 	EVTQ_MSI_INDEX,
48 	GERROR_MSI_INDEX,
49 	PRIQ_MSI_INDEX,
50 	ARM_SMMU_MAX_MSIS,
51 };
52 
53 static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
54 	[EVTQ_MSI_INDEX] = {
55 		ARM_SMMU_EVTQ_IRQ_CFG0,
56 		ARM_SMMU_EVTQ_IRQ_CFG1,
57 		ARM_SMMU_EVTQ_IRQ_CFG2,
58 	},
59 	[GERROR_MSI_INDEX] = {
60 		ARM_SMMU_GERROR_IRQ_CFG0,
61 		ARM_SMMU_GERROR_IRQ_CFG1,
62 		ARM_SMMU_GERROR_IRQ_CFG2,
63 	},
64 	[PRIQ_MSI_INDEX] = {
65 		ARM_SMMU_PRIQ_IRQ_CFG0,
66 		ARM_SMMU_PRIQ_IRQ_CFG1,
67 		ARM_SMMU_PRIQ_IRQ_CFG2,
68 	},
69 };
70 
71 struct arm_smmu_option_prop {
72 	u32 opt;
73 	const char *prop;
74 };
75 
76 DEFINE_XARRAY_ALLOC1(arm_smmu_asid_xa);
77 DEFINE_MUTEX(arm_smmu_asid_lock);
78 
79 static struct arm_smmu_option_prop arm_smmu_options[] = {
80 	{ ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
81 	{ ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
82 	{ 0, NULL},
83 };
84 
arm_smmu_page1_fixup(unsigned long offset,struct arm_smmu_device * smmu)85 static inline void __iomem *arm_smmu_page1_fixup(unsigned long offset,
86 						 struct arm_smmu_device *smmu)
87 {
88 	if (offset > SZ_64K)
89 		return smmu->page1 + offset - SZ_64K;
90 
91 	return smmu->base + offset;
92 }
93 
to_smmu_domain(struct iommu_domain * dom)94 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
95 {
96 	return container_of(dom, struct arm_smmu_domain, domain);
97 }
98 
parse_driver_options(struct arm_smmu_device * smmu)99 static void parse_driver_options(struct arm_smmu_device *smmu)
100 {
101 	int i = 0;
102 
103 	do {
104 		if (of_property_read_bool(smmu->dev->of_node,
105 						arm_smmu_options[i].prop)) {
106 			smmu->options |= arm_smmu_options[i].opt;
107 			dev_notice(smmu->dev, "option %s\n",
108 				arm_smmu_options[i].prop);
109 		}
110 	} while (arm_smmu_options[++i].opt);
111 }
112 
113 /* Low-level queue manipulation functions */
queue_has_space(struct arm_smmu_ll_queue * q,u32 n)114 static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n)
115 {
116 	u32 space, prod, cons;
117 
118 	prod = Q_IDX(q, q->prod);
119 	cons = Q_IDX(q, q->cons);
120 
121 	if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons))
122 		space = (1 << q->max_n_shift) - (prod - cons);
123 	else
124 		space = cons - prod;
125 
126 	return space >= n;
127 }
128 
queue_full(struct arm_smmu_ll_queue * q)129 static bool queue_full(struct arm_smmu_ll_queue *q)
130 {
131 	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
132 	       Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
133 }
134 
queue_empty(struct arm_smmu_ll_queue * q)135 static bool queue_empty(struct arm_smmu_ll_queue *q)
136 {
137 	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
138 	       Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
139 }
140 
queue_consumed(struct arm_smmu_ll_queue * q,u32 prod)141 static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod)
142 {
143 	return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) &&
144 		(Q_IDX(q, q->cons) > Q_IDX(q, prod))) ||
145 	       ((Q_WRP(q, q->cons) != Q_WRP(q, prod)) &&
146 		(Q_IDX(q, q->cons) <= Q_IDX(q, prod)));
147 }
148 
queue_sync_cons_out(struct arm_smmu_queue * q)149 static void queue_sync_cons_out(struct arm_smmu_queue *q)
150 {
151 	/*
152 	 * Ensure that all CPU accesses (reads and writes) to the queue
153 	 * are complete before we update the cons pointer.
154 	 */
155 	__iomb();
156 	writel_relaxed(q->llq.cons, q->cons_reg);
157 }
158 
queue_inc_cons(struct arm_smmu_ll_queue * q)159 static void queue_inc_cons(struct arm_smmu_ll_queue *q)
160 {
161 	u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
162 	q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
163 }
164 
queue_sync_prod_in(struct arm_smmu_queue * q)165 static int queue_sync_prod_in(struct arm_smmu_queue *q)
166 {
167 	u32 prod;
168 	int ret = 0;
169 
170 	/*
171 	 * We can't use the _relaxed() variant here, as we must prevent
172 	 * speculative reads of the queue before we have determined that
173 	 * prod has indeed moved.
174 	 */
175 	prod = readl(q->prod_reg);
176 
177 	if (Q_OVF(prod) != Q_OVF(q->llq.prod))
178 		ret = -EOVERFLOW;
179 
180 	q->llq.prod = prod;
181 	return ret;
182 }
183 
queue_inc_prod_n(struct arm_smmu_ll_queue * q,int n)184 static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n)
185 {
186 	u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n;
187 	return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
188 }
189 
queue_poll_init(struct arm_smmu_device * smmu,struct arm_smmu_queue_poll * qp)190 static void queue_poll_init(struct arm_smmu_device *smmu,
191 			    struct arm_smmu_queue_poll *qp)
192 {
193 	qp->delay = 1;
194 	qp->spin_cnt = 0;
195 	qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
196 	qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
197 }
198 
queue_poll(struct arm_smmu_queue_poll * qp)199 static int queue_poll(struct arm_smmu_queue_poll *qp)
200 {
201 	if (ktime_compare(ktime_get(), qp->timeout) > 0)
202 		return -ETIMEDOUT;
203 
204 	if (qp->wfe) {
205 		wfe();
206 	} else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) {
207 		cpu_relax();
208 	} else {
209 		udelay(qp->delay);
210 		qp->delay *= 2;
211 		qp->spin_cnt = 0;
212 	}
213 
214 	return 0;
215 }
216 
queue_write(__le64 * dst,u64 * src,size_t n_dwords)217 static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
218 {
219 	int i;
220 
221 	for (i = 0; i < n_dwords; ++i)
222 		*dst++ = cpu_to_le64(*src++);
223 }
224 
queue_read(u64 * dst,__le64 * src,size_t n_dwords)225 static void queue_read(u64 *dst, __le64 *src, size_t n_dwords)
226 {
227 	int i;
228 
229 	for (i = 0; i < n_dwords; ++i)
230 		*dst++ = le64_to_cpu(*src++);
231 }
232 
queue_remove_raw(struct arm_smmu_queue * q,u64 * ent)233 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
234 {
235 	if (queue_empty(&q->llq))
236 		return -EAGAIN;
237 
238 	queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords);
239 	queue_inc_cons(&q->llq);
240 	queue_sync_cons_out(q);
241 	return 0;
242 }
243 
244 /* High-level queue accessors */
arm_smmu_cmdq_build_cmd(u64 * cmd,struct arm_smmu_cmdq_ent * ent)245 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
246 {
247 	memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
248 	cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
249 
250 	switch (ent->opcode) {
251 	case CMDQ_OP_TLBI_EL2_ALL:
252 	case CMDQ_OP_TLBI_NSNH_ALL:
253 		break;
254 	case CMDQ_OP_PREFETCH_CFG:
255 		cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
256 		cmd[1] |= FIELD_PREP(CMDQ_PREFETCH_1_SIZE, ent->prefetch.size);
257 		cmd[1] |= ent->prefetch.addr & CMDQ_PREFETCH_1_ADDR_MASK;
258 		break;
259 	case CMDQ_OP_CFGI_CD:
260 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
261 		fallthrough;
262 	case CMDQ_OP_CFGI_STE:
263 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
264 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
265 		break;
266 	case CMDQ_OP_CFGI_CD_ALL:
267 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
268 		break;
269 	case CMDQ_OP_CFGI_ALL:
270 		/* Cover the entire SID range */
271 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
272 		break;
273 	case CMDQ_OP_TLBI_NH_VA:
274 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
275 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
276 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
277 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
278 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
279 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
280 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
281 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
282 		break;
283 	case CMDQ_OP_TLBI_S2_IPA:
284 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
285 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
286 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
287 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
288 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
289 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
290 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
291 		break;
292 	case CMDQ_OP_TLBI_NH_ASID:
293 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
294 		fallthrough;
295 	case CMDQ_OP_TLBI_S12_VMALL:
296 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
297 		break;
298 	case CMDQ_OP_ATC_INV:
299 		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
300 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global);
301 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid);
302 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid);
303 		cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size);
304 		cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK;
305 		break;
306 	case CMDQ_OP_PRI_RESP:
307 		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
308 		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
309 		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
310 		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
311 		switch (ent->pri.resp) {
312 		case PRI_RESP_DENY:
313 		case PRI_RESP_FAIL:
314 		case PRI_RESP_SUCC:
315 			break;
316 		default:
317 			return -EINVAL;
318 		}
319 		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
320 		break;
321 	case CMDQ_OP_CMD_SYNC:
322 		if (ent->sync.msiaddr) {
323 			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
324 			cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
325 		} else {
326 			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
327 		}
328 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
329 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
330 		break;
331 	default:
332 		return -ENOENT;
333 	}
334 
335 	return 0;
336 }
337 
arm_smmu_cmdq_build_sync_cmd(u64 * cmd,struct arm_smmu_device * smmu,u32 prod)338 static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
339 					 u32 prod)
340 {
341 	struct arm_smmu_queue *q = &smmu->cmdq.q;
342 	struct arm_smmu_cmdq_ent ent = {
343 		.opcode = CMDQ_OP_CMD_SYNC,
344 	};
345 
346 	/*
347 	 * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
348 	 * payload, so the write will zero the entire command on that platform.
349 	 */
350 	if (smmu->options & ARM_SMMU_OPT_MSIPOLL) {
351 		ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
352 				   q->ent_dwords * 8;
353 	}
354 
355 	arm_smmu_cmdq_build_cmd(cmd, &ent);
356 }
357 
arm_smmu_cmdq_skip_err(struct arm_smmu_device * smmu)358 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
359 {
360 	static const char *cerror_str[] = {
361 		[CMDQ_ERR_CERROR_NONE_IDX]	= "No error",
362 		[CMDQ_ERR_CERROR_ILL_IDX]	= "Illegal command",
363 		[CMDQ_ERR_CERROR_ABT_IDX]	= "Abort on command fetch",
364 		[CMDQ_ERR_CERROR_ATC_INV_IDX]	= "ATC invalidate timeout",
365 	};
366 
367 	int i;
368 	u64 cmd[CMDQ_ENT_DWORDS];
369 	struct arm_smmu_queue *q = &smmu->cmdq.q;
370 	u32 cons = readl_relaxed(q->cons_reg);
371 	u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
372 	struct arm_smmu_cmdq_ent cmd_sync = {
373 		.opcode = CMDQ_OP_CMD_SYNC,
374 	};
375 
376 	dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
377 		idx < ARRAY_SIZE(cerror_str) ?  cerror_str[idx] : "Unknown");
378 
379 	switch (idx) {
380 	case CMDQ_ERR_CERROR_ABT_IDX:
381 		dev_err(smmu->dev, "retrying command fetch\n");
382 	case CMDQ_ERR_CERROR_NONE_IDX:
383 		return;
384 	case CMDQ_ERR_CERROR_ATC_INV_IDX:
385 		/*
386 		 * ATC Invalidation Completion timeout. CONS is still pointing
387 		 * at the CMD_SYNC. Attempt to complete other pending commands
388 		 * by repeating the CMD_SYNC, though we might well end up back
389 		 * here since the ATC invalidation may still be pending.
390 		 */
391 		return;
392 	case CMDQ_ERR_CERROR_ILL_IDX:
393 	default:
394 		break;
395 	}
396 
397 	/*
398 	 * We may have concurrent producers, so we need to be careful
399 	 * not to touch any of the shadow cmdq state.
400 	 */
401 	queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
402 	dev_err(smmu->dev, "skipping command in error state:\n");
403 	for (i = 0; i < ARRAY_SIZE(cmd); ++i)
404 		dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
405 
406 	/* Convert the erroneous command into a CMD_SYNC */
407 	if (arm_smmu_cmdq_build_cmd(cmd, &cmd_sync)) {
408 		dev_err(smmu->dev, "failed to convert to CMD_SYNC\n");
409 		return;
410 	}
411 
412 	queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
413 }
414 
415 /*
416  * Command queue locking.
417  * This is a form of bastardised rwlock with the following major changes:
418  *
419  * - The only LOCK routines are exclusive_trylock() and shared_lock().
420  *   Neither have barrier semantics, and instead provide only a control
421  *   dependency.
422  *
423  * - The UNLOCK routines are supplemented with shared_tryunlock(), which
424  *   fails if the caller appears to be the last lock holder (yes, this is
425  *   racy). All successful UNLOCK routines have RELEASE semantics.
426  */
arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq * cmdq)427 static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)
428 {
429 	int val;
430 
431 	/*
432 	 * We can try to avoid the cmpxchg() loop by simply incrementing the
433 	 * lock counter. When held in exclusive state, the lock counter is set
434 	 * to INT_MIN so these increments won't hurt as the value will remain
435 	 * negative.
436 	 */
437 	if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0)
438 		return;
439 
440 	do {
441 		val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);
442 	} while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);
443 }
444 
arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq * cmdq)445 static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq)
446 {
447 	(void)atomic_dec_return_release(&cmdq->lock);
448 }
449 
arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq * cmdq)450 static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
451 {
452 	if (atomic_read(&cmdq->lock) == 1)
453 		return false;
454 
455 	arm_smmu_cmdq_shared_unlock(cmdq);
456 	return true;
457 }
458 
459 #define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)		\
460 ({									\
461 	bool __ret;							\
462 	local_irq_save(flags);						\
463 	__ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN);	\
464 	if (!__ret)							\
465 		local_irq_restore(flags);				\
466 	__ret;								\
467 })
468 
469 #define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags)		\
470 ({									\
471 	atomic_set_release(&cmdq->lock, 0);				\
472 	local_irq_restore(flags);					\
473 })
474 
475 
476 /*
477  * Command queue insertion.
478  * This is made fiddly by our attempts to achieve some sort of scalability
479  * since there is one queue shared amongst all of the CPUs in the system.  If
480  * you like mixed-size concurrency, dependency ordering and relaxed atomics,
481  * then you'll *love* this monstrosity.
482  *
483  * The basic idea is to split the queue up into ranges of commands that are
484  * owned by a given CPU; the owner may not have written all of the commands
485  * itself, but is responsible for advancing the hardware prod pointer when
486  * the time comes. The algorithm is roughly:
487  *
488  * 	1. Allocate some space in the queue. At this point we also discover
489  *	   whether the head of the queue is currently owned by another CPU,
490  *	   or whether we are the owner.
491  *
492  *	2. Write our commands into our allocated slots in the queue.
493  *
494  *	3. Mark our slots as valid in arm_smmu_cmdq.valid_map.
495  *
496  *	4. If we are an owner:
497  *		a. Wait for the previous owner to finish.
498  *		b. Mark the queue head as unowned, which tells us the range
499  *		   that we are responsible for publishing.
500  *		c. Wait for all commands in our owned range to become valid.
501  *		d. Advance the hardware prod pointer.
502  *		e. Tell the next owner we've finished.
503  *
504  *	5. If we are inserting a CMD_SYNC (we may or may not have been an
505  *	   owner), then we need to stick around until it has completed:
506  *		a. If we have MSIs, the SMMU can write back into the CMD_SYNC
507  *		   to clear the first 4 bytes.
508  *		b. Otherwise, we spin waiting for the hardware cons pointer to
509  *		   advance past our command.
510  *
511  * The devil is in the details, particularly the use of locking for handling
512  * SYNC completion and freeing up space in the queue before we think that it is
513  * full.
514  */
__arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq * cmdq,u32 sprod,u32 eprod,bool set)515 static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq,
516 					       u32 sprod, u32 eprod, bool set)
517 {
518 	u32 swidx, sbidx, ewidx, ebidx;
519 	struct arm_smmu_ll_queue llq = {
520 		.max_n_shift	= cmdq->q.llq.max_n_shift,
521 		.prod		= sprod,
522 	};
523 
524 	ewidx = BIT_WORD(Q_IDX(&llq, eprod));
525 	ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG;
526 
527 	while (llq.prod != eprod) {
528 		unsigned long mask;
529 		atomic_long_t *ptr;
530 		u32 limit = BITS_PER_LONG;
531 
532 		swidx = BIT_WORD(Q_IDX(&llq, llq.prod));
533 		sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG;
534 
535 		ptr = &cmdq->valid_map[swidx];
536 
537 		if ((swidx == ewidx) && (sbidx < ebidx))
538 			limit = ebidx;
539 
540 		mask = GENMASK(limit - 1, sbidx);
541 
542 		/*
543 		 * The valid bit is the inverse of the wrap bit. This means
544 		 * that a zero-initialised queue is invalid and, after marking
545 		 * all entries as valid, they become invalid again when we
546 		 * wrap.
547 		 */
548 		if (set) {
549 			atomic_long_xor(mask, ptr);
550 		} else { /* Poll */
551 			unsigned long valid;
552 
553 			valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask;
554 			atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid);
555 		}
556 
557 		llq.prod = queue_inc_prod_n(&llq, limit - sbidx);
558 	}
559 }
560 
561 /* Mark all entries in the range [sprod, eprod) as valid */
arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq * cmdq,u32 sprod,u32 eprod)562 static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq,
563 					u32 sprod, u32 eprod)
564 {
565 	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true);
566 }
567 
568 /* Wait for all entries in the range [sprod, eprod) to become valid */
arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq * cmdq,u32 sprod,u32 eprod)569 static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
570 					 u32 sprod, u32 eprod)
571 {
572 	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false);
573 }
574 
575 /* Wait for the command queue to become non-full */
arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device * smmu,struct arm_smmu_ll_queue * llq)576 static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
577 					     struct arm_smmu_ll_queue *llq)
578 {
579 	unsigned long flags;
580 	struct arm_smmu_queue_poll qp;
581 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
582 	int ret = 0;
583 
584 	/*
585 	 * Try to update our copy of cons by grabbing exclusive cmdq access. If
586 	 * that fails, spin until somebody else updates it for us.
587 	 */
588 	if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) {
589 		WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg));
590 		arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags);
591 		llq->val = READ_ONCE(cmdq->q.llq.val);
592 		return 0;
593 	}
594 
595 	queue_poll_init(smmu, &qp);
596 	do {
597 		llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
598 		if (!queue_full(llq))
599 			break;
600 
601 		ret = queue_poll(&qp);
602 	} while (!ret);
603 
604 	return ret;
605 }
606 
607 /*
608  * Wait until the SMMU signals a CMD_SYNC completion MSI.
609  * Must be called with the cmdq lock held in some capacity.
610  */
__arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device * smmu,struct arm_smmu_ll_queue * llq)611 static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
612 					  struct arm_smmu_ll_queue *llq)
613 {
614 	int ret = 0;
615 	struct arm_smmu_queue_poll qp;
616 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
617 	u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
618 
619 	queue_poll_init(smmu, &qp);
620 
621 	/*
622 	 * The MSI won't generate an event, since it's being written back
623 	 * into the command queue.
624 	 */
625 	qp.wfe = false;
626 	smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp)));
627 	llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1);
628 	return ret;
629 }
630 
631 /*
632  * Wait until the SMMU cons index passes llq->prod.
633  * Must be called with the cmdq lock held in some capacity.
634  */
__arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device * smmu,struct arm_smmu_ll_queue * llq)635 static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
636 					       struct arm_smmu_ll_queue *llq)
637 {
638 	struct arm_smmu_queue_poll qp;
639 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
640 	u32 prod = llq->prod;
641 	int ret = 0;
642 
643 	queue_poll_init(smmu, &qp);
644 	llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
645 	do {
646 		if (queue_consumed(llq, prod))
647 			break;
648 
649 		ret = queue_poll(&qp);
650 
651 		/*
652 		 * This needs to be a readl() so that our subsequent call
653 		 * to arm_smmu_cmdq_shared_tryunlock() can fail accurately.
654 		 *
655 		 * Specifically, we need to ensure that we observe all
656 		 * shared_lock()s by other CMD_SYNCs that share our owner,
657 		 * so that a failing call to tryunlock() means that we're
658 		 * the last one out and therefore we can safely advance
659 		 * cmdq->q.llq.cons. Roughly speaking:
660 		 *
661 		 * CPU 0		CPU1			CPU2 (us)
662 		 *
663 		 * if (sync)
664 		 * 	shared_lock();
665 		 *
666 		 * dma_wmb();
667 		 * set_valid_map();
668 		 *
669 		 * 			if (owner) {
670 		 *				poll_valid_map();
671 		 *				<control dependency>
672 		 *				writel(prod_reg);
673 		 *
674 		 *						readl(cons_reg);
675 		 *						tryunlock();
676 		 *
677 		 * Requires us to see CPU 0's shared_lock() acquisition.
678 		 */
679 		llq->cons = readl(cmdq->q.cons_reg);
680 	} while (!ret);
681 
682 	return ret;
683 }
684 
arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device * smmu,struct arm_smmu_ll_queue * llq)685 static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
686 					 struct arm_smmu_ll_queue *llq)
687 {
688 	if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
689 		return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
690 
691 	return __arm_smmu_cmdq_poll_until_consumed(smmu, llq);
692 }
693 
arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq * cmdq,u64 * cmds,u32 prod,int n)694 static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
695 					u32 prod, int n)
696 {
697 	int i;
698 	struct arm_smmu_ll_queue llq = {
699 		.max_n_shift	= cmdq->q.llq.max_n_shift,
700 		.prod		= prod,
701 	};
702 
703 	for (i = 0; i < n; ++i) {
704 		u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS];
705 
706 		prod = queue_inc_prod_n(&llq, i);
707 		queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS);
708 	}
709 }
710 
711 /*
712  * This is the actual insertion function, and provides the following
713  * ordering guarantees to callers:
714  *
715  * - There is a dma_wmb() before publishing any commands to the queue.
716  *   This can be relied upon to order prior writes to data structures
717  *   in memory (such as a CD or an STE) before the command.
718  *
719  * - On completion of a CMD_SYNC, there is a control dependency.
720  *   This can be relied upon to order subsequent writes to memory (e.g.
721  *   freeing an IOVA) after completion of the CMD_SYNC.
722  *
723  * - Command insertion is totally ordered, so if two CPUs each race to
724  *   insert their own list of commands then all of the commands from one
725  *   CPU will appear before any of the commands from the other CPU.
726  */
arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device * smmu,u64 * cmds,int n,bool sync)727 static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
728 				       u64 *cmds, int n, bool sync)
729 {
730 	u64 cmd_sync[CMDQ_ENT_DWORDS];
731 	u32 prod;
732 	unsigned long flags;
733 	bool owner;
734 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
735 	struct arm_smmu_ll_queue llq = {
736 		.max_n_shift = cmdq->q.llq.max_n_shift,
737 	}, head = llq;
738 	int ret = 0;
739 
740 	/* 1. Allocate some space in the queue */
741 	local_irq_save(flags);
742 	llq.val = READ_ONCE(cmdq->q.llq.val);
743 	do {
744 		u64 old;
745 
746 		while (!queue_has_space(&llq, n + sync)) {
747 			local_irq_restore(flags);
748 			if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq))
749 				dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
750 			local_irq_save(flags);
751 		}
752 
753 		head.cons = llq.cons;
754 		head.prod = queue_inc_prod_n(&llq, n + sync) |
755 					     CMDQ_PROD_OWNED_FLAG;
756 
757 		old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val);
758 		if (old == llq.val)
759 			break;
760 
761 		llq.val = old;
762 	} while (1);
763 	owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG);
764 	head.prod &= ~CMDQ_PROD_OWNED_FLAG;
765 	llq.prod &= ~CMDQ_PROD_OWNED_FLAG;
766 
767 	/*
768 	 * 2. Write our commands into the queue
769 	 * Dependency ordering from the cmpxchg() loop above.
770 	 */
771 	arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
772 	if (sync) {
773 		prod = queue_inc_prod_n(&llq, n);
774 		arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, prod);
775 		queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
776 
777 		/*
778 		 * In order to determine completion of our CMD_SYNC, we must
779 		 * ensure that the queue can't wrap twice without us noticing.
780 		 * We achieve that by taking the cmdq lock as shared before
781 		 * marking our slot as valid.
782 		 */
783 		arm_smmu_cmdq_shared_lock(cmdq);
784 	}
785 
786 	/* 3. Mark our slots as valid, ensuring commands are visible first */
787 	dma_wmb();
788 	arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
789 
790 	/* 4. If we are the owner, take control of the SMMU hardware */
791 	if (owner) {
792 		/* a. Wait for previous owner to finish */
793 		atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
794 
795 		/* b. Stop gathering work by clearing the owned flag */
796 		prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG,
797 						   &cmdq->q.llq.atomic.prod);
798 		prod &= ~CMDQ_PROD_OWNED_FLAG;
799 
800 		/*
801 		 * c. Wait for any gathered work to be written to the queue.
802 		 * Note that we read our own entries so that we have the control
803 		 * dependency required by (d).
804 		 */
805 		arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
806 
807 		/*
808 		 * d. Advance the hardware prod pointer
809 		 * Control dependency ordering from the entries becoming valid.
810 		 */
811 		writel_relaxed(prod, cmdq->q.prod_reg);
812 
813 		/*
814 		 * e. Tell the next owner we're done
815 		 * Make sure we've updated the hardware first, so that we don't
816 		 * race to update prod and potentially move it backwards.
817 		 */
818 		atomic_set_release(&cmdq->owner_prod, prod);
819 	}
820 
821 	/* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
822 	if (sync) {
823 		llq.prod = queue_inc_prod_n(&llq, n);
824 		ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq);
825 		if (ret) {
826 			dev_err_ratelimited(smmu->dev,
827 					    "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
828 					    llq.prod,
829 					    readl_relaxed(cmdq->q.prod_reg),
830 					    readl_relaxed(cmdq->q.cons_reg));
831 		}
832 
833 		/*
834 		 * Try to unlock the cmdq lock. This will fail if we're the last
835 		 * reader, in which case we can safely update cmdq->q.llq.cons
836 		 */
837 		if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
838 			WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
839 			arm_smmu_cmdq_shared_unlock(cmdq);
840 		}
841 	}
842 
843 	local_irq_restore(flags);
844 	return ret;
845 }
846 
arm_smmu_cmdq_issue_cmd(struct arm_smmu_device * smmu,struct arm_smmu_cmdq_ent * ent)847 static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
848 				   struct arm_smmu_cmdq_ent *ent)
849 {
850 	u64 cmd[CMDQ_ENT_DWORDS];
851 
852 	if (arm_smmu_cmdq_build_cmd(cmd, ent)) {
853 		dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
854 			 ent->opcode);
855 		return -EINVAL;
856 	}
857 
858 	return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, false);
859 }
860 
arm_smmu_cmdq_issue_sync(struct arm_smmu_device * smmu)861 static int arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
862 {
863 	return arm_smmu_cmdq_issue_cmdlist(smmu, NULL, 0, true);
864 }
865 
arm_smmu_cmdq_batch_add(struct arm_smmu_device * smmu,struct arm_smmu_cmdq_batch * cmds,struct arm_smmu_cmdq_ent * cmd)866 static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
867 				    struct arm_smmu_cmdq_batch *cmds,
868 				    struct arm_smmu_cmdq_ent *cmd)
869 {
870 	if (cmds->num == CMDQ_BATCH_ENTRIES) {
871 		arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false);
872 		cmds->num = 0;
873 	}
874 	arm_smmu_cmdq_build_cmd(&cmds->cmds[cmds->num * CMDQ_ENT_DWORDS], cmd);
875 	cmds->num++;
876 }
877 
arm_smmu_cmdq_batch_submit(struct arm_smmu_device * smmu,struct arm_smmu_cmdq_batch * cmds)878 static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
879 				      struct arm_smmu_cmdq_batch *cmds)
880 {
881 	return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
882 }
883 
884 /* Context descriptor manipulation functions */
arm_smmu_tlb_inv_asid(struct arm_smmu_device * smmu,u16 asid)885 void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
886 {
887 	struct arm_smmu_cmdq_ent cmd = {
888 		.opcode = CMDQ_OP_TLBI_NH_ASID,
889 		.tlbi.asid = asid,
890 	};
891 
892 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
893 	arm_smmu_cmdq_issue_sync(smmu);
894 }
895 
arm_smmu_sync_cd(struct arm_smmu_domain * smmu_domain,int ssid,bool leaf)896 static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
897 			     int ssid, bool leaf)
898 {
899 	size_t i;
900 	unsigned long flags;
901 	struct arm_smmu_master *master;
902 	struct arm_smmu_cmdq_batch cmds = {};
903 	struct arm_smmu_device *smmu = smmu_domain->smmu;
904 	struct arm_smmu_cmdq_ent cmd = {
905 		.opcode	= CMDQ_OP_CFGI_CD,
906 		.cfgi	= {
907 			.ssid	= ssid,
908 			.leaf	= leaf,
909 		},
910 	};
911 
912 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
913 	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
914 		for (i = 0; i < master->num_sids; i++) {
915 			cmd.cfgi.sid = master->sids[i];
916 			arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
917 		}
918 	}
919 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
920 
921 	arm_smmu_cmdq_batch_submit(smmu, &cmds);
922 }
923 
arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device * smmu,struct arm_smmu_l1_ctx_desc * l1_desc)924 static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
925 					struct arm_smmu_l1_ctx_desc *l1_desc)
926 {
927 	size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
928 
929 	l1_desc->l2ptr = dmam_alloc_coherent(smmu->dev, size,
930 					     &l1_desc->l2ptr_dma, GFP_KERNEL);
931 	if (!l1_desc->l2ptr) {
932 		dev_warn(smmu->dev,
933 			 "failed to allocate context descriptor table\n");
934 		return -ENOMEM;
935 	}
936 	return 0;
937 }
938 
arm_smmu_write_cd_l1_desc(__le64 * dst,struct arm_smmu_l1_ctx_desc * l1_desc)939 static void arm_smmu_write_cd_l1_desc(__le64 *dst,
940 				      struct arm_smmu_l1_ctx_desc *l1_desc)
941 {
942 	u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) |
943 		  CTXDESC_L1_DESC_V;
944 
945 	/* See comment in arm_smmu_write_ctx_desc() */
946 	WRITE_ONCE(*dst, cpu_to_le64(val));
947 }
948 
arm_smmu_get_cd_ptr(struct arm_smmu_domain * smmu_domain,u32 ssid)949 static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
950 				   u32 ssid)
951 {
952 	__le64 *l1ptr;
953 	unsigned int idx;
954 	struct arm_smmu_l1_ctx_desc *l1_desc;
955 	struct arm_smmu_device *smmu = smmu_domain->smmu;
956 	struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
957 
958 	if (smmu_domain->s1_cfg.s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
959 		return cdcfg->cdtab + ssid * CTXDESC_CD_DWORDS;
960 
961 	idx = ssid >> CTXDESC_SPLIT;
962 	l1_desc = &cdcfg->l1_desc[idx];
963 	if (!l1_desc->l2ptr) {
964 		if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
965 			return NULL;
966 
967 		l1ptr = cdcfg->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
968 		arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
969 		/* An invalid L1CD can be cached */
970 		arm_smmu_sync_cd(smmu_domain, ssid, false);
971 	}
972 	idx = ssid & (CTXDESC_L2_ENTRIES - 1);
973 	return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS;
974 }
975 
arm_smmu_write_ctx_desc(struct arm_smmu_domain * smmu_domain,int ssid,struct arm_smmu_ctx_desc * cd)976 int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
977 			    struct arm_smmu_ctx_desc *cd)
978 {
979 	/*
980 	 * This function handles the following cases:
981 	 *
982 	 * (1) Install primary CD, for normal DMA traffic (SSID = 0).
983 	 * (2) Install a secondary CD, for SID+SSID traffic.
984 	 * (3) Update ASID of a CD. Atomically write the first 64 bits of the
985 	 *     CD, then invalidate the old entry and mappings.
986 	 * (4) Remove a secondary CD.
987 	 */
988 	u64 val;
989 	bool cd_live;
990 	__le64 *cdptr;
991 	struct arm_smmu_device *smmu = smmu_domain->smmu;
992 
993 	if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))
994 		return -E2BIG;
995 
996 	cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
997 	if (!cdptr)
998 		return -ENOMEM;
999 
1000 	val = le64_to_cpu(cdptr[0]);
1001 	cd_live = !!(val & CTXDESC_CD_0_V);
1002 
1003 	if (!cd) { /* (4) */
1004 		val = 0;
1005 	} else if (cd_live) { /* (3) */
1006 		val &= ~CTXDESC_CD_0_ASID;
1007 		val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid);
1008 		/*
1009 		 * Until CD+TLB invalidation, both ASIDs may be used for tagging
1010 		 * this substream's traffic
1011 		 */
1012 	} else { /* (1) and (2) */
1013 		cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK);
1014 		cdptr[2] = 0;
1015 		cdptr[3] = cpu_to_le64(cd->mair);
1016 
1017 		/*
1018 		 * STE is live, and the SMMU might read dwords of this CD in any
1019 		 * order. Ensure that it observes valid values before reading
1020 		 * V=1.
1021 		 */
1022 		arm_smmu_sync_cd(smmu_domain, ssid, true);
1023 
1024 		val = cd->tcr |
1025 #ifdef __BIG_ENDIAN
1026 			CTXDESC_CD_0_ENDI |
1027 #endif
1028 			CTXDESC_CD_0_R | CTXDESC_CD_0_A |
1029 			(cd->mm ? 0 : CTXDESC_CD_0_ASET) |
1030 			CTXDESC_CD_0_AA64 |
1031 			FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
1032 			CTXDESC_CD_0_V;
1033 
1034 		/* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */
1035 		if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
1036 			val |= CTXDESC_CD_0_S;
1037 	}
1038 
1039 	/*
1040 	 * The SMMU accesses 64-bit values atomically. See IHI0070Ca 3.21.3
1041 	 * "Configuration structures and configuration invalidation completion"
1042 	 *
1043 	 *   The size of single-copy atomic reads made by the SMMU is
1044 	 *   IMPLEMENTATION DEFINED but must be at least 64 bits. Any single
1045 	 *   field within an aligned 64-bit span of a structure can be altered
1046 	 *   without first making the structure invalid.
1047 	 */
1048 	WRITE_ONCE(cdptr[0], cpu_to_le64(val));
1049 	arm_smmu_sync_cd(smmu_domain, ssid, true);
1050 	return 0;
1051 }
1052 
arm_smmu_alloc_cd_tables(struct arm_smmu_domain * smmu_domain)1053 static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
1054 {
1055 	int ret;
1056 	size_t l1size;
1057 	size_t max_contexts;
1058 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1059 	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1060 	struct arm_smmu_ctx_desc_cfg *cdcfg = &cfg->cdcfg;
1061 
1062 	max_contexts = 1 << cfg->s1cdmax;
1063 
1064 	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
1065 	    max_contexts <= CTXDESC_L2_ENTRIES) {
1066 		cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
1067 		cdcfg->num_l1_ents = max_contexts;
1068 
1069 		l1size = max_contexts * (CTXDESC_CD_DWORDS << 3);
1070 	} else {
1071 		cfg->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
1072 		cdcfg->num_l1_ents = DIV_ROUND_UP(max_contexts,
1073 						  CTXDESC_L2_ENTRIES);
1074 
1075 		cdcfg->l1_desc = devm_kcalloc(smmu->dev, cdcfg->num_l1_ents,
1076 					      sizeof(*cdcfg->l1_desc),
1077 					      GFP_KERNEL);
1078 		if (!cdcfg->l1_desc)
1079 			return -ENOMEM;
1080 
1081 		l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1082 	}
1083 
1084 	cdcfg->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cdcfg->cdtab_dma,
1085 					   GFP_KERNEL);
1086 	if (!cdcfg->cdtab) {
1087 		dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1088 		ret = -ENOMEM;
1089 		goto err_free_l1;
1090 	}
1091 
1092 	return 0;
1093 
1094 err_free_l1:
1095 	if (cdcfg->l1_desc) {
1096 		devm_kfree(smmu->dev, cdcfg->l1_desc);
1097 		cdcfg->l1_desc = NULL;
1098 	}
1099 	return ret;
1100 }
1101 
arm_smmu_free_cd_tables(struct arm_smmu_domain * smmu_domain)1102 static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain)
1103 {
1104 	int i;
1105 	size_t size, l1size;
1106 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1107 	struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
1108 
1109 	if (cdcfg->l1_desc) {
1110 		size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1111 
1112 		for (i = 0; i < cdcfg->num_l1_ents; i++) {
1113 			if (!cdcfg->l1_desc[i].l2ptr)
1114 				continue;
1115 
1116 			dmam_free_coherent(smmu->dev, size,
1117 					   cdcfg->l1_desc[i].l2ptr,
1118 					   cdcfg->l1_desc[i].l2ptr_dma);
1119 		}
1120 		devm_kfree(smmu->dev, cdcfg->l1_desc);
1121 		cdcfg->l1_desc = NULL;
1122 
1123 		l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1124 	} else {
1125 		l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
1126 	}
1127 
1128 	dmam_free_coherent(smmu->dev, l1size, cdcfg->cdtab, cdcfg->cdtab_dma);
1129 	cdcfg->cdtab_dma = 0;
1130 	cdcfg->cdtab = NULL;
1131 }
1132 
arm_smmu_free_asid(struct arm_smmu_ctx_desc * cd)1133 bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd)
1134 {
1135 	bool free;
1136 	struct arm_smmu_ctx_desc *old_cd;
1137 
1138 	if (!cd->asid)
1139 		return false;
1140 
1141 	free = refcount_dec_and_test(&cd->refs);
1142 	if (free) {
1143 		old_cd = xa_erase(&arm_smmu_asid_xa, cd->asid);
1144 		WARN_ON(old_cd != cd);
1145 	}
1146 	return free;
1147 }
1148 
1149 /* Stream table manipulation functions */
1150 static void
arm_smmu_write_strtab_l1_desc(__le64 * dst,struct arm_smmu_strtab_l1_desc * desc)1151 arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
1152 {
1153 	u64 val = 0;
1154 
1155 	val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
1156 	val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
1157 
1158 	/* See comment in arm_smmu_write_ctx_desc() */
1159 	WRITE_ONCE(*dst, cpu_to_le64(val));
1160 }
1161 
arm_smmu_sync_ste_for_sid(struct arm_smmu_device * smmu,u32 sid)1162 static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
1163 {
1164 	struct arm_smmu_cmdq_ent cmd = {
1165 		.opcode	= CMDQ_OP_CFGI_STE,
1166 		.cfgi	= {
1167 			.sid	= sid,
1168 			.leaf	= true,
1169 		},
1170 	};
1171 
1172 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1173 	arm_smmu_cmdq_issue_sync(smmu);
1174 }
1175 
arm_smmu_write_strtab_ent(struct arm_smmu_master * master,u32 sid,__le64 * dst)1176 static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
1177 				      __le64 *dst)
1178 {
1179 	/*
1180 	 * This is hideously complicated, but we only really care about
1181 	 * three cases at the moment:
1182 	 *
1183 	 * 1. Invalid (all zero) -> bypass/fault (init)
1184 	 * 2. Bypass/fault -> translation/bypass (attach)
1185 	 * 3. Translation/bypass -> bypass/fault (detach)
1186 	 *
1187 	 * Given that we can't update the STE atomically and the SMMU
1188 	 * doesn't read the thing in a defined order, that leaves us
1189 	 * with the following maintenance requirements:
1190 	 *
1191 	 * 1. Update Config, return (init time STEs aren't live)
1192 	 * 2. Write everything apart from dword 0, sync, write dword 0, sync
1193 	 * 3. Update Config, sync
1194 	 */
1195 	u64 val = le64_to_cpu(dst[0]);
1196 	bool ste_live = false;
1197 	struct arm_smmu_device *smmu = NULL;
1198 	struct arm_smmu_s1_cfg *s1_cfg = NULL;
1199 	struct arm_smmu_s2_cfg *s2_cfg = NULL;
1200 	struct arm_smmu_domain *smmu_domain = NULL;
1201 	struct arm_smmu_cmdq_ent prefetch_cmd = {
1202 		.opcode		= CMDQ_OP_PREFETCH_CFG,
1203 		.prefetch	= {
1204 			.sid	= sid,
1205 		},
1206 	};
1207 
1208 	if (master) {
1209 		smmu_domain = master->domain;
1210 		smmu = master->smmu;
1211 	}
1212 
1213 	if (smmu_domain) {
1214 		switch (smmu_domain->stage) {
1215 		case ARM_SMMU_DOMAIN_S1:
1216 			s1_cfg = &smmu_domain->s1_cfg;
1217 			break;
1218 		case ARM_SMMU_DOMAIN_S2:
1219 		case ARM_SMMU_DOMAIN_NESTED:
1220 			s2_cfg = &smmu_domain->s2_cfg;
1221 			break;
1222 		default:
1223 			break;
1224 		}
1225 	}
1226 
1227 	if (val & STRTAB_STE_0_V) {
1228 		switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
1229 		case STRTAB_STE_0_CFG_BYPASS:
1230 			break;
1231 		case STRTAB_STE_0_CFG_S1_TRANS:
1232 		case STRTAB_STE_0_CFG_S2_TRANS:
1233 			ste_live = true;
1234 			break;
1235 		case STRTAB_STE_0_CFG_ABORT:
1236 			BUG_ON(!disable_bypass);
1237 			break;
1238 		default:
1239 			BUG(); /* STE corruption */
1240 		}
1241 	}
1242 
1243 	/* Nuke the existing STE_0 value, as we're going to rewrite it */
1244 	val = STRTAB_STE_0_V;
1245 
1246 	/* Bypass/fault */
1247 	if (!smmu_domain || !(s1_cfg || s2_cfg)) {
1248 		if (!smmu_domain && disable_bypass)
1249 			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1250 		else
1251 			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1252 
1253 		dst[0] = cpu_to_le64(val);
1254 		dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1255 						STRTAB_STE_1_SHCFG_INCOMING));
1256 		dst[2] = 0; /* Nuke the VMID */
1257 		/*
1258 		 * The SMMU can perform negative caching, so we must sync
1259 		 * the STE regardless of whether the old value was live.
1260 		 */
1261 		if (smmu)
1262 			arm_smmu_sync_ste_for_sid(smmu, sid);
1263 		return;
1264 	}
1265 
1266 	if (s1_cfg) {
1267 		BUG_ON(ste_live);
1268 		dst[1] = cpu_to_le64(
1269 			 FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
1270 			 FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1271 			 FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1272 			 FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
1273 			 FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_NSEL1));
1274 
1275 		if (smmu->features & ARM_SMMU_FEAT_STALLS &&
1276 		   !(smmu->features & ARM_SMMU_FEAT_STALL_FORCE))
1277 			dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
1278 
1279 		val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
1280 			FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
1281 			FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) |
1282 			FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt);
1283 	}
1284 
1285 	if (s2_cfg) {
1286 		BUG_ON(ste_live);
1287 		dst[2] = cpu_to_le64(
1288 			 FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
1289 			 FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) |
1290 #ifdef __BIG_ENDIAN
1291 			 STRTAB_STE_2_S2ENDI |
1292 #endif
1293 			 STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1294 			 STRTAB_STE_2_S2R);
1295 
1296 		dst[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
1297 
1298 		val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
1299 	}
1300 
1301 	if (master->ats_enabled)
1302 		dst[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS,
1303 						 STRTAB_STE_1_EATS_TRANS));
1304 
1305 	arm_smmu_sync_ste_for_sid(smmu, sid);
1306 	/* See comment in arm_smmu_write_ctx_desc() */
1307 	WRITE_ONCE(dst[0], cpu_to_le64(val));
1308 	arm_smmu_sync_ste_for_sid(smmu, sid);
1309 
1310 	/* It's likely that we'll want to use the new STE soon */
1311 	if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
1312 		arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1313 }
1314 
arm_smmu_init_bypass_stes(__le64 * strtab,unsigned int nent)1315 static void arm_smmu_init_bypass_stes(__le64 *strtab, unsigned int nent)
1316 {
1317 	unsigned int i;
1318 
1319 	for (i = 0; i < nent; ++i) {
1320 		arm_smmu_write_strtab_ent(NULL, -1, strtab);
1321 		strtab += STRTAB_STE_DWORDS;
1322 	}
1323 }
1324 
arm_smmu_init_l2_strtab(struct arm_smmu_device * smmu,u32 sid)1325 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1326 {
1327 	size_t size;
1328 	void *strtab;
1329 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1330 	struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1331 
1332 	if (desc->l2ptr)
1333 		return 0;
1334 
1335 	size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1336 	strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1337 
1338 	desc->span = STRTAB_SPLIT + 1;
1339 	desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1340 					  GFP_KERNEL);
1341 	if (!desc->l2ptr) {
1342 		dev_err(smmu->dev,
1343 			"failed to allocate l2 stream table for SID %u\n",
1344 			sid);
1345 		return -ENOMEM;
1346 	}
1347 
1348 	arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT);
1349 	arm_smmu_write_strtab_l1_desc(strtab, desc);
1350 	return 0;
1351 }
1352 
1353 /* IRQ and event handlers */
arm_smmu_evtq_thread(int irq,void * dev)1354 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1355 {
1356 	int i;
1357 	struct arm_smmu_device *smmu = dev;
1358 	struct arm_smmu_queue *q = &smmu->evtq.q;
1359 	struct arm_smmu_ll_queue *llq = &q->llq;
1360 	u64 evt[EVTQ_ENT_DWORDS];
1361 
1362 	do {
1363 		while (!queue_remove_raw(q, evt)) {
1364 			u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
1365 
1366 			dev_info(smmu->dev, "event 0x%02x received:\n", id);
1367 			for (i = 0; i < ARRAY_SIZE(evt); ++i)
1368 				dev_info(smmu->dev, "\t0x%016llx\n",
1369 					 (unsigned long long)evt[i]);
1370 
1371 			cond_resched();
1372 		}
1373 
1374 		/*
1375 		 * Not much we can do on overflow, so scream and pretend we're
1376 		 * trying harder.
1377 		 */
1378 		if (queue_sync_prod_in(q) == -EOVERFLOW)
1379 			dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1380 	} while (!queue_empty(llq));
1381 
1382 	/* Sync our overflow flag, as we believe we're up to speed */
1383 	llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
1384 		    Q_IDX(llq, llq->cons);
1385 	return IRQ_HANDLED;
1386 }
1387 
arm_smmu_handle_ppr(struct arm_smmu_device * smmu,u64 * evt)1388 static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
1389 {
1390 	u32 sid, ssid;
1391 	u16 grpid;
1392 	bool ssv, last;
1393 
1394 	sid = FIELD_GET(PRIQ_0_SID, evt[0]);
1395 	ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
1396 	ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : 0;
1397 	last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
1398 	grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
1399 
1400 	dev_info(smmu->dev, "unexpected PRI request received:\n");
1401 	dev_info(smmu->dev,
1402 		 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1403 		 sid, ssid, grpid, last ? "L" : "",
1404 		 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1405 		 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1406 		 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1407 		 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1408 		 evt[1] & PRIQ_1_ADDR_MASK);
1409 
1410 	if (last) {
1411 		struct arm_smmu_cmdq_ent cmd = {
1412 			.opcode			= CMDQ_OP_PRI_RESP,
1413 			.substream_valid	= ssv,
1414 			.pri			= {
1415 				.sid	= sid,
1416 				.ssid	= ssid,
1417 				.grpid	= grpid,
1418 				.resp	= PRI_RESP_DENY,
1419 			},
1420 		};
1421 
1422 		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1423 	}
1424 }
1425 
arm_smmu_priq_thread(int irq,void * dev)1426 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1427 {
1428 	struct arm_smmu_device *smmu = dev;
1429 	struct arm_smmu_queue *q = &smmu->priq.q;
1430 	struct arm_smmu_ll_queue *llq = &q->llq;
1431 	u64 evt[PRIQ_ENT_DWORDS];
1432 
1433 	do {
1434 		while (!queue_remove_raw(q, evt))
1435 			arm_smmu_handle_ppr(smmu, evt);
1436 
1437 		if (queue_sync_prod_in(q) == -EOVERFLOW)
1438 			dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1439 	} while (!queue_empty(llq));
1440 
1441 	/* Sync our overflow flag, as we believe we're up to speed */
1442 	llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
1443 		      Q_IDX(llq, llq->cons);
1444 	queue_sync_cons_out(q);
1445 	return IRQ_HANDLED;
1446 }
1447 
1448 static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1449 
arm_smmu_gerror_handler(int irq,void * dev)1450 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1451 {
1452 	u32 gerror, gerrorn, active;
1453 	struct arm_smmu_device *smmu = dev;
1454 
1455 	gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1456 	gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1457 
1458 	active = gerror ^ gerrorn;
1459 	if (!(active & GERROR_ERR_MASK))
1460 		return IRQ_NONE; /* No errors pending */
1461 
1462 	dev_warn(smmu->dev,
1463 		 "unexpected global error reported (0x%08x), this could be serious\n",
1464 		 active);
1465 
1466 	if (active & GERROR_SFM_ERR) {
1467 		dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1468 		arm_smmu_device_disable(smmu);
1469 	}
1470 
1471 	if (active & GERROR_MSI_GERROR_ABT_ERR)
1472 		dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1473 
1474 	if (active & GERROR_MSI_PRIQ_ABT_ERR)
1475 		dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1476 
1477 	if (active & GERROR_MSI_EVTQ_ABT_ERR)
1478 		dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1479 
1480 	if (active & GERROR_MSI_CMDQ_ABT_ERR)
1481 		dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1482 
1483 	if (active & GERROR_PRIQ_ABT_ERR)
1484 		dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1485 
1486 	if (active & GERROR_EVTQ_ABT_ERR)
1487 		dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1488 
1489 	if (active & GERROR_CMDQ_ERR)
1490 		arm_smmu_cmdq_skip_err(smmu);
1491 
1492 	writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1493 	return IRQ_HANDLED;
1494 }
1495 
arm_smmu_combined_irq_thread(int irq,void * dev)1496 static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
1497 {
1498 	struct arm_smmu_device *smmu = dev;
1499 
1500 	arm_smmu_evtq_thread(irq, dev);
1501 	if (smmu->features & ARM_SMMU_FEAT_PRI)
1502 		arm_smmu_priq_thread(irq, dev);
1503 
1504 	return IRQ_HANDLED;
1505 }
1506 
arm_smmu_combined_irq_handler(int irq,void * dev)1507 static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
1508 {
1509 	arm_smmu_gerror_handler(irq, dev);
1510 	return IRQ_WAKE_THREAD;
1511 }
1512 
1513 static void
arm_smmu_atc_inv_to_cmd(int ssid,unsigned long iova,size_t size,struct arm_smmu_cmdq_ent * cmd)1514 arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
1515 			struct arm_smmu_cmdq_ent *cmd)
1516 {
1517 	size_t log2_span;
1518 	size_t span_mask;
1519 	/* ATC invalidates are always on 4096-bytes pages */
1520 	size_t inval_grain_shift = 12;
1521 	unsigned long page_start, page_end;
1522 
1523 	*cmd = (struct arm_smmu_cmdq_ent) {
1524 		.opcode			= CMDQ_OP_ATC_INV,
1525 		.substream_valid	= !!ssid,
1526 		.atc.ssid		= ssid,
1527 	};
1528 
1529 	if (!size) {
1530 		cmd->atc.size = ATC_INV_SIZE_ALL;
1531 		return;
1532 	}
1533 
1534 	page_start	= iova >> inval_grain_shift;
1535 	page_end	= (iova + size - 1) >> inval_grain_shift;
1536 
1537 	/*
1538 	 * In an ATS Invalidate Request, the address must be aligned on the
1539 	 * range size, which must be a power of two number of page sizes. We
1540 	 * thus have to choose between grossly over-invalidating the region, or
1541 	 * splitting the invalidation into multiple commands. For simplicity
1542 	 * we'll go with the first solution, but should refine it in the future
1543 	 * if multiple commands are shown to be more efficient.
1544 	 *
1545 	 * Find the smallest power of two that covers the range. The most
1546 	 * significant differing bit between the start and end addresses,
1547 	 * fls(start ^ end), indicates the required span. For example:
1548 	 *
1549 	 * We want to invalidate pages [8; 11]. This is already the ideal range:
1550 	 *		x = 0b1000 ^ 0b1011 = 0b11
1551 	 *		span = 1 << fls(x) = 4
1552 	 *
1553 	 * To invalidate pages [7; 10], we need to invalidate [0; 15]:
1554 	 *		x = 0b0111 ^ 0b1010 = 0b1101
1555 	 *		span = 1 << fls(x) = 16
1556 	 */
1557 	log2_span	= fls_long(page_start ^ page_end);
1558 	span_mask	= (1ULL << log2_span) - 1;
1559 
1560 	page_start	&= ~span_mask;
1561 
1562 	cmd->atc.addr	= page_start << inval_grain_shift;
1563 	cmd->atc.size	= log2_span;
1564 }
1565 
arm_smmu_atc_inv_master(struct arm_smmu_master * master)1566 static int arm_smmu_atc_inv_master(struct arm_smmu_master *master)
1567 {
1568 	int i;
1569 	struct arm_smmu_cmdq_ent cmd;
1570 
1571 	arm_smmu_atc_inv_to_cmd(0, 0, 0, &cmd);
1572 
1573 	for (i = 0; i < master->num_sids; i++) {
1574 		cmd.atc.sid = master->sids[i];
1575 		arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
1576 	}
1577 
1578 	return arm_smmu_cmdq_issue_sync(master->smmu);
1579 }
1580 
arm_smmu_atc_inv_domain(struct arm_smmu_domain * smmu_domain,int ssid,unsigned long iova,size_t size)1581 static int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain,
1582 				   int ssid, unsigned long iova, size_t size)
1583 {
1584 	int i;
1585 	unsigned long flags;
1586 	struct arm_smmu_cmdq_ent cmd;
1587 	struct arm_smmu_master *master;
1588 	struct arm_smmu_cmdq_batch cmds = {};
1589 
1590 	if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
1591 		return 0;
1592 
1593 	/*
1594 	 * Ensure that we've completed prior invalidation of the main TLBs
1595 	 * before we read 'nr_ats_masters' in case of a concurrent call to
1596 	 * arm_smmu_enable_ats():
1597 	 *
1598 	 *	// unmap()			// arm_smmu_enable_ats()
1599 	 *	TLBI+SYNC			atomic_inc(&nr_ats_masters);
1600 	 *	smp_mb();			[...]
1601 	 *	atomic_read(&nr_ats_masters);	pci_enable_ats() // writel()
1602 	 *
1603 	 * Ensures that we always see the incremented 'nr_ats_masters' count if
1604 	 * ATS was enabled at the PCI device before completion of the TLBI.
1605 	 */
1606 	smp_mb();
1607 	if (!atomic_read(&smmu_domain->nr_ats_masters))
1608 		return 0;
1609 
1610 	arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);
1611 
1612 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
1613 	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
1614 		if (!master->ats_enabled)
1615 			continue;
1616 
1617 		for (i = 0; i < master->num_sids; i++) {
1618 			cmd.atc.sid = master->sids[i];
1619 			arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);
1620 		}
1621 	}
1622 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
1623 
1624 	return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);
1625 }
1626 
1627 /* IO_PGTABLE API */
arm_smmu_tlb_inv_context(void * cookie)1628 static void arm_smmu_tlb_inv_context(void *cookie)
1629 {
1630 	struct arm_smmu_domain *smmu_domain = cookie;
1631 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1632 	struct arm_smmu_cmdq_ent cmd;
1633 
1634 	/*
1635 	 * NOTE: when io-pgtable is in non-strict mode, we may get here with
1636 	 * PTEs previously cleared by unmaps on the current CPU not yet visible
1637 	 * to the SMMU. We are relying on the dma_wmb() implicit during cmd
1638 	 * insertion to guarantee those are observed before the TLBI. Do be
1639 	 * careful, 007.
1640 	 */
1641 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1642 		arm_smmu_tlb_inv_asid(smmu, smmu_domain->s1_cfg.cd.asid);
1643 	} else {
1644 		cmd.opcode	= CMDQ_OP_TLBI_S12_VMALL;
1645 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
1646 		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1647 		arm_smmu_cmdq_issue_sync(smmu);
1648 	}
1649 	arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
1650 }
1651 
arm_smmu_tlb_inv_range(unsigned long iova,size_t size,size_t granule,bool leaf,struct arm_smmu_domain * smmu_domain)1652 static void arm_smmu_tlb_inv_range(unsigned long iova, size_t size,
1653 				   size_t granule, bool leaf,
1654 				   struct arm_smmu_domain *smmu_domain)
1655 {
1656 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1657 	unsigned long start = iova, end = iova + size, num_pages = 0, tg = 0;
1658 	size_t inv_range = granule;
1659 	struct arm_smmu_cmdq_batch cmds = {};
1660 	struct arm_smmu_cmdq_ent cmd = {
1661 		.tlbi = {
1662 			.leaf	= leaf,
1663 		},
1664 	};
1665 
1666 	if (!size)
1667 		return;
1668 
1669 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1670 		cmd.opcode	= CMDQ_OP_TLBI_NH_VA;
1671 		cmd.tlbi.asid	= smmu_domain->s1_cfg.cd.asid;
1672 	} else {
1673 		cmd.opcode	= CMDQ_OP_TLBI_S2_IPA;
1674 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
1675 	}
1676 
1677 	if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1678 		/* Get the leaf page size */
1679 		tg = __ffs(smmu_domain->domain.pgsize_bitmap);
1680 
1681 		/* Convert page size of 12,14,16 (log2) to 1,2,3 */
1682 		cmd.tlbi.tg = (tg - 10) / 2;
1683 
1684 		/* Determine what level the granule is at */
1685 		cmd.tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
1686 
1687 		num_pages = size >> tg;
1688 	}
1689 
1690 	while (iova < end) {
1691 		if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1692 			/*
1693 			 * On each iteration of the loop, the range is 5 bits
1694 			 * worth of the aligned size remaining.
1695 			 * The range in pages is:
1696 			 *
1697 			 * range = (num_pages & (0x1f << __ffs(num_pages)))
1698 			 */
1699 			unsigned long scale, num;
1700 
1701 			/* Determine the power of 2 multiple number of pages */
1702 			scale = __ffs(num_pages);
1703 			cmd.tlbi.scale = scale;
1704 
1705 			/* Determine how many chunks of 2^scale size we have */
1706 			num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
1707 			cmd.tlbi.num = num - 1;
1708 
1709 			/* range is num * 2^scale * pgsize */
1710 			inv_range = num << (scale + tg);
1711 
1712 			/* Clear out the lower order bits for the next iteration */
1713 			num_pages -= num << scale;
1714 		}
1715 
1716 		cmd.tlbi.addr = iova;
1717 		arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
1718 		iova += inv_range;
1719 	}
1720 	arm_smmu_cmdq_batch_submit(smmu, &cmds);
1721 
1722 	/*
1723 	 * Unfortunately, this can't be leaf-only since we may have
1724 	 * zapped an entire table.
1725 	 */
1726 	arm_smmu_atc_inv_domain(smmu_domain, 0, start, size);
1727 }
1728 
arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather * gather,unsigned long iova,size_t granule,void * cookie)1729 static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
1730 					 unsigned long iova, size_t granule,
1731 					 void *cookie)
1732 {
1733 	struct arm_smmu_domain *smmu_domain = cookie;
1734 	struct iommu_domain *domain = &smmu_domain->domain;
1735 
1736 	iommu_iotlb_gather_add_page(domain, gather, iova, granule);
1737 }
1738 
arm_smmu_tlb_inv_walk(unsigned long iova,size_t size,size_t granule,void * cookie)1739 static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
1740 				  size_t granule, void *cookie)
1741 {
1742 	arm_smmu_tlb_inv_range(iova, size, granule, false, cookie);
1743 }
1744 
arm_smmu_tlb_inv_leaf(unsigned long iova,size_t size,size_t granule,void * cookie)1745 static void arm_smmu_tlb_inv_leaf(unsigned long iova, size_t size,
1746 				  size_t granule, void *cookie)
1747 {
1748 	arm_smmu_tlb_inv_range(iova, size, granule, true, cookie);
1749 }
1750 
1751 static const struct iommu_flush_ops arm_smmu_flush_ops = {
1752 	.tlb_flush_all	= arm_smmu_tlb_inv_context,
1753 	.tlb_flush_walk = arm_smmu_tlb_inv_walk,
1754 	.tlb_flush_leaf = arm_smmu_tlb_inv_leaf,
1755 	.tlb_add_page	= arm_smmu_tlb_inv_page_nosync,
1756 };
1757 
1758 /* IOMMU API */
arm_smmu_capable(enum iommu_cap cap)1759 static bool arm_smmu_capable(enum iommu_cap cap)
1760 {
1761 	switch (cap) {
1762 	case IOMMU_CAP_CACHE_COHERENCY:
1763 		return true;
1764 	case IOMMU_CAP_NOEXEC:
1765 		return true;
1766 	default:
1767 		return false;
1768 	}
1769 }
1770 
arm_smmu_domain_alloc(unsigned type)1771 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
1772 {
1773 	struct arm_smmu_domain *smmu_domain;
1774 
1775 	if (type != IOMMU_DOMAIN_UNMANAGED &&
1776 	    type != IOMMU_DOMAIN_DMA &&
1777 	    type != IOMMU_DOMAIN_IDENTITY)
1778 		return NULL;
1779 
1780 	/*
1781 	 * Allocate the domain and initialise some of its data structures.
1782 	 * We can't really do anything meaningful until we've added a
1783 	 * master.
1784 	 */
1785 	smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
1786 	if (!smmu_domain)
1787 		return NULL;
1788 
1789 	if (type == IOMMU_DOMAIN_DMA &&
1790 	    iommu_get_dma_cookie(&smmu_domain->domain)) {
1791 		kfree(smmu_domain);
1792 		return NULL;
1793 	}
1794 
1795 	mutex_init(&smmu_domain->init_mutex);
1796 	INIT_LIST_HEAD(&smmu_domain->devices);
1797 	spin_lock_init(&smmu_domain->devices_lock);
1798 
1799 	return &smmu_domain->domain;
1800 }
1801 
arm_smmu_bitmap_alloc(unsigned long * map,int span)1802 static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
1803 {
1804 	int idx, size = 1 << span;
1805 
1806 	do {
1807 		idx = find_first_zero_bit(map, size);
1808 		if (idx == size)
1809 			return -ENOSPC;
1810 	} while (test_and_set_bit(idx, map));
1811 
1812 	return idx;
1813 }
1814 
arm_smmu_bitmap_free(unsigned long * map,int idx)1815 static void arm_smmu_bitmap_free(unsigned long *map, int idx)
1816 {
1817 	clear_bit(idx, map);
1818 }
1819 
arm_smmu_domain_free(struct iommu_domain * domain)1820 static void arm_smmu_domain_free(struct iommu_domain *domain)
1821 {
1822 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1823 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1824 
1825 	iommu_put_dma_cookie(domain);
1826 	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
1827 
1828 	/* Free the CD and ASID, if we allocated them */
1829 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1830 		struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1831 
1832 		/* Prevent SVA from touching the CD while we're freeing it */
1833 		mutex_lock(&arm_smmu_asid_lock);
1834 		if (cfg->cdcfg.cdtab)
1835 			arm_smmu_free_cd_tables(smmu_domain);
1836 		arm_smmu_free_asid(&cfg->cd);
1837 		mutex_unlock(&arm_smmu_asid_lock);
1838 	} else {
1839 		struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1840 		if (cfg->vmid)
1841 			arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
1842 	}
1843 
1844 	kfree(smmu_domain);
1845 }
1846 
arm_smmu_domain_finalise_s1(struct arm_smmu_domain * smmu_domain,struct arm_smmu_master * master,struct io_pgtable_cfg * pgtbl_cfg)1847 static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
1848 				       struct arm_smmu_master *master,
1849 				       struct io_pgtable_cfg *pgtbl_cfg)
1850 {
1851 	int ret;
1852 	u32 asid;
1853 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1854 	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1855 	typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
1856 
1857 	refcount_set(&cfg->cd.refs, 1);
1858 
1859 	/* Prevent SVA from modifying the ASID until it is written to the CD */
1860 	mutex_lock(&arm_smmu_asid_lock);
1861 	ret = xa_alloc(&arm_smmu_asid_xa, &asid, &cfg->cd,
1862 		       XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
1863 	if (ret)
1864 		goto out_unlock;
1865 
1866 	cfg->s1cdmax = master->ssid_bits;
1867 
1868 	ret = arm_smmu_alloc_cd_tables(smmu_domain);
1869 	if (ret)
1870 		goto out_free_asid;
1871 
1872 	cfg->cd.asid	= (u16)asid;
1873 	cfg->cd.ttbr	= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
1874 	cfg->cd.tcr	= FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
1875 			  FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
1876 			  FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
1877 			  FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
1878 			  FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
1879 			  FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
1880 			  CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
1881 	cfg->cd.mair	= pgtbl_cfg->arm_lpae_s1_cfg.mair;
1882 
1883 	/*
1884 	 * Note that this will end up calling arm_smmu_sync_cd() before
1885 	 * the master has been added to the devices list for this domain.
1886 	 * This isn't an issue because the STE hasn't been installed yet.
1887 	 */
1888 	ret = arm_smmu_write_ctx_desc(smmu_domain, 0, &cfg->cd);
1889 	if (ret)
1890 		goto out_free_cd_tables;
1891 
1892 	mutex_unlock(&arm_smmu_asid_lock);
1893 	return 0;
1894 
1895 out_free_cd_tables:
1896 	arm_smmu_free_cd_tables(smmu_domain);
1897 out_free_asid:
1898 	arm_smmu_free_asid(&cfg->cd);
1899 out_unlock:
1900 	mutex_unlock(&arm_smmu_asid_lock);
1901 	return ret;
1902 }
1903 
arm_smmu_domain_finalise_s2(struct arm_smmu_domain * smmu_domain,struct arm_smmu_master * master,struct io_pgtable_cfg * pgtbl_cfg)1904 static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
1905 				       struct arm_smmu_master *master,
1906 				       struct io_pgtable_cfg *pgtbl_cfg)
1907 {
1908 	int vmid;
1909 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1910 	struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1911 	typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr;
1912 
1913 	vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
1914 	if (vmid < 0)
1915 		return vmid;
1916 
1917 	vtcr = &pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
1918 	cfg->vmid	= (u16)vmid;
1919 	cfg->vttbr	= pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
1920 	cfg->vtcr	= FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
1921 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
1922 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) |
1923 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) |
1924 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) |
1925 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) |
1926 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps);
1927 	return 0;
1928 }
1929 
arm_smmu_domain_finalise(struct iommu_domain * domain,struct arm_smmu_master * master)1930 static int arm_smmu_domain_finalise(struct iommu_domain *domain,
1931 				    struct arm_smmu_master *master)
1932 {
1933 	int ret;
1934 	unsigned long ias, oas;
1935 	enum io_pgtable_fmt fmt;
1936 	struct io_pgtable_cfg pgtbl_cfg;
1937 	struct io_pgtable_ops *pgtbl_ops;
1938 	int (*finalise_stage_fn)(struct arm_smmu_domain *,
1939 				 struct arm_smmu_master *,
1940 				 struct io_pgtable_cfg *);
1941 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1942 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1943 
1944 	if (domain->type == IOMMU_DOMAIN_IDENTITY) {
1945 		smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
1946 		return 0;
1947 	}
1948 
1949 	/* Restrict the stage to what we can actually support */
1950 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
1951 		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
1952 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
1953 		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1954 
1955 	switch (smmu_domain->stage) {
1956 	case ARM_SMMU_DOMAIN_S1:
1957 		ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
1958 		ias = min_t(unsigned long, ias, VA_BITS);
1959 		oas = smmu->ias;
1960 		fmt = ARM_64_LPAE_S1;
1961 		finalise_stage_fn = arm_smmu_domain_finalise_s1;
1962 		break;
1963 	case ARM_SMMU_DOMAIN_NESTED:
1964 	case ARM_SMMU_DOMAIN_S2:
1965 		ias = smmu->ias;
1966 		oas = smmu->oas;
1967 		fmt = ARM_64_LPAE_S2;
1968 		finalise_stage_fn = arm_smmu_domain_finalise_s2;
1969 		break;
1970 	default:
1971 		return -EINVAL;
1972 	}
1973 
1974 	pgtbl_cfg = (struct io_pgtable_cfg) {
1975 		.pgsize_bitmap	= smmu->pgsize_bitmap,
1976 		.ias		= ias,
1977 		.oas		= oas,
1978 		.coherent_walk	= smmu->features & ARM_SMMU_FEAT_COHERENCY,
1979 		.tlb		= &arm_smmu_flush_ops,
1980 		.iommu_dev	= smmu->dev,
1981 	};
1982 
1983 	if (smmu_domain->non_strict)
1984 		pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
1985 
1986 	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
1987 	if (!pgtbl_ops)
1988 		return -ENOMEM;
1989 
1990 	domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
1991 	domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
1992 	domain->geometry.force_aperture = true;
1993 
1994 	ret = finalise_stage_fn(smmu_domain, master, &pgtbl_cfg);
1995 	if (ret < 0) {
1996 		free_io_pgtable_ops(pgtbl_ops);
1997 		return ret;
1998 	}
1999 
2000 	smmu_domain->pgtbl_ops = pgtbl_ops;
2001 	return 0;
2002 }
2003 
arm_smmu_get_step_for_sid(struct arm_smmu_device * smmu,u32 sid)2004 static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
2005 {
2006 	__le64 *step;
2007 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2008 
2009 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2010 		struct arm_smmu_strtab_l1_desc *l1_desc;
2011 		int idx;
2012 
2013 		/* Two-level walk */
2014 		idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
2015 		l1_desc = &cfg->l1_desc[idx];
2016 		idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
2017 		step = &l1_desc->l2ptr[idx];
2018 	} else {
2019 		/* Simple linear lookup */
2020 		step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
2021 	}
2022 
2023 	return step;
2024 }
2025 
arm_smmu_install_ste_for_dev(struct arm_smmu_master * master)2026 static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master)
2027 {
2028 	int i, j;
2029 	struct arm_smmu_device *smmu = master->smmu;
2030 
2031 	for (i = 0; i < master->num_sids; ++i) {
2032 		u32 sid = master->sids[i];
2033 		__le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
2034 
2035 		/* Bridged PCI devices may end up with duplicated IDs */
2036 		for (j = 0; j < i; j++)
2037 			if (master->sids[j] == sid)
2038 				break;
2039 		if (j < i)
2040 			continue;
2041 
2042 		arm_smmu_write_strtab_ent(master, sid, step);
2043 	}
2044 }
2045 
arm_smmu_ats_supported(struct arm_smmu_master * master)2046 static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
2047 {
2048 	struct device *dev = master->dev;
2049 	struct arm_smmu_device *smmu = master->smmu;
2050 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2051 
2052 	if (!(smmu->features & ARM_SMMU_FEAT_ATS))
2053 		return false;
2054 
2055 	if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS))
2056 		return false;
2057 
2058 	return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev));
2059 }
2060 
arm_smmu_enable_ats(struct arm_smmu_master * master)2061 static void arm_smmu_enable_ats(struct arm_smmu_master *master)
2062 {
2063 	size_t stu;
2064 	struct pci_dev *pdev;
2065 	struct arm_smmu_device *smmu = master->smmu;
2066 	struct arm_smmu_domain *smmu_domain = master->domain;
2067 
2068 	/* Don't enable ATS at the endpoint if it's not enabled in the STE */
2069 	if (!master->ats_enabled)
2070 		return;
2071 
2072 	/* Smallest Translation Unit: log2 of the smallest supported granule */
2073 	stu = __ffs(smmu->pgsize_bitmap);
2074 	pdev = to_pci_dev(master->dev);
2075 
2076 	atomic_inc(&smmu_domain->nr_ats_masters);
2077 	arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
2078 	if (pci_enable_ats(pdev, stu))
2079 		dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
2080 }
2081 
arm_smmu_disable_ats(struct arm_smmu_master * master)2082 static void arm_smmu_disable_ats(struct arm_smmu_master *master)
2083 {
2084 	struct arm_smmu_domain *smmu_domain = master->domain;
2085 
2086 	if (!master->ats_enabled)
2087 		return;
2088 
2089 	pci_disable_ats(to_pci_dev(master->dev));
2090 	/*
2091 	 * Ensure ATS is disabled at the endpoint before we issue the
2092 	 * ATC invalidation via the SMMU.
2093 	 */
2094 	wmb();
2095 	arm_smmu_atc_inv_master(master);
2096 	atomic_dec(&smmu_domain->nr_ats_masters);
2097 }
2098 
arm_smmu_enable_pasid(struct arm_smmu_master * master)2099 static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
2100 {
2101 	int ret;
2102 	int features;
2103 	int num_pasids;
2104 	struct pci_dev *pdev;
2105 
2106 	if (!dev_is_pci(master->dev))
2107 		return -ENODEV;
2108 
2109 	pdev = to_pci_dev(master->dev);
2110 
2111 	features = pci_pasid_features(pdev);
2112 	if (features < 0)
2113 		return features;
2114 
2115 	num_pasids = pci_max_pasids(pdev);
2116 	if (num_pasids <= 0)
2117 		return num_pasids;
2118 
2119 	ret = pci_enable_pasid(pdev, features);
2120 	if (ret) {
2121 		dev_err(&pdev->dev, "Failed to enable PASID\n");
2122 		return ret;
2123 	}
2124 
2125 	master->ssid_bits = min_t(u8, ilog2(num_pasids),
2126 				  master->smmu->ssid_bits);
2127 	return 0;
2128 }
2129 
arm_smmu_disable_pasid(struct arm_smmu_master * master)2130 static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
2131 {
2132 	struct pci_dev *pdev;
2133 
2134 	if (!dev_is_pci(master->dev))
2135 		return;
2136 
2137 	pdev = to_pci_dev(master->dev);
2138 
2139 	if (!pdev->pasid_enabled)
2140 		return;
2141 
2142 	master->ssid_bits = 0;
2143 	pci_disable_pasid(pdev);
2144 }
2145 
arm_smmu_detach_dev(struct arm_smmu_master * master)2146 static void arm_smmu_detach_dev(struct arm_smmu_master *master)
2147 {
2148 	unsigned long flags;
2149 	struct arm_smmu_domain *smmu_domain = master->domain;
2150 
2151 	if (!smmu_domain)
2152 		return;
2153 
2154 	arm_smmu_disable_ats(master);
2155 
2156 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2157 	list_del(&master->domain_head);
2158 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2159 
2160 	master->domain = NULL;
2161 	master->ats_enabled = false;
2162 	arm_smmu_install_ste_for_dev(master);
2163 }
2164 
arm_smmu_attach_dev(struct iommu_domain * domain,struct device * dev)2165 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
2166 {
2167 	int ret = 0;
2168 	unsigned long flags;
2169 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2170 	struct arm_smmu_device *smmu;
2171 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2172 	struct arm_smmu_master *master;
2173 
2174 	if (!fwspec)
2175 		return -ENOENT;
2176 
2177 	master = dev_iommu_priv_get(dev);
2178 	smmu = master->smmu;
2179 
2180 	/*
2181 	 * Checking that SVA is disabled ensures that this device isn't bound to
2182 	 * any mm, and can be safely detached from its old domain. Bonds cannot
2183 	 * be removed concurrently since we're holding the group mutex.
2184 	 */
2185 	if (arm_smmu_master_sva_enabled(master)) {
2186 		dev_err(dev, "cannot attach - SVA enabled\n");
2187 		return -EBUSY;
2188 	}
2189 
2190 	arm_smmu_detach_dev(master);
2191 
2192 	mutex_lock(&smmu_domain->init_mutex);
2193 
2194 	if (!smmu_domain->smmu) {
2195 		smmu_domain->smmu = smmu;
2196 		ret = arm_smmu_domain_finalise(domain, master);
2197 		if (ret) {
2198 			smmu_domain->smmu = NULL;
2199 			goto out_unlock;
2200 		}
2201 	} else if (smmu_domain->smmu != smmu) {
2202 		dev_err(dev,
2203 			"cannot attach to SMMU %s (upstream of %s)\n",
2204 			dev_name(smmu_domain->smmu->dev),
2205 			dev_name(smmu->dev));
2206 		ret = -ENXIO;
2207 		goto out_unlock;
2208 	} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
2209 		   master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) {
2210 		dev_err(dev,
2211 			"cannot attach to incompatible domain (%u SSID bits != %u)\n",
2212 			smmu_domain->s1_cfg.s1cdmax, master->ssid_bits);
2213 		ret = -EINVAL;
2214 		goto out_unlock;
2215 	}
2216 
2217 	master->domain = smmu_domain;
2218 
2219 	if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
2220 		master->ats_enabled = arm_smmu_ats_supported(master);
2221 
2222 	arm_smmu_install_ste_for_dev(master);
2223 
2224 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2225 	list_add(&master->domain_head, &smmu_domain->devices);
2226 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2227 
2228 	arm_smmu_enable_ats(master);
2229 
2230 out_unlock:
2231 	mutex_unlock(&smmu_domain->init_mutex);
2232 	return ret;
2233 }
2234 
arm_smmu_map(struct iommu_domain * domain,unsigned long iova,phys_addr_t paddr,size_t size,int prot,gfp_t gfp)2235 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
2236 			phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
2237 {
2238 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2239 
2240 	if (!ops)
2241 		return -ENODEV;
2242 
2243 	return ops->map(ops, iova, paddr, size, prot, gfp);
2244 }
2245 
arm_smmu_unmap(struct iommu_domain * domain,unsigned long iova,size_t size,struct iommu_iotlb_gather * gather)2246 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
2247 			     size_t size, struct iommu_iotlb_gather *gather)
2248 {
2249 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2250 	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
2251 
2252 	if (!ops)
2253 		return 0;
2254 
2255 	return ops->unmap(ops, iova, size, gather);
2256 }
2257 
arm_smmu_flush_iotlb_all(struct iommu_domain * domain)2258 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
2259 {
2260 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2261 
2262 	if (smmu_domain->smmu)
2263 		arm_smmu_tlb_inv_context(smmu_domain);
2264 }
2265 
arm_smmu_iotlb_sync(struct iommu_domain * domain,struct iommu_iotlb_gather * gather)2266 static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
2267 				struct iommu_iotlb_gather *gather)
2268 {
2269 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2270 
2271 	arm_smmu_tlb_inv_range(gather->start, gather->end - gather->start + 1,
2272 			       gather->pgsize, true, smmu_domain);
2273 }
2274 
2275 static phys_addr_t
arm_smmu_iova_to_phys(struct iommu_domain * domain,dma_addr_t iova)2276 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
2277 {
2278 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2279 
2280 	if (domain->type == IOMMU_DOMAIN_IDENTITY)
2281 		return iova;
2282 
2283 	if (!ops)
2284 		return 0;
2285 
2286 	return ops->iova_to_phys(ops, iova);
2287 }
2288 
2289 static struct platform_driver arm_smmu_driver;
2290 
2291 static
arm_smmu_get_by_fwnode(struct fwnode_handle * fwnode)2292 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
2293 {
2294 	struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
2295 							  fwnode);
2296 	put_device(dev);
2297 	return dev ? dev_get_drvdata(dev) : NULL;
2298 }
2299 
arm_smmu_sid_in_range(struct arm_smmu_device * smmu,u32 sid)2300 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
2301 {
2302 	unsigned long limit = smmu->strtab_cfg.num_l1_ents;
2303 
2304 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2305 		limit *= 1UL << STRTAB_SPLIT;
2306 
2307 	return sid < limit;
2308 }
2309 
2310 static struct iommu_ops arm_smmu_ops;
2311 
arm_smmu_probe_device(struct device * dev)2312 static struct iommu_device *arm_smmu_probe_device(struct device *dev)
2313 {
2314 	int i, ret;
2315 	struct arm_smmu_device *smmu;
2316 	struct arm_smmu_master *master;
2317 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2318 
2319 	if (!fwspec || fwspec->ops != &arm_smmu_ops)
2320 		return ERR_PTR(-ENODEV);
2321 
2322 	if (WARN_ON_ONCE(dev_iommu_priv_get(dev)))
2323 		return ERR_PTR(-EBUSY);
2324 
2325 	smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
2326 	if (!smmu)
2327 		return ERR_PTR(-ENODEV);
2328 
2329 	master = kzalloc(sizeof(*master), GFP_KERNEL);
2330 	if (!master)
2331 		return ERR_PTR(-ENOMEM);
2332 
2333 	master->dev = dev;
2334 	master->smmu = smmu;
2335 	master->sids = fwspec->ids;
2336 	master->num_sids = fwspec->num_ids;
2337 	INIT_LIST_HEAD(&master->bonds);
2338 	dev_iommu_priv_set(dev, master);
2339 
2340 	/* Check the SIDs are in range of the SMMU and our stream table */
2341 	for (i = 0; i < master->num_sids; i++) {
2342 		u32 sid = master->sids[i];
2343 
2344 		if (!arm_smmu_sid_in_range(smmu, sid)) {
2345 			ret = -ERANGE;
2346 			goto err_free_master;
2347 		}
2348 
2349 		/* Ensure l2 strtab is initialised */
2350 		if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2351 			ret = arm_smmu_init_l2_strtab(smmu, sid);
2352 			if (ret)
2353 				goto err_free_master;
2354 		}
2355 	}
2356 
2357 	master->ssid_bits = min(smmu->ssid_bits, fwspec->num_pasid_bits);
2358 
2359 	/*
2360 	 * Note that PASID must be enabled before, and disabled after ATS:
2361 	 * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register
2362 	 *
2363 	 *   Behavior is undefined if this bit is Set and the value of the PASID
2364 	 *   Enable, Execute Requested Enable, or Privileged Mode Requested bits
2365 	 *   are changed.
2366 	 */
2367 	arm_smmu_enable_pasid(master);
2368 
2369 	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
2370 		master->ssid_bits = min_t(u8, master->ssid_bits,
2371 					  CTXDESC_LINEAR_CDMAX);
2372 
2373 	return &smmu->iommu;
2374 
2375 err_free_master:
2376 	kfree(master);
2377 	dev_iommu_priv_set(dev, NULL);
2378 	return ERR_PTR(ret);
2379 }
2380 
arm_smmu_release_device(struct device * dev)2381 static void arm_smmu_release_device(struct device *dev)
2382 {
2383 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2384 	struct arm_smmu_master *master;
2385 
2386 	if (!fwspec || fwspec->ops != &arm_smmu_ops)
2387 		return;
2388 
2389 	master = dev_iommu_priv_get(dev);
2390 	WARN_ON(arm_smmu_master_sva_enabled(master));
2391 	arm_smmu_detach_dev(master);
2392 	arm_smmu_disable_pasid(master);
2393 	kfree(master);
2394 	iommu_fwspec_free(dev);
2395 }
2396 
arm_smmu_device_group(struct device * dev)2397 static struct iommu_group *arm_smmu_device_group(struct device *dev)
2398 {
2399 	struct iommu_group *group;
2400 
2401 	/*
2402 	 * We don't support devices sharing stream IDs other than PCI RID
2403 	 * aliases, since the necessary ID-to-device lookup becomes rather
2404 	 * impractical given a potential sparse 32-bit stream ID space.
2405 	 */
2406 	if (dev_is_pci(dev))
2407 		group = pci_device_group(dev);
2408 	else
2409 		group = generic_device_group(dev);
2410 
2411 	return group;
2412 }
2413 
arm_smmu_domain_get_attr(struct iommu_domain * domain,enum iommu_attr attr,void * data)2414 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
2415 				    enum iommu_attr attr, void *data)
2416 {
2417 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2418 
2419 	switch (domain->type) {
2420 	case IOMMU_DOMAIN_UNMANAGED:
2421 		switch (attr) {
2422 		case DOMAIN_ATTR_NESTING:
2423 			*(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
2424 			return 0;
2425 		default:
2426 			return -ENODEV;
2427 		}
2428 		break;
2429 	case IOMMU_DOMAIN_DMA:
2430 		switch (attr) {
2431 		case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
2432 			*(int *)data = smmu_domain->non_strict;
2433 			return 0;
2434 		default:
2435 			return -ENODEV;
2436 		}
2437 		break;
2438 	default:
2439 		return -EINVAL;
2440 	}
2441 }
2442 
arm_smmu_domain_set_attr(struct iommu_domain * domain,enum iommu_attr attr,void * data)2443 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
2444 				    enum iommu_attr attr, void *data)
2445 {
2446 	int ret = 0;
2447 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2448 
2449 	mutex_lock(&smmu_domain->init_mutex);
2450 
2451 	switch (domain->type) {
2452 	case IOMMU_DOMAIN_UNMANAGED:
2453 		switch (attr) {
2454 		case DOMAIN_ATTR_NESTING:
2455 			if (smmu_domain->smmu) {
2456 				ret = -EPERM;
2457 				goto out_unlock;
2458 			}
2459 
2460 			if (*(int *)data)
2461 				smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
2462 			else
2463 				smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
2464 			break;
2465 		default:
2466 			ret = -ENODEV;
2467 		}
2468 		break;
2469 	case IOMMU_DOMAIN_DMA:
2470 		switch(attr) {
2471 		case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
2472 			smmu_domain->non_strict = *(int *)data;
2473 			break;
2474 		default:
2475 			ret = -ENODEV;
2476 		}
2477 		break;
2478 	default:
2479 		ret = -EINVAL;
2480 	}
2481 
2482 out_unlock:
2483 	mutex_unlock(&smmu_domain->init_mutex);
2484 	return ret;
2485 }
2486 
arm_smmu_of_xlate(struct device * dev,struct of_phandle_args * args)2487 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
2488 {
2489 	return iommu_fwspec_add_ids(dev, args->args, 1);
2490 }
2491 
arm_smmu_get_resv_regions(struct device * dev,struct list_head * head)2492 static void arm_smmu_get_resv_regions(struct device *dev,
2493 				      struct list_head *head)
2494 {
2495 	struct iommu_resv_region *region;
2496 	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
2497 
2498 	region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
2499 					 prot, IOMMU_RESV_SW_MSI);
2500 	if (!region)
2501 		return;
2502 
2503 	list_add_tail(&region->list, head);
2504 
2505 	iommu_dma_get_resv_regions(dev, head);
2506 }
2507 
arm_smmu_dev_has_feature(struct device * dev,enum iommu_dev_features feat)2508 static bool arm_smmu_dev_has_feature(struct device *dev,
2509 				     enum iommu_dev_features feat)
2510 {
2511 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2512 
2513 	if (!master)
2514 		return false;
2515 
2516 	switch (feat) {
2517 	case IOMMU_DEV_FEAT_SVA:
2518 		return arm_smmu_master_sva_supported(master);
2519 	default:
2520 		return false;
2521 	}
2522 }
2523 
arm_smmu_dev_feature_enabled(struct device * dev,enum iommu_dev_features feat)2524 static bool arm_smmu_dev_feature_enabled(struct device *dev,
2525 					 enum iommu_dev_features feat)
2526 {
2527 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2528 
2529 	if (!master)
2530 		return false;
2531 
2532 	switch (feat) {
2533 	case IOMMU_DEV_FEAT_SVA:
2534 		return arm_smmu_master_sva_enabled(master);
2535 	default:
2536 		return false;
2537 	}
2538 }
2539 
arm_smmu_dev_enable_feature(struct device * dev,enum iommu_dev_features feat)2540 static int arm_smmu_dev_enable_feature(struct device *dev,
2541 				       enum iommu_dev_features feat)
2542 {
2543 	if (!arm_smmu_dev_has_feature(dev, feat))
2544 		return -ENODEV;
2545 
2546 	if (arm_smmu_dev_feature_enabled(dev, feat))
2547 		return -EBUSY;
2548 
2549 	switch (feat) {
2550 	case IOMMU_DEV_FEAT_SVA:
2551 		return arm_smmu_master_enable_sva(dev_iommu_priv_get(dev));
2552 	default:
2553 		return -EINVAL;
2554 	}
2555 }
2556 
arm_smmu_dev_disable_feature(struct device * dev,enum iommu_dev_features feat)2557 static int arm_smmu_dev_disable_feature(struct device *dev,
2558 					enum iommu_dev_features feat)
2559 {
2560 	if (!arm_smmu_dev_feature_enabled(dev, feat))
2561 		return -EINVAL;
2562 
2563 	switch (feat) {
2564 	case IOMMU_DEV_FEAT_SVA:
2565 		return arm_smmu_master_disable_sva(dev_iommu_priv_get(dev));
2566 	default:
2567 		return -EINVAL;
2568 	}
2569 }
2570 
2571 static struct iommu_ops arm_smmu_ops = {
2572 	.capable		= arm_smmu_capable,
2573 	.domain_alloc		= arm_smmu_domain_alloc,
2574 	.domain_free		= arm_smmu_domain_free,
2575 	.attach_dev		= arm_smmu_attach_dev,
2576 	.map			= arm_smmu_map,
2577 	.unmap			= arm_smmu_unmap,
2578 	.flush_iotlb_all	= arm_smmu_flush_iotlb_all,
2579 	.iotlb_sync		= arm_smmu_iotlb_sync,
2580 	.iova_to_phys		= arm_smmu_iova_to_phys,
2581 	.probe_device		= arm_smmu_probe_device,
2582 	.release_device		= arm_smmu_release_device,
2583 	.device_group		= arm_smmu_device_group,
2584 	.domain_get_attr	= arm_smmu_domain_get_attr,
2585 	.domain_set_attr	= arm_smmu_domain_set_attr,
2586 	.of_xlate		= arm_smmu_of_xlate,
2587 	.get_resv_regions	= arm_smmu_get_resv_regions,
2588 	.put_resv_regions	= generic_iommu_put_resv_regions,
2589 	.dev_has_feat		= arm_smmu_dev_has_feature,
2590 	.dev_feat_enabled	= arm_smmu_dev_feature_enabled,
2591 	.dev_enable_feat	= arm_smmu_dev_enable_feature,
2592 	.dev_disable_feat	= arm_smmu_dev_disable_feature,
2593 	.pgsize_bitmap		= -1UL, /* Restricted during device attach */
2594 };
2595 
2596 /* Probing and initialisation functions */
arm_smmu_init_one_queue(struct arm_smmu_device * smmu,struct arm_smmu_queue * q,unsigned long prod_off,unsigned long cons_off,size_t dwords,const char * name)2597 static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
2598 				   struct arm_smmu_queue *q,
2599 				   unsigned long prod_off,
2600 				   unsigned long cons_off,
2601 				   size_t dwords, const char *name)
2602 {
2603 	size_t qsz;
2604 
2605 	do {
2606 		qsz = ((1 << q->llq.max_n_shift) * dwords) << 3;
2607 		q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma,
2608 					      GFP_KERNEL);
2609 		if (q->base || qsz < PAGE_SIZE)
2610 			break;
2611 
2612 		q->llq.max_n_shift--;
2613 	} while (1);
2614 
2615 	if (!q->base) {
2616 		dev_err(smmu->dev,
2617 			"failed to allocate queue (0x%zx bytes) for %s\n",
2618 			qsz, name);
2619 		return -ENOMEM;
2620 	}
2621 
2622 	if (!WARN_ON(q->base_dma & (qsz - 1))) {
2623 		dev_info(smmu->dev, "allocated %u entries for %s\n",
2624 			 1 << q->llq.max_n_shift, name);
2625 	}
2626 
2627 	q->prod_reg	= arm_smmu_page1_fixup(prod_off, smmu);
2628 	q->cons_reg	= arm_smmu_page1_fixup(cons_off, smmu);
2629 	q->ent_dwords	= dwords;
2630 
2631 	q->q_base  = Q_BASE_RWA;
2632 	q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
2633 	q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift);
2634 
2635 	q->llq.prod = q->llq.cons = 0;
2636 	return 0;
2637 }
2638 
arm_smmu_cmdq_free_bitmap(void * data)2639 static void arm_smmu_cmdq_free_bitmap(void *data)
2640 {
2641 	unsigned long *bitmap = data;
2642 	bitmap_free(bitmap);
2643 }
2644 
arm_smmu_cmdq_init(struct arm_smmu_device * smmu)2645 static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
2646 {
2647 	int ret = 0;
2648 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
2649 	unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
2650 	atomic_long_t *bitmap;
2651 
2652 	atomic_set(&cmdq->owner_prod, 0);
2653 	atomic_set(&cmdq->lock, 0);
2654 
2655 	bitmap = (atomic_long_t *)bitmap_zalloc(nents, GFP_KERNEL);
2656 	if (!bitmap) {
2657 		dev_err(smmu->dev, "failed to allocate cmdq bitmap\n");
2658 		ret = -ENOMEM;
2659 	} else {
2660 		cmdq->valid_map = bitmap;
2661 		devm_add_action(smmu->dev, arm_smmu_cmdq_free_bitmap, bitmap);
2662 	}
2663 
2664 	return ret;
2665 }
2666 
arm_smmu_init_queues(struct arm_smmu_device * smmu)2667 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
2668 {
2669 	int ret;
2670 
2671 	/* cmdq */
2672 	ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, ARM_SMMU_CMDQ_PROD,
2673 				      ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS,
2674 				      "cmdq");
2675 	if (ret)
2676 		return ret;
2677 
2678 	ret = arm_smmu_cmdq_init(smmu);
2679 	if (ret)
2680 		return ret;
2681 
2682 	/* evtq */
2683 	ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, ARM_SMMU_EVTQ_PROD,
2684 				      ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS,
2685 				      "evtq");
2686 	if (ret)
2687 		return ret;
2688 
2689 	/* priq */
2690 	if (!(smmu->features & ARM_SMMU_FEAT_PRI))
2691 		return 0;
2692 
2693 	return arm_smmu_init_one_queue(smmu, &smmu->priq.q, ARM_SMMU_PRIQ_PROD,
2694 				       ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS,
2695 				       "priq");
2696 }
2697 
arm_smmu_init_l1_strtab(struct arm_smmu_device * smmu)2698 static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
2699 {
2700 	unsigned int i;
2701 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2702 	size_t size = sizeof(*cfg->l1_desc) * cfg->num_l1_ents;
2703 	void *strtab = smmu->strtab_cfg.strtab;
2704 
2705 	cfg->l1_desc = devm_kzalloc(smmu->dev, size, GFP_KERNEL);
2706 	if (!cfg->l1_desc) {
2707 		dev_err(smmu->dev, "failed to allocate l1 stream table desc\n");
2708 		return -ENOMEM;
2709 	}
2710 
2711 	for (i = 0; i < cfg->num_l1_ents; ++i) {
2712 		arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
2713 		strtab += STRTAB_L1_DESC_DWORDS << 3;
2714 	}
2715 
2716 	return 0;
2717 }
2718 
arm_smmu_init_strtab_2lvl(struct arm_smmu_device * smmu)2719 static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
2720 {
2721 	void *strtab;
2722 	u64 reg;
2723 	u32 size, l1size;
2724 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2725 
2726 	/* Calculate the L1 size, capped to the SIDSIZE. */
2727 	size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
2728 	size = min(size, smmu->sid_bits - STRTAB_SPLIT);
2729 	cfg->num_l1_ents = 1 << size;
2730 
2731 	size += STRTAB_SPLIT;
2732 	if (size < smmu->sid_bits)
2733 		dev_warn(smmu->dev,
2734 			 "2-level strtab only covers %u/%u bits of SID\n",
2735 			 size, smmu->sid_bits);
2736 
2737 	l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
2738 	strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
2739 				     GFP_KERNEL);
2740 	if (!strtab) {
2741 		dev_err(smmu->dev,
2742 			"failed to allocate l1 stream table (%u bytes)\n",
2743 			l1size);
2744 		return -ENOMEM;
2745 	}
2746 	cfg->strtab = strtab;
2747 
2748 	/* Configure strtab_base_cfg for 2 levels */
2749 	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
2750 	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
2751 	reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
2752 	cfg->strtab_base_cfg = reg;
2753 
2754 	return arm_smmu_init_l1_strtab(smmu);
2755 }
2756 
arm_smmu_init_strtab_linear(struct arm_smmu_device * smmu)2757 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
2758 {
2759 	void *strtab;
2760 	u64 reg;
2761 	u32 size;
2762 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2763 
2764 	size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
2765 	strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
2766 				     GFP_KERNEL);
2767 	if (!strtab) {
2768 		dev_err(smmu->dev,
2769 			"failed to allocate linear stream table (%u bytes)\n",
2770 			size);
2771 		return -ENOMEM;
2772 	}
2773 	cfg->strtab = strtab;
2774 	cfg->num_l1_ents = 1 << smmu->sid_bits;
2775 
2776 	/* Configure strtab_base_cfg for a linear table covering all SIDs */
2777 	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
2778 	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
2779 	cfg->strtab_base_cfg = reg;
2780 
2781 	arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents);
2782 	return 0;
2783 }
2784 
arm_smmu_init_strtab(struct arm_smmu_device * smmu)2785 static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
2786 {
2787 	u64 reg;
2788 	int ret;
2789 
2790 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2791 		ret = arm_smmu_init_strtab_2lvl(smmu);
2792 	else
2793 		ret = arm_smmu_init_strtab_linear(smmu);
2794 
2795 	if (ret)
2796 		return ret;
2797 
2798 	/* Set the strtab base address */
2799 	reg  = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
2800 	reg |= STRTAB_BASE_RA;
2801 	smmu->strtab_cfg.strtab_base = reg;
2802 
2803 	/* Allocate the first VMID for stage-2 bypass STEs */
2804 	set_bit(0, smmu->vmid_map);
2805 	return 0;
2806 }
2807 
arm_smmu_init_structures(struct arm_smmu_device * smmu)2808 static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
2809 {
2810 	int ret;
2811 
2812 	ret = arm_smmu_init_queues(smmu);
2813 	if (ret)
2814 		return ret;
2815 
2816 	return arm_smmu_init_strtab(smmu);
2817 }
2818 
arm_smmu_write_reg_sync(struct arm_smmu_device * smmu,u32 val,unsigned int reg_off,unsigned int ack_off)2819 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
2820 				   unsigned int reg_off, unsigned int ack_off)
2821 {
2822 	u32 reg;
2823 
2824 	writel_relaxed(val, smmu->base + reg_off);
2825 	return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
2826 					  1, ARM_SMMU_POLL_TIMEOUT_US);
2827 }
2828 
2829 /* GBPA is "special" */
arm_smmu_update_gbpa(struct arm_smmu_device * smmu,u32 set,u32 clr)2830 static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
2831 {
2832 	int ret;
2833 	u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
2834 
2835 	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
2836 					 1, ARM_SMMU_POLL_TIMEOUT_US);
2837 	if (ret)
2838 		return ret;
2839 
2840 	reg &= ~clr;
2841 	reg |= set;
2842 	writel_relaxed(reg | GBPA_UPDATE, gbpa);
2843 	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
2844 					 1, ARM_SMMU_POLL_TIMEOUT_US);
2845 
2846 	if (ret)
2847 		dev_err(smmu->dev, "GBPA not responding to update\n");
2848 	return ret;
2849 }
2850 
arm_smmu_free_msis(void * data)2851 static void arm_smmu_free_msis(void *data)
2852 {
2853 	struct device *dev = data;
2854 	platform_msi_domain_free_irqs(dev);
2855 }
2856 
arm_smmu_write_msi_msg(struct msi_desc * desc,struct msi_msg * msg)2857 static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
2858 {
2859 	phys_addr_t doorbell;
2860 	struct device *dev = msi_desc_to_dev(desc);
2861 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2862 	phys_addr_t *cfg = arm_smmu_msi_cfg[desc->platform.msi_index];
2863 
2864 	doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
2865 	doorbell &= MSI_CFG0_ADDR_MASK;
2866 
2867 	writeq_relaxed(doorbell, smmu->base + cfg[0]);
2868 	writel_relaxed(msg->data, smmu->base + cfg[1]);
2869 	writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
2870 }
2871 
arm_smmu_setup_msis(struct arm_smmu_device * smmu)2872 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
2873 {
2874 	struct msi_desc *desc;
2875 	int ret, nvec = ARM_SMMU_MAX_MSIS;
2876 	struct device *dev = smmu->dev;
2877 
2878 	/* Clear the MSI address regs */
2879 	writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
2880 	writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
2881 
2882 	if (smmu->features & ARM_SMMU_FEAT_PRI)
2883 		writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
2884 	else
2885 		nvec--;
2886 
2887 	if (!(smmu->features & ARM_SMMU_FEAT_MSI))
2888 		return;
2889 
2890 	if (!dev->msi_domain) {
2891 		dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
2892 		return;
2893 	}
2894 
2895 	/* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
2896 	ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
2897 	if (ret) {
2898 		dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
2899 		return;
2900 	}
2901 
2902 	for_each_msi_entry(desc, dev) {
2903 		switch (desc->platform.msi_index) {
2904 		case EVTQ_MSI_INDEX:
2905 			smmu->evtq.q.irq = desc->irq;
2906 			break;
2907 		case GERROR_MSI_INDEX:
2908 			smmu->gerr_irq = desc->irq;
2909 			break;
2910 		case PRIQ_MSI_INDEX:
2911 			smmu->priq.q.irq = desc->irq;
2912 			break;
2913 		default:	/* Unknown */
2914 			continue;
2915 		}
2916 	}
2917 
2918 	/* Add callback to free MSIs on teardown */
2919 	devm_add_action(dev, arm_smmu_free_msis, dev);
2920 }
2921 
arm_smmu_setup_unique_irqs(struct arm_smmu_device * smmu)2922 static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
2923 {
2924 	int irq, ret;
2925 
2926 	arm_smmu_setup_msis(smmu);
2927 
2928 	/* Request interrupt lines */
2929 	irq = smmu->evtq.q.irq;
2930 	if (irq) {
2931 		ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
2932 						arm_smmu_evtq_thread,
2933 						IRQF_ONESHOT,
2934 						"arm-smmu-v3-evtq", smmu);
2935 		if (ret < 0)
2936 			dev_warn(smmu->dev, "failed to enable evtq irq\n");
2937 	} else {
2938 		dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
2939 	}
2940 
2941 	irq = smmu->gerr_irq;
2942 	if (irq) {
2943 		ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
2944 				       0, "arm-smmu-v3-gerror", smmu);
2945 		if (ret < 0)
2946 			dev_warn(smmu->dev, "failed to enable gerror irq\n");
2947 	} else {
2948 		dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
2949 	}
2950 
2951 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
2952 		irq = smmu->priq.q.irq;
2953 		if (irq) {
2954 			ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
2955 							arm_smmu_priq_thread,
2956 							IRQF_ONESHOT,
2957 							"arm-smmu-v3-priq",
2958 							smmu);
2959 			if (ret < 0)
2960 				dev_warn(smmu->dev,
2961 					 "failed to enable priq irq\n");
2962 		} else {
2963 			dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
2964 		}
2965 	}
2966 }
2967 
arm_smmu_setup_irqs(struct arm_smmu_device * smmu)2968 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
2969 {
2970 	int ret, irq;
2971 	u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
2972 
2973 	/* Disable IRQs first */
2974 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
2975 				      ARM_SMMU_IRQ_CTRLACK);
2976 	if (ret) {
2977 		dev_err(smmu->dev, "failed to disable irqs\n");
2978 		return ret;
2979 	}
2980 
2981 	irq = smmu->combined_irq;
2982 	if (irq) {
2983 		/*
2984 		 * Cavium ThunderX2 implementation doesn't support unique irq
2985 		 * lines. Use a single irq line for all the SMMUv3 interrupts.
2986 		 */
2987 		ret = devm_request_threaded_irq(smmu->dev, irq,
2988 					arm_smmu_combined_irq_handler,
2989 					arm_smmu_combined_irq_thread,
2990 					IRQF_ONESHOT,
2991 					"arm-smmu-v3-combined-irq", smmu);
2992 		if (ret < 0)
2993 			dev_warn(smmu->dev, "failed to enable combined irq\n");
2994 	} else
2995 		arm_smmu_setup_unique_irqs(smmu);
2996 
2997 	if (smmu->features & ARM_SMMU_FEAT_PRI)
2998 		irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
2999 
3000 	/* Enable interrupt generation on the SMMU */
3001 	ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
3002 				      ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
3003 	if (ret)
3004 		dev_warn(smmu->dev, "failed to enable irqs\n");
3005 
3006 	return 0;
3007 }
3008 
arm_smmu_device_disable(struct arm_smmu_device * smmu)3009 static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
3010 {
3011 	int ret;
3012 
3013 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
3014 	if (ret)
3015 		dev_err(smmu->dev, "failed to clear cr0\n");
3016 
3017 	return ret;
3018 }
3019 
arm_smmu_device_reset(struct arm_smmu_device * smmu,bool bypass)3020 static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
3021 {
3022 	int ret;
3023 	u32 reg, enables;
3024 	struct arm_smmu_cmdq_ent cmd;
3025 
3026 	/* Clear CR0 and sync (disables SMMU and queue processing) */
3027 	reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
3028 	if (reg & CR0_SMMUEN) {
3029 		dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
3030 		WARN_ON(is_kdump_kernel() && !disable_bypass);
3031 		arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
3032 	}
3033 
3034 	ret = arm_smmu_device_disable(smmu);
3035 	if (ret)
3036 		return ret;
3037 
3038 	/* CR1 (table and queue memory attributes) */
3039 	reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
3040 	      FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
3041 	      FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
3042 	      FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
3043 	      FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
3044 	      FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
3045 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
3046 
3047 	/* CR2 (random crap) */
3048 	reg = CR2_PTM | CR2_RECINVSID | CR2_E2H;
3049 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
3050 
3051 	/* Stream table */
3052 	writeq_relaxed(smmu->strtab_cfg.strtab_base,
3053 		       smmu->base + ARM_SMMU_STRTAB_BASE);
3054 	writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
3055 		       smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
3056 
3057 	/* Command queue */
3058 	writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
3059 	writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
3060 	writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
3061 
3062 	enables = CR0_CMDQEN;
3063 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3064 				      ARM_SMMU_CR0ACK);
3065 	if (ret) {
3066 		dev_err(smmu->dev, "failed to enable command queue\n");
3067 		return ret;
3068 	}
3069 
3070 	/* Invalidate any cached configuration */
3071 	cmd.opcode = CMDQ_OP_CFGI_ALL;
3072 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3073 	arm_smmu_cmdq_issue_sync(smmu);
3074 
3075 	/* Invalidate any stale TLB entries */
3076 	if (smmu->features & ARM_SMMU_FEAT_HYP) {
3077 		cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
3078 		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3079 	}
3080 
3081 	cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
3082 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3083 	arm_smmu_cmdq_issue_sync(smmu);
3084 
3085 	/* Event queue */
3086 	writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
3087 	writel_relaxed(smmu->evtq.q.llq.prod,
3088 		       arm_smmu_page1_fixup(ARM_SMMU_EVTQ_PROD, smmu));
3089 	writel_relaxed(smmu->evtq.q.llq.cons,
3090 		       arm_smmu_page1_fixup(ARM_SMMU_EVTQ_CONS, smmu));
3091 
3092 	enables |= CR0_EVTQEN;
3093 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3094 				      ARM_SMMU_CR0ACK);
3095 	if (ret) {
3096 		dev_err(smmu->dev, "failed to enable event queue\n");
3097 		return ret;
3098 	}
3099 
3100 	/* PRI queue */
3101 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
3102 		writeq_relaxed(smmu->priq.q.q_base,
3103 			       smmu->base + ARM_SMMU_PRIQ_BASE);
3104 		writel_relaxed(smmu->priq.q.llq.prod,
3105 			       arm_smmu_page1_fixup(ARM_SMMU_PRIQ_PROD, smmu));
3106 		writel_relaxed(smmu->priq.q.llq.cons,
3107 			       arm_smmu_page1_fixup(ARM_SMMU_PRIQ_CONS, smmu));
3108 
3109 		enables |= CR0_PRIQEN;
3110 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3111 					      ARM_SMMU_CR0ACK);
3112 		if (ret) {
3113 			dev_err(smmu->dev, "failed to enable PRI queue\n");
3114 			return ret;
3115 		}
3116 	}
3117 
3118 	if (smmu->features & ARM_SMMU_FEAT_ATS) {
3119 		enables |= CR0_ATSCHK;
3120 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3121 					      ARM_SMMU_CR0ACK);
3122 		if (ret) {
3123 			dev_err(smmu->dev, "failed to enable ATS check\n");
3124 			return ret;
3125 		}
3126 	}
3127 
3128 	ret = arm_smmu_setup_irqs(smmu);
3129 	if (ret) {
3130 		dev_err(smmu->dev, "failed to setup irqs\n");
3131 		return ret;
3132 	}
3133 
3134 	if (is_kdump_kernel())
3135 		enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
3136 
3137 	/* Enable the SMMU interface, or ensure bypass */
3138 	if (!bypass || disable_bypass) {
3139 		enables |= CR0_SMMUEN;
3140 	} else {
3141 		ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
3142 		if (ret)
3143 			return ret;
3144 	}
3145 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3146 				      ARM_SMMU_CR0ACK);
3147 	if (ret) {
3148 		dev_err(smmu->dev, "failed to enable SMMU interface\n");
3149 		return ret;
3150 	}
3151 
3152 	return 0;
3153 }
3154 
arm_smmu_device_hw_probe(struct arm_smmu_device * smmu)3155 static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
3156 {
3157 	u32 reg;
3158 	bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
3159 
3160 	/* IDR0 */
3161 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
3162 
3163 	/* 2-level structures */
3164 	if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
3165 		smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
3166 
3167 	if (reg & IDR0_CD2L)
3168 		smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
3169 
3170 	/*
3171 	 * Translation table endianness.
3172 	 * We currently require the same endianness as the CPU, but this
3173 	 * could be changed later by adding a new IO_PGTABLE_QUIRK.
3174 	 */
3175 	switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
3176 	case IDR0_TTENDIAN_MIXED:
3177 		smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
3178 		break;
3179 #ifdef __BIG_ENDIAN
3180 	case IDR0_TTENDIAN_BE:
3181 		smmu->features |= ARM_SMMU_FEAT_TT_BE;
3182 		break;
3183 #else
3184 	case IDR0_TTENDIAN_LE:
3185 		smmu->features |= ARM_SMMU_FEAT_TT_LE;
3186 		break;
3187 #endif
3188 	default:
3189 		dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
3190 		return -ENXIO;
3191 	}
3192 
3193 	/* Boolean feature flags */
3194 	if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
3195 		smmu->features |= ARM_SMMU_FEAT_PRI;
3196 
3197 	if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
3198 		smmu->features |= ARM_SMMU_FEAT_ATS;
3199 
3200 	if (reg & IDR0_SEV)
3201 		smmu->features |= ARM_SMMU_FEAT_SEV;
3202 
3203 	if (reg & IDR0_MSI) {
3204 		smmu->features |= ARM_SMMU_FEAT_MSI;
3205 		if (coherent && !disable_msipolling)
3206 			smmu->options |= ARM_SMMU_OPT_MSIPOLL;
3207 	}
3208 
3209 	if (reg & IDR0_HYP)
3210 		smmu->features |= ARM_SMMU_FEAT_HYP;
3211 
3212 	/*
3213 	 * The coherency feature as set by FW is used in preference to the ID
3214 	 * register, but warn on mismatch.
3215 	 */
3216 	if (!!(reg & IDR0_COHACC) != coherent)
3217 		dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
3218 			 coherent ? "true" : "false");
3219 
3220 	switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
3221 	case IDR0_STALL_MODEL_FORCE:
3222 		smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
3223 		fallthrough;
3224 	case IDR0_STALL_MODEL_STALL:
3225 		smmu->features |= ARM_SMMU_FEAT_STALLS;
3226 	}
3227 
3228 	if (reg & IDR0_S1P)
3229 		smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
3230 
3231 	if (reg & IDR0_S2P)
3232 		smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
3233 
3234 	if (!(reg & (IDR0_S1P | IDR0_S2P))) {
3235 		dev_err(smmu->dev, "no translation support!\n");
3236 		return -ENXIO;
3237 	}
3238 
3239 	/* We only support the AArch64 table format at present */
3240 	switch (FIELD_GET(IDR0_TTF, reg)) {
3241 	case IDR0_TTF_AARCH32_64:
3242 		smmu->ias = 40;
3243 		fallthrough;
3244 	case IDR0_TTF_AARCH64:
3245 		break;
3246 	default:
3247 		dev_err(smmu->dev, "AArch64 table format not supported!\n");
3248 		return -ENXIO;
3249 	}
3250 
3251 	/* ASID/VMID sizes */
3252 	smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
3253 	smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
3254 
3255 	/* IDR1 */
3256 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
3257 	if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
3258 		dev_err(smmu->dev, "embedded implementation not supported\n");
3259 		return -ENXIO;
3260 	}
3261 
3262 	/* Queue sizes, capped to ensure natural alignment */
3263 	smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
3264 					     FIELD_GET(IDR1_CMDQS, reg));
3265 	if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) {
3266 		/*
3267 		 * We don't support splitting up batches, so one batch of
3268 		 * commands plus an extra sync needs to fit inside the command
3269 		 * queue. There's also no way we can handle the weird alignment
3270 		 * restrictions on the base pointer for a unit-length queue.
3271 		 */
3272 		dev_err(smmu->dev, "command queue size <= %d entries not supported\n",
3273 			CMDQ_BATCH_ENTRIES);
3274 		return -ENXIO;
3275 	}
3276 
3277 	smmu->evtq.q.llq.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
3278 					     FIELD_GET(IDR1_EVTQS, reg));
3279 	smmu->priq.q.llq.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
3280 					     FIELD_GET(IDR1_PRIQS, reg));
3281 
3282 	/* SID/SSID sizes */
3283 	smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
3284 	smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
3285 
3286 	/*
3287 	 * If the SMMU supports fewer bits than would fill a single L2 stream
3288 	 * table, use a linear table instead.
3289 	 */
3290 	if (smmu->sid_bits <= STRTAB_SPLIT)
3291 		smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
3292 
3293 	/* IDR3 */
3294 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3);
3295 	if (FIELD_GET(IDR3_RIL, reg))
3296 		smmu->features |= ARM_SMMU_FEAT_RANGE_INV;
3297 
3298 	/* IDR5 */
3299 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
3300 
3301 	/* Maximum number of outstanding stalls */
3302 	smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
3303 
3304 	/* Page sizes */
3305 	if (reg & IDR5_GRAN64K)
3306 		smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
3307 	if (reg & IDR5_GRAN16K)
3308 		smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
3309 	if (reg & IDR5_GRAN4K)
3310 		smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
3311 
3312 	/* Input address size */
3313 	if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
3314 		smmu->features |= ARM_SMMU_FEAT_VAX;
3315 
3316 	/* Output address size */
3317 	switch (FIELD_GET(IDR5_OAS, reg)) {
3318 	case IDR5_OAS_32_BIT:
3319 		smmu->oas = 32;
3320 		break;
3321 	case IDR5_OAS_36_BIT:
3322 		smmu->oas = 36;
3323 		break;
3324 	case IDR5_OAS_40_BIT:
3325 		smmu->oas = 40;
3326 		break;
3327 	case IDR5_OAS_42_BIT:
3328 		smmu->oas = 42;
3329 		break;
3330 	case IDR5_OAS_44_BIT:
3331 		smmu->oas = 44;
3332 		break;
3333 	case IDR5_OAS_52_BIT:
3334 		smmu->oas = 52;
3335 		smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
3336 		break;
3337 	default:
3338 		dev_info(smmu->dev,
3339 			"unknown output address size. Truncating to 48-bit\n");
3340 		fallthrough;
3341 	case IDR5_OAS_48_BIT:
3342 		smmu->oas = 48;
3343 	}
3344 
3345 	if (arm_smmu_ops.pgsize_bitmap == -1UL)
3346 		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
3347 	else
3348 		arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
3349 
3350 	/* Set the DMA mask for our table walker */
3351 	if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
3352 		dev_warn(smmu->dev,
3353 			 "failed to set DMA mask for table walker\n");
3354 
3355 	smmu->ias = max(smmu->ias, smmu->oas);
3356 
3357 	if (arm_smmu_sva_supported(smmu))
3358 		smmu->features |= ARM_SMMU_FEAT_SVA;
3359 
3360 	dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
3361 		 smmu->ias, smmu->oas, smmu->features);
3362 	return 0;
3363 }
3364 
3365 #ifdef CONFIG_ACPI
acpi_smmu_get_options(u32 model,struct arm_smmu_device * smmu)3366 static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
3367 {
3368 	switch (model) {
3369 	case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
3370 		smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
3371 		break;
3372 	case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
3373 		smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
3374 		break;
3375 	}
3376 
3377 	dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
3378 }
3379 
arm_smmu_device_acpi_probe(struct platform_device * pdev,struct arm_smmu_device * smmu)3380 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3381 				      struct arm_smmu_device *smmu)
3382 {
3383 	struct acpi_iort_smmu_v3 *iort_smmu;
3384 	struct device *dev = smmu->dev;
3385 	struct acpi_iort_node *node;
3386 
3387 	node = *(struct acpi_iort_node **)dev_get_platdata(dev);
3388 
3389 	/* Retrieve SMMUv3 specific data */
3390 	iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
3391 
3392 	acpi_smmu_get_options(iort_smmu->model, smmu);
3393 
3394 	if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
3395 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3396 
3397 	return 0;
3398 }
3399 #else
arm_smmu_device_acpi_probe(struct platform_device * pdev,struct arm_smmu_device * smmu)3400 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3401 					     struct arm_smmu_device *smmu)
3402 {
3403 	return -ENODEV;
3404 }
3405 #endif
3406 
arm_smmu_device_dt_probe(struct platform_device * pdev,struct arm_smmu_device * smmu)3407 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
3408 				    struct arm_smmu_device *smmu)
3409 {
3410 	struct device *dev = &pdev->dev;
3411 	u32 cells;
3412 	int ret = -EINVAL;
3413 
3414 	if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
3415 		dev_err(dev, "missing #iommu-cells property\n");
3416 	else if (cells != 1)
3417 		dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
3418 	else
3419 		ret = 0;
3420 
3421 	parse_driver_options(smmu);
3422 
3423 	if (of_dma_is_coherent(dev->of_node))
3424 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3425 
3426 	return ret;
3427 }
3428 
arm_smmu_resource_size(struct arm_smmu_device * smmu)3429 static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
3430 {
3431 	if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
3432 		return SZ_64K;
3433 	else
3434 		return SZ_128K;
3435 }
3436 
arm_smmu_set_bus_ops(struct iommu_ops * ops)3437 static int arm_smmu_set_bus_ops(struct iommu_ops *ops)
3438 {
3439 	int err;
3440 
3441 #ifdef CONFIG_PCI
3442 	if (pci_bus_type.iommu_ops != ops) {
3443 		err = bus_set_iommu(&pci_bus_type, ops);
3444 		if (err)
3445 			return err;
3446 	}
3447 #endif
3448 #ifdef CONFIG_ARM_AMBA
3449 	if (amba_bustype.iommu_ops != ops) {
3450 		err = bus_set_iommu(&amba_bustype, ops);
3451 		if (err)
3452 			goto err_reset_pci_ops;
3453 	}
3454 #endif
3455 	if (platform_bus_type.iommu_ops != ops) {
3456 		err = bus_set_iommu(&platform_bus_type, ops);
3457 		if (err)
3458 			goto err_reset_amba_ops;
3459 	}
3460 
3461 	return 0;
3462 
3463 err_reset_amba_ops:
3464 #ifdef CONFIG_ARM_AMBA
3465 	bus_set_iommu(&amba_bustype, NULL);
3466 #endif
3467 err_reset_pci_ops: __maybe_unused;
3468 #ifdef CONFIG_PCI
3469 	bus_set_iommu(&pci_bus_type, NULL);
3470 #endif
3471 	return err;
3472 }
3473 
arm_smmu_ioremap(struct device * dev,resource_size_t start,resource_size_t size)3474 static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start,
3475 				      resource_size_t size)
3476 {
3477 	struct resource res = {
3478 		.flags = IORESOURCE_MEM,
3479 		.start = start,
3480 		.end = start + size - 1,
3481 	};
3482 
3483 	return devm_ioremap_resource(dev, &res);
3484 }
3485 
arm_smmu_device_probe(struct platform_device * pdev)3486 static int arm_smmu_device_probe(struct platform_device *pdev)
3487 {
3488 	int irq, ret;
3489 	struct resource *res;
3490 	resource_size_t ioaddr;
3491 	struct arm_smmu_device *smmu;
3492 	struct device *dev = &pdev->dev;
3493 	bool bypass;
3494 
3495 	smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
3496 	if (!smmu) {
3497 		dev_err(dev, "failed to allocate arm_smmu_device\n");
3498 		return -ENOMEM;
3499 	}
3500 	smmu->dev = dev;
3501 
3502 	if (dev->of_node) {
3503 		ret = arm_smmu_device_dt_probe(pdev, smmu);
3504 	} else {
3505 		ret = arm_smmu_device_acpi_probe(pdev, smmu);
3506 		if (ret == -ENODEV)
3507 			return ret;
3508 	}
3509 
3510 	/* Set bypass mode according to firmware probing result */
3511 	bypass = !!ret;
3512 
3513 	/* Base address */
3514 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
3515 	if (!res)
3516 		return -EINVAL;
3517 	if (resource_size(res) < arm_smmu_resource_size(smmu)) {
3518 		dev_err(dev, "MMIO region too small (%pr)\n", res);
3519 		return -EINVAL;
3520 	}
3521 	ioaddr = res->start;
3522 
3523 	/*
3524 	 * Don't map the IMPLEMENTATION DEFINED regions, since they may contain
3525 	 * the PMCG registers which are reserved by the PMU driver.
3526 	 */
3527 	smmu->base = arm_smmu_ioremap(dev, ioaddr, ARM_SMMU_REG_SZ);
3528 	if (IS_ERR(smmu->base))
3529 		return PTR_ERR(smmu->base);
3530 
3531 	if (arm_smmu_resource_size(smmu) > SZ_64K) {
3532 		smmu->page1 = arm_smmu_ioremap(dev, ioaddr + SZ_64K,
3533 					       ARM_SMMU_REG_SZ);
3534 		if (IS_ERR(smmu->page1))
3535 			return PTR_ERR(smmu->page1);
3536 	} else {
3537 		smmu->page1 = smmu->base;
3538 	}
3539 
3540 	/* Interrupt lines */
3541 
3542 	irq = platform_get_irq_byname_optional(pdev, "combined");
3543 	if (irq > 0)
3544 		smmu->combined_irq = irq;
3545 	else {
3546 		irq = platform_get_irq_byname_optional(pdev, "eventq");
3547 		if (irq > 0)
3548 			smmu->evtq.q.irq = irq;
3549 
3550 		irq = platform_get_irq_byname_optional(pdev, "priq");
3551 		if (irq > 0)
3552 			smmu->priq.q.irq = irq;
3553 
3554 		irq = platform_get_irq_byname_optional(pdev, "gerror");
3555 		if (irq > 0)
3556 			smmu->gerr_irq = irq;
3557 	}
3558 	/* Probe the h/w */
3559 	ret = arm_smmu_device_hw_probe(smmu);
3560 	if (ret)
3561 		return ret;
3562 
3563 	/* Initialise in-memory data structures */
3564 	ret = arm_smmu_init_structures(smmu);
3565 	if (ret)
3566 		return ret;
3567 
3568 	/* Record our private device structure */
3569 	platform_set_drvdata(pdev, smmu);
3570 
3571 	/* Reset the device */
3572 	ret = arm_smmu_device_reset(smmu, bypass);
3573 	if (ret)
3574 		return ret;
3575 
3576 	/* And we're up. Go go go! */
3577 	ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
3578 				     "smmu3.%pa", &ioaddr);
3579 	if (ret)
3580 		return ret;
3581 
3582 	iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops);
3583 	iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
3584 
3585 	ret = iommu_device_register(&smmu->iommu);
3586 	if (ret) {
3587 		dev_err(dev, "Failed to register iommu\n");
3588 		return ret;
3589 	}
3590 
3591 	return arm_smmu_set_bus_ops(&arm_smmu_ops);
3592 }
3593 
arm_smmu_device_remove(struct platform_device * pdev)3594 static int arm_smmu_device_remove(struct platform_device *pdev)
3595 {
3596 	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
3597 
3598 	arm_smmu_set_bus_ops(NULL);
3599 	iommu_device_unregister(&smmu->iommu);
3600 	iommu_device_sysfs_remove(&smmu->iommu);
3601 	arm_smmu_device_disable(smmu);
3602 
3603 	return 0;
3604 }
3605 
arm_smmu_device_shutdown(struct platform_device * pdev)3606 static void arm_smmu_device_shutdown(struct platform_device *pdev)
3607 {
3608 	arm_smmu_device_remove(pdev);
3609 }
3610 
3611 static const struct of_device_id arm_smmu_of_match[] = {
3612 	{ .compatible = "arm,smmu-v3", },
3613 	{ },
3614 };
3615 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
3616 
3617 static struct platform_driver arm_smmu_driver = {
3618 	.driver	= {
3619 		.name			= "arm-smmu-v3",
3620 		.of_match_table		= arm_smmu_of_match,
3621 		.suppress_bind_attrs	= true,
3622 	},
3623 	.probe	= arm_smmu_device_probe,
3624 	.remove	= arm_smmu_device_remove,
3625 	.shutdown = arm_smmu_device_shutdown,
3626 };
3627 module_platform_driver(arm_smmu_driver);
3628 
3629 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
3630 MODULE_AUTHOR("Will Deacon <will@kernel.org>");
3631 MODULE_ALIAS("platform:arm-smmu-v3");
3632 MODULE_LICENSE("GPL v2");
3633