• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * IOMMU API for ARM architected SMMUv3 implementations.
4  *
5  * Copyright (C) 2015 ARM Limited
6  *
7  * Author: Will Deacon <will.deacon@arm.com>
8  *
9  * This driver is powered by bad coffee and bombay mix.
10  */
11 
12 #include <linux/acpi.h>
13 #include <linux/acpi_iort.h>
14 #include <linux/bitops.h>
15 #include <linux/crash_dump.h>
16 #include <linux/delay.h>
17 #include <linux/dma-iommu.h>
18 #include <linux/err.h>
19 #include <linux/interrupt.h>
20 #include <linux/io-pgtable.h>
21 #include <linux/iopoll.h>
22 #include <linux/module.h>
23 #include <linux/msi.h>
24 #include <linux/of.h>
25 #include <linux/of_address.h>
26 #include <linux/of_platform.h>
27 #include <linux/pci.h>
28 #include <linux/pci-ats.h>
29 #include <linux/platform_device.h>
30 
31 #include <linux/amba/bus.h>
32 
33 #include "arm-smmu-v3.h"
34 #include "../../iommu-sva-lib.h"
35 
36 static bool disable_bypass = true;
37 module_param(disable_bypass, bool, 0444);
38 MODULE_PARM_DESC(disable_bypass,
39 	"Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
40 
41 static bool disable_msipolling;
42 module_param(disable_msipolling, bool, 0444);
43 MODULE_PARM_DESC(disable_msipolling,
44 	"Disable MSI-based polling for CMD_SYNC completion.");
45 
46 enum arm_smmu_msi_index {
47 	EVTQ_MSI_INDEX,
48 	GERROR_MSI_INDEX,
49 	PRIQ_MSI_INDEX,
50 	ARM_SMMU_MAX_MSIS,
51 };
52 
53 static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
54 	[EVTQ_MSI_INDEX] = {
55 		ARM_SMMU_EVTQ_IRQ_CFG0,
56 		ARM_SMMU_EVTQ_IRQ_CFG1,
57 		ARM_SMMU_EVTQ_IRQ_CFG2,
58 	},
59 	[GERROR_MSI_INDEX] = {
60 		ARM_SMMU_GERROR_IRQ_CFG0,
61 		ARM_SMMU_GERROR_IRQ_CFG1,
62 		ARM_SMMU_GERROR_IRQ_CFG2,
63 	},
64 	[PRIQ_MSI_INDEX] = {
65 		ARM_SMMU_PRIQ_IRQ_CFG0,
66 		ARM_SMMU_PRIQ_IRQ_CFG1,
67 		ARM_SMMU_PRIQ_IRQ_CFG2,
68 	},
69 };
70 
71 struct arm_smmu_option_prop {
72 	u32 opt;
73 	const char *prop;
74 };
75 
76 DEFINE_XARRAY_ALLOC1(arm_smmu_asid_xa);
77 DEFINE_MUTEX(arm_smmu_asid_lock);
78 
79 /*
80  * Special value used by SVA when a process dies, to quiesce a CD without
81  * disabling it.
82  */
83 struct arm_smmu_ctx_desc quiet_cd = { 0 };
84 
85 static struct arm_smmu_option_prop arm_smmu_options[] = {
86 	{ ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
87 	{ ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
88 	{ 0, NULL},
89 };
90 
parse_driver_options(struct arm_smmu_device * smmu)91 static void parse_driver_options(struct arm_smmu_device *smmu)
92 {
93 	int i = 0;
94 
95 	do {
96 		if (of_property_read_bool(smmu->dev->of_node,
97 						arm_smmu_options[i].prop)) {
98 			smmu->options |= arm_smmu_options[i].opt;
99 			dev_notice(smmu->dev, "option %s\n",
100 				arm_smmu_options[i].prop);
101 		}
102 	} while (arm_smmu_options[++i].opt);
103 }
104 
105 /* Low-level queue manipulation functions */
queue_has_space(struct arm_smmu_ll_queue * q,u32 n)106 static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n)
107 {
108 	u32 space, prod, cons;
109 
110 	prod = Q_IDX(q, q->prod);
111 	cons = Q_IDX(q, q->cons);
112 
113 	if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons))
114 		space = (1 << q->max_n_shift) - (prod - cons);
115 	else
116 		space = cons - prod;
117 
118 	return space >= n;
119 }
120 
queue_full(struct arm_smmu_ll_queue * q)121 static bool queue_full(struct arm_smmu_ll_queue *q)
122 {
123 	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
124 	       Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
125 }
126 
queue_empty(struct arm_smmu_ll_queue * q)127 static bool queue_empty(struct arm_smmu_ll_queue *q)
128 {
129 	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
130 	       Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
131 }
132 
queue_consumed(struct arm_smmu_ll_queue * q,u32 prod)133 static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod)
134 {
135 	return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) &&
136 		(Q_IDX(q, q->cons) > Q_IDX(q, prod))) ||
137 	       ((Q_WRP(q, q->cons) != Q_WRP(q, prod)) &&
138 		(Q_IDX(q, q->cons) <= Q_IDX(q, prod)));
139 }
140 
queue_sync_cons_out(struct arm_smmu_queue * q)141 static void queue_sync_cons_out(struct arm_smmu_queue *q)
142 {
143 	/*
144 	 * Ensure that all CPU accesses (reads and writes) to the queue
145 	 * are complete before we update the cons pointer.
146 	 */
147 	__iomb();
148 	writel_relaxed(q->llq.cons, q->cons_reg);
149 }
150 
queue_inc_cons(struct arm_smmu_ll_queue * q)151 static void queue_inc_cons(struct arm_smmu_ll_queue *q)
152 {
153 	u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
154 	q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
155 }
156 
queue_sync_cons_ovf(struct arm_smmu_queue * q)157 static void queue_sync_cons_ovf(struct arm_smmu_queue *q)
158 {
159 	struct arm_smmu_ll_queue *llq = &q->llq;
160 
161 	if (likely(Q_OVF(llq->prod) == Q_OVF(llq->cons)))
162 		return;
163 
164 	llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
165 		      Q_IDX(llq, llq->cons);
166 	queue_sync_cons_out(q);
167 }
168 
queue_sync_prod_in(struct arm_smmu_queue * q)169 static int queue_sync_prod_in(struct arm_smmu_queue *q)
170 {
171 	u32 prod;
172 	int ret = 0;
173 
174 	/*
175 	 * We can't use the _relaxed() variant here, as we must prevent
176 	 * speculative reads of the queue before we have determined that
177 	 * prod has indeed moved.
178 	 */
179 	prod = readl(q->prod_reg);
180 
181 	if (Q_OVF(prod) != Q_OVF(q->llq.prod))
182 		ret = -EOVERFLOW;
183 
184 	q->llq.prod = prod;
185 	return ret;
186 }
187 
queue_inc_prod_n(struct arm_smmu_ll_queue * q,int n)188 static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n)
189 {
190 	u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n;
191 	return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
192 }
193 
queue_poll_init(struct arm_smmu_device * smmu,struct arm_smmu_queue_poll * qp)194 static void queue_poll_init(struct arm_smmu_device *smmu,
195 			    struct arm_smmu_queue_poll *qp)
196 {
197 	qp->delay = 1;
198 	qp->spin_cnt = 0;
199 	qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
200 	qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
201 }
202 
queue_poll(struct arm_smmu_queue_poll * qp)203 static int queue_poll(struct arm_smmu_queue_poll *qp)
204 {
205 	if (ktime_compare(ktime_get(), qp->timeout) > 0)
206 		return -ETIMEDOUT;
207 
208 	if (qp->wfe) {
209 		wfe();
210 	} else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) {
211 		cpu_relax();
212 	} else {
213 		udelay(qp->delay);
214 		qp->delay *= 2;
215 		qp->spin_cnt = 0;
216 	}
217 
218 	return 0;
219 }
220 
queue_write(__le64 * dst,u64 * src,size_t n_dwords)221 static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
222 {
223 	int i;
224 
225 	for (i = 0; i < n_dwords; ++i)
226 		*dst++ = cpu_to_le64(*src++);
227 }
228 
queue_read(u64 * dst,__le64 * src,size_t n_dwords)229 static void queue_read(u64 *dst, __le64 *src, size_t n_dwords)
230 {
231 	int i;
232 
233 	for (i = 0; i < n_dwords; ++i)
234 		*dst++ = le64_to_cpu(*src++);
235 }
236 
queue_remove_raw(struct arm_smmu_queue * q,u64 * ent)237 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
238 {
239 	if (queue_empty(&q->llq))
240 		return -EAGAIN;
241 
242 	queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords);
243 	queue_inc_cons(&q->llq);
244 	queue_sync_cons_out(q);
245 	return 0;
246 }
247 
248 /* High-level queue accessors */
arm_smmu_cmdq_build_cmd(u64 * cmd,struct arm_smmu_cmdq_ent * ent)249 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
250 {
251 	memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
252 	cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
253 
254 	switch (ent->opcode) {
255 	case CMDQ_OP_TLBI_EL2_ALL:
256 	case CMDQ_OP_TLBI_NSNH_ALL:
257 		break;
258 	case CMDQ_OP_PREFETCH_CFG:
259 		cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
260 		break;
261 	case CMDQ_OP_CFGI_CD:
262 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
263 		fallthrough;
264 	case CMDQ_OP_CFGI_STE:
265 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
266 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
267 		break;
268 	case CMDQ_OP_CFGI_CD_ALL:
269 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
270 		break;
271 	case CMDQ_OP_CFGI_ALL:
272 		/* Cover the entire SID range */
273 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
274 		break;
275 	case CMDQ_OP_TLBI_NH_VA:
276 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
277 		fallthrough;
278 	case CMDQ_OP_TLBI_EL2_VA:
279 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
280 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
281 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
282 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
283 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
284 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
285 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
286 		break;
287 	case CMDQ_OP_TLBI_S2_IPA:
288 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
289 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
290 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
291 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
292 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
293 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
294 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
295 		break;
296 	case CMDQ_OP_TLBI_NH_ASID:
297 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
298 		fallthrough;
299 	case CMDQ_OP_TLBI_S12_VMALL:
300 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
301 		break;
302 	case CMDQ_OP_TLBI_EL2_ASID:
303 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
304 		break;
305 	case CMDQ_OP_ATC_INV:
306 		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
307 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global);
308 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid);
309 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid);
310 		cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size);
311 		cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK;
312 		break;
313 	case CMDQ_OP_PRI_RESP:
314 		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
315 		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
316 		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
317 		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
318 		switch (ent->pri.resp) {
319 		case PRI_RESP_DENY:
320 		case PRI_RESP_FAIL:
321 		case PRI_RESP_SUCC:
322 			break;
323 		default:
324 			return -EINVAL;
325 		}
326 		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
327 		break;
328 	case CMDQ_OP_RESUME:
329 		cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_SID, ent->resume.sid);
330 		cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_RESP, ent->resume.resp);
331 		cmd[1] |= FIELD_PREP(CMDQ_RESUME_1_STAG, ent->resume.stag);
332 		break;
333 	case CMDQ_OP_CMD_SYNC:
334 		if (ent->sync.msiaddr) {
335 			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
336 			cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
337 		} else {
338 			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
339 		}
340 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
341 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
342 		break;
343 	default:
344 		return -ENOENT;
345 	}
346 
347 	return 0;
348 }
349 
arm_smmu_get_cmdq(struct arm_smmu_device * smmu)350 static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu)
351 {
352 	return &smmu->cmdq;
353 }
354 
arm_smmu_cmdq_build_sync_cmd(u64 * cmd,struct arm_smmu_device * smmu,struct arm_smmu_queue * q,u32 prod)355 static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
356 					 struct arm_smmu_queue *q, u32 prod)
357 {
358 	struct arm_smmu_cmdq_ent ent = {
359 		.opcode = CMDQ_OP_CMD_SYNC,
360 	};
361 
362 	/*
363 	 * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
364 	 * payload, so the write will zero the entire command on that platform.
365 	 */
366 	if (smmu->options & ARM_SMMU_OPT_MSIPOLL) {
367 		ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
368 				   q->ent_dwords * 8;
369 	}
370 
371 	arm_smmu_cmdq_build_cmd(cmd, &ent);
372 }
373 
__arm_smmu_cmdq_skip_err(struct arm_smmu_device * smmu,struct arm_smmu_queue * q)374 static void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
375 				     struct arm_smmu_queue *q)
376 {
377 	static const char * const cerror_str[] = {
378 		[CMDQ_ERR_CERROR_NONE_IDX]	= "No error",
379 		[CMDQ_ERR_CERROR_ILL_IDX]	= "Illegal command",
380 		[CMDQ_ERR_CERROR_ABT_IDX]	= "Abort on command fetch",
381 		[CMDQ_ERR_CERROR_ATC_INV_IDX]	= "ATC invalidate timeout",
382 	};
383 
384 	int i;
385 	u64 cmd[CMDQ_ENT_DWORDS];
386 	u32 cons = readl_relaxed(q->cons_reg);
387 	u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
388 	struct arm_smmu_cmdq_ent cmd_sync = {
389 		.opcode = CMDQ_OP_CMD_SYNC,
390 	};
391 
392 	dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
393 		idx < ARRAY_SIZE(cerror_str) ?  cerror_str[idx] : "Unknown");
394 
395 	switch (idx) {
396 	case CMDQ_ERR_CERROR_ABT_IDX:
397 		dev_err(smmu->dev, "retrying command fetch\n");
398 		return;
399 	case CMDQ_ERR_CERROR_NONE_IDX:
400 		return;
401 	case CMDQ_ERR_CERROR_ATC_INV_IDX:
402 		/*
403 		 * ATC Invalidation Completion timeout. CONS is still pointing
404 		 * at the CMD_SYNC. Attempt to complete other pending commands
405 		 * by repeating the CMD_SYNC, though we might well end up back
406 		 * here since the ATC invalidation may still be pending.
407 		 */
408 		return;
409 	case CMDQ_ERR_CERROR_ILL_IDX:
410 	default:
411 		break;
412 	}
413 
414 	/*
415 	 * We may have concurrent producers, so we need to be careful
416 	 * not to touch any of the shadow cmdq state.
417 	 */
418 	queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
419 	dev_err(smmu->dev, "skipping command in error state:\n");
420 	for (i = 0; i < ARRAY_SIZE(cmd); ++i)
421 		dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
422 
423 	/* Convert the erroneous command into a CMD_SYNC */
424 	if (arm_smmu_cmdq_build_cmd(cmd, &cmd_sync)) {
425 		dev_err(smmu->dev, "failed to convert to CMD_SYNC\n");
426 		return;
427 	}
428 
429 	queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
430 }
431 
arm_smmu_cmdq_skip_err(struct arm_smmu_device * smmu)432 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
433 {
434 	__arm_smmu_cmdq_skip_err(smmu, &smmu->cmdq.q);
435 }
436 
437 /*
438  * Command queue locking.
439  * This is a form of bastardised rwlock with the following major changes:
440  *
441  * - The only LOCK routines are exclusive_trylock() and shared_lock().
442  *   Neither have barrier semantics, and instead provide only a control
443  *   dependency.
444  *
445  * - The UNLOCK routines are supplemented with shared_tryunlock(), which
446  *   fails if the caller appears to be the last lock holder (yes, this is
447  *   racy). All successful UNLOCK routines have RELEASE semantics.
448  */
arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq * cmdq)449 static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)
450 {
451 	int val;
452 
453 	/*
454 	 * We can try to avoid the cmpxchg() loop by simply incrementing the
455 	 * lock counter. When held in exclusive state, the lock counter is set
456 	 * to INT_MIN so these increments won't hurt as the value will remain
457 	 * negative.
458 	 */
459 	if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0)
460 		return;
461 
462 	do {
463 		val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);
464 	} while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);
465 }
466 
arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq * cmdq)467 static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq)
468 {
469 	(void)atomic_dec_return_release(&cmdq->lock);
470 }
471 
arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq * cmdq)472 static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
473 {
474 	if (atomic_read(&cmdq->lock) == 1)
475 		return false;
476 
477 	arm_smmu_cmdq_shared_unlock(cmdq);
478 	return true;
479 }
480 
481 #define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)		\
482 ({									\
483 	bool __ret;							\
484 	local_irq_save(flags);						\
485 	__ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN);	\
486 	if (!__ret)							\
487 		local_irq_restore(flags);				\
488 	__ret;								\
489 })
490 
491 #define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags)		\
492 ({									\
493 	atomic_set_release(&cmdq->lock, 0);				\
494 	local_irq_restore(flags);					\
495 })
496 
497 
498 /*
499  * Command queue insertion.
500  * This is made fiddly by our attempts to achieve some sort of scalability
501  * since there is one queue shared amongst all of the CPUs in the system.  If
502  * you like mixed-size concurrency, dependency ordering and relaxed atomics,
503  * then you'll *love* this monstrosity.
504  *
505  * The basic idea is to split the queue up into ranges of commands that are
506  * owned by a given CPU; the owner may not have written all of the commands
507  * itself, but is responsible for advancing the hardware prod pointer when
508  * the time comes. The algorithm is roughly:
509  *
510  * 	1. Allocate some space in the queue. At this point we also discover
511  *	   whether the head of the queue is currently owned by another CPU,
512  *	   or whether we are the owner.
513  *
514  *	2. Write our commands into our allocated slots in the queue.
515  *
516  *	3. Mark our slots as valid in arm_smmu_cmdq.valid_map.
517  *
518  *	4. If we are an owner:
519  *		a. Wait for the previous owner to finish.
520  *		b. Mark the queue head as unowned, which tells us the range
521  *		   that we are responsible for publishing.
522  *		c. Wait for all commands in our owned range to become valid.
523  *		d. Advance the hardware prod pointer.
524  *		e. Tell the next owner we've finished.
525  *
526  *	5. If we are inserting a CMD_SYNC (we may or may not have been an
527  *	   owner), then we need to stick around until it has completed:
528  *		a. If we have MSIs, the SMMU can write back into the CMD_SYNC
529  *		   to clear the first 4 bytes.
530  *		b. Otherwise, we spin waiting for the hardware cons pointer to
531  *		   advance past our command.
532  *
533  * The devil is in the details, particularly the use of locking for handling
534  * SYNC completion and freeing up space in the queue before we think that it is
535  * full.
536  */
__arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq * cmdq,u32 sprod,u32 eprod,bool set)537 static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq,
538 					       u32 sprod, u32 eprod, bool set)
539 {
540 	u32 swidx, sbidx, ewidx, ebidx;
541 	struct arm_smmu_ll_queue llq = {
542 		.max_n_shift	= cmdq->q.llq.max_n_shift,
543 		.prod		= sprod,
544 	};
545 
546 	ewidx = BIT_WORD(Q_IDX(&llq, eprod));
547 	ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG;
548 
549 	while (llq.prod != eprod) {
550 		unsigned long mask;
551 		atomic_long_t *ptr;
552 		u32 limit = BITS_PER_LONG;
553 
554 		swidx = BIT_WORD(Q_IDX(&llq, llq.prod));
555 		sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG;
556 
557 		ptr = &cmdq->valid_map[swidx];
558 
559 		if ((swidx == ewidx) && (sbidx < ebidx))
560 			limit = ebidx;
561 
562 		mask = GENMASK(limit - 1, sbidx);
563 
564 		/*
565 		 * The valid bit is the inverse of the wrap bit. This means
566 		 * that a zero-initialised queue is invalid and, after marking
567 		 * all entries as valid, they become invalid again when we
568 		 * wrap.
569 		 */
570 		if (set) {
571 			atomic_long_xor(mask, ptr);
572 		} else { /* Poll */
573 			unsigned long valid;
574 
575 			valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask;
576 			atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid);
577 		}
578 
579 		llq.prod = queue_inc_prod_n(&llq, limit - sbidx);
580 	}
581 }
582 
583 /* Mark all entries in the range [sprod, eprod) as valid */
arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq * cmdq,u32 sprod,u32 eprod)584 static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq,
585 					u32 sprod, u32 eprod)
586 {
587 	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true);
588 }
589 
590 /* Wait for all entries in the range [sprod, eprod) to become valid */
arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq * cmdq,u32 sprod,u32 eprod)591 static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
592 					 u32 sprod, u32 eprod)
593 {
594 	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false);
595 }
596 
597 /* Wait for the command queue to become non-full */
arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device * smmu,struct arm_smmu_ll_queue * llq)598 static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
599 					     struct arm_smmu_ll_queue *llq)
600 {
601 	unsigned long flags;
602 	struct arm_smmu_queue_poll qp;
603 	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
604 	int ret = 0;
605 
606 	/*
607 	 * Try to update our copy of cons by grabbing exclusive cmdq access. If
608 	 * that fails, spin until somebody else updates it for us.
609 	 */
610 	if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) {
611 		WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg));
612 		arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags);
613 		llq->val = READ_ONCE(cmdq->q.llq.val);
614 		return 0;
615 	}
616 
617 	queue_poll_init(smmu, &qp);
618 	do {
619 		llq->val = READ_ONCE(cmdq->q.llq.val);
620 		if (!queue_full(llq))
621 			break;
622 
623 		ret = queue_poll(&qp);
624 	} while (!ret);
625 
626 	return ret;
627 }
628 
629 /*
630  * Wait until the SMMU signals a CMD_SYNC completion MSI.
631  * Must be called with the cmdq lock held in some capacity.
632  */
__arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device * smmu,struct arm_smmu_ll_queue * llq)633 static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
634 					  struct arm_smmu_ll_queue *llq)
635 {
636 	int ret = 0;
637 	struct arm_smmu_queue_poll qp;
638 	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
639 	u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
640 
641 	queue_poll_init(smmu, &qp);
642 
643 	/*
644 	 * The MSI won't generate an event, since it's being written back
645 	 * into the command queue.
646 	 */
647 	qp.wfe = false;
648 	smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp)));
649 	llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1);
650 	return ret;
651 }
652 
653 /*
654  * Wait until the SMMU cons index passes llq->prod.
655  * Must be called with the cmdq lock held in some capacity.
656  */
__arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device * smmu,struct arm_smmu_ll_queue * llq)657 static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
658 					       struct arm_smmu_ll_queue *llq)
659 {
660 	struct arm_smmu_queue_poll qp;
661 	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
662 	u32 prod = llq->prod;
663 	int ret = 0;
664 
665 	queue_poll_init(smmu, &qp);
666 	llq->val = READ_ONCE(cmdq->q.llq.val);
667 	do {
668 		if (queue_consumed(llq, prod))
669 			break;
670 
671 		ret = queue_poll(&qp);
672 
673 		/*
674 		 * This needs to be a readl() so that our subsequent call
675 		 * to arm_smmu_cmdq_shared_tryunlock() can fail accurately.
676 		 *
677 		 * Specifically, we need to ensure that we observe all
678 		 * shared_lock()s by other CMD_SYNCs that share our owner,
679 		 * so that a failing call to tryunlock() means that we're
680 		 * the last one out and therefore we can safely advance
681 		 * cmdq->q.llq.cons. Roughly speaking:
682 		 *
683 		 * CPU 0		CPU1			CPU2 (us)
684 		 *
685 		 * if (sync)
686 		 * 	shared_lock();
687 		 *
688 		 * dma_wmb();
689 		 * set_valid_map();
690 		 *
691 		 * 			if (owner) {
692 		 *				poll_valid_map();
693 		 *				<control dependency>
694 		 *				writel(prod_reg);
695 		 *
696 		 *						readl(cons_reg);
697 		 *						tryunlock();
698 		 *
699 		 * Requires us to see CPU 0's shared_lock() acquisition.
700 		 */
701 		llq->cons = readl(cmdq->q.cons_reg);
702 	} while (!ret);
703 
704 	return ret;
705 }
706 
arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device * smmu,struct arm_smmu_ll_queue * llq)707 static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
708 					 struct arm_smmu_ll_queue *llq)
709 {
710 	if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
711 		return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
712 
713 	return __arm_smmu_cmdq_poll_until_consumed(smmu, llq);
714 }
715 
arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq * cmdq,u64 * cmds,u32 prod,int n)716 static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
717 					u32 prod, int n)
718 {
719 	int i;
720 	struct arm_smmu_ll_queue llq = {
721 		.max_n_shift	= cmdq->q.llq.max_n_shift,
722 		.prod		= prod,
723 	};
724 
725 	for (i = 0; i < n; ++i) {
726 		u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS];
727 
728 		prod = queue_inc_prod_n(&llq, i);
729 		queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS);
730 	}
731 }
732 
733 /*
734  * This is the actual insertion function, and provides the following
735  * ordering guarantees to callers:
736  *
737  * - There is a dma_wmb() before publishing any commands to the queue.
738  *   This can be relied upon to order prior writes to data structures
739  *   in memory (such as a CD or an STE) before the command.
740  *
741  * - On completion of a CMD_SYNC, there is a control dependency.
742  *   This can be relied upon to order subsequent writes to memory (e.g.
743  *   freeing an IOVA) after completion of the CMD_SYNC.
744  *
745  * - Command insertion is totally ordered, so if two CPUs each race to
746  *   insert their own list of commands then all of the commands from one
747  *   CPU will appear before any of the commands from the other CPU.
748  */
arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device * smmu,u64 * cmds,int n,bool sync)749 static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
750 				       u64 *cmds, int n, bool sync)
751 {
752 	u64 cmd_sync[CMDQ_ENT_DWORDS];
753 	u32 prod;
754 	unsigned long flags;
755 	bool owner;
756 	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
757 	struct arm_smmu_ll_queue llq, head;
758 	int ret = 0;
759 
760 	llq.max_n_shift = cmdq->q.llq.max_n_shift;
761 
762 	/* 1. Allocate some space in the queue */
763 	local_irq_save(flags);
764 	llq.val = READ_ONCE(cmdq->q.llq.val);
765 	do {
766 		u64 old;
767 
768 		while (!queue_has_space(&llq, n + sync)) {
769 			local_irq_restore(flags);
770 			if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq))
771 				dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
772 			local_irq_save(flags);
773 		}
774 
775 		head.cons = llq.cons;
776 		head.prod = queue_inc_prod_n(&llq, n + sync) |
777 					     CMDQ_PROD_OWNED_FLAG;
778 
779 		old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val);
780 		if (old == llq.val)
781 			break;
782 
783 		llq.val = old;
784 	} while (1);
785 	owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG);
786 	head.prod &= ~CMDQ_PROD_OWNED_FLAG;
787 	llq.prod &= ~CMDQ_PROD_OWNED_FLAG;
788 
789 	/*
790 	 * 2. Write our commands into the queue
791 	 * Dependency ordering from the cmpxchg() loop above.
792 	 */
793 	arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
794 	if (sync) {
795 		prod = queue_inc_prod_n(&llq, n);
796 		arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, &cmdq->q, prod);
797 		queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
798 
799 		/*
800 		 * In order to determine completion of our CMD_SYNC, we must
801 		 * ensure that the queue can't wrap twice without us noticing.
802 		 * We achieve that by taking the cmdq lock as shared before
803 		 * marking our slot as valid.
804 		 */
805 		arm_smmu_cmdq_shared_lock(cmdq);
806 	}
807 
808 	/* 3. Mark our slots as valid, ensuring commands are visible first */
809 	dma_wmb();
810 	arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
811 
812 	/* 4. If we are the owner, take control of the SMMU hardware */
813 	if (owner) {
814 		/* a. Wait for previous owner to finish */
815 		atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
816 
817 		/* b. Stop gathering work by clearing the owned flag */
818 		prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG,
819 						   &cmdq->q.llq.atomic.prod);
820 		prod &= ~CMDQ_PROD_OWNED_FLAG;
821 
822 		/*
823 		 * c. Wait for any gathered work to be written to the queue.
824 		 * Note that we read our own entries so that we have the control
825 		 * dependency required by (d).
826 		 */
827 		arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
828 
829 		/*
830 		 * d. Advance the hardware prod pointer
831 		 * Control dependency ordering from the entries becoming valid.
832 		 */
833 		writel_relaxed(prod, cmdq->q.prod_reg);
834 
835 		/*
836 		 * e. Tell the next owner we're done
837 		 * Make sure we've updated the hardware first, so that we don't
838 		 * race to update prod and potentially move it backwards.
839 		 */
840 		atomic_set_release(&cmdq->owner_prod, prod);
841 	}
842 
843 	/* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
844 	if (sync) {
845 		llq.prod = queue_inc_prod_n(&llq, n);
846 		ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq);
847 		if (ret) {
848 			dev_err_ratelimited(smmu->dev,
849 					    "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
850 					    llq.prod,
851 					    readl_relaxed(cmdq->q.prod_reg),
852 					    readl_relaxed(cmdq->q.cons_reg));
853 		}
854 
855 		/*
856 		 * Try to unlock the cmdq lock. This will fail if we're the last
857 		 * reader, in which case we can safely update cmdq->q.llq.cons
858 		 */
859 		if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
860 			WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
861 			arm_smmu_cmdq_shared_unlock(cmdq);
862 		}
863 	}
864 
865 	local_irq_restore(flags);
866 	return ret;
867 }
868 
__arm_smmu_cmdq_issue_cmd(struct arm_smmu_device * smmu,struct arm_smmu_cmdq_ent * ent,bool sync)869 static int __arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
870 				     struct arm_smmu_cmdq_ent *ent,
871 				     bool sync)
872 {
873 	u64 cmd[CMDQ_ENT_DWORDS];
874 
875 	if (arm_smmu_cmdq_build_cmd(cmd, ent)) {
876 		dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
877 			 ent->opcode);
878 		return -EINVAL;
879 	}
880 
881 	return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, sync);
882 }
883 
arm_smmu_cmdq_issue_cmd(struct arm_smmu_device * smmu,struct arm_smmu_cmdq_ent * ent)884 static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
885 				   struct arm_smmu_cmdq_ent *ent)
886 {
887 	return __arm_smmu_cmdq_issue_cmd(smmu, ent, false);
888 }
889 
arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device * smmu,struct arm_smmu_cmdq_ent * ent)890 static int arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device *smmu,
891 					     struct arm_smmu_cmdq_ent *ent)
892 {
893 	return __arm_smmu_cmdq_issue_cmd(smmu, ent, true);
894 }
895 
arm_smmu_cmdq_batch_add(struct arm_smmu_device * smmu,struct arm_smmu_cmdq_batch * cmds,struct arm_smmu_cmdq_ent * cmd)896 static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
897 				    struct arm_smmu_cmdq_batch *cmds,
898 				    struct arm_smmu_cmdq_ent *cmd)
899 {
900 	if (cmds->num == CMDQ_BATCH_ENTRIES - 1 &&
901 	    (smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC)) {
902 		arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
903 		cmds->num = 0;
904 	}
905 
906 	if (cmds->num == CMDQ_BATCH_ENTRIES) {
907 		arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false);
908 		cmds->num = 0;
909 	}
910 	arm_smmu_cmdq_build_cmd(&cmds->cmds[cmds->num * CMDQ_ENT_DWORDS], cmd);
911 	cmds->num++;
912 }
913 
arm_smmu_cmdq_batch_submit(struct arm_smmu_device * smmu,struct arm_smmu_cmdq_batch * cmds)914 static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
915 				      struct arm_smmu_cmdq_batch *cmds)
916 {
917 	return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
918 }
919 
arm_smmu_page_response(struct device * dev,struct iommu_fault_event * unused,struct iommu_page_response * resp)920 static int arm_smmu_page_response(struct device *dev,
921 				  struct iommu_fault_event *unused,
922 				  struct iommu_page_response *resp)
923 {
924 	struct arm_smmu_cmdq_ent cmd = {0};
925 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
926 	int sid = master->streams[0].id;
927 
928 	if (master->stall_enabled) {
929 		cmd.opcode		= CMDQ_OP_RESUME;
930 		cmd.resume.sid		= sid;
931 		cmd.resume.stag		= resp->grpid;
932 		switch (resp->code) {
933 		case IOMMU_PAGE_RESP_INVALID:
934 		case IOMMU_PAGE_RESP_FAILURE:
935 			cmd.resume.resp = CMDQ_RESUME_0_RESP_ABORT;
936 			break;
937 		case IOMMU_PAGE_RESP_SUCCESS:
938 			cmd.resume.resp = CMDQ_RESUME_0_RESP_RETRY;
939 			break;
940 		default:
941 			return -EINVAL;
942 		}
943 	} else {
944 		return -ENODEV;
945 	}
946 
947 	arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
948 	/*
949 	 * Don't send a SYNC, it doesn't do anything for RESUME or PRI_RESP.
950 	 * RESUME consumption guarantees that the stalled transaction will be
951 	 * terminated... at some point in the future. PRI_RESP is fire and
952 	 * forget.
953 	 */
954 
955 	return 0;
956 }
957 
958 /* Context descriptor manipulation functions */
arm_smmu_tlb_inv_asid(struct arm_smmu_device * smmu,u16 asid)959 void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
960 {
961 	struct arm_smmu_cmdq_ent cmd = {
962 		.opcode	= smmu->features & ARM_SMMU_FEAT_E2H ?
963 			CMDQ_OP_TLBI_EL2_ASID : CMDQ_OP_TLBI_NH_ASID,
964 		.tlbi.asid = asid,
965 	};
966 
967 	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
968 }
969 
arm_smmu_sync_cd(struct arm_smmu_domain * smmu_domain,int ssid,bool leaf)970 static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
971 			     int ssid, bool leaf)
972 {
973 	size_t i;
974 	unsigned long flags;
975 	struct arm_smmu_master *master;
976 	struct arm_smmu_cmdq_batch cmds;
977 	struct arm_smmu_device *smmu = smmu_domain->smmu;
978 	struct arm_smmu_cmdq_ent cmd = {
979 		.opcode	= CMDQ_OP_CFGI_CD,
980 		.cfgi	= {
981 			.ssid	= ssid,
982 			.leaf	= leaf,
983 		},
984 	};
985 
986 	cmds.num = 0;
987 
988 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
989 	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
990 		for (i = 0; i < master->num_streams; i++) {
991 			cmd.cfgi.sid = master->streams[i].id;
992 			arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
993 		}
994 	}
995 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
996 
997 	arm_smmu_cmdq_batch_submit(smmu, &cmds);
998 }
999 
arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device * smmu,struct arm_smmu_l1_ctx_desc * l1_desc)1000 static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
1001 					struct arm_smmu_l1_ctx_desc *l1_desc)
1002 {
1003 	size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1004 
1005 	l1_desc->l2ptr = dmam_alloc_coherent(smmu->dev, size,
1006 					     &l1_desc->l2ptr_dma, GFP_KERNEL);
1007 	if (!l1_desc->l2ptr) {
1008 		dev_warn(smmu->dev,
1009 			 "failed to allocate context descriptor table\n");
1010 		return -ENOMEM;
1011 	}
1012 	return 0;
1013 }
1014 
arm_smmu_write_cd_l1_desc(__le64 * dst,struct arm_smmu_l1_ctx_desc * l1_desc)1015 static void arm_smmu_write_cd_l1_desc(__le64 *dst,
1016 				      struct arm_smmu_l1_ctx_desc *l1_desc)
1017 {
1018 	u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) |
1019 		  CTXDESC_L1_DESC_V;
1020 
1021 	/* See comment in arm_smmu_write_ctx_desc() */
1022 	WRITE_ONCE(*dst, cpu_to_le64(val));
1023 }
1024 
arm_smmu_get_cd_ptr(struct arm_smmu_domain * smmu_domain,u32 ssid)1025 static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
1026 				   u32 ssid)
1027 {
1028 	__le64 *l1ptr;
1029 	unsigned int idx;
1030 	struct arm_smmu_l1_ctx_desc *l1_desc;
1031 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1032 	struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
1033 
1034 	if (smmu_domain->s1_cfg.s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
1035 		return cdcfg->cdtab + ssid * CTXDESC_CD_DWORDS;
1036 
1037 	idx = ssid >> CTXDESC_SPLIT;
1038 	l1_desc = &cdcfg->l1_desc[idx];
1039 	if (!l1_desc->l2ptr) {
1040 		if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
1041 			return NULL;
1042 
1043 		l1ptr = cdcfg->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
1044 		arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
1045 		/* An invalid L1CD can be cached */
1046 		arm_smmu_sync_cd(smmu_domain, ssid, false);
1047 	}
1048 	idx = ssid & (CTXDESC_L2_ENTRIES - 1);
1049 	return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS;
1050 }
1051 
arm_smmu_write_ctx_desc(struct arm_smmu_domain * smmu_domain,int ssid,struct arm_smmu_ctx_desc * cd)1052 int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
1053 			    struct arm_smmu_ctx_desc *cd)
1054 {
1055 	/*
1056 	 * This function handles the following cases:
1057 	 *
1058 	 * (1) Install primary CD, for normal DMA traffic (SSID = 0).
1059 	 * (2) Install a secondary CD, for SID+SSID traffic.
1060 	 * (3) Update ASID of a CD. Atomically write the first 64 bits of the
1061 	 *     CD, then invalidate the old entry and mappings.
1062 	 * (4) Quiesce the context without clearing the valid bit. Disable
1063 	 *     translation, and ignore any translation fault.
1064 	 * (5) Remove a secondary CD.
1065 	 */
1066 	u64 val;
1067 	bool cd_live;
1068 	__le64 *cdptr;
1069 
1070 	if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))
1071 		return -E2BIG;
1072 
1073 	cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
1074 	if (!cdptr)
1075 		return -ENOMEM;
1076 
1077 	val = le64_to_cpu(cdptr[0]);
1078 	cd_live = !!(val & CTXDESC_CD_0_V);
1079 
1080 	if (!cd) { /* (5) */
1081 		val = 0;
1082 	} else if (cd == &quiet_cd) { /* (4) */
1083 		val |= CTXDESC_CD_0_TCR_EPD0;
1084 	} else if (cd_live) { /* (3) */
1085 		val &= ~CTXDESC_CD_0_ASID;
1086 		val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid);
1087 		/*
1088 		 * Until CD+TLB invalidation, both ASIDs may be used for tagging
1089 		 * this substream's traffic
1090 		 */
1091 	} else { /* (1) and (2) */
1092 		cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK);
1093 		cdptr[2] = 0;
1094 		cdptr[3] = cpu_to_le64(cd->mair);
1095 
1096 		/*
1097 		 * STE is live, and the SMMU might read dwords of this CD in any
1098 		 * order. Ensure that it observes valid values before reading
1099 		 * V=1.
1100 		 */
1101 		arm_smmu_sync_cd(smmu_domain, ssid, true);
1102 
1103 		val = cd->tcr |
1104 #ifdef __BIG_ENDIAN
1105 			CTXDESC_CD_0_ENDI |
1106 #endif
1107 			CTXDESC_CD_0_R | CTXDESC_CD_0_A |
1108 			(cd->mm ? 0 : CTXDESC_CD_0_ASET) |
1109 			CTXDESC_CD_0_AA64 |
1110 			FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
1111 			CTXDESC_CD_0_V;
1112 
1113 		if (smmu_domain->stall_enabled)
1114 			val |= CTXDESC_CD_0_S;
1115 	}
1116 
1117 	/*
1118 	 * The SMMU accesses 64-bit values atomically. See IHI0070Ca 3.21.3
1119 	 * "Configuration structures and configuration invalidation completion"
1120 	 *
1121 	 *   The size of single-copy atomic reads made by the SMMU is
1122 	 *   IMPLEMENTATION DEFINED but must be at least 64 bits. Any single
1123 	 *   field within an aligned 64-bit span of a structure can be altered
1124 	 *   without first making the structure invalid.
1125 	 */
1126 	WRITE_ONCE(cdptr[0], cpu_to_le64(val));
1127 	arm_smmu_sync_cd(smmu_domain, ssid, true);
1128 	return 0;
1129 }
1130 
arm_smmu_alloc_cd_tables(struct arm_smmu_domain * smmu_domain)1131 static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
1132 {
1133 	int ret;
1134 	size_t l1size;
1135 	size_t max_contexts;
1136 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1137 	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1138 	struct arm_smmu_ctx_desc_cfg *cdcfg = &cfg->cdcfg;
1139 
1140 	max_contexts = 1 << cfg->s1cdmax;
1141 
1142 	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
1143 	    max_contexts <= CTXDESC_L2_ENTRIES) {
1144 		cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
1145 		cdcfg->num_l1_ents = max_contexts;
1146 
1147 		l1size = max_contexts * (CTXDESC_CD_DWORDS << 3);
1148 	} else {
1149 		cfg->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
1150 		cdcfg->num_l1_ents = DIV_ROUND_UP(max_contexts,
1151 						  CTXDESC_L2_ENTRIES);
1152 
1153 		cdcfg->l1_desc = devm_kcalloc(smmu->dev, cdcfg->num_l1_ents,
1154 					      sizeof(*cdcfg->l1_desc),
1155 					      GFP_KERNEL);
1156 		if (!cdcfg->l1_desc)
1157 			return -ENOMEM;
1158 
1159 		l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1160 	}
1161 
1162 	cdcfg->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cdcfg->cdtab_dma,
1163 					   GFP_KERNEL);
1164 	if (!cdcfg->cdtab) {
1165 		dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1166 		ret = -ENOMEM;
1167 		goto err_free_l1;
1168 	}
1169 
1170 	return 0;
1171 
1172 err_free_l1:
1173 	if (cdcfg->l1_desc) {
1174 		devm_kfree(smmu->dev, cdcfg->l1_desc);
1175 		cdcfg->l1_desc = NULL;
1176 	}
1177 	return ret;
1178 }
1179 
arm_smmu_free_cd_tables(struct arm_smmu_domain * smmu_domain)1180 static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain)
1181 {
1182 	int i;
1183 	size_t size, l1size;
1184 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1185 	struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
1186 
1187 	if (cdcfg->l1_desc) {
1188 		size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1189 
1190 		for (i = 0; i < cdcfg->num_l1_ents; i++) {
1191 			if (!cdcfg->l1_desc[i].l2ptr)
1192 				continue;
1193 
1194 			dmam_free_coherent(smmu->dev, size,
1195 					   cdcfg->l1_desc[i].l2ptr,
1196 					   cdcfg->l1_desc[i].l2ptr_dma);
1197 		}
1198 		devm_kfree(smmu->dev, cdcfg->l1_desc);
1199 		cdcfg->l1_desc = NULL;
1200 
1201 		l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1202 	} else {
1203 		l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
1204 	}
1205 
1206 	dmam_free_coherent(smmu->dev, l1size, cdcfg->cdtab, cdcfg->cdtab_dma);
1207 	cdcfg->cdtab_dma = 0;
1208 	cdcfg->cdtab = NULL;
1209 }
1210 
arm_smmu_free_asid(struct arm_smmu_ctx_desc * cd)1211 bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd)
1212 {
1213 	bool free;
1214 	struct arm_smmu_ctx_desc *old_cd;
1215 
1216 	if (!cd->asid)
1217 		return false;
1218 
1219 	free = refcount_dec_and_test(&cd->refs);
1220 	if (free) {
1221 		old_cd = xa_erase(&arm_smmu_asid_xa, cd->asid);
1222 		WARN_ON(old_cd != cd);
1223 	}
1224 	return free;
1225 }
1226 
1227 /* Stream table manipulation functions */
1228 static void
arm_smmu_write_strtab_l1_desc(__le64 * dst,struct arm_smmu_strtab_l1_desc * desc)1229 arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
1230 {
1231 	u64 val = 0;
1232 
1233 	val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
1234 	val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
1235 
1236 	/* See comment in arm_smmu_write_ctx_desc() */
1237 	WRITE_ONCE(*dst, cpu_to_le64(val));
1238 }
1239 
arm_smmu_sync_ste_for_sid(struct arm_smmu_device * smmu,u32 sid)1240 static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
1241 {
1242 	struct arm_smmu_cmdq_ent cmd = {
1243 		.opcode	= CMDQ_OP_CFGI_STE,
1244 		.cfgi	= {
1245 			.sid	= sid,
1246 			.leaf	= true,
1247 		},
1248 	};
1249 
1250 	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
1251 }
1252 
arm_smmu_write_strtab_ent(struct arm_smmu_master * master,u32 sid,__le64 * dst)1253 static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
1254 				      __le64 *dst)
1255 {
1256 	/*
1257 	 * This is hideously complicated, but we only really care about
1258 	 * three cases at the moment:
1259 	 *
1260 	 * 1. Invalid (all zero) -> bypass/fault (init)
1261 	 * 2. Bypass/fault -> translation/bypass (attach)
1262 	 * 3. Translation/bypass -> bypass/fault (detach)
1263 	 *
1264 	 * Given that we can't update the STE atomically and the SMMU
1265 	 * doesn't read the thing in a defined order, that leaves us
1266 	 * with the following maintenance requirements:
1267 	 *
1268 	 * 1. Update Config, return (init time STEs aren't live)
1269 	 * 2. Write everything apart from dword 0, sync, write dword 0, sync
1270 	 * 3. Update Config, sync
1271 	 */
1272 	u64 val = le64_to_cpu(dst[0]);
1273 	bool ste_live = false;
1274 	struct arm_smmu_device *smmu = NULL;
1275 	struct arm_smmu_s1_cfg *s1_cfg = NULL;
1276 	struct arm_smmu_s2_cfg *s2_cfg = NULL;
1277 	struct arm_smmu_domain *smmu_domain = NULL;
1278 	struct arm_smmu_cmdq_ent prefetch_cmd = {
1279 		.opcode		= CMDQ_OP_PREFETCH_CFG,
1280 		.prefetch	= {
1281 			.sid	= sid,
1282 		},
1283 	};
1284 
1285 	if (master) {
1286 		smmu_domain = master->domain;
1287 		smmu = master->smmu;
1288 	}
1289 
1290 	if (smmu_domain) {
1291 		switch (smmu_domain->stage) {
1292 		case ARM_SMMU_DOMAIN_S1:
1293 			s1_cfg = &smmu_domain->s1_cfg;
1294 			break;
1295 		case ARM_SMMU_DOMAIN_S2:
1296 		case ARM_SMMU_DOMAIN_NESTED:
1297 			s2_cfg = &smmu_domain->s2_cfg;
1298 			break;
1299 		default:
1300 			break;
1301 		}
1302 	}
1303 
1304 	if (val & STRTAB_STE_0_V) {
1305 		switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
1306 		case STRTAB_STE_0_CFG_BYPASS:
1307 			break;
1308 		case STRTAB_STE_0_CFG_S1_TRANS:
1309 		case STRTAB_STE_0_CFG_S2_TRANS:
1310 			ste_live = true;
1311 			break;
1312 		case STRTAB_STE_0_CFG_ABORT:
1313 			BUG_ON(!disable_bypass);
1314 			break;
1315 		default:
1316 			BUG(); /* STE corruption */
1317 		}
1318 	}
1319 
1320 	/* Nuke the existing STE_0 value, as we're going to rewrite it */
1321 	val = STRTAB_STE_0_V;
1322 
1323 	/* Bypass/fault */
1324 	if (!smmu_domain || !(s1_cfg || s2_cfg)) {
1325 		if (!smmu_domain && disable_bypass)
1326 			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1327 		else
1328 			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1329 
1330 		dst[0] = cpu_to_le64(val);
1331 		dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1332 						STRTAB_STE_1_SHCFG_INCOMING));
1333 		dst[2] = 0; /* Nuke the VMID */
1334 		/*
1335 		 * The SMMU can perform negative caching, so we must sync
1336 		 * the STE regardless of whether the old value was live.
1337 		 */
1338 		if (smmu)
1339 			arm_smmu_sync_ste_for_sid(smmu, sid);
1340 		return;
1341 	}
1342 
1343 	if (s1_cfg) {
1344 		u64 strw = smmu->features & ARM_SMMU_FEAT_E2H ?
1345 			STRTAB_STE_1_STRW_EL2 : STRTAB_STE_1_STRW_NSEL1;
1346 
1347 		BUG_ON(ste_live);
1348 		dst[1] = cpu_to_le64(
1349 			 FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
1350 			 FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1351 			 FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1352 			 FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
1353 			 FIELD_PREP(STRTAB_STE_1_STRW, strw));
1354 
1355 		if (smmu->features & ARM_SMMU_FEAT_STALLS &&
1356 		    !master->stall_enabled)
1357 			dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
1358 
1359 		val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
1360 			FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
1361 			FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) |
1362 			FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt);
1363 	}
1364 
1365 	if (s2_cfg) {
1366 		BUG_ON(ste_live);
1367 		dst[2] = cpu_to_le64(
1368 			 FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
1369 			 FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) |
1370 #ifdef __BIG_ENDIAN
1371 			 STRTAB_STE_2_S2ENDI |
1372 #endif
1373 			 STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1374 			 STRTAB_STE_2_S2R);
1375 
1376 		dst[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
1377 
1378 		val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
1379 	}
1380 
1381 	if (master->ats_enabled)
1382 		dst[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS,
1383 						 STRTAB_STE_1_EATS_TRANS));
1384 
1385 	arm_smmu_sync_ste_for_sid(smmu, sid);
1386 	/* See comment in arm_smmu_write_ctx_desc() */
1387 	WRITE_ONCE(dst[0], cpu_to_le64(val));
1388 	arm_smmu_sync_ste_for_sid(smmu, sid);
1389 
1390 	/* It's likely that we'll want to use the new STE soon */
1391 	if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
1392 		arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1393 }
1394 
arm_smmu_init_bypass_stes(__le64 * strtab,unsigned int nent)1395 static void arm_smmu_init_bypass_stes(__le64 *strtab, unsigned int nent)
1396 {
1397 	unsigned int i;
1398 
1399 	for (i = 0; i < nent; ++i) {
1400 		arm_smmu_write_strtab_ent(NULL, -1, strtab);
1401 		strtab += STRTAB_STE_DWORDS;
1402 	}
1403 }
1404 
arm_smmu_init_l2_strtab(struct arm_smmu_device * smmu,u32 sid)1405 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1406 {
1407 	size_t size;
1408 	void *strtab;
1409 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1410 	struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1411 
1412 	if (desc->l2ptr)
1413 		return 0;
1414 
1415 	size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1416 	strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1417 
1418 	desc->span = STRTAB_SPLIT + 1;
1419 	desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1420 					  GFP_KERNEL);
1421 	if (!desc->l2ptr) {
1422 		dev_err(smmu->dev,
1423 			"failed to allocate l2 stream table for SID %u\n",
1424 			sid);
1425 		return -ENOMEM;
1426 	}
1427 
1428 	arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT);
1429 	arm_smmu_write_strtab_l1_desc(strtab, desc);
1430 	return 0;
1431 }
1432 
1433 static struct arm_smmu_master *
arm_smmu_find_master(struct arm_smmu_device * smmu,u32 sid)1434 arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid)
1435 {
1436 	struct rb_node *node;
1437 	struct arm_smmu_stream *stream;
1438 
1439 	lockdep_assert_held(&smmu->streams_mutex);
1440 
1441 	node = smmu->streams.rb_node;
1442 	while (node) {
1443 		stream = rb_entry(node, struct arm_smmu_stream, node);
1444 		if (stream->id < sid)
1445 			node = node->rb_right;
1446 		else if (stream->id > sid)
1447 			node = node->rb_left;
1448 		else
1449 			return stream->master;
1450 	}
1451 
1452 	return NULL;
1453 }
1454 
1455 /* IRQ and event handlers */
arm_smmu_handle_evt(struct arm_smmu_device * smmu,u64 * evt)1456 static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt)
1457 {
1458 	int ret;
1459 	u32 reason;
1460 	u32 perm = 0;
1461 	struct arm_smmu_master *master;
1462 	bool ssid_valid = evt[0] & EVTQ_0_SSV;
1463 	u32 sid = FIELD_GET(EVTQ_0_SID, evt[0]);
1464 	struct iommu_fault_event fault_evt = { };
1465 	struct iommu_fault *flt = &fault_evt.fault;
1466 
1467 	switch (FIELD_GET(EVTQ_0_ID, evt[0])) {
1468 	case EVT_ID_TRANSLATION_FAULT:
1469 		reason = IOMMU_FAULT_REASON_PTE_FETCH;
1470 		break;
1471 	case EVT_ID_ADDR_SIZE_FAULT:
1472 		reason = IOMMU_FAULT_REASON_OOR_ADDRESS;
1473 		break;
1474 	case EVT_ID_ACCESS_FAULT:
1475 		reason = IOMMU_FAULT_REASON_ACCESS;
1476 		break;
1477 	case EVT_ID_PERMISSION_FAULT:
1478 		reason = IOMMU_FAULT_REASON_PERMISSION;
1479 		break;
1480 	default:
1481 		return -EOPNOTSUPP;
1482 	}
1483 
1484 	/* Stage-2 is always pinned at the moment */
1485 	if (evt[1] & EVTQ_1_S2)
1486 		return -EFAULT;
1487 
1488 	if (evt[1] & EVTQ_1_RnW)
1489 		perm |= IOMMU_FAULT_PERM_READ;
1490 	else
1491 		perm |= IOMMU_FAULT_PERM_WRITE;
1492 
1493 	if (evt[1] & EVTQ_1_InD)
1494 		perm |= IOMMU_FAULT_PERM_EXEC;
1495 
1496 	if (evt[1] & EVTQ_1_PnU)
1497 		perm |= IOMMU_FAULT_PERM_PRIV;
1498 
1499 	if (evt[1] & EVTQ_1_STALL) {
1500 		flt->type = IOMMU_FAULT_PAGE_REQ;
1501 		flt->prm = (struct iommu_fault_page_request) {
1502 			.flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE,
1503 			.grpid = FIELD_GET(EVTQ_1_STAG, evt[1]),
1504 			.perm = perm,
1505 			.addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
1506 		};
1507 
1508 		if (ssid_valid) {
1509 			flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
1510 			flt->prm.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
1511 		}
1512 	} else {
1513 		flt->type = IOMMU_FAULT_DMA_UNRECOV;
1514 		flt->event = (struct iommu_fault_unrecoverable) {
1515 			.reason = reason,
1516 			.flags = IOMMU_FAULT_UNRECOV_ADDR_VALID,
1517 			.perm = perm,
1518 			.addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
1519 		};
1520 
1521 		if (ssid_valid) {
1522 			flt->event.flags |= IOMMU_FAULT_UNRECOV_PASID_VALID;
1523 			flt->event.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
1524 		}
1525 	}
1526 
1527 	mutex_lock(&smmu->streams_mutex);
1528 	master = arm_smmu_find_master(smmu, sid);
1529 	if (!master) {
1530 		ret = -EINVAL;
1531 		goto out_unlock;
1532 	}
1533 
1534 	ret = iommu_report_device_fault(master->dev, &fault_evt);
1535 	if (ret && flt->type == IOMMU_FAULT_PAGE_REQ) {
1536 		/* Nobody cared, abort the access */
1537 		struct iommu_page_response resp = {
1538 			.pasid		= flt->prm.pasid,
1539 			.grpid		= flt->prm.grpid,
1540 			.code		= IOMMU_PAGE_RESP_FAILURE,
1541 		};
1542 		arm_smmu_page_response(master->dev, &fault_evt, &resp);
1543 	}
1544 
1545 out_unlock:
1546 	mutex_unlock(&smmu->streams_mutex);
1547 	return ret;
1548 }
1549 
arm_smmu_evtq_thread(int irq,void * dev)1550 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1551 {
1552 	int i, ret;
1553 	struct arm_smmu_device *smmu = dev;
1554 	struct arm_smmu_queue *q = &smmu->evtq.q;
1555 	struct arm_smmu_ll_queue *llq = &q->llq;
1556 	static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
1557 				      DEFAULT_RATELIMIT_BURST);
1558 	u64 evt[EVTQ_ENT_DWORDS];
1559 
1560 	do {
1561 		while (!queue_remove_raw(q, evt)) {
1562 			u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
1563 
1564 			ret = arm_smmu_handle_evt(smmu, evt);
1565 			if (!ret || !__ratelimit(&rs))
1566 				continue;
1567 
1568 			dev_info(smmu->dev, "event 0x%02x received:\n", id);
1569 			for (i = 0; i < ARRAY_SIZE(evt); ++i)
1570 				dev_info(smmu->dev, "\t0x%016llx\n",
1571 					 (unsigned long long)evt[i]);
1572 
1573 			cond_resched();
1574 		}
1575 
1576 		/*
1577 		 * Not much we can do on overflow, so scream and pretend we're
1578 		 * trying harder.
1579 		 */
1580 		if (queue_sync_prod_in(q) == -EOVERFLOW)
1581 			dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1582 	} while (!queue_empty(llq));
1583 
1584 	/* Sync our overflow flag, as we believe we're up to speed */
1585 	queue_sync_cons_ovf(q);
1586 	return IRQ_HANDLED;
1587 }
1588 
arm_smmu_handle_ppr(struct arm_smmu_device * smmu,u64 * evt)1589 static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
1590 {
1591 	u32 sid, ssid;
1592 	u16 grpid;
1593 	bool ssv, last;
1594 
1595 	sid = FIELD_GET(PRIQ_0_SID, evt[0]);
1596 	ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
1597 	ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : 0;
1598 	last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
1599 	grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
1600 
1601 	dev_info(smmu->dev, "unexpected PRI request received:\n");
1602 	dev_info(smmu->dev,
1603 		 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1604 		 sid, ssid, grpid, last ? "L" : "",
1605 		 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1606 		 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1607 		 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1608 		 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1609 		 evt[1] & PRIQ_1_ADDR_MASK);
1610 
1611 	if (last) {
1612 		struct arm_smmu_cmdq_ent cmd = {
1613 			.opcode			= CMDQ_OP_PRI_RESP,
1614 			.substream_valid	= ssv,
1615 			.pri			= {
1616 				.sid	= sid,
1617 				.ssid	= ssid,
1618 				.grpid	= grpid,
1619 				.resp	= PRI_RESP_DENY,
1620 			},
1621 		};
1622 
1623 		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1624 	}
1625 }
1626 
arm_smmu_priq_thread(int irq,void * dev)1627 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1628 {
1629 	struct arm_smmu_device *smmu = dev;
1630 	struct arm_smmu_queue *q = &smmu->priq.q;
1631 	struct arm_smmu_ll_queue *llq = &q->llq;
1632 	u64 evt[PRIQ_ENT_DWORDS];
1633 
1634 	do {
1635 		while (!queue_remove_raw(q, evt))
1636 			arm_smmu_handle_ppr(smmu, evt);
1637 
1638 		if (queue_sync_prod_in(q) == -EOVERFLOW)
1639 			dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1640 	} while (!queue_empty(llq));
1641 
1642 	/* Sync our overflow flag, as we believe we're up to speed */
1643 	queue_sync_cons_ovf(q);
1644 	return IRQ_HANDLED;
1645 }
1646 
1647 static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1648 
arm_smmu_gerror_handler(int irq,void * dev)1649 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1650 {
1651 	u32 gerror, gerrorn, active;
1652 	struct arm_smmu_device *smmu = dev;
1653 
1654 	gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1655 	gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1656 
1657 	active = gerror ^ gerrorn;
1658 	if (!(active & GERROR_ERR_MASK))
1659 		return IRQ_NONE; /* No errors pending */
1660 
1661 	dev_warn(smmu->dev,
1662 		 "unexpected global error reported (0x%08x), this could be serious\n",
1663 		 active);
1664 
1665 	if (active & GERROR_SFM_ERR) {
1666 		dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1667 		arm_smmu_device_disable(smmu);
1668 	}
1669 
1670 	if (active & GERROR_MSI_GERROR_ABT_ERR)
1671 		dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1672 
1673 	if (active & GERROR_MSI_PRIQ_ABT_ERR)
1674 		dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1675 
1676 	if (active & GERROR_MSI_EVTQ_ABT_ERR)
1677 		dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1678 
1679 	if (active & GERROR_MSI_CMDQ_ABT_ERR)
1680 		dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1681 
1682 	if (active & GERROR_PRIQ_ABT_ERR)
1683 		dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1684 
1685 	if (active & GERROR_EVTQ_ABT_ERR)
1686 		dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1687 
1688 	if (active & GERROR_CMDQ_ERR)
1689 		arm_smmu_cmdq_skip_err(smmu);
1690 
1691 	writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1692 	return IRQ_HANDLED;
1693 }
1694 
arm_smmu_combined_irq_thread(int irq,void * dev)1695 static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
1696 {
1697 	struct arm_smmu_device *smmu = dev;
1698 
1699 	arm_smmu_evtq_thread(irq, dev);
1700 	if (smmu->features & ARM_SMMU_FEAT_PRI)
1701 		arm_smmu_priq_thread(irq, dev);
1702 
1703 	return IRQ_HANDLED;
1704 }
1705 
arm_smmu_combined_irq_handler(int irq,void * dev)1706 static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
1707 {
1708 	arm_smmu_gerror_handler(irq, dev);
1709 	return IRQ_WAKE_THREAD;
1710 }
1711 
1712 static void
arm_smmu_atc_inv_to_cmd(int ssid,unsigned long iova,size_t size,struct arm_smmu_cmdq_ent * cmd)1713 arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
1714 			struct arm_smmu_cmdq_ent *cmd)
1715 {
1716 	size_t log2_span;
1717 	size_t span_mask;
1718 	/* ATC invalidates are always on 4096-bytes pages */
1719 	size_t inval_grain_shift = 12;
1720 	unsigned long page_start, page_end;
1721 
1722 	/*
1723 	 * ATS and PASID:
1724 	 *
1725 	 * If substream_valid is clear, the PCIe TLP is sent without a PASID
1726 	 * prefix. In that case all ATC entries within the address range are
1727 	 * invalidated, including those that were requested with a PASID! There
1728 	 * is no way to invalidate only entries without PASID.
1729 	 *
1730 	 * When using STRTAB_STE_1_S1DSS_SSID0 (reserving CD 0 for non-PASID
1731 	 * traffic), translation requests without PASID create ATC entries
1732 	 * without PASID, which must be invalidated with substream_valid clear.
1733 	 * This has the unpleasant side-effect of invalidating all PASID-tagged
1734 	 * ATC entries within the address range.
1735 	 */
1736 	*cmd = (struct arm_smmu_cmdq_ent) {
1737 		.opcode			= CMDQ_OP_ATC_INV,
1738 		.substream_valid	= !!ssid,
1739 		.atc.ssid		= ssid,
1740 	};
1741 
1742 	if (!size) {
1743 		cmd->atc.size = ATC_INV_SIZE_ALL;
1744 		return;
1745 	}
1746 
1747 	page_start	= iova >> inval_grain_shift;
1748 	page_end	= (iova + size - 1) >> inval_grain_shift;
1749 
1750 	/*
1751 	 * In an ATS Invalidate Request, the address must be aligned on the
1752 	 * range size, which must be a power of two number of page sizes. We
1753 	 * thus have to choose between grossly over-invalidating the region, or
1754 	 * splitting the invalidation into multiple commands. For simplicity
1755 	 * we'll go with the first solution, but should refine it in the future
1756 	 * if multiple commands are shown to be more efficient.
1757 	 *
1758 	 * Find the smallest power of two that covers the range. The most
1759 	 * significant differing bit between the start and end addresses,
1760 	 * fls(start ^ end), indicates the required span. For example:
1761 	 *
1762 	 * We want to invalidate pages [8; 11]. This is already the ideal range:
1763 	 *		x = 0b1000 ^ 0b1011 = 0b11
1764 	 *		span = 1 << fls(x) = 4
1765 	 *
1766 	 * To invalidate pages [7; 10], we need to invalidate [0; 15]:
1767 	 *		x = 0b0111 ^ 0b1010 = 0b1101
1768 	 *		span = 1 << fls(x) = 16
1769 	 */
1770 	log2_span	= fls_long(page_start ^ page_end);
1771 	span_mask	= (1ULL << log2_span) - 1;
1772 
1773 	page_start	&= ~span_mask;
1774 
1775 	cmd->atc.addr	= page_start << inval_grain_shift;
1776 	cmd->atc.size	= log2_span;
1777 }
1778 
arm_smmu_atc_inv_master(struct arm_smmu_master * master)1779 static int arm_smmu_atc_inv_master(struct arm_smmu_master *master)
1780 {
1781 	int i;
1782 	struct arm_smmu_cmdq_ent cmd;
1783 	struct arm_smmu_cmdq_batch cmds = {};
1784 
1785 	arm_smmu_atc_inv_to_cmd(0, 0, 0, &cmd);
1786 
1787 	for (i = 0; i < master->num_streams; i++) {
1788 		cmd.atc.sid = master->streams[i].id;
1789 		arm_smmu_cmdq_batch_add(master->smmu, &cmds, &cmd);
1790 	}
1791 
1792 	return arm_smmu_cmdq_batch_submit(master->smmu, &cmds);
1793 }
1794 
arm_smmu_atc_inv_domain(struct arm_smmu_domain * smmu_domain,int ssid,unsigned long iova,size_t size)1795 int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid,
1796 			    unsigned long iova, size_t size)
1797 {
1798 	int i;
1799 	unsigned long flags;
1800 	struct arm_smmu_cmdq_ent cmd;
1801 	struct arm_smmu_master *master;
1802 	struct arm_smmu_cmdq_batch cmds;
1803 
1804 	if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
1805 		return 0;
1806 
1807 	/*
1808 	 * Ensure that we've completed prior invalidation of the main TLBs
1809 	 * before we read 'nr_ats_masters' in case of a concurrent call to
1810 	 * arm_smmu_enable_ats():
1811 	 *
1812 	 *	// unmap()			// arm_smmu_enable_ats()
1813 	 *	TLBI+SYNC			atomic_inc(&nr_ats_masters);
1814 	 *	smp_mb();			[...]
1815 	 *	atomic_read(&nr_ats_masters);	pci_enable_ats() // writel()
1816 	 *
1817 	 * Ensures that we always see the incremented 'nr_ats_masters' count if
1818 	 * ATS was enabled at the PCI device before completion of the TLBI.
1819 	 */
1820 	smp_mb();
1821 	if (!atomic_read(&smmu_domain->nr_ats_masters))
1822 		return 0;
1823 
1824 	arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);
1825 
1826 	cmds.num = 0;
1827 
1828 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
1829 	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
1830 		if (!master->ats_enabled)
1831 			continue;
1832 
1833 		for (i = 0; i < master->num_streams; i++) {
1834 			cmd.atc.sid = master->streams[i].id;
1835 			arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);
1836 		}
1837 	}
1838 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
1839 
1840 	return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);
1841 }
1842 
1843 /* IO_PGTABLE API */
arm_smmu_tlb_inv_context(void * cookie)1844 static void arm_smmu_tlb_inv_context(void *cookie)
1845 {
1846 	struct arm_smmu_domain *smmu_domain = cookie;
1847 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1848 	struct arm_smmu_cmdq_ent cmd;
1849 
1850 	/*
1851 	 * NOTE: when io-pgtable is in non-strict mode, we may get here with
1852 	 * PTEs previously cleared by unmaps on the current CPU not yet visible
1853 	 * to the SMMU. We are relying on the dma_wmb() implicit during cmd
1854 	 * insertion to guarantee those are observed before the TLBI. Do be
1855 	 * careful, 007.
1856 	 */
1857 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1858 		arm_smmu_tlb_inv_asid(smmu, smmu_domain->s1_cfg.cd.asid);
1859 	} else {
1860 		cmd.opcode	= CMDQ_OP_TLBI_S12_VMALL;
1861 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
1862 		arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
1863 	}
1864 	arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
1865 }
1866 
__arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent * cmd,unsigned long iova,size_t size,size_t granule,struct arm_smmu_domain * smmu_domain)1867 static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
1868 				     unsigned long iova, size_t size,
1869 				     size_t granule,
1870 				     struct arm_smmu_domain *smmu_domain)
1871 {
1872 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1873 	unsigned long end = iova + size, num_pages = 0, tg = 0;
1874 	size_t inv_range = granule;
1875 	struct arm_smmu_cmdq_batch cmds;
1876 
1877 	if (!size)
1878 		return;
1879 
1880 	if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1881 		/* Get the leaf page size */
1882 		tg = __ffs(smmu_domain->domain.pgsize_bitmap);
1883 
1884 		num_pages = size >> tg;
1885 
1886 		/* Convert page size of 12,14,16 (log2) to 1,2,3 */
1887 		cmd->tlbi.tg = (tg - 10) / 2;
1888 
1889 		/*
1890 		 * Determine what level the granule is at. For non-leaf, both
1891 		 * io-pgtable and SVA pass a nominal last-level granule because
1892 		 * they don't know what level(s) actually apply, so ignore that
1893 		 * and leave TTL=0. However for various errata reasons we still
1894 		 * want to use a range command, so avoid the SVA corner case
1895 		 * where both scale and num could be 0 as well.
1896 		 */
1897 		if (cmd->tlbi.leaf)
1898 			cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
1899 		else if ((num_pages & CMDQ_TLBI_RANGE_NUM_MAX) == 1)
1900 			num_pages++;
1901 	}
1902 
1903 	cmds.num = 0;
1904 
1905 	while (iova < end) {
1906 		if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1907 			/*
1908 			 * On each iteration of the loop, the range is 5 bits
1909 			 * worth of the aligned size remaining.
1910 			 * The range in pages is:
1911 			 *
1912 			 * range = (num_pages & (0x1f << __ffs(num_pages)))
1913 			 */
1914 			unsigned long scale, num;
1915 
1916 			/* Determine the power of 2 multiple number of pages */
1917 			scale = __ffs(num_pages);
1918 			cmd->tlbi.scale = scale;
1919 
1920 			/* Determine how many chunks of 2^scale size we have */
1921 			num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
1922 			cmd->tlbi.num = num - 1;
1923 
1924 			/* range is num * 2^scale * pgsize */
1925 			inv_range = num << (scale + tg);
1926 
1927 			/* Clear out the lower order bits for the next iteration */
1928 			num_pages -= num << scale;
1929 		}
1930 
1931 		cmd->tlbi.addr = iova;
1932 		arm_smmu_cmdq_batch_add(smmu, &cmds, cmd);
1933 		iova += inv_range;
1934 	}
1935 	arm_smmu_cmdq_batch_submit(smmu, &cmds);
1936 }
1937 
arm_smmu_tlb_inv_range_domain(unsigned long iova,size_t size,size_t granule,bool leaf,struct arm_smmu_domain * smmu_domain)1938 static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size,
1939 					  size_t granule, bool leaf,
1940 					  struct arm_smmu_domain *smmu_domain)
1941 {
1942 	struct arm_smmu_cmdq_ent cmd = {
1943 		.tlbi = {
1944 			.leaf	= leaf,
1945 		},
1946 	};
1947 
1948 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1949 		cmd.opcode	= smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
1950 				  CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA;
1951 		cmd.tlbi.asid	= smmu_domain->s1_cfg.cd.asid;
1952 	} else {
1953 		cmd.opcode	= CMDQ_OP_TLBI_S2_IPA;
1954 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
1955 	}
1956 	__arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
1957 
1958 	/*
1959 	 * Unfortunately, this can't be leaf-only since we may have
1960 	 * zapped an entire table.
1961 	 */
1962 	arm_smmu_atc_inv_domain(smmu_domain, 0, iova, size);
1963 }
1964 
arm_smmu_tlb_inv_range_asid(unsigned long iova,size_t size,int asid,size_t granule,bool leaf,struct arm_smmu_domain * smmu_domain)1965 void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,
1966 				 size_t granule, bool leaf,
1967 				 struct arm_smmu_domain *smmu_domain)
1968 {
1969 	struct arm_smmu_cmdq_ent cmd = {
1970 		.opcode	= smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
1971 			  CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA,
1972 		.tlbi = {
1973 			.asid	= asid,
1974 			.leaf	= leaf,
1975 		},
1976 	};
1977 
1978 	__arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
1979 }
1980 
arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather * gather,unsigned long iova,size_t granule,void * cookie)1981 static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
1982 					 unsigned long iova, size_t granule,
1983 					 void *cookie)
1984 {
1985 	struct arm_smmu_domain *smmu_domain = cookie;
1986 	struct iommu_domain *domain = &smmu_domain->domain;
1987 
1988 	iommu_iotlb_gather_add_page(domain, gather, iova, granule);
1989 }
1990 
arm_smmu_tlb_inv_walk(unsigned long iova,size_t size,size_t granule,void * cookie)1991 static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
1992 				  size_t granule, void *cookie)
1993 {
1994 	arm_smmu_tlb_inv_range_domain(iova, size, granule, false, cookie);
1995 }
1996 
1997 static const struct iommu_flush_ops arm_smmu_flush_ops = {
1998 	.tlb_flush_all	= arm_smmu_tlb_inv_context,
1999 	.tlb_flush_walk = arm_smmu_tlb_inv_walk,
2000 	.tlb_add_page	= arm_smmu_tlb_inv_page_nosync,
2001 };
2002 
2003 /* IOMMU API */
arm_smmu_capable(enum iommu_cap cap)2004 static bool arm_smmu_capable(enum iommu_cap cap)
2005 {
2006 	switch (cap) {
2007 	case IOMMU_CAP_CACHE_COHERENCY:
2008 		return true;
2009 	case IOMMU_CAP_NOEXEC:
2010 		return true;
2011 	default:
2012 		return false;
2013 	}
2014 }
2015 
arm_smmu_domain_alloc(unsigned type)2016 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
2017 {
2018 	struct arm_smmu_domain *smmu_domain;
2019 
2020 	if (type != IOMMU_DOMAIN_UNMANAGED &&
2021 	    type != IOMMU_DOMAIN_DMA &&
2022 	    type != IOMMU_DOMAIN_DMA_FQ &&
2023 	    type != IOMMU_DOMAIN_IDENTITY)
2024 		return NULL;
2025 
2026 	/*
2027 	 * Allocate the domain and initialise some of its data structures.
2028 	 * We can't really do anything meaningful until we've added a
2029 	 * master.
2030 	 */
2031 	smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
2032 	if (!smmu_domain)
2033 		return NULL;
2034 
2035 	mutex_init(&smmu_domain->init_mutex);
2036 	INIT_LIST_HEAD(&smmu_domain->devices);
2037 	spin_lock_init(&smmu_domain->devices_lock);
2038 	INIT_LIST_HEAD(&smmu_domain->mmu_notifiers);
2039 
2040 	return &smmu_domain->domain;
2041 }
2042 
arm_smmu_bitmap_alloc(unsigned long * map,int span)2043 static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
2044 {
2045 	int idx, size = 1 << span;
2046 
2047 	do {
2048 		idx = find_first_zero_bit(map, size);
2049 		if (idx == size)
2050 			return -ENOSPC;
2051 	} while (test_and_set_bit(idx, map));
2052 
2053 	return idx;
2054 }
2055 
arm_smmu_bitmap_free(unsigned long * map,int idx)2056 static void arm_smmu_bitmap_free(unsigned long *map, int idx)
2057 {
2058 	clear_bit(idx, map);
2059 }
2060 
arm_smmu_domain_free(struct iommu_domain * domain)2061 static void arm_smmu_domain_free(struct iommu_domain *domain)
2062 {
2063 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2064 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2065 
2066 	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
2067 
2068 	/* Free the CD and ASID, if we allocated them */
2069 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2070 		struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
2071 
2072 		/* Prevent SVA from touching the CD while we're freeing it */
2073 		mutex_lock(&arm_smmu_asid_lock);
2074 		if (cfg->cdcfg.cdtab)
2075 			arm_smmu_free_cd_tables(smmu_domain);
2076 		arm_smmu_free_asid(&cfg->cd);
2077 		mutex_unlock(&arm_smmu_asid_lock);
2078 	} else {
2079 		struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2080 		if (cfg->vmid)
2081 			arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
2082 	}
2083 
2084 	kfree(smmu_domain);
2085 }
2086 
arm_smmu_domain_finalise_s1(struct arm_smmu_domain * smmu_domain,struct arm_smmu_master * master,struct io_pgtable_cfg * pgtbl_cfg)2087 static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
2088 				       struct arm_smmu_master *master,
2089 				       struct io_pgtable_cfg *pgtbl_cfg)
2090 {
2091 	int ret;
2092 	u32 asid;
2093 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2094 	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
2095 	typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
2096 
2097 	refcount_set(&cfg->cd.refs, 1);
2098 
2099 	/* Prevent SVA from modifying the ASID until it is written to the CD */
2100 	mutex_lock(&arm_smmu_asid_lock);
2101 	ret = xa_alloc(&arm_smmu_asid_xa, &asid, &cfg->cd,
2102 		       XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
2103 	if (ret)
2104 		goto out_unlock;
2105 
2106 	cfg->s1cdmax = master->ssid_bits;
2107 
2108 	smmu_domain->stall_enabled = master->stall_enabled;
2109 
2110 	ret = arm_smmu_alloc_cd_tables(smmu_domain);
2111 	if (ret)
2112 		goto out_free_asid;
2113 
2114 	cfg->cd.asid	= (u16)asid;
2115 	cfg->cd.ttbr	= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
2116 	cfg->cd.tcr	= FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
2117 			  FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
2118 			  FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
2119 			  FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
2120 			  FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
2121 			  FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
2122 			  CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
2123 	cfg->cd.mair	= pgtbl_cfg->arm_lpae_s1_cfg.mair;
2124 
2125 	/*
2126 	 * Note that this will end up calling arm_smmu_sync_cd() before
2127 	 * the master has been added to the devices list for this domain.
2128 	 * This isn't an issue because the STE hasn't been installed yet.
2129 	 */
2130 	ret = arm_smmu_write_ctx_desc(smmu_domain, 0, &cfg->cd);
2131 	if (ret)
2132 		goto out_free_cd_tables;
2133 
2134 	mutex_unlock(&arm_smmu_asid_lock);
2135 	return 0;
2136 
2137 out_free_cd_tables:
2138 	arm_smmu_free_cd_tables(smmu_domain);
2139 out_free_asid:
2140 	arm_smmu_free_asid(&cfg->cd);
2141 out_unlock:
2142 	mutex_unlock(&arm_smmu_asid_lock);
2143 	return ret;
2144 }
2145 
arm_smmu_domain_finalise_s2(struct arm_smmu_domain * smmu_domain,struct arm_smmu_master * master,struct io_pgtable_cfg * pgtbl_cfg)2146 static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
2147 				       struct arm_smmu_master *master,
2148 				       struct io_pgtable_cfg *pgtbl_cfg)
2149 {
2150 	int vmid;
2151 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2152 	struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2153 	typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr;
2154 
2155 	vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
2156 	if (vmid < 0)
2157 		return vmid;
2158 
2159 	vtcr = &pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
2160 	cfg->vmid	= (u16)vmid;
2161 	cfg->vttbr	= pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
2162 	cfg->vtcr	= FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
2163 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
2164 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) |
2165 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) |
2166 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) |
2167 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) |
2168 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps);
2169 	return 0;
2170 }
2171 
arm_smmu_domain_finalise(struct iommu_domain * domain,struct arm_smmu_master * master)2172 static int arm_smmu_domain_finalise(struct iommu_domain *domain,
2173 				    struct arm_smmu_master *master)
2174 {
2175 	int ret;
2176 	unsigned long ias, oas;
2177 	enum io_pgtable_fmt fmt;
2178 	struct io_pgtable_cfg pgtbl_cfg;
2179 	struct io_pgtable_ops *pgtbl_ops;
2180 	int (*finalise_stage_fn)(struct arm_smmu_domain *,
2181 				 struct arm_smmu_master *,
2182 				 struct io_pgtable_cfg *);
2183 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2184 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2185 
2186 	if (domain->type == IOMMU_DOMAIN_IDENTITY) {
2187 		smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
2188 		return 0;
2189 	}
2190 
2191 	/* Restrict the stage to what we can actually support */
2192 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
2193 		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
2194 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
2195 		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
2196 
2197 	switch (smmu_domain->stage) {
2198 	case ARM_SMMU_DOMAIN_S1:
2199 		ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
2200 		ias = min_t(unsigned long, ias, VA_BITS);
2201 		oas = smmu->ias;
2202 		fmt = ARM_64_LPAE_S1;
2203 		finalise_stage_fn = arm_smmu_domain_finalise_s1;
2204 		break;
2205 	case ARM_SMMU_DOMAIN_NESTED:
2206 	case ARM_SMMU_DOMAIN_S2:
2207 		ias = smmu->ias;
2208 		oas = smmu->oas;
2209 		fmt = ARM_64_LPAE_S2;
2210 		finalise_stage_fn = arm_smmu_domain_finalise_s2;
2211 		break;
2212 	default:
2213 		return -EINVAL;
2214 	}
2215 
2216 	pgtbl_cfg = (struct io_pgtable_cfg) {
2217 		.pgsize_bitmap	= smmu->pgsize_bitmap,
2218 		.ias		= ias,
2219 		.oas		= oas,
2220 		.coherent_walk	= smmu->features & ARM_SMMU_FEAT_COHERENCY,
2221 		.tlb		= &arm_smmu_flush_ops,
2222 		.iommu_dev	= smmu->dev,
2223 	};
2224 
2225 	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
2226 	if (!pgtbl_ops)
2227 		return -ENOMEM;
2228 
2229 	domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
2230 	domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
2231 	domain->geometry.force_aperture = true;
2232 
2233 	ret = finalise_stage_fn(smmu_domain, master, &pgtbl_cfg);
2234 	if (ret < 0) {
2235 		free_io_pgtable_ops(pgtbl_ops);
2236 		return ret;
2237 	}
2238 
2239 	smmu_domain->pgtbl_ops = pgtbl_ops;
2240 	return 0;
2241 }
2242 
arm_smmu_get_step_for_sid(struct arm_smmu_device * smmu,u32 sid)2243 static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
2244 {
2245 	__le64 *step;
2246 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2247 
2248 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2249 		struct arm_smmu_strtab_l1_desc *l1_desc;
2250 		int idx;
2251 
2252 		/* Two-level walk */
2253 		idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
2254 		l1_desc = &cfg->l1_desc[idx];
2255 		idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
2256 		step = &l1_desc->l2ptr[idx];
2257 	} else {
2258 		/* Simple linear lookup */
2259 		step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
2260 	}
2261 
2262 	return step;
2263 }
2264 
arm_smmu_install_ste_for_dev(struct arm_smmu_master * master)2265 static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master)
2266 {
2267 	int i, j;
2268 	struct arm_smmu_device *smmu = master->smmu;
2269 
2270 	for (i = 0; i < master->num_streams; ++i) {
2271 		u32 sid = master->streams[i].id;
2272 		__le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
2273 
2274 		/* Bridged PCI devices may end up with duplicated IDs */
2275 		for (j = 0; j < i; j++)
2276 			if (master->streams[j].id == sid)
2277 				break;
2278 		if (j < i)
2279 			continue;
2280 
2281 		arm_smmu_write_strtab_ent(master, sid, step);
2282 	}
2283 }
2284 
arm_smmu_ats_supported(struct arm_smmu_master * master)2285 static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
2286 {
2287 	struct device *dev = master->dev;
2288 	struct arm_smmu_device *smmu = master->smmu;
2289 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2290 
2291 	if (!(smmu->features & ARM_SMMU_FEAT_ATS))
2292 		return false;
2293 
2294 	if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS))
2295 		return false;
2296 
2297 	return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev));
2298 }
2299 
arm_smmu_enable_ats(struct arm_smmu_master * master)2300 static void arm_smmu_enable_ats(struct arm_smmu_master *master)
2301 {
2302 	size_t stu;
2303 	struct pci_dev *pdev;
2304 	struct arm_smmu_device *smmu = master->smmu;
2305 	struct arm_smmu_domain *smmu_domain = master->domain;
2306 
2307 	/* Don't enable ATS at the endpoint if it's not enabled in the STE */
2308 	if (!master->ats_enabled)
2309 		return;
2310 
2311 	/* Smallest Translation Unit: log2 of the smallest supported granule */
2312 	stu = __ffs(smmu->pgsize_bitmap);
2313 	pdev = to_pci_dev(master->dev);
2314 
2315 	atomic_inc(&smmu_domain->nr_ats_masters);
2316 	arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
2317 	if (pci_enable_ats(pdev, stu))
2318 		dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
2319 }
2320 
arm_smmu_disable_ats(struct arm_smmu_master * master)2321 static void arm_smmu_disable_ats(struct arm_smmu_master *master)
2322 {
2323 	struct arm_smmu_domain *smmu_domain = master->domain;
2324 
2325 	if (!master->ats_enabled)
2326 		return;
2327 
2328 	pci_disable_ats(to_pci_dev(master->dev));
2329 	/*
2330 	 * Ensure ATS is disabled at the endpoint before we issue the
2331 	 * ATC invalidation via the SMMU.
2332 	 */
2333 	wmb();
2334 	arm_smmu_atc_inv_master(master);
2335 	atomic_dec(&smmu_domain->nr_ats_masters);
2336 }
2337 
arm_smmu_enable_pasid(struct arm_smmu_master * master)2338 static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
2339 {
2340 	int ret;
2341 	int features;
2342 	int num_pasids;
2343 	struct pci_dev *pdev;
2344 
2345 	if (!dev_is_pci(master->dev))
2346 		return -ENODEV;
2347 
2348 	pdev = to_pci_dev(master->dev);
2349 
2350 	features = pci_pasid_features(pdev);
2351 	if (features < 0)
2352 		return features;
2353 
2354 	num_pasids = pci_max_pasids(pdev);
2355 	if (num_pasids <= 0)
2356 		return num_pasids;
2357 
2358 	ret = pci_enable_pasid(pdev, features);
2359 	if (ret) {
2360 		dev_err(&pdev->dev, "Failed to enable PASID\n");
2361 		return ret;
2362 	}
2363 
2364 	master->ssid_bits = min_t(u8, ilog2(num_pasids),
2365 				  master->smmu->ssid_bits);
2366 	return 0;
2367 }
2368 
arm_smmu_disable_pasid(struct arm_smmu_master * master)2369 static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
2370 {
2371 	struct pci_dev *pdev;
2372 
2373 	if (!dev_is_pci(master->dev))
2374 		return;
2375 
2376 	pdev = to_pci_dev(master->dev);
2377 
2378 	if (!pdev->pasid_enabled)
2379 		return;
2380 
2381 	master->ssid_bits = 0;
2382 	pci_disable_pasid(pdev);
2383 }
2384 
arm_smmu_detach_dev(struct arm_smmu_master * master)2385 static void arm_smmu_detach_dev(struct arm_smmu_master *master)
2386 {
2387 	unsigned long flags;
2388 	struct arm_smmu_domain *smmu_domain = master->domain;
2389 
2390 	if (!smmu_domain)
2391 		return;
2392 
2393 	arm_smmu_disable_ats(master);
2394 
2395 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2396 	list_del(&master->domain_head);
2397 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2398 
2399 	master->domain = NULL;
2400 	master->ats_enabled = false;
2401 	arm_smmu_install_ste_for_dev(master);
2402 }
2403 
arm_smmu_attach_dev(struct iommu_domain * domain,struct device * dev)2404 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
2405 {
2406 	int ret = 0;
2407 	unsigned long flags;
2408 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2409 	struct arm_smmu_device *smmu;
2410 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2411 	struct arm_smmu_master *master;
2412 
2413 	if (!fwspec)
2414 		return -ENOENT;
2415 
2416 	master = dev_iommu_priv_get(dev);
2417 	smmu = master->smmu;
2418 
2419 	/*
2420 	 * Checking that SVA is disabled ensures that this device isn't bound to
2421 	 * any mm, and can be safely detached from its old domain. Bonds cannot
2422 	 * be removed concurrently since we're holding the group mutex.
2423 	 */
2424 	if (arm_smmu_master_sva_enabled(master)) {
2425 		dev_err(dev, "cannot attach - SVA enabled\n");
2426 		return -EBUSY;
2427 	}
2428 
2429 	arm_smmu_detach_dev(master);
2430 
2431 	mutex_lock(&smmu_domain->init_mutex);
2432 
2433 	if (!smmu_domain->smmu) {
2434 		smmu_domain->smmu = smmu;
2435 		ret = arm_smmu_domain_finalise(domain, master);
2436 		if (ret) {
2437 			smmu_domain->smmu = NULL;
2438 			goto out_unlock;
2439 		}
2440 	} else if (smmu_domain->smmu != smmu) {
2441 		dev_err(dev,
2442 			"cannot attach to SMMU %s (upstream of %s)\n",
2443 			dev_name(smmu_domain->smmu->dev),
2444 			dev_name(smmu->dev));
2445 		ret = -ENXIO;
2446 		goto out_unlock;
2447 	} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
2448 		   master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) {
2449 		dev_err(dev,
2450 			"cannot attach to incompatible domain (%u SSID bits != %u)\n",
2451 			smmu_domain->s1_cfg.s1cdmax, master->ssid_bits);
2452 		ret = -EINVAL;
2453 		goto out_unlock;
2454 	} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
2455 		   smmu_domain->stall_enabled != master->stall_enabled) {
2456 		dev_err(dev, "cannot attach to stall-%s domain\n",
2457 			smmu_domain->stall_enabled ? "enabled" : "disabled");
2458 		ret = -EINVAL;
2459 		goto out_unlock;
2460 	}
2461 
2462 	master->domain = smmu_domain;
2463 
2464 	if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
2465 		master->ats_enabled = arm_smmu_ats_supported(master);
2466 
2467 	arm_smmu_install_ste_for_dev(master);
2468 
2469 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2470 	list_add(&master->domain_head, &smmu_domain->devices);
2471 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2472 
2473 	arm_smmu_enable_ats(master);
2474 
2475 out_unlock:
2476 	mutex_unlock(&smmu_domain->init_mutex);
2477 	return ret;
2478 }
2479 
arm_smmu_map_pages(struct iommu_domain * domain,unsigned long iova,phys_addr_t paddr,size_t pgsize,size_t pgcount,int prot,gfp_t gfp,size_t * mapped)2480 static int arm_smmu_map_pages(struct iommu_domain *domain, unsigned long iova,
2481 			      phys_addr_t paddr, size_t pgsize, size_t pgcount,
2482 			      int prot, gfp_t gfp, size_t *mapped)
2483 {
2484 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2485 
2486 	if (!ops)
2487 		return -ENODEV;
2488 
2489 	return ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, gfp, mapped);
2490 }
2491 
arm_smmu_unmap_pages(struct iommu_domain * domain,unsigned long iova,size_t pgsize,size_t pgcount,struct iommu_iotlb_gather * gather)2492 static size_t arm_smmu_unmap_pages(struct iommu_domain *domain, unsigned long iova,
2493 				   size_t pgsize, size_t pgcount,
2494 				   struct iommu_iotlb_gather *gather)
2495 {
2496 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2497 	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
2498 
2499 	if (!ops)
2500 		return 0;
2501 
2502 	return ops->unmap_pages(ops, iova, pgsize, pgcount, gather);
2503 }
2504 
arm_smmu_flush_iotlb_all(struct iommu_domain * domain)2505 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
2506 {
2507 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2508 
2509 	if (smmu_domain->smmu)
2510 		arm_smmu_tlb_inv_context(smmu_domain);
2511 }
2512 
arm_smmu_iotlb_sync(struct iommu_domain * domain,struct iommu_iotlb_gather * gather)2513 static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
2514 				struct iommu_iotlb_gather *gather)
2515 {
2516 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2517 
2518 	if (!gather->pgsize)
2519 		return;
2520 
2521 	arm_smmu_tlb_inv_range_domain(gather->start,
2522 				      gather->end - gather->start + 1,
2523 				      gather->pgsize, true, smmu_domain);
2524 }
2525 
2526 static phys_addr_t
arm_smmu_iova_to_phys(struct iommu_domain * domain,dma_addr_t iova)2527 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
2528 {
2529 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2530 
2531 	if (!ops)
2532 		return 0;
2533 
2534 	return ops->iova_to_phys(ops, iova);
2535 }
2536 
2537 static struct platform_driver arm_smmu_driver;
2538 
2539 static
arm_smmu_get_by_fwnode(struct fwnode_handle * fwnode)2540 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
2541 {
2542 	struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
2543 							  fwnode);
2544 	put_device(dev);
2545 	return dev ? dev_get_drvdata(dev) : NULL;
2546 }
2547 
arm_smmu_sid_in_range(struct arm_smmu_device * smmu,u32 sid)2548 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
2549 {
2550 	unsigned long limit = smmu->strtab_cfg.num_l1_ents;
2551 
2552 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2553 		limit *= 1UL << STRTAB_SPLIT;
2554 
2555 	return sid < limit;
2556 }
2557 
arm_smmu_insert_master(struct arm_smmu_device * smmu,struct arm_smmu_master * master)2558 static int arm_smmu_insert_master(struct arm_smmu_device *smmu,
2559 				  struct arm_smmu_master *master)
2560 {
2561 	int i;
2562 	int ret = 0;
2563 	struct arm_smmu_stream *new_stream, *cur_stream;
2564 	struct rb_node **new_node, *parent_node = NULL;
2565 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
2566 
2567 	master->streams = kcalloc(fwspec->num_ids, sizeof(*master->streams),
2568 				  GFP_KERNEL);
2569 	if (!master->streams)
2570 		return -ENOMEM;
2571 	master->num_streams = fwspec->num_ids;
2572 
2573 	mutex_lock(&smmu->streams_mutex);
2574 	for (i = 0; i < fwspec->num_ids; i++) {
2575 		u32 sid = fwspec->ids[i];
2576 
2577 		new_stream = &master->streams[i];
2578 		new_stream->id = sid;
2579 		new_stream->master = master;
2580 
2581 		/*
2582 		 * Check the SIDs are in range of the SMMU and our stream table
2583 		 */
2584 		if (!arm_smmu_sid_in_range(smmu, sid)) {
2585 			ret = -ERANGE;
2586 			break;
2587 		}
2588 
2589 		/* Ensure l2 strtab is initialised */
2590 		if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2591 			ret = arm_smmu_init_l2_strtab(smmu, sid);
2592 			if (ret)
2593 				break;
2594 		}
2595 
2596 		/* Insert into SID tree */
2597 		new_node = &(smmu->streams.rb_node);
2598 		while (*new_node) {
2599 			cur_stream = rb_entry(*new_node, struct arm_smmu_stream,
2600 					      node);
2601 			parent_node = *new_node;
2602 			if (cur_stream->id > new_stream->id) {
2603 				new_node = &((*new_node)->rb_left);
2604 			} else if (cur_stream->id < new_stream->id) {
2605 				new_node = &((*new_node)->rb_right);
2606 			} else {
2607 				dev_warn(master->dev,
2608 					 "stream %u already in tree\n",
2609 					 cur_stream->id);
2610 				ret = -EINVAL;
2611 				break;
2612 			}
2613 		}
2614 		if (ret)
2615 			break;
2616 
2617 		rb_link_node(&new_stream->node, parent_node, new_node);
2618 		rb_insert_color(&new_stream->node, &smmu->streams);
2619 	}
2620 
2621 	if (ret) {
2622 		for (i--; i >= 0; i--)
2623 			rb_erase(&master->streams[i].node, &smmu->streams);
2624 		kfree(master->streams);
2625 	}
2626 	mutex_unlock(&smmu->streams_mutex);
2627 
2628 	return ret;
2629 }
2630 
arm_smmu_remove_master(struct arm_smmu_master * master)2631 static void arm_smmu_remove_master(struct arm_smmu_master *master)
2632 {
2633 	int i;
2634 	struct arm_smmu_device *smmu = master->smmu;
2635 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
2636 
2637 	if (!smmu || !master->streams)
2638 		return;
2639 
2640 	mutex_lock(&smmu->streams_mutex);
2641 	for (i = 0; i < fwspec->num_ids; i++)
2642 		rb_erase(&master->streams[i].node, &smmu->streams);
2643 	mutex_unlock(&smmu->streams_mutex);
2644 
2645 	kfree(master->streams);
2646 }
2647 
2648 static struct iommu_ops arm_smmu_ops;
2649 
arm_smmu_probe_device(struct device * dev)2650 static struct iommu_device *arm_smmu_probe_device(struct device *dev)
2651 {
2652 	int ret;
2653 	struct arm_smmu_device *smmu;
2654 	struct arm_smmu_master *master;
2655 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2656 
2657 	if (!fwspec || fwspec->ops != &arm_smmu_ops)
2658 		return ERR_PTR(-ENODEV);
2659 
2660 	if (WARN_ON_ONCE(dev_iommu_priv_get(dev)))
2661 		return ERR_PTR(-EBUSY);
2662 
2663 	smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
2664 	if (!smmu)
2665 		return ERR_PTR(-ENODEV);
2666 
2667 	master = kzalloc(sizeof(*master), GFP_KERNEL);
2668 	if (!master)
2669 		return ERR_PTR(-ENOMEM);
2670 
2671 	master->dev = dev;
2672 	master->smmu = smmu;
2673 	INIT_LIST_HEAD(&master->bonds);
2674 	dev_iommu_priv_set(dev, master);
2675 
2676 	ret = arm_smmu_insert_master(smmu, master);
2677 	if (ret)
2678 		goto err_free_master;
2679 
2680 	device_property_read_u32(dev, "pasid-num-bits", &master->ssid_bits);
2681 	master->ssid_bits = min(smmu->ssid_bits, master->ssid_bits);
2682 
2683 	/*
2684 	 * Note that PASID must be enabled before, and disabled after ATS:
2685 	 * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register
2686 	 *
2687 	 *   Behavior is undefined if this bit is Set and the value of the PASID
2688 	 *   Enable, Execute Requested Enable, or Privileged Mode Requested bits
2689 	 *   are changed.
2690 	 */
2691 	arm_smmu_enable_pasid(master);
2692 
2693 	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
2694 		master->ssid_bits = min_t(u8, master->ssid_bits,
2695 					  CTXDESC_LINEAR_CDMAX);
2696 
2697 	if ((smmu->features & ARM_SMMU_FEAT_STALLS &&
2698 	     device_property_read_bool(dev, "dma-can-stall")) ||
2699 	    smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
2700 		master->stall_enabled = true;
2701 
2702 	return &smmu->iommu;
2703 
2704 err_free_master:
2705 	kfree(master);
2706 	dev_iommu_priv_set(dev, NULL);
2707 	return ERR_PTR(ret);
2708 }
2709 
arm_smmu_release_device(struct device * dev)2710 static void arm_smmu_release_device(struct device *dev)
2711 {
2712 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2713 	struct arm_smmu_master *master;
2714 
2715 	if (!fwspec || fwspec->ops != &arm_smmu_ops)
2716 		return;
2717 
2718 	master = dev_iommu_priv_get(dev);
2719 	if (WARN_ON(arm_smmu_master_sva_enabled(master)))
2720 		iopf_queue_remove_device(master->smmu->evtq.iopf, dev);
2721 	arm_smmu_detach_dev(master);
2722 	arm_smmu_disable_pasid(master);
2723 	arm_smmu_remove_master(master);
2724 	kfree(master);
2725 	iommu_fwspec_free(dev);
2726 }
2727 
arm_smmu_device_group(struct device * dev)2728 static struct iommu_group *arm_smmu_device_group(struct device *dev)
2729 {
2730 	struct iommu_group *group;
2731 
2732 	/*
2733 	 * We don't support devices sharing stream IDs other than PCI RID
2734 	 * aliases, since the necessary ID-to-device lookup becomes rather
2735 	 * impractical given a potential sparse 32-bit stream ID space.
2736 	 */
2737 	if (dev_is_pci(dev))
2738 		group = pci_device_group(dev);
2739 	else
2740 		group = generic_device_group(dev);
2741 
2742 	return group;
2743 }
2744 
arm_smmu_enable_nesting(struct iommu_domain * domain)2745 static int arm_smmu_enable_nesting(struct iommu_domain *domain)
2746 {
2747 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2748 	int ret = 0;
2749 
2750 	mutex_lock(&smmu_domain->init_mutex);
2751 	if (smmu_domain->smmu)
2752 		ret = -EPERM;
2753 	else
2754 		smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
2755 	mutex_unlock(&smmu_domain->init_mutex);
2756 
2757 	return ret;
2758 }
2759 
arm_smmu_of_xlate(struct device * dev,struct of_phandle_args * args)2760 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
2761 {
2762 	return iommu_fwspec_add_ids(dev, args->args, 1);
2763 }
2764 
arm_smmu_get_resv_regions(struct device * dev,struct list_head * head)2765 static void arm_smmu_get_resv_regions(struct device *dev,
2766 				      struct list_head *head)
2767 {
2768 	struct iommu_resv_region *region;
2769 	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
2770 
2771 	region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
2772 					 prot, IOMMU_RESV_SW_MSI);
2773 	if (!region)
2774 		return;
2775 
2776 	list_add_tail(&region->list, head);
2777 
2778 	iommu_dma_get_resv_regions(dev, head);
2779 }
2780 
arm_smmu_dev_has_feature(struct device * dev,enum iommu_dev_features feat)2781 static bool arm_smmu_dev_has_feature(struct device *dev,
2782 				     enum iommu_dev_features feat)
2783 {
2784 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2785 
2786 	if (!master)
2787 		return false;
2788 
2789 	switch (feat) {
2790 	case IOMMU_DEV_FEAT_IOPF:
2791 		return arm_smmu_master_iopf_supported(master);
2792 	case IOMMU_DEV_FEAT_SVA:
2793 		return arm_smmu_master_sva_supported(master);
2794 	default:
2795 		return false;
2796 	}
2797 }
2798 
arm_smmu_dev_feature_enabled(struct device * dev,enum iommu_dev_features feat)2799 static bool arm_smmu_dev_feature_enabled(struct device *dev,
2800 					 enum iommu_dev_features feat)
2801 {
2802 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2803 
2804 	if (!master)
2805 		return false;
2806 
2807 	switch (feat) {
2808 	case IOMMU_DEV_FEAT_IOPF:
2809 		return master->iopf_enabled;
2810 	case IOMMU_DEV_FEAT_SVA:
2811 		return arm_smmu_master_sva_enabled(master);
2812 	default:
2813 		return false;
2814 	}
2815 }
2816 
arm_smmu_dev_enable_feature(struct device * dev,enum iommu_dev_features feat)2817 static int arm_smmu_dev_enable_feature(struct device *dev,
2818 				       enum iommu_dev_features feat)
2819 {
2820 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2821 
2822 	if (!arm_smmu_dev_has_feature(dev, feat))
2823 		return -ENODEV;
2824 
2825 	if (arm_smmu_dev_feature_enabled(dev, feat))
2826 		return -EBUSY;
2827 
2828 	switch (feat) {
2829 	case IOMMU_DEV_FEAT_IOPF:
2830 		master->iopf_enabled = true;
2831 		return 0;
2832 	case IOMMU_DEV_FEAT_SVA:
2833 		return arm_smmu_master_enable_sva(master);
2834 	default:
2835 		return -EINVAL;
2836 	}
2837 }
2838 
arm_smmu_dev_disable_feature(struct device * dev,enum iommu_dev_features feat)2839 static int arm_smmu_dev_disable_feature(struct device *dev,
2840 					enum iommu_dev_features feat)
2841 {
2842 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2843 
2844 	if (!arm_smmu_dev_feature_enabled(dev, feat))
2845 		return -EINVAL;
2846 
2847 	switch (feat) {
2848 	case IOMMU_DEV_FEAT_IOPF:
2849 		if (master->sva_enabled)
2850 			return -EBUSY;
2851 		master->iopf_enabled = false;
2852 		return 0;
2853 	case IOMMU_DEV_FEAT_SVA:
2854 		return arm_smmu_master_disable_sva(master);
2855 	default:
2856 		return -EINVAL;
2857 	}
2858 }
2859 
2860 /*
2861  * HiSilicon PCIe tune and trace device can be used to trace TLP headers on the
2862  * PCIe link and save the data to memory by DMA. The hardware is restricted to
2863  * use identity mapping only.
2864  */
2865 #define IS_HISI_PTT_DEVICE(pdev)	((pdev)->vendor == PCI_VENDOR_ID_HUAWEI && \
2866 					 (pdev)->device == 0xa12e)
2867 
arm_smmu_def_domain_type(struct device * dev)2868 static int arm_smmu_def_domain_type(struct device *dev)
2869 {
2870 	if (dev_is_pci(dev)) {
2871 		struct pci_dev *pdev = to_pci_dev(dev);
2872 
2873 		if (IS_HISI_PTT_DEVICE(pdev))
2874 			return IOMMU_DOMAIN_IDENTITY;
2875 	}
2876 
2877 	return 0;
2878 }
2879 
2880 static struct iommu_ops arm_smmu_ops = {
2881 	.capable		= arm_smmu_capable,
2882 	.domain_alloc		= arm_smmu_domain_alloc,
2883 	.domain_free		= arm_smmu_domain_free,
2884 	.attach_dev		= arm_smmu_attach_dev,
2885 	.map_pages		= arm_smmu_map_pages,
2886 	.unmap_pages		= arm_smmu_unmap_pages,
2887 	.flush_iotlb_all	= arm_smmu_flush_iotlb_all,
2888 	.iotlb_sync		= arm_smmu_iotlb_sync,
2889 	.iova_to_phys		= arm_smmu_iova_to_phys,
2890 	.probe_device		= arm_smmu_probe_device,
2891 	.release_device		= arm_smmu_release_device,
2892 	.device_group		= arm_smmu_device_group,
2893 	.enable_nesting		= arm_smmu_enable_nesting,
2894 	.of_xlate		= arm_smmu_of_xlate,
2895 	.get_resv_regions	= arm_smmu_get_resv_regions,
2896 	.put_resv_regions	= generic_iommu_put_resv_regions,
2897 	.dev_has_feat		= arm_smmu_dev_has_feature,
2898 	.dev_feat_enabled	= arm_smmu_dev_feature_enabled,
2899 	.dev_enable_feat	= arm_smmu_dev_enable_feature,
2900 	.dev_disable_feat	= arm_smmu_dev_disable_feature,
2901 	.sva_bind		= arm_smmu_sva_bind,
2902 	.sva_unbind		= arm_smmu_sva_unbind,
2903 	.sva_get_pasid		= arm_smmu_sva_get_pasid,
2904 	.page_response		= arm_smmu_page_response,
2905 	.def_domain_type	= arm_smmu_def_domain_type,
2906 	.pgsize_bitmap		= -1UL, /* Restricted during device attach */
2907 	.owner			= THIS_MODULE,
2908 };
2909 
2910 /* Probing and initialisation functions */
arm_smmu_init_one_queue(struct arm_smmu_device * smmu,struct arm_smmu_queue * q,void __iomem * page,unsigned long prod_off,unsigned long cons_off,size_t dwords,const char * name)2911 static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
2912 				   struct arm_smmu_queue *q,
2913 				   void __iomem *page,
2914 				   unsigned long prod_off,
2915 				   unsigned long cons_off,
2916 				   size_t dwords, const char *name)
2917 {
2918 	size_t qsz;
2919 
2920 	do {
2921 		qsz = ((1 << q->llq.max_n_shift) * dwords) << 3;
2922 		q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma,
2923 					      GFP_KERNEL);
2924 		if (q->base || qsz < PAGE_SIZE)
2925 			break;
2926 
2927 		q->llq.max_n_shift--;
2928 	} while (1);
2929 
2930 	if (!q->base) {
2931 		dev_err(smmu->dev,
2932 			"failed to allocate queue (0x%zx bytes) for %s\n",
2933 			qsz, name);
2934 		return -ENOMEM;
2935 	}
2936 
2937 	if (!WARN_ON(q->base_dma & (qsz - 1))) {
2938 		dev_info(smmu->dev, "allocated %u entries for %s\n",
2939 			 1 << q->llq.max_n_shift, name);
2940 	}
2941 
2942 	q->prod_reg	= page + prod_off;
2943 	q->cons_reg	= page + cons_off;
2944 	q->ent_dwords	= dwords;
2945 
2946 	q->q_base  = Q_BASE_RWA;
2947 	q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
2948 	q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift);
2949 
2950 	q->llq.prod = q->llq.cons = 0;
2951 	return 0;
2952 }
2953 
arm_smmu_cmdq_free_bitmap(void * data)2954 static void arm_smmu_cmdq_free_bitmap(void *data)
2955 {
2956 	unsigned long *bitmap = data;
2957 	bitmap_free(bitmap);
2958 }
2959 
arm_smmu_cmdq_init(struct arm_smmu_device * smmu)2960 static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
2961 {
2962 	int ret = 0;
2963 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
2964 	unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
2965 	atomic_long_t *bitmap;
2966 
2967 	atomic_set(&cmdq->owner_prod, 0);
2968 	atomic_set(&cmdq->lock, 0);
2969 
2970 	bitmap = (atomic_long_t *)bitmap_zalloc(nents, GFP_KERNEL);
2971 	if (!bitmap) {
2972 		dev_err(smmu->dev, "failed to allocate cmdq bitmap\n");
2973 		ret = -ENOMEM;
2974 	} else {
2975 		cmdq->valid_map = bitmap;
2976 		devm_add_action(smmu->dev, arm_smmu_cmdq_free_bitmap, bitmap);
2977 	}
2978 
2979 	return ret;
2980 }
2981 
arm_smmu_init_queues(struct arm_smmu_device * smmu)2982 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
2983 {
2984 	int ret;
2985 
2986 	/* cmdq */
2987 	ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, smmu->base,
2988 				      ARM_SMMU_CMDQ_PROD, ARM_SMMU_CMDQ_CONS,
2989 				      CMDQ_ENT_DWORDS, "cmdq");
2990 	if (ret)
2991 		return ret;
2992 
2993 	ret = arm_smmu_cmdq_init(smmu);
2994 	if (ret)
2995 		return ret;
2996 
2997 	/* evtq */
2998 	ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, smmu->page1,
2999 				      ARM_SMMU_EVTQ_PROD, ARM_SMMU_EVTQ_CONS,
3000 				      EVTQ_ENT_DWORDS, "evtq");
3001 	if (ret)
3002 		return ret;
3003 
3004 	if ((smmu->features & ARM_SMMU_FEAT_SVA) &&
3005 	    (smmu->features & ARM_SMMU_FEAT_STALLS)) {
3006 		smmu->evtq.iopf = iopf_queue_alloc(dev_name(smmu->dev));
3007 		if (!smmu->evtq.iopf)
3008 			return -ENOMEM;
3009 	}
3010 
3011 	/* priq */
3012 	if (!(smmu->features & ARM_SMMU_FEAT_PRI))
3013 		return 0;
3014 
3015 	return arm_smmu_init_one_queue(smmu, &smmu->priq.q, smmu->page1,
3016 				       ARM_SMMU_PRIQ_PROD, ARM_SMMU_PRIQ_CONS,
3017 				       PRIQ_ENT_DWORDS, "priq");
3018 }
3019 
arm_smmu_init_l1_strtab(struct arm_smmu_device * smmu)3020 static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
3021 {
3022 	unsigned int i;
3023 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3024 	size_t size = sizeof(*cfg->l1_desc) * cfg->num_l1_ents;
3025 	void *strtab = smmu->strtab_cfg.strtab;
3026 
3027 	cfg->l1_desc = devm_kzalloc(smmu->dev, size, GFP_KERNEL);
3028 	if (!cfg->l1_desc)
3029 		return -ENOMEM;
3030 
3031 	for (i = 0; i < cfg->num_l1_ents; ++i) {
3032 		arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
3033 		strtab += STRTAB_L1_DESC_DWORDS << 3;
3034 	}
3035 
3036 	return 0;
3037 }
3038 
arm_smmu_init_strtab_2lvl(struct arm_smmu_device * smmu)3039 static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
3040 {
3041 	void *strtab;
3042 	u64 reg;
3043 	u32 size, l1size;
3044 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3045 
3046 	/* Calculate the L1 size, capped to the SIDSIZE. */
3047 	size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
3048 	size = min(size, smmu->sid_bits - STRTAB_SPLIT);
3049 	cfg->num_l1_ents = 1 << size;
3050 
3051 	size += STRTAB_SPLIT;
3052 	if (size < smmu->sid_bits)
3053 		dev_warn(smmu->dev,
3054 			 "2-level strtab only covers %u/%u bits of SID\n",
3055 			 size, smmu->sid_bits);
3056 
3057 	l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
3058 	strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
3059 				     GFP_KERNEL);
3060 	if (!strtab) {
3061 		dev_err(smmu->dev,
3062 			"failed to allocate l1 stream table (%u bytes)\n",
3063 			l1size);
3064 		return -ENOMEM;
3065 	}
3066 	cfg->strtab = strtab;
3067 
3068 	/* Configure strtab_base_cfg for 2 levels */
3069 	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
3070 	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
3071 	reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
3072 	cfg->strtab_base_cfg = reg;
3073 
3074 	return arm_smmu_init_l1_strtab(smmu);
3075 }
3076 
arm_smmu_init_strtab_linear(struct arm_smmu_device * smmu)3077 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
3078 {
3079 	void *strtab;
3080 	u64 reg;
3081 	u32 size;
3082 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3083 
3084 	size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
3085 	strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
3086 				     GFP_KERNEL);
3087 	if (!strtab) {
3088 		dev_err(smmu->dev,
3089 			"failed to allocate linear stream table (%u bytes)\n",
3090 			size);
3091 		return -ENOMEM;
3092 	}
3093 	cfg->strtab = strtab;
3094 	cfg->num_l1_ents = 1 << smmu->sid_bits;
3095 
3096 	/* Configure strtab_base_cfg for a linear table covering all SIDs */
3097 	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
3098 	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
3099 	cfg->strtab_base_cfg = reg;
3100 
3101 	arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents);
3102 	return 0;
3103 }
3104 
arm_smmu_init_strtab(struct arm_smmu_device * smmu)3105 static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
3106 {
3107 	u64 reg;
3108 	int ret;
3109 
3110 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
3111 		ret = arm_smmu_init_strtab_2lvl(smmu);
3112 	else
3113 		ret = arm_smmu_init_strtab_linear(smmu);
3114 
3115 	if (ret)
3116 		return ret;
3117 
3118 	/* Set the strtab base address */
3119 	reg  = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
3120 	reg |= STRTAB_BASE_RA;
3121 	smmu->strtab_cfg.strtab_base = reg;
3122 
3123 	/* Allocate the first VMID for stage-2 bypass STEs */
3124 	set_bit(0, smmu->vmid_map);
3125 	return 0;
3126 }
3127 
arm_smmu_init_structures(struct arm_smmu_device * smmu)3128 static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
3129 {
3130 	int ret;
3131 
3132 	mutex_init(&smmu->streams_mutex);
3133 	smmu->streams = RB_ROOT;
3134 
3135 	ret = arm_smmu_init_queues(smmu);
3136 	if (ret)
3137 		return ret;
3138 
3139 	return arm_smmu_init_strtab(smmu);
3140 }
3141 
arm_smmu_write_reg_sync(struct arm_smmu_device * smmu,u32 val,unsigned int reg_off,unsigned int ack_off)3142 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
3143 				   unsigned int reg_off, unsigned int ack_off)
3144 {
3145 	u32 reg;
3146 
3147 	writel_relaxed(val, smmu->base + reg_off);
3148 	return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
3149 					  1, ARM_SMMU_POLL_TIMEOUT_US);
3150 }
3151 
3152 /* GBPA is "special" */
arm_smmu_update_gbpa(struct arm_smmu_device * smmu,u32 set,u32 clr)3153 static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
3154 {
3155 	int ret;
3156 	u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
3157 
3158 	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3159 					 1, ARM_SMMU_POLL_TIMEOUT_US);
3160 	if (ret)
3161 		return ret;
3162 
3163 	reg &= ~clr;
3164 	reg |= set;
3165 	writel_relaxed(reg | GBPA_UPDATE, gbpa);
3166 	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3167 					 1, ARM_SMMU_POLL_TIMEOUT_US);
3168 
3169 	if (ret)
3170 		dev_err(smmu->dev, "GBPA not responding to update\n");
3171 	return ret;
3172 }
3173 
arm_smmu_free_msis(void * data)3174 static void arm_smmu_free_msis(void *data)
3175 {
3176 	struct device *dev = data;
3177 	platform_msi_domain_free_irqs(dev);
3178 }
3179 
arm_smmu_write_msi_msg(struct msi_desc * desc,struct msi_msg * msg)3180 static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
3181 {
3182 	phys_addr_t doorbell;
3183 	struct device *dev = msi_desc_to_dev(desc);
3184 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
3185 	phys_addr_t *cfg = arm_smmu_msi_cfg[desc->platform.msi_index];
3186 
3187 	doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
3188 	doorbell &= MSI_CFG0_ADDR_MASK;
3189 
3190 	writeq_relaxed(doorbell, smmu->base + cfg[0]);
3191 	writel_relaxed(msg->data, smmu->base + cfg[1]);
3192 	writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
3193 }
3194 
arm_smmu_setup_msis(struct arm_smmu_device * smmu)3195 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
3196 {
3197 	struct msi_desc *desc;
3198 	int ret, nvec = ARM_SMMU_MAX_MSIS;
3199 	struct device *dev = smmu->dev;
3200 
3201 	/* Clear the MSI address regs */
3202 	writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
3203 	writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
3204 
3205 	if (smmu->features & ARM_SMMU_FEAT_PRI)
3206 		writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
3207 	else
3208 		nvec--;
3209 
3210 	if (!(smmu->features & ARM_SMMU_FEAT_MSI))
3211 		return;
3212 
3213 	if (!dev->msi_domain) {
3214 		dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
3215 		return;
3216 	}
3217 
3218 	/* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
3219 	ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
3220 	if (ret) {
3221 		dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
3222 		return;
3223 	}
3224 
3225 	for_each_msi_entry(desc, dev) {
3226 		switch (desc->platform.msi_index) {
3227 		case EVTQ_MSI_INDEX:
3228 			smmu->evtq.q.irq = desc->irq;
3229 			break;
3230 		case GERROR_MSI_INDEX:
3231 			smmu->gerr_irq = desc->irq;
3232 			break;
3233 		case PRIQ_MSI_INDEX:
3234 			smmu->priq.q.irq = desc->irq;
3235 			break;
3236 		default:	/* Unknown */
3237 			continue;
3238 		}
3239 	}
3240 
3241 	/* Add callback to free MSIs on teardown */
3242 	devm_add_action(dev, arm_smmu_free_msis, dev);
3243 }
3244 
arm_smmu_setup_unique_irqs(struct arm_smmu_device * smmu)3245 static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
3246 {
3247 	int irq, ret;
3248 
3249 	arm_smmu_setup_msis(smmu);
3250 
3251 	/* Request interrupt lines */
3252 	irq = smmu->evtq.q.irq;
3253 	if (irq) {
3254 		ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3255 						arm_smmu_evtq_thread,
3256 						IRQF_ONESHOT,
3257 						"arm-smmu-v3-evtq", smmu);
3258 		if (ret < 0)
3259 			dev_warn(smmu->dev, "failed to enable evtq irq\n");
3260 	} else {
3261 		dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
3262 	}
3263 
3264 	irq = smmu->gerr_irq;
3265 	if (irq) {
3266 		ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
3267 				       0, "arm-smmu-v3-gerror", smmu);
3268 		if (ret < 0)
3269 			dev_warn(smmu->dev, "failed to enable gerror irq\n");
3270 	} else {
3271 		dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
3272 	}
3273 
3274 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
3275 		irq = smmu->priq.q.irq;
3276 		if (irq) {
3277 			ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3278 							arm_smmu_priq_thread,
3279 							IRQF_ONESHOT,
3280 							"arm-smmu-v3-priq",
3281 							smmu);
3282 			if (ret < 0)
3283 				dev_warn(smmu->dev,
3284 					 "failed to enable priq irq\n");
3285 		} else {
3286 			dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
3287 		}
3288 	}
3289 }
3290 
arm_smmu_setup_irqs(struct arm_smmu_device * smmu)3291 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
3292 {
3293 	int ret, irq;
3294 	u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
3295 
3296 	/* Disable IRQs first */
3297 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
3298 				      ARM_SMMU_IRQ_CTRLACK);
3299 	if (ret) {
3300 		dev_err(smmu->dev, "failed to disable irqs\n");
3301 		return ret;
3302 	}
3303 
3304 	irq = smmu->combined_irq;
3305 	if (irq) {
3306 		/*
3307 		 * Cavium ThunderX2 implementation doesn't support unique irq
3308 		 * lines. Use a single irq line for all the SMMUv3 interrupts.
3309 		 */
3310 		ret = devm_request_threaded_irq(smmu->dev, irq,
3311 					arm_smmu_combined_irq_handler,
3312 					arm_smmu_combined_irq_thread,
3313 					IRQF_ONESHOT,
3314 					"arm-smmu-v3-combined-irq", smmu);
3315 		if (ret < 0)
3316 			dev_warn(smmu->dev, "failed to enable combined irq\n");
3317 	} else
3318 		arm_smmu_setup_unique_irqs(smmu);
3319 
3320 	if (smmu->features & ARM_SMMU_FEAT_PRI)
3321 		irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
3322 
3323 	/* Enable interrupt generation on the SMMU */
3324 	ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
3325 				      ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
3326 	if (ret)
3327 		dev_warn(smmu->dev, "failed to enable irqs\n");
3328 
3329 	return 0;
3330 }
3331 
arm_smmu_device_disable(struct arm_smmu_device * smmu)3332 static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
3333 {
3334 	int ret;
3335 
3336 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
3337 	if (ret)
3338 		dev_err(smmu->dev, "failed to clear cr0\n");
3339 
3340 	return ret;
3341 }
3342 
arm_smmu_device_reset(struct arm_smmu_device * smmu,bool bypass)3343 static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
3344 {
3345 	int ret;
3346 	u32 reg, enables;
3347 	struct arm_smmu_cmdq_ent cmd;
3348 
3349 	/* Clear CR0 and sync (disables SMMU and queue processing) */
3350 	reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
3351 	if (reg & CR0_SMMUEN) {
3352 		dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
3353 		WARN_ON(is_kdump_kernel() && !disable_bypass);
3354 		arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
3355 	}
3356 
3357 	ret = arm_smmu_device_disable(smmu);
3358 	if (ret)
3359 		return ret;
3360 
3361 	/* CR1 (table and queue memory attributes) */
3362 	reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
3363 	      FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
3364 	      FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
3365 	      FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
3366 	      FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
3367 	      FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
3368 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
3369 
3370 	/* CR2 (random crap) */
3371 	reg = CR2_PTM | CR2_RECINVSID;
3372 
3373 	if (smmu->features & ARM_SMMU_FEAT_E2H)
3374 		reg |= CR2_E2H;
3375 
3376 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
3377 
3378 	/* Stream table */
3379 	writeq_relaxed(smmu->strtab_cfg.strtab_base,
3380 		       smmu->base + ARM_SMMU_STRTAB_BASE);
3381 	writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
3382 		       smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
3383 
3384 	/* Command queue */
3385 	writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
3386 	writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
3387 	writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
3388 
3389 	enables = CR0_CMDQEN;
3390 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3391 				      ARM_SMMU_CR0ACK);
3392 	if (ret) {
3393 		dev_err(smmu->dev, "failed to enable command queue\n");
3394 		return ret;
3395 	}
3396 
3397 	/* Invalidate any cached configuration */
3398 	cmd.opcode = CMDQ_OP_CFGI_ALL;
3399 	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3400 
3401 	/* Invalidate any stale TLB entries */
3402 	if (smmu->features & ARM_SMMU_FEAT_HYP) {
3403 		cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
3404 		arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3405 	}
3406 
3407 	cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
3408 	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3409 
3410 	/* Event queue */
3411 	writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
3412 	writel_relaxed(smmu->evtq.q.llq.prod, smmu->page1 + ARM_SMMU_EVTQ_PROD);
3413 	writel_relaxed(smmu->evtq.q.llq.cons, smmu->page1 + ARM_SMMU_EVTQ_CONS);
3414 
3415 	enables |= CR0_EVTQEN;
3416 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3417 				      ARM_SMMU_CR0ACK);
3418 	if (ret) {
3419 		dev_err(smmu->dev, "failed to enable event queue\n");
3420 		return ret;
3421 	}
3422 
3423 	/* PRI queue */
3424 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
3425 		writeq_relaxed(smmu->priq.q.q_base,
3426 			       smmu->base + ARM_SMMU_PRIQ_BASE);
3427 		writel_relaxed(smmu->priq.q.llq.prod,
3428 			       smmu->page1 + ARM_SMMU_PRIQ_PROD);
3429 		writel_relaxed(smmu->priq.q.llq.cons,
3430 			       smmu->page1 + ARM_SMMU_PRIQ_CONS);
3431 
3432 		enables |= CR0_PRIQEN;
3433 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3434 					      ARM_SMMU_CR0ACK);
3435 		if (ret) {
3436 			dev_err(smmu->dev, "failed to enable PRI queue\n");
3437 			return ret;
3438 		}
3439 	}
3440 
3441 	if (smmu->features & ARM_SMMU_FEAT_ATS) {
3442 		enables |= CR0_ATSCHK;
3443 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3444 					      ARM_SMMU_CR0ACK);
3445 		if (ret) {
3446 			dev_err(smmu->dev, "failed to enable ATS check\n");
3447 			return ret;
3448 		}
3449 	}
3450 
3451 	ret = arm_smmu_setup_irqs(smmu);
3452 	if (ret) {
3453 		dev_err(smmu->dev, "failed to setup irqs\n");
3454 		return ret;
3455 	}
3456 
3457 	if (is_kdump_kernel())
3458 		enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
3459 
3460 	/* Enable the SMMU interface, or ensure bypass */
3461 	if (!bypass || disable_bypass) {
3462 		enables |= CR0_SMMUEN;
3463 	} else {
3464 		ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
3465 		if (ret)
3466 			return ret;
3467 	}
3468 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3469 				      ARM_SMMU_CR0ACK);
3470 	if (ret) {
3471 		dev_err(smmu->dev, "failed to enable SMMU interface\n");
3472 		return ret;
3473 	}
3474 
3475 	return 0;
3476 }
3477 
3478 #define IIDR_IMPLEMENTER_ARM		0x43b
3479 #define IIDR_PRODUCTID_ARM_MMU_600	0x483
3480 #define IIDR_PRODUCTID_ARM_MMU_700	0x487
3481 
arm_smmu_device_iidr_probe(struct arm_smmu_device * smmu)3482 static void arm_smmu_device_iidr_probe(struct arm_smmu_device *smmu)
3483 {
3484 	u32 reg;
3485 	unsigned int implementer, productid, variant, revision;
3486 
3487 	reg = readl_relaxed(smmu->base + ARM_SMMU_IIDR);
3488 	implementer = FIELD_GET(IIDR_IMPLEMENTER, reg);
3489 	productid = FIELD_GET(IIDR_PRODUCTID, reg);
3490 	variant = FIELD_GET(IIDR_VARIANT, reg);
3491 	revision = FIELD_GET(IIDR_REVISION, reg);
3492 
3493 	switch (implementer) {
3494 	case IIDR_IMPLEMENTER_ARM:
3495 		switch (productid) {
3496 		case IIDR_PRODUCTID_ARM_MMU_600:
3497 			/* Arm erratum 1076982 */
3498 			if (variant == 0 && revision <= 2)
3499 				smmu->features &= ~ARM_SMMU_FEAT_SEV;
3500 			/* Arm erratum 1209401 */
3501 			if (variant < 2)
3502 				smmu->features &= ~ARM_SMMU_FEAT_NESTING;
3503 			break;
3504 		case IIDR_PRODUCTID_ARM_MMU_700:
3505 			/* Arm erratum 2812531 */
3506 			smmu->features &= ~ARM_SMMU_FEAT_BTM;
3507 			smmu->options |= ARM_SMMU_OPT_CMDQ_FORCE_SYNC;
3508 			/* Arm errata 2268618, 2812531 */
3509 			smmu->features &= ~ARM_SMMU_FEAT_NESTING;
3510 			break;
3511 		}
3512 		break;
3513 	}
3514 }
3515 
arm_smmu_device_hw_probe(struct arm_smmu_device * smmu)3516 static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
3517 {
3518 	u32 reg;
3519 	bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
3520 
3521 	/* IDR0 */
3522 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
3523 
3524 	/* 2-level structures */
3525 	if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
3526 		smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
3527 
3528 	if (reg & IDR0_CD2L)
3529 		smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
3530 
3531 	/*
3532 	 * Translation table endianness.
3533 	 * We currently require the same endianness as the CPU, but this
3534 	 * could be changed later by adding a new IO_PGTABLE_QUIRK.
3535 	 */
3536 	switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
3537 	case IDR0_TTENDIAN_MIXED:
3538 		smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
3539 		break;
3540 #ifdef __BIG_ENDIAN
3541 	case IDR0_TTENDIAN_BE:
3542 		smmu->features |= ARM_SMMU_FEAT_TT_BE;
3543 		break;
3544 #else
3545 	case IDR0_TTENDIAN_LE:
3546 		smmu->features |= ARM_SMMU_FEAT_TT_LE;
3547 		break;
3548 #endif
3549 	default:
3550 		dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
3551 		return -ENXIO;
3552 	}
3553 
3554 	/* Boolean feature flags */
3555 	if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
3556 		smmu->features |= ARM_SMMU_FEAT_PRI;
3557 
3558 	if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
3559 		smmu->features |= ARM_SMMU_FEAT_ATS;
3560 
3561 	if (reg & IDR0_SEV)
3562 		smmu->features |= ARM_SMMU_FEAT_SEV;
3563 
3564 	if (reg & IDR0_MSI) {
3565 		smmu->features |= ARM_SMMU_FEAT_MSI;
3566 		if (coherent && !disable_msipolling)
3567 			smmu->options |= ARM_SMMU_OPT_MSIPOLL;
3568 	}
3569 
3570 	if (reg & IDR0_HYP) {
3571 		smmu->features |= ARM_SMMU_FEAT_HYP;
3572 		if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN))
3573 			smmu->features |= ARM_SMMU_FEAT_E2H;
3574 	}
3575 
3576 	/*
3577 	 * The coherency feature as set by FW is used in preference to the ID
3578 	 * register, but warn on mismatch.
3579 	 */
3580 	if (!!(reg & IDR0_COHACC) != coherent)
3581 		dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
3582 			 coherent ? "true" : "false");
3583 
3584 	switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
3585 	case IDR0_STALL_MODEL_FORCE:
3586 		smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
3587 		fallthrough;
3588 	case IDR0_STALL_MODEL_STALL:
3589 		smmu->features |= ARM_SMMU_FEAT_STALLS;
3590 	}
3591 
3592 	if (reg & IDR0_S1P)
3593 		smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
3594 
3595 	if (reg & IDR0_S2P)
3596 		smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
3597 
3598 	if (!(reg & (IDR0_S1P | IDR0_S2P))) {
3599 		dev_err(smmu->dev, "no translation support!\n");
3600 		return -ENXIO;
3601 	}
3602 
3603 	/* We only support the AArch64 table format at present */
3604 	switch (FIELD_GET(IDR0_TTF, reg)) {
3605 	case IDR0_TTF_AARCH32_64:
3606 		smmu->ias = 40;
3607 		fallthrough;
3608 	case IDR0_TTF_AARCH64:
3609 		break;
3610 	default:
3611 		dev_err(smmu->dev, "AArch64 table format not supported!\n");
3612 		return -ENXIO;
3613 	}
3614 
3615 	/* ASID/VMID sizes */
3616 	smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
3617 	smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
3618 
3619 	/* IDR1 */
3620 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
3621 	if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
3622 		dev_err(smmu->dev, "embedded implementation not supported\n");
3623 		return -ENXIO;
3624 	}
3625 
3626 	/* Queue sizes, capped to ensure natural alignment */
3627 	smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
3628 					     FIELD_GET(IDR1_CMDQS, reg));
3629 	if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) {
3630 		/*
3631 		 * We don't support splitting up batches, so one batch of
3632 		 * commands plus an extra sync needs to fit inside the command
3633 		 * queue. There's also no way we can handle the weird alignment
3634 		 * restrictions on the base pointer for a unit-length queue.
3635 		 */
3636 		dev_err(smmu->dev, "command queue size <= %d entries not supported\n",
3637 			CMDQ_BATCH_ENTRIES);
3638 		return -ENXIO;
3639 	}
3640 
3641 	smmu->evtq.q.llq.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
3642 					     FIELD_GET(IDR1_EVTQS, reg));
3643 	smmu->priq.q.llq.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
3644 					     FIELD_GET(IDR1_PRIQS, reg));
3645 
3646 	/* SID/SSID sizes */
3647 	smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
3648 	smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
3649 
3650 	/*
3651 	 * If the SMMU supports fewer bits than would fill a single L2 stream
3652 	 * table, use a linear table instead.
3653 	 */
3654 	if (smmu->sid_bits <= STRTAB_SPLIT)
3655 		smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
3656 
3657 	/* IDR3 */
3658 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3);
3659 	if (FIELD_GET(IDR3_RIL, reg))
3660 		smmu->features |= ARM_SMMU_FEAT_RANGE_INV;
3661 
3662 	/* IDR5 */
3663 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
3664 
3665 	/* Maximum number of outstanding stalls */
3666 	smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
3667 
3668 	/* Page sizes */
3669 	if (reg & IDR5_GRAN64K)
3670 		smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
3671 	if (reg & IDR5_GRAN16K)
3672 		smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
3673 	if (reg & IDR5_GRAN4K)
3674 		smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
3675 
3676 	/* Input address size */
3677 	if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
3678 		smmu->features |= ARM_SMMU_FEAT_VAX;
3679 
3680 	/* Output address size */
3681 	switch (FIELD_GET(IDR5_OAS, reg)) {
3682 	case IDR5_OAS_32_BIT:
3683 		smmu->oas = 32;
3684 		break;
3685 	case IDR5_OAS_36_BIT:
3686 		smmu->oas = 36;
3687 		break;
3688 	case IDR5_OAS_40_BIT:
3689 		smmu->oas = 40;
3690 		break;
3691 	case IDR5_OAS_42_BIT:
3692 		smmu->oas = 42;
3693 		break;
3694 	case IDR5_OAS_44_BIT:
3695 		smmu->oas = 44;
3696 		break;
3697 	case IDR5_OAS_52_BIT:
3698 		smmu->oas = 52;
3699 		smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
3700 		break;
3701 	default:
3702 		dev_info(smmu->dev,
3703 			"unknown output address size. Truncating to 48-bit\n");
3704 		fallthrough;
3705 	case IDR5_OAS_48_BIT:
3706 		smmu->oas = 48;
3707 	}
3708 
3709 	if (arm_smmu_ops.pgsize_bitmap == -1UL)
3710 		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
3711 	else
3712 		arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
3713 
3714 	/* Set the DMA mask for our table walker */
3715 	if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
3716 		dev_warn(smmu->dev,
3717 			 "failed to set DMA mask for table walker\n");
3718 
3719 	smmu->ias = max(smmu->ias, smmu->oas);
3720 
3721 	if ((smmu->features & ARM_SMMU_FEAT_TRANS_S1) &&
3722 	    (smmu->features & ARM_SMMU_FEAT_TRANS_S2))
3723 		smmu->features |= ARM_SMMU_FEAT_NESTING;
3724 
3725 	arm_smmu_device_iidr_probe(smmu);
3726 
3727 	if (arm_smmu_sva_supported(smmu))
3728 		smmu->features |= ARM_SMMU_FEAT_SVA;
3729 
3730 	dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
3731 		 smmu->ias, smmu->oas, smmu->features);
3732 	return 0;
3733 }
3734 
3735 #ifdef CONFIG_ACPI
acpi_smmu_get_options(u32 model,struct arm_smmu_device * smmu)3736 static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
3737 {
3738 	switch (model) {
3739 	case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
3740 		smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
3741 		break;
3742 	case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
3743 		smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
3744 		break;
3745 	}
3746 
3747 	dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
3748 }
3749 
arm_smmu_device_acpi_probe(struct platform_device * pdev,struct arm_smmu_device * smmu)3750 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3751 				      struct arm_smmu_device *smmu)
3752 {
3753 	struct acpi_iort_smmu_v3 *iort_smmu;
3754 	struct device *dev = smmu->dev;
3755 	struct acpi_iort_node *node;
3756 
3757 	node = *(struct acpi_iort_node **)dev_get_platdata(dev);
3758 
3759 	/* Retrieve SMMUv3 specific data */
3760 	iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
3761 
3762 	acpi_smmu_get_options(iort_smmu->model, smmu);
3763 
3764 	if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
3765 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3766 
3767 	return 0;
3768 }
3769 #else
arm_smmu_device_acpi_probe(struct platform_device * pdev,struct arm_smmu_device * smmu)3770 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3771 					     struct arm_smmu_device *smmu)
3772 {
3773 	return -ENODEV;
3774 }
3775 #endif
3776 
arm_smmu_device_dt_probe(struct platform_device * pdev,struct arm_smmu_device * smmu)3777 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
3778 				    struct arm_smmu_device *smmu)
3779 {
3780 	struct device *dev = &pdev->dev;
3781 	u32 cells;
3782 	int ret = -EINVAL;
3783 
3784 	if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
3785 		dev_err(dev, "missing #iommu-cells property\n");
3786 	else if (cells != 1)
3787 		dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
3788 	else
3789 		ret = 0;
3790 
3791 	parse_driver_options(smmu);
3792 
3793 	if (of_dma_is_coherent(dev->of_node))
3794 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3795 
3796 	return ret;
3797 }
3798 
arm_smmu_resource_size(struct arm_smmu_device * smmu)3799 static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
3800 {
3801 	if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
3802 		return SZ_64K;
3803 	else
3804 		return SZ_128K;
3805 }
3806 
arm_smmu_set_bus_ops(struct iommu_ops * ops)3807 static int arm_smmu_set_bus_ops(struct iommu_ops *ops)
3808 {
3809 	int err;
3810 
3811 #ifdef CONFIG_PCI
3812 	if (pci_bus_type.iommu_ops != ops) {
3813 		err = bus_set_iommu(&pci_bus_type, ops);
3814 		if (err)
3815 			return err;
3816 	}
3817 #endif
3818 #ifdef CONFIG_ARM_AMBA
3819 	if (amba_bustype.iommu_ops != ops) {
3820 		err = bus_set_iommu(&amba_bustype, ops);
3821 		if (err)
3822 			goto err_reset_pci_ops;
3823 	}
3824 #endif
3825 	if (platform_bus_type.iommu_ops != ops) {
3826 		err = bus_set_iommu(&platform_bus_type, ops);
3827 		if (err)
3828 			goto err_reset_amba_ops;
3829 	}
3830 
3831 	return 0;
3832 
3833 err_reset_amba_ops:
3834 #ifdef CONFIG_ARM_AMBA
3835 	bus_set_iommu(&amba_bustype, NULL);
3836 #endif
3837 err_reset_pci_ops: __maybe_unused;
3838 #ifdef CONFIG_PCI
3839 	bus_set_iommu(&pci_bus_type, NULL);
3840 #endif
3841 	return err;
3842 }
3843 
arm_smmu_ioremap(struct device * dev,resource_size_t start,resource_size_t size)3844 static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start,
3845 				      resource_size_t size)
3846 {
3847 	struct resource res = DEFINE_RES_MEM(start, size);
3848 
3849 	return devm_ioremap_resource(dev, &res);
3850 }
3851 
arm_smmu_device_probe(struct platform_device * pdev)3852 static int arm_smmu_device_probe(struct platform_device *pdev)
3853 {
3854 	int irq, ret;
3855 	struct resource *res;
3856 	resource_size_t ioaddr;
3857 	struct arm_smmu_device *smmu;
3858 	struct device *dev = &pdev->dev;
3859 	bool bypass;
3860 
3861 	smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
3862 	if (!smmu)
3863 		return -ENOMEM;
3864 	smmu->dev = dev;
3865 
3866 	if (dev->of_node) {
3867 		ret = arm_smmu_device_dt_probe(pdev, smmu);
3868 	} else {
3869 		ret = arm_smmu_device_acpi_probe(pdev, smmu);
3870 		if (ret == -ENODEV)
3871 			return ret;
3872 	}
3873 
3874 	/* Set bypass mode according to firmware probing result */
3875 	bypass = !!ret;
3876 
3877 	/* Base address */
3878 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
3879 	if (!res)
3880 		return -EINVAL;
3881 	if (resource_size(res) < arm_smmu_resource_size(smmu)) {
3882 		dev_err(dev, "MMIO region too small (%pr)\n", res);
3883 		return -EINVAL;
3884 	}
3885 	ioaddr = res->start;
3886 
3887 	/*
3888 	 * Don't map the IMPLEMENTATION DEFINED regions, since they may contain
3889 	 * the PMCG registers which are reserved by the PMU driver.
3890 	 */
3891 	smmu->base = arm_smmu_ioremap(dev, ioaddr, ARM_SMMU_REG_SZ);
3892 	if (IS_ERR(smmu->base))
3893 		return PTR_ERR(smmu->base);
3894 
3895 	if (arm_smmu_resource_size(smmu) > SZ_64K) {
3896 		smmu->page1 = arm_smmu_ioremap(dev, ioaddr + SZ_64K,
3897 					       ARM_SMMU_REG_SZ);
3898 		if (IS_ERR(smmu->page1))
3899 			return PTR_ERR(smmu->page1);
3900 	} else {
3901 		smmu->page1 = smmu->base;
3902 	}
3903 
3904 	/* Interrupt lines */
3905 
3906 	irq = platform_get_irq_byname_optional(pdev, "combined");
3907 	if (irq > 0)
3908 		smmu->combined_irq = irq;
3909 	else {
3910 		irq = platform_get_irq_byname_optional(pdev, "eventq");
3911 		if (irq > 0)
3912 			smmu->evtq.q.irq = irq;
3913 
3914 		irq = platform_get_irq_byname_optional(pdev, "priq");
3915 		if (irq > 0)
3916 			smmu->priq.q.irq = irq;
3917 
3918 		irq = platform_get_irq_byname_optional(pdev, "gerror");
3919 		if (irq > 0)
3920 			smmu->gerr_irq = irq;
3921 	}
3922 	/* Probe the h/w */
3923 	ret = arm_smmu_device_hw_probe(smmu);
3924 	if (ret)
3925 		return ret;
3926 
3927 	/* Initialise in-memory data structures */
3928 	ret = arm_smmu_init_structures(smmu);
3929 	if (ret)
3930 		return ret;
3931 
3932 	/* Record our private device structure */
3933 	platform_set_drvdata(pdev, smmu);
3934 
3935 	/* Reset the device */
3936 	ret = arm_smmu_device_reset(smmu, bypass);
3937 	if (ret)
3938 		return ret;
3939 
3940 	/* And we're up. Go go go! */
3941 	ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
3942 				     "smmu3.%pa", &ioaddr);
3943 	if (ret)
3944 		return ret;
3945 
3946 	ret = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev);
3947 	if (ret) {
3948 		dev_err(dev, "Failed to register iommu\n");
3949 		goto err_sysfs_remove;
3950 	}
3951 
3952 	ret = arm_smmu_set_bus_ops(&arm_smmu_ops);
3953 	if (ret)
3954 		goto err_unregister_device;
3955 
3956 	return 0;
3957 
3958 err_unregister_device:
3959 	iommu_device_unregister(&smmu->iommu);
3960 err_sysfs_remove:
3961 	iommu_device_sysfs_remove(&smmu->iommu);
3962 	return ret;
3963 }
3964 
arm_smmu_device_remove(struct platform_device * pdev)3965 static int arm_smmu_device_remove(struct platform_device *pdev)
3966 {
3967 	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
3968 
3969 	arm_smmu_set_bus_ops(NULL);
3970 	iommu_device_unregister(&smmu->iommu);
3971 	iommu_device_sysfs_remove(&smmu->iommu);
3972 	arm_smmu_device_disable(smmu);
3973 	iopf_queue_free(smmu->evtq.iopf);
3974 
3975 	return 0;
3976 }
3977 
arm_smmu_device_shutdown(struct platform_device * pdev)3978 static void arm_smmu_device_shutdown(struct platform_device *pdev)
3979 {
3980 	arm_smmu_device_remove(pdev);
3981 }
3982 
3983 static const struct of_device_id arm_smmu_of_match[] = {
3984 	{ .compatible = "arm,smmu-v3", },
3985 	{ },
3986 };
3987 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
3988 
arm_smmu_driver_unregister(struct platform_driver * drv)3989 static void arm_smmu_driver_unregister(struct platform_driver *drv)
3990 {
3991 	arm_smmu_sva_notifier_synchronize();
3992 	platform_driver_unregister(drv);
3993 }
3994 
3995 static struct platform_driver arm_smmu_driver = {
3996 	.driver	= {
3997 		.name			= "arm-smmu-v3",
3998 		.of_match_table		= arm_smmu_of_match,
3999 		.suppress_bind_attrs	= true,
4000 	},
4001 	.probe	= arm_smmu_device_probe,
4002 	.remove	= arm_smmu_device_remove,
4003 	.shutdown = arm_smmu_device_shutdown,
4004 };
4005 module_driver(arm_smmu_driver, platform_driver_register,
4006 	      arm_smmu_driver_unregister);
4007 
4008 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
4009 MODULE_AUTHOR("Will Deacon <will@kernel.org>");
4010 MODULE_ALIAS("platform:arm-smmu-v3");
4011 MODULE_LICENSE("GPL v2");
4012