• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * IOMMU API for ARM architected SMMUv3 implementations.
4  *
5  * Copyright (C) 2015 ARM Limited
6  *
7  * Author: Will Deacon <will.deacon@arm.com>
8  *
9  * This driver is powered by bad coffee and bombay mix.
10  */
11 
12 #include <linux/acpi_iort.h>
13 #include <linux/bitops.h>
14 #include <linux/crash_dump.h>
15 #include <linux/err.h>
16 #include <linux/interrupt.h>
17 #include <linux/io-pgtable.h>
18 #include <linux/module.h>
19 #include <linux/msi.h>
20 #include <linux/pci-ats.h>
21 #include <linux/platform_device.h>
22 #include <kunit/visibility.h>
23 #include <uapi/linux/iommufd.h>
24 
25 #include "arm-smmu-v3.h"
26 #include "../../dma-iommu.h"
27 
28 static bool disable_msipolling;
29 module_param(disable_msipolling, bool, 0444);
30 MODULE_PARM_DESC(disable_msipolling,
31 	"Disable MSI-based polling for CMD_SYNC completion.");
32 
33 static struct iommu_ops arm_smmu_ops;
34 static struct iommu_dirty_ops arm_smmu_dirty_ops;
35 
36 #define NUM_ENTRY_QWORDS 8
37 static_assert(sizeof(struct arm_smmu_ste) == NUM_ENTRY_QWORDS * sizeof(u64));
38 static_assert(sizeof(struct arm_smmu_cd) == NUM_ENTRY_QWORDS * sizeof(u64));
39 
40 DEFINE_XARRAY_ALLOC1(arm_smmu_asid_xa);
41 DEFINE_MUTEX(arm_smmu_asid_lock);
42 
43 static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain,
44 				    struct arm_smmu_device *smmu, u32 flags);
45 static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master);
46 
47 /* High-level queue accessors */
arm_smmu_cmdq_build_cmd(u64 * cmd,struct arm_smmu_cmdq_ent * ent)48 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
49 {
50 	memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
51 	cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
52 
53 	switch (ent->opcode) {
54 	case CMDQ_OP_TLBI_EL2_ALL:
55 	case CMDQ_OP_TLBI_NSNH_ALL:
56 		break;
57 	case CMDQ_OP_PREFETCH_CFG:
58 		cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
59 		break;
60 	case CMDQ_OP_CFGI_CD:
61 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
62 		fallthrough;
63 	case CMDQ_OP_CFGI_STE:
64 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
65 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
66 		break;
67 	case CMDQ_OP_CFGI_CD_ALL:
68 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
69 		break;
70 	case CMDQ_OP_CFGI_ALL:
71 		/* Cover the entire SID range */
72 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
73 		break;
74 	case CMDQ_OP_TLBI_NH_VA:
75 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
76 		fallthrough;
77 	case CMDQ_OP_TLBI_EL2_VA:
78 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
79 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
80 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
81 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
82 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
83 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
84 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
85 		break;
86 	case CMDQ_OP_TLBI_S2_IPA:
87 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
88 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
89 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
90 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
91 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
92 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
93 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
94 		break;
95 	case CMDQ_OP_TLBI_NH_ASID:
96 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
97 		fallthrough;
98 	case CMDQ_OP_TLBI_S12_VMALL:
99 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
100 		break;
101 	case CMDQ_OP_TLBI_EL2_ASID:
102 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
103 		break;
104 	case CMDQ_OP_ATC_INV:
105 		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
106 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global);
107 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid);
108 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid);
109 		cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size);
110 		cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK;
111 		break;
112 	case CMDQ_OP_PRI_RESP:
113 		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
114 		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
115 		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
116 		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
117 		switch (ent->pri.resp) {
118 		case PRI_RESP_DENY:
119 		case PRI_RESP_FAIL:
120 		case PRI_RESP_SUCC:
121 			break;
122 		default:
123 			return -EINVAL;
124 		}
125 		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
126 		break;
127 	case CMDQ_OP_RESUME:
128 		cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_SID, ent->resume.sid);
129 		cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_RESP, ent->resume.resp);
130 		cmd[1] |= FIELD_PREP(CMDQ_RESUME_1_STAG, ent->resume.stag);
131 		break;
132 	case CMDQ_OP_CMD_SYNC:
133 		if (ent->sync.msiaddr) {
134 			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
135 			cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
136 		} else {
137 			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
138 		}
139 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
140 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
141 		break;
142 	default:
143 		return -ENOENT;
144 	}
145 
146 	return 0;
147 }
148 
arm_smmu_get_cmdq(struct arm_smmu_device * smmu,struct arm_smmu_cmdq_ent * ent)149 static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu,
150 					       struct arm_smmu_cmdq_ent *ent)
151 {
152 	struct arm_smmu_cmdq *cmdq = NULL;
153 
154 	if (smmu->impl_ops && smmu->impl_ops->get_secondary_cmdq)
155 		cmdq = smmu->impl_ops->get_secondary_cmdq(smmu, ent);
156 
157 	return cmdq ?: &smmu->cmdq;
158 }
159 
arm_smmu_cmdq_needs_busy_polling(struct arm_smmu_device * smmu,struct arm_smmu_cmdq * cmdq)160 static bool arm_smmu_cmdq_needs_busy_polling(struct arm_smmu_device *smmu,
161 					     struct arm_smmu_cmdq *cmdq)
162 {
163 	if (cmdq == &smmu->cmdq)
164 		return false;
165 
166 	return smmu->options & ARM_SMMU_OPT_TEGRA241_CMDQV;
167 }
168 
arm_smmu_cmdq_build_sync_cmd(u64 * cmd,struct arm_smmu_device * smmu,struct arm_smmu_cmdq * cmdq,u32 prod)169 static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
170 					 struct arm_smmu_cmdq *cmdq, u32 prod)
171 {
172 	struct arm_smmu_queue *q = &cmdq->q;
173 	struct arm_smmu_cmdq_ent ent = {
174 		.opcode = CMDQ_OP_CMD_SYNC,
175 	};
176 
177 	/*
178 	 * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
179 	 * payload, so the write will zero the entire command on that platform.
180 	 */
181 	if (smmu->options & ARM_SMMU_OPT_MSIPOLL) {
182 		ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
183 				   q->ent_dwords * 8;
184 	}
185 
186 	arm_smmu_cmdq_build_cmd(cmd, &ent);
187 	if (arm_smmu_cmdq_needs_busy_polling(smmu, cmdq))
188 		u64p_replace_bits(cmd, CMDQ_SYNC_0_CS_NONE, CMDQ_SYNC_0_CS);
189 }
190 
__arm_smmu_cmdq_skip_err(struct arm_smmu_device * smmu,struct arm_smmu_cmdq * cmdq)191 void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
192 			      struct arm_smmu_cmdq *cmdq)
193 {
194 	static const char * const cerror_str[] = {
195 		[CMDQ_ERR_CERROR_NONE_IDX]	= "No error",
196 		[CMDQ_ERR_CERROR_ILL_IDX]	= "Illegal command",
197 		[CMDQ_ERR_CERROR_ABT_IDX]	= "Abort on command fetch",
198 		[CMDQ_ERR_CERROR_ATC_INV_IDX]	= "ATC invalidate timeout",
199 	};
200 	struct arm_smmu_queue *q = &cmdq->q;
201 
202 	int i;
203 	u64 cmd[CMDQ_ENT_DWORDS];
204 	u32 cons = readl_relaxed(q->cons_reg);
205 	u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
206 	struct arm_smmu_cmdq_ent cmd_sync = {
207 		.opcode = CMDQ_OP_CMD_SYNC,
208 	};
209 
210 	dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
211 		idx < ARRAY_SIZE(cerror_str) ?  cerror_str[idx] : "Unknown");
212 
213 	switch (idx) {
214 	case CMDQ_ERR_CERROR_ABT_IDX:
215 		dev_err(smmu->dev, "retrying command fetch\n");
216 		return;
217 	case CMDQ_ERR_CERROR_NONE_IDX:
218 		return;
219 	case CMDQ_ERR_CERROR_ATC_INV_IDX:
220 		/*
221 		 * ATC Invalidation Completion timeout. CONS is still pointing
222 		 * at the CMD_SYNC. Attempt to complete other pending commands
223 		 * by repeating the CMD_SYNC, though we might well end up back
224 		 * here since the ATC invalidation may still be pending.
225 		 */
226 		return;
227 	case CMDQ_ERR_CERROR_ILL_IDX:
228 	default:
229 		break;
230 	}
231 
232 	/*
233 	 * We may have concurrent producers, so we need to be careful
234 	 * not to touch any of the shadow cmdq state.
235 	 */
236 	queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
237 	dev_err(smmu->dev, "skipping command in error state:\n");
238 	for (i = 0; i < ARRAY_SIZE(cmd); ++i)
239 		dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
240 
241 	/* Convert the erroneous command into a CMD_SYNC */
242 	arm_smmu_cmdq_build_cmd(cmd, &cmd_sync);
243 	if (arm_smmu_cmdq_needs_busy_polling(smmu, cmdq))
244 		u64p_replace_bits(cmd, CMDQ_SYNC_0_CS_NONE, CMDQ_SYNC_0_CS);
245 
246 	queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
247 }
248 
arm_smmu_cmdq_skip_err(struct arm_smmu_device * smmu)249 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
250 {
251 	__arm_smmu_cmdq_skip_err(smmu, &smmu->cmdq);
252 }
253 
254 /*
255  * Command queue locking.
256  * This is a form of bastardised rwlock with the following major changes:
257  *
258  * - The only LOCK routines are exclusive_trylock() and shared_lock().
259  *   Neither have barrier semantics, and instead provide only a control
260  *   dependency.
261  *
262  * - The UNLOCK routines are supplemented with shared_tryunlock(), which
263  *   fails if the caller appears to be the last lock holder (yes, this is
264  *   racy). All successful UNLOCK routines have RELEASE semantics.
265  */
arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq * cmdq)266 static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)
267 {
268 	int val;
269 
270 	/*
271 	 * We can try to avoid the cmpxchg() loop by simply incrementing the
272 	 * lock counter. When held in exclusive state, the lock counter is set
273 	 * to INT_MIN so these increments won't hurt as the value will remain
274 	 * negative.
275 	 */
276 	if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0)
277 		return;
278 
279 	do {
280 		val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);
281 	} while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);
282 }
283 
arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq * cmdq)284 static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq)
285 {
286 	(void)atomic_dec_return_release(&cmdq->lock);
287 }
288 
arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq * cmdq)289 static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
290 {
291 	if (atomic_read(&cmdq->lock) == 1)
292 		return false;
293 
294 	arm_smmu_cmdq_shared_unlock(cmdq);
295 	return true;
296 }
297 
298 #define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)		\
299 ({									\
300 	bool __ret;							\
301 	local_irq_save(flags);						\
302 	__ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN);	\
303 	if (!__ret)							\
304 		local_irq_restore(flags);				\
305 	__ret;								\
306 })
307 
308 #define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags)		\
309 ({									\
310 	atomic_set_release(&cmdq->lock, 0);				\
311 	local_irq_restore(flags);					\
312 })
313 
314 
315 /*
316  * Command queue insertion.
317  * This is made fiddly by our attempts to achieve some sort of scalability
318  * since there is one queue shared amongst all of the CPUs in the system.  If
319  * you like mixed-size concurrency, dependency ordering and relaxed atomics,
320  * then you'll *love* this monstrosity.
321  *
322  * The basic idea is to split the queue up into ranges of commands that are
323  * owned by a given CPU; the owner may not have written all of the commands
324  * itself, but is responsible for advancing the hardware prod pointer when
325  * the time comes. The algorithm is roughly:
326  *
327  * 	1. Allocate some space in the queue. At this point we also discover
328  *	   whether the head of the queue is currently owned by another CPU,
329  *	   or whether we are the owner.
330  *
331  *	2. Write our commands into our allocated slots in the queue.
332  *
333  *	3. Mark our slots as valid in arm_smmu_cmdq.valid_map.
334  *
335  *	4. If we are an owner:
336  *		a. Wait for the previous owner to finish.
337  *		b. Mark the queue head as unowned, which tells us the range
338  *		   that we are responsible for publishing.
339  *		c. Wait for all commands in our owned range to become valid.
340  *		d. Advance the hardware prod pointer.
341  *		e. Tell the next owner we've finished.
342  *
343  *	5. If we are inserting a CMD_SYNC (we may or may not have been an
344  *	   owner), then we need to stick around until it has completed:
345  *		a. If we have MSIs, the SMMU can write back into the CMD_SYNC
346  *		   to clear the first 4 bytes.
347  *		b. Otherwise, we spin waiting for the hardware cons pointer to
348  *		   advance past our command.
349  *
350  * The devil is in the details, particularly the use of locking for handling
351  * SYNC completion and freeing up space in the queue before we think that it is
352  * full.
353  */
__arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq * cmdq,u32 sprod,u32 eprod,bool set)354 static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq,
355 					       u32 sprod, u32 eprod, bool set)
356 {
357 	u32 swidx, sbidx, ewidx, ebidx;
358 	struct arm_smmu_ll_queue llq = {
359 		.max_n_shift	= cmdq->q.llq.max_n_shift,
360 		.prod		= sprod,
361 	};
362 
363 	ewidx = BIT_WORD(Q_IDX(&llq, eprod));
364 	ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG;
365 
366 	while (llq.prod != eprod) {
367 		unsigned long mask;
368 		atomic_long_t *ptr;
369 		u32 limit = BITS_PER_LONG;
370 
371 		swidx = BIT_WORD(Q_IDX(&llq, llq.prod));
372 		sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG;
373 
374 		ptr = &cmdq->valid_map[swidx];
375 
376 		if ((swidx == ewidx) && (sbidx < ebidx))
377 			limit = ebidx;
378 
379 		mask = GENMASK(limit - 1, sbidx);
380 
381 		/*
382 		 * The valid bit is the inverse of the wrap bit. This means
383 		 * that a zero-initialised queue is invalid and, after marking
384 		 * all entries as valid, they become invalid again when we
385 		 * wrap.
386 		 */
387 		if (set) {
388 			atomic_long_xor(mask, ptr);
389 		} else { /* Poll */
390 			unsigned long valid;
391 
392 			valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask;
393 			atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid);
394 		}
395 
396 		llq.prod = queue_inc_prod_n(&llq, limit - sbidx);
397 	}
398 }
399 
400 /* Mark all entries in the range [sprod, eprod) as valid */
arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq * cmdq,u32 sprod,u32 eprod)401 static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq,
402 					u32 sprod, u32 eprod)
403 {
404 	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true);
405 }
406 
407 /* Wait for all entries in the range [sprod, eprod) to become valid */
arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq * cmdq,u32 sprod,u32 eprod)408 static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
409 					 u32 sprod, u32 eprod)
410 {
411 	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false);
412 }
413 
414 /* Wait for the command queue to become non-full */
arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device * smmu,struct arm_smmu_cmdq * cmdq,struct arm_smmu_ll_queue * llq)415 static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
416 					     struct arm_smmu_cmdq *cmdq,
417 					     struct arm_smmu_ll_queue *llq)
418 {
419 	unsigned long flags;
420 	struct arm_smmu_queue_poll qp;
421 	int ret = 0;
422 
423 	/*
424 	 * Try to update our copy of cons by grabbing exclusive cmdq access. If
425 	 * that fails, spin until somebody else updates it for us.
426 	 */
427 	if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) {
428 		WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg));
429 		arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags);
430 		llq->val = READ_ONCE(cmdq->q.llq.val);
431 		return 0;
432 	}
433 
434 	queue_poll_init(smmu, &qp);
435 	do {
436 		llq->val = READ_ONCE(cmdq->q.llq.val);
437 		if (!queue_full(llq))
438 			break;
439 
440 		ret = queue_poll(&qp);
441 	} while (!ret);
442 
443 	return ret;
444 }
445 
446 /*
447  * Wait until the SMMU signals a CMD_SYNC completion MSI.
448  * Must be called with the cmdq lock held in some capacity.
449  */
__arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device * smmu,struct arm_smmu_cmdq * cmdq,struct arm_smmu_ll_queue * llq)450 static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
451 					  struct arm_smmu_cmdq *cmdq,
452 					  struct arm_smmu_ll_queue *llq)
453 {
454 	int ret = 0;
455 	struct arm_smmu_queue_poll qp;
456 	u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
457 
458 	queue_poll_init(smmu, &qp);
459 
460 	/*
461 	 * The MSI won't generate an event, since it's being written back
462 	 * into the command queue.
463 	 */
464 	qp.wfe = false;
465 	smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp)));
466 	llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1);
467 	return ret;
468 }
469 
470 /*
471  * Wait until the SMMU cons index passes llq->prod.
472  * Must be called with the cmdq lock held in some capacity.
473  */
__arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device * smmu,struct arm_smmu_cmdq * cmdq,struct arm_smmu_ll_queue * llq)474 static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
475 					       struct arm_smmu_cmdq *cmdq,
476 					       struct arm_smmu_ll_queue *llq)
477 {
478 	struct arm_smmu_queue_poll qp;
479 	u32 prod = llq->prod;
480 	int ret = 0;
481 
482 	queue_poll_init(smmu, &qp);
483 	llq->val = READ_ONCE(cmdq->q.llq.val);
484 	do {
485 		if (queue_consumed(llq, prod))
486 			break;
487 
488 		ret = queue_poll(&qp);
489 
490 		/*
491 		 * This needs to be a readl() so that our subsequent call
492 		 * to arm_smmu_cmdq_shared_tryunlock() can fail accurately.
493 		 *
494 		 * Specifically, we need to ensure that we observe all
495 		 * shared_lock()s by other CMD_SYNCs that share our owner,
496 		 * so that a failing call to tryunlock() means that we're
497 		 * the last one out and therefore we can safely advance
498 		 * cmdq->q.llq.cons. Roughly speaking:
499 		 *
500 		 * CPU 0		CPU1			CPU2 (us)
501 		 *
502 		 * if (sync)
503 		 * 	shared_lock();
504 		 *
505 		 * dma_wmb();
506 		 * set_valid_map();
507 		 *
508 		 * 			if (owner) {
509 		 *				poll_valid_map();
510 		 *				<control dependency>
511 		 *				writel(prod_reg);
512 		 *
513 		 *						readl(cons_reg);
514 		 *						tryunlock();
515 		 *
516 		 * Requires us to see CPU 0's shared_lock() acquisition.
517 		 */
518 		llq->cons = readl(cmdq->q.cons_reg);
519 	} while (!ret);
520 
521 	return ret;
522 }
523 
arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device * smmu,struct arm_smmu_cmdq * cmdq,struct arm_smmu_ll_queue * llq)524 static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
525 					 struct arm_smmu_cmdq *cmdq,
526 					 struct arm_smmu_ll_queue *llq)
527 {
528 	if (smmu->options & ARM_SMMU_OPT_MSIPOLL &&
529 	    !arm_smmu_cmdq_needs_busy_polling(smmu, cmdq))
530 		return __arm_smmu_cmdq_poll_until_msi(smmu, cmdq, llq);
531 
532 	return __arm_smmu_cmdq_poll_until_consumed(smmu, cmdq, llq);
533 }
534 
arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq * cmdq,u64 * cmds,u32 prod,int n)535 static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
536 					u32 prod, int n)
537 {
538 	int i;
539 	struct arm_smmu_ll_queue llq = {
540 		.max_n_shift	= cmdq->q.llq.max_n_shift,
541 		.prod		= prod,
542 	};
543 
544 	for (i = 0; i < n; ++i) {
545 		u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS];
546 
547 		prod = queue_inc_prod_n(&llq, i);
548 		queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS);
549 	}
550 }
551 
552 /*
553  * This is the actual insertion function, and provides the following
554  * ordering guarantees to callers:
555  *
556  * - There is a dma_wmb() before publishing any commands to the queue.
557  *   This can be relied upon to order prior writes to data structures
558  *   in memory (such as a CD or an STE) before the command.
559  *
560  * - On completion of a CMD_SYNC, there is a control dependency.
561  *   This can be relied upon to order subsequent writes to memory (e.g.
562  *   freeing an IOVA) after completion of the CMD_SYNC.
563  *
564  * - Command insertion is totally ordered, so if two CPUs each race to
565  *   insert their own list of commands then all of the commands from one
566  *   CPU will appear before any of the commands from the other CPU.
567  */
arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device * smmu,struct arm_smmu_cmdq * cmdq,u64 * cmds,int n,bool sync)568 static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
569 				       struct arm_smmu_cmdq *cmdq,
570 				       u64 *cmds, int n, bool sync)
571 {
572 	u64 cmd_sync[CMDQ_ENT_DWORDS];
573 	u32 prod;
574 	unsigned long flags;
575 	bool owner;
576 	struct arm_smmu_ll_queue llq, head;
577 	int ret = 0;
578 
579 	llq.max_n_shift = cmdq->q.llq.max_n_shift;
580 
581 	/* 1. Allocate some space in the queue */
582 	local_irq_save(flags);
583 	llq.val = READ_ONCE(cmdq->q.llq.val);
584 	do {
585 		u64 old;
586 
587 		while (!queue_has_space(&llq, n + sync)) {
588 			local_irq_restore(flags);
589 			if (arm_smmu_cmdq_poll_until_not_full(smmu, cmdq, &llq))
590 				dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
591 			local_irq_save(flags);
592 		}
593 
594 		head.cons = llq.cons;
595 		head.prod = queue_inc_prod_n(&llq, n + sync) |
596 					     CMDQ_PROD_OWNED_FLAG;
597 
598 		old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val);
599 		if (old == llq.val)
600 			break;
601 
602 		llq.val = old;
603 	} while (1);
604 	owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG);
605 	head.prod &= ~CMDQ_PROD_OWNED_FLAG;
606 	llq.prod &= ~CMDQ_PROD_OWNED_FLAG;
607 
608 	/*
609 	 * 2. Write our commands into the queue
610 	 * Dependency ordering from the cmpxchg() loop above.
611 	 */
612 	arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
613 	if (sync) {
614 		prod = queue_inc_prod_n(&llq, n);
615 		arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, cmdq, prod);
616 		queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
617 
618 		/*
619 		 * In order to determine completion of our CMD_SYNC, we must
620 		 * ensure that the queue can't wrap twice without us noticing.
621 		 * We achieve that by taking the cmdq lock as shared before
622 		 * marking our slot as valid.
623 		 */
624 		arm_smmu_cmdq_shared_lock(cmdq);
625 	}
626 
627 	/* 3. Mark our slots as valid, ensuring commands are visible first */
628 	dma_wmb();
629 	arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
630 
631 	/* 4. If we are the owner, take control of the SMMU hardware */
632 	if (owner) {
633 		/* a. Wait for previous owner to finish */
634 		atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
635 
636 		/* b. Stop gathering work by clearing the owned flag */
637 		prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG,
638 						   &cmdq->q.llq.atomic.prod);
639 		prod &= ~CMDQ_PROD_OWNED_FLAG;
640 
641 		/*
642 		 * c. Wait for any gathered work to be written to the queue.
643 		 * Note that we read our own entries so that we have the control
644 		 * dependency required by (d).
645 		 */
646 		arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
647 
648 		/*
649 		 * d. Advance the hardware prod pointer
650 		 * Control dependency ordering from the entries becoming valid.
651 		 */
652 		writel_relaxed(prod, cmdq->q.prod_reg);
653 
654 		/*
655 		 * e. Tell the next owner we're done
656 		 * Make sure we've updated the hardware first, so that we don't
657 		 * race to update prod and potentially move it backwards.
658 		 */
659 		atomic_set_release(&cmdq->owner_prod, prod);
660 	}
661 
662 	/* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
663 	if (sync) {
664 		llq.prod = queue_inc_prod_n(&llq, n);
665 		ret = arm_smmu_cmdq_poll_until_sync(smmu, cmdq, &llq);
666 		if (ret) {
667 			dev_err_ratelimited(smmu->dev,
668 					    "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
669 					    llq.prod,
670 					    readl_relaxed(cmdq->q.prod_reg),
671 					    readl_relaxed(cmdq->q.cons_reg));
672 		}
673 
674 		/*
675 		 * Try to unlock the cmdq lock. This will fail if we're the last
676 		 * reader, in which case we can safely update cmdq->q.llq.cons
677 		 */
678 		if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
679 			WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
680 			arm_smmu_cmdq_shared_unlock(cmdq);
681 		}
682 	}
683 
684 	local_irq_restore(flags);
685 	return ret;
686 }
687 
__arm_smmu_cmdq_issue_cmd(struct arm_smmu_device * smmu,struct arm_smmu_cmdq_ent * ent,bool sync)688 static int __arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
689 				     struct arm_smmu_cmdq_ent *ent,
690 				     bool sync)
691 {
692 	u64 cmd[CMDQ_ENT_DWORDS];
693 
694 	if (unlikely(arm_smmu_cmdq_build_cmd(cmd, ent))) {
695 		dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
696 			 ent->opcode);
697 		return -EINVAL;
698 	}
699 
700 	return arm_smmu_cmdq_issue_cmdlist(
701 		smmu, arm_smmu_get_cmdq(smmu, ent), cmd, 1, sync);
702 }
703 
arm_smmu_cmdq_issue_cmd(struct arm_smmu_device * smmu,struct arm_smmu_cmdq_ent * ent)704 static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
705 				   struct arm_smmu_cmdq_ent *ent)
706 {
707 	return __arm_smmu_cmdq_issue_cmd(smmu, ent, false);
708 }
709 
arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device * smmu,struct arm_smmu_cmdq_ent * ent)710 static int arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device *smmu,
711 					     struct arm_smmu_cmdq_ent *ent)
712 {
713 	return __arm_smmu_cmdq_issue_cmd(smmu, ent, true);
714 }
715 
arm_smmu_cmdq_batch_init(struct arm_smmu_device * smmu,struct arm_smmu_cmdq_batch * cmds,struct arm_smmu_cmdq_ent * ent)716 static void arm_smmu_cmdq_batch_init(struct arm_smmu_device *smmu,
717 				     struct arm_smmu_cmdq_batch *cmds,
718 				     struct arm_smmu_cmdq_ent *ent)
719 {
720 	cmds->num = 0;
721 	cmds->cmdq = arm_smmu_get_cmdq(smmu, ent);
722 }
723 
arm_smmu_cmdq_batch_add(struct arm_smmu_device * smmu,struct arm_smmu_cmdq_batch * cmds,struct arm_smmu_cmdq_ent * cmd)724 static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
725 				    struct arm_smmu_cmdq_batch *cmds,
726 				    struct arm_smmu_cmdq_ent *cmd)
727 {
728 	bool unsupported_cmd = !arm_smmu_cmdq_supports_cmd(cmds->cmdq, cmd);
729 	bool force_sync = (cmds->num == CMDQ_BATCH_ENTRIES - 1) &&
730 			  (smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC);
731 	int index;
732 
733 	if (force_sync || unsupported_cmd) {
734 		arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds,
735 					    cmds->num, true);
736 		arm_smmu_cmdq_batch_init(smmu, cmds, cmd);
737 	}
738 
739 	if (cmds->num == CMDQ_BATCH_ENTRIES) {
740 		arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds,
741 					    cmds->num, false);
742 		arm_smmu_cmdq_batch_init(smmu, cmds, cmd);
743 	}
744 
745 	index = cmds->num * CMDQ_ENT_DWORDS;
746 	if (unlikely(arm_smmu_cmdq_build_cmd(&cmds->cmds[index], cmd))) {
747 		dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
748 			 cmd->opcode);
749 		return;
750 	}
751 
752 	cmds->num++;
753 }
754 
arm_smmu_cmdq_batch_submit(struct arm_smmu_device * smmu,struct arm_smmu_cmdq_batch * cmds)755 static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
756 				      struct arm_smmu_cmdq_batch *cmds)
757 {
758 	return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds,
759 					   cmds->num, true);
760 }
761 
arm_smmu_page_response(struct device * dev,struct iopf_fault * unused,struct iommu_page_response * resp)762 static void arm_smmu_page_response(struct device *dev, struct iopf_fault *unused,
763 				   struct iommu_page_response *resp)
764 {
765 	struct arm_smmu_cmdq_ent cmd = {0};
766 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
767 	int sid = master->streams[0].id;
768 
769 	if (WARN_ON(!master->stall_enabled))
770 		return;
771 
772 	cmd.opcode		= CMDQ_OP_RESUME;
773 	cmd.resume.sid		= sid;
774 	cmd.resume.stag		= resp->grpid;
775 	switch (resp->code) {
776 	case IOMMU_PAGE_RESP_INVALID:
777 	case IOMMU_PAGE_RESP_FAILURE:
778 		cmd.resume.resp = CMDQ_RESUME_0_RESP_ABORT;
779 		break;
780 	case IOMMU_PAGE_RESP_SUCCESS:
781 		cmd.resume.resp = CMDQ_RESUME_0_RESP_RETRY;
782 		break;
783 	default:
784 		break;
785 	}
786 
787 	arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
788 	/*
789 	 * Don't send a SYNC, it doesn't do anything for RESUME or PRI_RESP.
790 	 * RESUME consumption guarantees that the stalled transaction will be
791 	 * terminated... at some point in the future. PRI_RESP is fire and
792 	 * forget.
793 	 */
794 }
795 
796 /* Context descriptor manipulation functions */
arm_smmu_tlb_inv_asid(struct arm_smmu_device * smmu,u16 asid)797 void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
798 {
799 	struct arm_smmu_cmdq_ent cmd = {
800 		.opcode	= smmu->features & ARM_SMMU_FEAT_E2H ?
801 			CMDQ_OP_TLBI_EL2_ASID : CMDQ_OP_TLBI_NH_ASID,
802 		.tlbi.asid = asid,
803 	};
804 
805 	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
806 }
807 
808 /*
809  * Based on the value of ent report which bits of the STE the HW will access. It
810  * would be nice if this was complete according to the spec, but minimally it
811  * has to capture the bits this driver uses.
812  */
813 VISIBLE_IF_KUNIT
arm_smmu_get_ste_used(const __le64 * ent,__le64 * used_bits)814 void arm_smmu_get_ste_used(const __le64 *ent, __le64 *used_bits)
815 {
816 	unsigned int cfg = FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(ent[0]));
817 
818 	used_bits[0] = cpu_to_le64(STRTAB_STE_0_V);
819 	if (!(ent[0] & cpu_to_le64(STRTAB_STE_0_V)))
820 		return;
821 
822 	used_bits[0] |= cpu_to_le64(STRTAB_STE_0_CFG);
823 
824 	/* S1 translates */
825 	if (cfg & BIT(0)) {
826 		used_bits[0] |= cpu_to_le64(STRTAB_STE_0_S1FMT |
827 					    STRTAB_STE_0_S1CTXPTR_MASK |
828 					    STRTAB_STE_0_S1CDMAX);
829 		used_bits[1] |=
830 			cpu_to_le64(STRTAB_STE_1_S1DSS | STRTAB_STE_1_S1CIR |
831 				    STRTAB_STE_1_S1COR | STRTAB_STE_1_S1CSH |
832 				    STRTAB_STE_1_S1STALLD | STRTAB_STE_1_STRW |
833 				    STRTAB_STE_1_EATS);
834 		used_bits[2] |= cpu_to_le64(STRTAB_STE_2_S2VMID);
835 
836 		/*
837 		 * See 13.5 Summary of attribute/permission configuration fields
838 		 * for the SHCFG behavior.
839 		 */
840 		if (FIELD_GET(STRTAB_STE_1_S1DSS, le64_to_cpu(ent[1])) ==
841 		    STRTAB_STE_1_S1DSS_BYPASS)
842 			used_bits[1] |= cpu_to_le64(STRTAB_STE_1_SHCFG);
843 	}
844 
845 	/* S2 translates */
846 	if (cfg & BIT(1)) {
847 		used_bits[1] |=
848 			cpu_to_le64(STRTAB_STE_1_EATS | STRTAB_STE_1_SHCFG);
849 		used_bits[2] |=
850 			cpu_to_le64(STRTAB_STE_2_S2VMID | STRTAB_STE_2_VTCR |
851 				    STRTAB_STE_2_S2AA64 | STRTAB_STE_2_S2ENDI |
852 				    STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2S |
853 				    STRTAB_STE_2_S2R);
854 		used_bits[3] |= cpu_to_le64(STRTAB_STE_3_S2TTB_MASK);
855 	}
856 
857 	if (cfg == STRTAB_STE_0_CFG_BYPASS)
858 		used_bits[1] |= cpu_to_le64(STRTAB_STE_1_SHCFG);
859 }
860 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_get_ste_used);
861 
862 /*
863  * Figure out if we can do a hitless update of entry to become target. Returns a
864  * bit mask where 1 indicates that qword needs to be set disruptively.
865  * unused_update is an intermediate value of entry that has unused bits set to
866  * their new values.
867  */
arm_smmu_entry_qword_diff(struct arm_smmu_entry_writer * writer,const __le64 * entry,const __le64 * target,__le64 * unused_update)868 static u8 arm_smmu_entry_qword_diff(struct arm_smmu_entry_writer *writer,
869 				    const __le64 *entry, const __le64 *target,
870 				    __le64 *unused_update)
871 {
872 	__le64 target_used[NUM_ENTRY_QWORDS] = {};
873 	__le64 cur_used[NUM_ENTRY_QWORDS] = {};
874 	u8 used_qword_diff = 0;
875 	unsigned int i;
876 
877 	writer->ops->get_used(entry, cur_used);
878 	writer->ops->get_used(target, target_used);
879 
880 	for (i = 0; i != NUM_ENTRY_QWORDS; i++) {
881 		/*
882 		 * Check that masks are up to date, the make functions are not
883 		 * allowed to set a bit to 1 if the used function doesn't say it
884 		 * is used.
885 		 */
886 		WARN_ON_ONCE(target[i] & ~target_used[i]);
887 
888 		/* Bits can change because they are not currently being used */
889 		unused_update[i] = (entry[i] & cur_used[i]) |
890 				   (target[i] & ~cur_used[i]);
891 		/*
892 		 * Each bit indicates that a used bit in a qword needs to be
893 		 * changed after unused_update is applied.
894 		 */
895 		if ((unused_update[i] & target_used[i]) != target[i])
896 			used_qword_diff |= 1 << i;
897 	}
898 	return used_qword_diff;
899 }
900 
entry_set(struct arm_smmu_entry_writer * writer,__le64 * entry,const __le64 * target,unsigned int start,unsigned int len)901 static bool entry_set(struct arm_smmu_entry_writer *writer, __le64 *entry,
902 		      const __le64 *target, unsigned int start,
903 		      unsigned int len)
904 {
905 	bool changed = false;
906 	unsigned int i;
907 
908 	for (i = start; len != 0; len--, i++) {
909 		if (entry[i] != target[i]) {
910 			WRITE_ONCE(entry[i], target[i]);
911 			changed = true;
912 		}
913 	}
914 
915 	if (changed)
916 		writer->ops->sync(writer);
917 	return changed;
918 }
919 
920 /*
921  * Update the STE/CD to the target configuration. The transition from the
922  * current entry to the target entry takes place over multiple steps that
923  * attempts to make the transition hitless if possible. This function takes care
924  * not to create a situation where the HW can perceive a corrupted entry. HW is
925  * only required to have a 64 bit atomicity with stores from the CPU, while
926  * entries are many 64 bit values big.
927  *
928  * The difference between the current value and the target value is analyzed to
929  * determine which of three updates are required - disruptive, hitless or no
930  * change.
931  *
932  * In the most general disruptive case we can make any update in three steps:
933  *  - Disrupting the entry (V=0)
934  *  - Fill now unused qwords, execpt qword 0 which contains V
935  *  - Make qword 0 have the final value and valid (V=1) with a single 64
936  *    bit store
937  *
938  * However this disrupts the HW while it is happening. There are several
939  * interesting cases where a STE/CD can be updated without disturbing the HW
940  * because only a small number of bits are changing (S1DSS, CONFIG, etc) or
941  * because the used bits don't intersect. We can detect this by calculating how
942  * many 64 bit values need update after adjusting the unused bits and skip the
943  * V=0 process. This relies on the IGNORED behavior described in the
944  * specification.
945  */
946 VISIBLE_IF_KUNIT
arm_smmu_write_entry(struct arm_smmu_entry_writer * writer,__le64 * entry,const __le64 * target)947 void arm_smmu_write_entry(struct arm_smmu_entry_writer *writer, __le64 *entry,
948 			  const __le64 *target)
949 {
950 	__le64 unused_update[NUM_ENTRY_QWORDS];
951 	u8 used_qword_diff;
952 
953 	used_qword_diff =
954 		arm_smmu_entry_qword_diff(writer, entry, target, unused_update);
955 	if (hweight8(used_qword_diff) == 1) {
956 		/*
957 		 * Only one qword needs its used bits to be changed. This is a
958 		 * hitless update, update all bits the current STE/CD is
959 		 * ignoring to their new values, then update a single "critical
960 		 * qword" to change the STE/CD and finally 0 out any bits that
961 		 * are now unused in the target configuration.
962 		 */
963 		unsigned int critical_qword_index = ffs(used_qword_diff) - 1;
964 
965 		/*
966 		 * Skip writing unused bits in the critical qword since we'll be
967 		 * writing it in the next step anyways. This can save a sync
968 		 * when the only change is in that qword.
969 		 */
970 		unused_update[critical_qword_index] =
971 			entry[critical_qword_index];
972 		entry_set(writer, entry, unused_update, 0, NUM_ENTRY_QWORDS);
973 		entry_set(writer, entry, target, critical_qword_index, 1);
974 		entry_set(writer, entry, target, 0, NUM_ENTRY_QWORDS);
975 	} else if (used_qword_diff) {
976 		/*
977 		 * At least two qwords need their inuse bits to be changed. This
978 		 * requires a breaking update, zero the V bit, write all qwords
979 		 * but 0, then set qword 0
980 		 */
981 		unused_update[0] = 0;
982 		entry_set(writer, entry, unused_update, 0, 1);
983 		entry_set(writer, entry, target, 1, NUM_ENTRY_QWORDS - 1);
984 		entry_set(writer, entry, target, 0, 1);
985 	} else {
986 		/*
987 		 * No inuse bit changed. Sanity check that all unused bits are 0
988 		 * in the entry. The target was already sanity checked by
989 		 * compute_qword_diff().
990 		 */
991 		WARN_ON_ONCE(
992 			entry_set(writer, entry, target, 0, NUM_ENTRY_QWORDS));
993 	}
994 }
995 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_write_entry);
996 
arm_smmu_sync_cd(struct arm_smmu_master * master,int ssid,bool leaf)997 static void arm_smmu_sync_cd(struct arm_smmu_master *master,
998 			     int ssid, bool leaf)
999 {
1000 	size_t i;
1001 	struct arm_smmu_cmdq_batch cmds;
1002 	struct arm_smmu_device *smmu = master->smmu;
1003 	struct arm_smmu_cmdq_ent cmd = {
1004 		.opcode	= CMDQ_OP_CFGI_CD,
1005 		.cfgi	= {
1006 			.ssid	= ssid,
1007 			.leaf	= leaf,
1008 		},
1009 	};
1010 
1011 	arm_smmu_cmdq_batch_init(smmu, &cmds, &cmd);
1012 	for (i = 0; i < master->num_streams; i++) {
1013 		cmd.cfgi.sid = master->streams[i].id;
1014 		arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
1015 	}
1016 
1017 	arm_smmu_cmdq_batch_submit(smmu, &cmds);
1018 }
1019 
arm_smmu_write_cd_l1_desc(struct arm_smmu_cdtab_l1 * dst,dma_addr_t l2ptr_dma)1020 static void arm_smmu_write_cd_l1_desc(struct arm_smmu_cdtab_l1 *dst,
1021 				      dma_addr_t l2ptr_dma)
1022 {
1023 	u64 val = (l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) | CTXDESC_L1_DESC_V;
1024 
1025 	/* The HW has 64 bit atomicity with stores to the L2 CD table */
1026 	WRITE_ONCE(dst->l2ptr, cpu_to_le64(val));
1027 }
1028 
arm_smmu_cd_l1_get_desc(const struct arm_smmu_cdtab_l1 * src)1029 static dma_addr_t arm_smmu_cd_l1_get_desc(const struct arm_smmu_cdtab_l1 *src)
1030 {
1031 	return le64_to_cpu(src->l2ptr) & CTXDESC_L1_DESC_L2PTR_MASK;
1032 }
1033 
arm_smmu_get_cd_ptr(struct arm_smmu_master * master,u32 ssid)1034 struct arm_smmu_cd *arm_smmu_get_cd_ptr(struct arm_smmu_master *master,
1035 					u32 ssid)
1036 {
1037 	struct arm_smmu_cdtab_l2 *l2;
1038 	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
1039 
1040 	if (!arm_smmu_cdtab_allocated(cd_table))
1041 		return NULL;
1042 
1043 	if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
1044 		return &cd_table->linear.table[ssid];
1045 
1046 	l2 = cd_table->l2.l2ptrs[arm_smmu_cdtab_l1_idx(ssid)];
1047 	if (!l2)
1048 		return NULL;
1049 	return &l2->cds[arm_smmu_cdtab_l2_idx(ssid)];
1050 }
1051 
arm_smmu_alloc_cd_ptr(struct arm_smmu_master * master,u32 ssid)1052 static struct arm_smmu_cd *arm_smmu_alloc_cd_ptr(struct arm_smmu_master *master,
1053 						 u32 ssid)
1054 {
1055 	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
1056 	struct arm_smmu_device *smmu = master->smmu;
1057 
1058 	might_sleep();
1059 	iommu_group_mutex_assert(master->dev);
1060 
1061 	if (!arm_smmu_cdtab_allocated(cd_table)) {
1062 		if (arm_smmu_alloc_cd_tables(master))
1063 			return NULL;
1064 	}
1065 
1066 	if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_64K_L2) {
1067 		unsigned int idx = arm_smmu_cdtab_l1_idx(ssid);
1068 		struct arm_smmu_cdtab_l2 **l2ptr = &cd_table->l2.l2ptrs[idx];
1069 
1070 		if (!*l2ptr) {
1071 			dma_addr_t l2ptr_dma;
1072 
1073 			*l2ptr = dma_alloc_coherent(smmu->dev, sizeof(**l2ptr),
1074 						    &l2ptr_dma, GFP_KERNEL);
1075 			if (!*l2ptr)
1076 				return NULL;
1077 
1078 			arm_smmu_write_cd_l1_desc(&cd_table->l2.l1tab[idx],
1079 						  l2ptr_dma);
1080 			/* An invalid L1CD can be cached */
1081 			arm_smmu_sync_cd(master, ssid, false);
1082 		}
1083 	}
1084 	return arm_smmu_get_cd_ptr(master, ssid);
1085 }
1086 
1087 struct arm_smmu_cd_writer {
1088 	struct arm_smmu_entry_writer writer;
1089 	unsigned int ssid;
1090 };
1091 
1092 VISIBLE_IF_KUNIT
arm_smmu_get_cd_used(const __le64 * ent,__le64 * used_bits)1093 void arm_smmu_get_cd_used(const __le64 *ent, __le64 *used_bits)
1094 {
1095 	used_bits[0] = cpu_to_le64(CTXDESC_CD_0_V);
1096 	if (!(ent[0] & cpu_to_le64(CTXDESC_CD_0_V)))
1097 		return;
1098 	memset(used_bits, 0xFF, sizeof(struct arm_smmu_cd));
1099 
1100 	/*
1101 	 * If EPD0 is set by the make function it means
1102 	 * T0SZ/TG0/IR0/OR0/SH0/TTB0 are IGNORED
1103 	 */
1104 	if (ent[0] & cpu_to_le64(CTXDESC_CD_0_TCR_EPD0)) {
1105 		used_bits[0] &= ~cpu_to_le64(
1106 			CTXDESC_CD_0_TCR_T0SZ | CTXDESC_CD_0_TCR_TG0 |
1107 			CTXDESC_CD_0_TCR_IRGN0 | CTXDESC_CD_0_TCR_ORGN0 |
1108 			CTXDESC_CD_0_TCR_SH0);
1109 		used_bits[1] &= ~cpu_to_le64(CTXDESC_CD_1_TTB0_MASK);
1110 	}
1111 }
1112 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_get_cd_used);
1113 
arm_smmu_cd_writer_sync_entry(struct arm_smmu_entry_writer * writer)1114 static void arm_smmu_cd_writer_sync_entry(struct arm_smmu_entry_writer *writer)
1115 {
1116 	struct arm_smmu_cd_writer *cd_writer =
1117 		container_of(writer, struct arm_smmu_cd_writer, writer);
1118 
1119 	arm_smmu_sync_cd(writer->master, cd_writer->ssid, true);
1120 }
1121 
1122 static const struct arm_smmu_entry_writer_ops arm_smmu_cd_writer_ops = {
1123 	.sync = arm_smmu_cd_writer_sync_entry,
1124 	.get_used = arm_smmu_get_cd_used,
1125 };
1126 
arm_smmu_write_cd_entry(struct arm_smmu_master * master,int ssid,struct arm_smmu_cd * cdptr,const struct arm_smmu_cd * target)1127 void arm_smmu_write_cd_entry(struct arm_smmu_master *master, int ssid,
1128 			     struct arm_smmu_cd *cdptr,
1129 			     const struct arm_smmu_cd *target)
1130 {
1131 	bool target_valid = target->data[0] & cpu_to_le64(CTXDESC_CD_0_V);
1132 	bool cur_valid = cdptr->data[0] & cpu_to_le64(CTXDESC_CD_0_V);
1133 	struct arm_smmu_cd_writer cd_writer = {
1134 		.writer = {
1135 			.ops = &arm_smmu_cd_writer_ops,
1136 			.master = master,
1137 		},
1138 		.ssid = ssid,
1139 	};
1140 
1141 	if (ssid != IOMMU_NO_PASID && cur_valid != target_valid) {
1142 		if (cur_valid)
1143 			master->cd_table.used_ssids--;
1144 		else
1145 			master->cd_table.used_ssids++;
1146 	}
1147 
1148 	arm_smmu_write_entry(&cd_writer.writer, cdptr->data, target->data);
1149 }
1150 
arm_smmu_make_s1_cd(struct arm_smmu_cd * target,struct arm_smmu_master * master,struct arm_smmu_domain * smmu_domain)1151 void arm_smmu_make_s1_cd(struct arm_smmu_cd *target,
1152 			 struct arm_smmu_master *master,
1153 			 struct arm_smmu_domain *smmu_domain)
1154 {
1155 	struct arm_smmu_ctx_desc *cd = &smmu_domain->cd;
1156 	const struct io_pgtable_cfg *pgtbl_cfg =
1157 		&io_pgtable_ops_to_pgtable(smmu_domain->pgtbl_ops)->cfg;
1158 	typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr =
1159 		&pgtbl_cfg->arm_lpae_s1_cfg.tcr;
1160 
1161 	memset(target, 0, sizeof(*target));
1162 
1163 	target->data[0] = cpu_to_le64(
1164 		FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
1165 		FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
1166 		FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
1167 		FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
1168 		FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
1169 #ifdef __BIG_ENDIAN
1170 		CTXDESC_CD_0_ENDI |
1171 #endif
1172 		CTXDESC_CD_0_TCR_EPD1 |
1173 		CTXDESC_CD_0_V |
1174 		FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
1175 		CTXDESC_CD_0_AA64 |
1176 		(master->stall_enabled ? CTXDESC_CD_0_S : 0) |
1177 		CTXDESC_CD_0_R |
1178 		CTXDESC_CD_0_A |
1179 		CTXDESC_CD_0_ASET |
1180 		FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid)
1181 		);
1182 
1183 	/* To enable dirty flag update, set both Access flag and dirty state update */
1184 	if (pgtbl_cfg->quirks & IO_PGTABLE_QUIRK_ARM_HD)
1185 		target->data[0] |= cpu_to_le64(CTXDESC_CD_0_TCR_HA |
1186 					       CTXDESC_CD_0_TCR_HD);
1187 
1188 	target->data[1] = cpu_to_le64(pgtbl_cfg->arm_lpae_s1_cfg.ttbr &
1189 				      CTXDESC_CD_1_TTB0_MASK);
1190 	target->data[3] = cpu_to_le64(pgtbl_cfg->arm_lpae_s1_cfg.mair);
1191 }
1192 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_s1_cd);
1193 
arm_smmu_clear_cd(struct arm_smmu_master * master,ioasid_t ssid)1194 void arm_smmu_clear_cd(struct arm_smmu_master *master, ioasid_t ssid)
1195 {
1196 	struct arm_smmu_cd target = {};
1197 	struct arm_smmu_cd *cdptr;
1198 
1199 	if (!arm_smmu_cdtab_allocated(&master->cd_table))
1200 		return;
1201 	cdptr = arm_smmu_get_cd_ptr(master, ssid);
1202 	if (WARN_ON(!cdptr))
1203 		return;
1204 	arm_smmu_write_cd_entry(master, ssid, cdptr, &target);
1205 }
1206 
arm_smmu_alloc_cd_tables(struct arm_smmu_master * master)1207 static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master)
1208 {
1209 	int ret;
1210 	size_t l1size;
1211 	size_t max_contexts;
1212 	struct arm_smmu_device *smmu = master->smmu;
1213 	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
1214 
1215 	cd_table->s1cdmax = master->ssid_bits;
1216 	max_contexts = 1 << cd_table->s1cdmax;
1217 
1218 	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
1219 	    max_contexts <= CTXDESC_L2_ENTRIES) {
1220 		cd_table->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
1221 		cd_table->linear.num_ents = max_contexts;
1222 
1223 		l1size = max_contexts * sizeof(struct arm_smmu_cd);
1224 		cd_table->linear.table = dma_alloc_coherent(smmu->dev, l1size,
1225 							    &cd_table->cdtab_dma,
1226 							    GFP_KERNEL);
1227 		if (!cd_table->linear.table)
1228 			return -ENOMEM;
1229 	} else {
1230 		cd_table->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
1231 		cd_table->l2.num_l1_ents =
1232 			DIV_ROUND_UP(max_contexts, CTXDESC_L2_ENTRIES);
1233 
1234 		cd_table->l2.l2ptrs = kcalloc(cd_table->l2.num_l1_ents,
1235 					     sizeof(*cd_table->l2.l2ptrs),
1236 					     GFP_KERNEL);
1237 		if (!cd_table->l2.l2ptrs)
1238 			return -ENOMEM;
1239 
1240 		l1size = cd_table->l2.num_l1_ents * sizeof(struct arm_smmu_cdtab_l1);
1241 		cd_table->l2.l1tab = dma_alloc_coherent(smmu->dev, l1size,
1242 							&cd_table->cdtab_dma,
1243 							GFP_KERNEL);
1244 		if (!cd_table->l2.l2ptrs) {
1245 			ret = -ENOMEM;
1246 			goto err_free_l2ptrs;
1247 		}
1248 	}
1249 	return 0;
1250 
1251 err_free_l2ptrs:
1252 	kfree(cd_table->l2.l2ptrs);
1253 	cd_table->l2.l2ptrs = NULL;
1254 	return ret;
1255 }
1256 
arm_smmu_free_cd_tables(struct arm_smmu_master * master)1257 static void arm_smmu_free_cd_tables(struct arm_smmu_master *master)
1258 {
1259 	int i;
1260 	struct arm_smmu_device *smmu = master->smmu;
1261 	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
1262 
1263 	if (cd_table->s1fmt != STRTAB_STE_0_S1FMT_LINEAR) {
1264 		for (i = 0; i < cd_table->l2.num_l1_ents; i++) {
1265 			if (!cd_table->l2.l2ptrs[i])
1266 				continue;
1267 
1268 			dma_free_coherent(smmu->dev,
1269 					  sizeof(*cd_table->l2.l2ptrs[i]),
1270 					  cd_table->l2.l2ptrs[i],
1271 					  arm_smmu_cd_l1_get_desc(&cd_table->l2.l1tab[i]));
1272 		}
1273 		kfree(cd_table->l2.l2ptrs);
1274 
1275 		dma_free_coherent(smmu->dev,
1276 				  cd_table->l2.num_l1_ents *
1277 					  sizeof(struct arm_smmu_cdtab_l1),
1278 				  cd_table->l2.l1tab, cd_table->cdtab_dma);
1279 	} else {
1280 		dma_free_coherent(smmu->dev,
1281 				  cd_table->linear.num_ents *
1282 					  sizeof(struct arm_smmu_cd),
1283 				  cd_table->linear.table, cd_table->cdtab_dma);
1284 	}
1285 }
1286 
1287 /* Stream table manipulation functions */
1288 struct arm_smmu_ste_writer {
1289 	struct arm_smmu_entry_writer writer;
1290 	u32 sid;
1291 };
1292 
arm_smmu_ste_writer_sync_entry(struct arm_smmu_entry_writer * writer)1293 static void arm_smmu_ste_writer_sync_entry(struct arm_smmu_entry_writer *writer)
1294 {
1295 	struct arm_smmu_ste_writer *ste_writer =
1296 		container_of(writer, struct arm_smmu_ste_writer, writer);
1297 	struct arm_smmu_cmdq_ent cmd = {
1298 		.opcode	= CMDQ_OP_CFGI_STE,
1299 		.cfgi	= {
1300 			.sid	= ste_writer->sid,
1301 			.leaf	= true,
1302 		},
1303 	};
1304 
1305 	arm_smmu_cmdq_issue_cmd_with_sync(writer->master->smmu, &cmd);
1306 }
1307 
1308 static const struct arm_smmu_entry_writer_ops arm_smmu_ste_writer_ops = {
1309 	.sync = arm_smmu_ste_writer_sync_entry,
1310 	.get_used = arm_smmu_get_ste_used,
1311 };
1312 
arm_smmu_write_ste(struct arm_smmu_master * master,u32 sid,struct arm_smmu_ste * ste,const struct arm_smmu_ste * target)1313 static void arm_smmu_write_ste(struct arm_smmu_master *master, u32 sid,
1314 			       struct arm_smmu_ste *ste,
1315 			       const struct arm_smmu_ste *target)
1316 {
1317 	struct arm_smmu_device *smmu = master->smmu;
1318 	struct arm_smmu_ste_writer ste_writer = {
1319 		.writer = {
1320 			.ops = &arm_smmu_ste_writer_ops,
1321 			.master = master,
1322 		},
1323 		.sid = sid,
1324 	};
1325 
1326 	arm_smmu_write_entry(&ste_writer.writer, ste->data, target->data);
1327 
1328 	/* It's likely that we'll want to use the new STE soon */
1329 	if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH)) {
1330 		struct arm_smmu_cmdq_ent
1331 			prefetch_cmd = { .opcode = CMDQ_OP_PREFETCH_CFG,
1332 					 .prefetch = {
1333 						 .sid = sid,
1334 					 } };
1335 
1336 		arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1337 	}
1338 }
1339 
1340 VISIBLE_IF_KUNIT
arm_smmu_make_abort_ste(struct arm_smmu_ste * target)1341 void arm_smmu_make_abort_ste(struct arm_smmu_ste *target)
1342 {
1343 	memset(target, 0, sizeof(*target));
1344 	target->data[0] = cpu_to_le64(
1345 		STRTAB_STE_0_V |
1346 		FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT));
1347 }
1348 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_abort_ste);
1349 
1350 VISIBLE_IF_KUNIT
arm_smmu_make_bypass_ste(struct arm_smmu_device * smmu,struct arm_smmu_ste * target)1351 void arm_smmu_make_bypass_ste(struct arm_smmu_device *smmu,
1352 			      struct arm_smmu_ste *target)
1353 {
1354 	memset(target, 0, sizeof(*target));
1355 	target->data[0] = cpu_to_le64(
1356 		STRTAB_STE_0_V |
1357 		FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS));
1358 
1359 	if (smmu->features & ARM_SMMU_FEAT_ATTR_TYPES_OVR)
1360 		target->data[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1361 							 STRTAB_STE_1_SHCFG_INCOMING));
1362 }
1363 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_bypass_ste);
1364 
1365 VISIBLE_IF_KUNIT
arm_smmu_make_cdtable_ste(struct arm_smmu_ste * target,struct arm_smmu_master * master,bool ats_enabled,unsigned int s1dss)1366 void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target,
1367 			       struct arm_smmu_master *master, bool ats_enabled,
1368 			       unsigned int s1dss)
1369 {
1370 	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
1371 	struct arm_smmu_device *smmu = master->smmu;
1372 
1373 	memset(target, 0, sizeof(*target));
1374 	target->data[0] = cpu_to_le64(
1375 		STRTAB_STE_0_V |
1376 		FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
1377 		FIELD_PREP(STRTAB_STE_0_S1FMT, cd_table->s1fmt) |
1378 		(cd_table->cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
1379 		FIELD_PREP(STRTAB_STE_0_S1CDMAX, cd_table->s1cdmax));
1380 
1381 	target->data[1] = cpu_to_le64(
1382 		FIELD_PREP(STRTAB_STE_1_S1DSS, s1dss) |
1383 		FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1384 		FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1385 		FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
1386 		((smmu->features & ARM_SMMU_FEAT_STALLS &&
1387 		  !master->stall_enabled) ?
1388 			 STRTAB_STE_1_S1STALLD :
1389 			 0) |
1390 		FIELD_PREP(STRTAB_STE_1_EATS,
1391 			   ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0));
1392 
1393 	if ((smmu->features & ARM_SMMU_FEAT_ATTR_TYPES_OVR) &&
1394 	    s1dss == STRTAB_STE_1_S1DSS_BYPASS)
1395 		target->data[1] |= cpu_to_le64(FIELD_PREP(
1396 			STRTAB_STE_1_SHCFG, STRTAB_STE_1_SHCFG_INCOMING));
1397 
1398 	if (smmu->features & ARM_SMMU_FEAT_E2H) {
1399 		/*
1400 		 * To support BTM the streamworld needs to match the
1401 		 * configuration of the CPU so that the ASID broadcasts are
1402 		 * properly matched. This means either S/NS-EL2-E2H (hypervisor)
1403 		 * or NS-EL1 (guest). Since an SVA domain can be installed in a
1404 		 * PASID this should always use a BTM compatible configuration
1405 		 * if the HW supports it.
1406 		 */
1407 		target->data[1] |= cpu_to_le64(
1408 			FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_EL2));
1409 	} else {
1410 		target->data[1] |= cpu_to_le64(
1411 			FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_NSEL1));
1412 
1413 		/*
1414 		 * VMID 0 is reserved for stage-2 bypass EL1 STEs, see
1415 		 * arm_smmu_domain_alloc_id()
1416 		 */
1417 		target->data[2] =
1418 			cpu_to_le64(FIELD_PREP(STRTAB_STE_2_S2VMID, 0));
1419 	}
1420 }
1421 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_cdtable_ste);
1422 
1423 VISIBLE_IF_KUNIT
arm_smmu_make_s2_domain_ste(struct arm_smmu_ste * target,struct arm_smmu_master * master,struct arm_smmu_domain * smmu_domain,bool ats_enabled)1424 void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target,
1425 				 struct arm_smmu_master *master,
1426 				 struct arm_smmu_domain *smmu_domain,
1427 				 bool ats_enabled)
1428 {
1429 	struct arm_smmu_s2_cfg *s2_cfg = &smmu_domain->s2_cfg;
1430 	const struct io_pgtable_cfg *pgtbl_cfg =
1431 		&io_pgtable_ops_to_pgtable(smmu_domain->pgtbl_ops)->cfg;
1432 	typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr =
1433 		&pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
1434 	u64 vtcr_val;
1435 	struct arm_smmu_device *smmu = master->smmu;
1436 
1437 	memset(target, 0, sizeof(*target));
1438 	target->data[0] = cpu_to_le64(
1439 		STRTAB_STE_0_V |
1440 		FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS));
1441 
1442 	target->data[1] = cpu_to_le64(
1443 		FIELD_PREP(STRTAB_STE_1_EATS,
1444 			   ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0));
1445 
1446 	if (smmu->features & ARM_SMMU_FEAT_ATTR_TYPES_OVR)
1447 		target->data[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1448 							  STRTAB_STE_1_SHCFG_INCOMING));
1449 
1450 	vtcr_val = FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
1451 		   FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
1452 		   FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) |
1453 		   FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) |
1454 		   FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) |
1455 		   FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) |
1456 		   FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps);
1457 	target->data[2] = cpu_to_le64(
1458 		FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
1459 		FIELD_PREP(STRTAB_STE_2_VTCR, vtcr_val) |
1460 		STRTAB_STE_2_S2AA64 |
1461 #ifdef __BIG_ENDIAN
1462 		STRTAB_STE_2_S2ENDI |
1463 #endif
1464 		STRTAB_STE_2_S2PTW |
1465 		(master->stall_enabled ? STRTAB_STE_2_S2S : 0) |
1466 		STRTAB_STE_2_S2R);
1467 
1468 	target->data[3] = cpu_to_le64(pgtbl_cfg->arm_lpae_s2_cfg.vttbr &
1469 				      STRTAB_STE_3_S2TTB_MASK);
1470 }
1471 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_s2_domain_ste);
1472 
1473 /*
1474  * This can safely directly manipulate the STE memory without a sync sequence
1475  * because the STE table has not been installed in the SMMU yet.
1476  */
arm_smmu_init_initial_stes(struct arm_smmu_ste * strtab,unsigned int nent)1477 static void arm_smmu_init_initial_stes(struct arm_smmu_ste *strtab,
1478 				       unsigned int nent)
1479 {
1480 	unsigned int i;
1481 
1482 	for (i = 0; i < nent; ++i) {
1483 		arm_smmu_make_abort_ste(strtab);
1484 		strtab++;
1485 	}
1486 }
1487 
arm_smmu_init_l2_strtab(struct arm_smmu_device * smmu,u32 sid)1488 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1489 {
1490 	dma_addr_t l2ptr_dma;
1491 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1492 	struct arm_smmu_strtab_l2 **l2table;
1493 
1494 	l2table = &cfg->l2.l2ptrs[arm_smmu_strtab_l1_idx(sid)];
1495 	if (*l2table)
1496 		return 0;
1497 
1498 	*l2table = dmam_alloc_coherent(smmu->dev, sizeof(**l2table),
1499 				       &l2ptr_dma, GFP_KERNEL);
1500 	if (!*l2table) {
1501 		dev_err(smmu->dev,
1502 			"failed to allocate l2 stream table for SID %u\n",
1503 			sid);
1504 		return -ENOMEM;
1505 	}
1506 
1507 	arm_smmu_init_initial_stes((*l2table)->stes,
1508 				   ARRAY_SIZE((*l2table)->stes));
1509 	arm_smmu_write_strtab_l1_desc(&cfg->l2.l1tab[arm_smmu_strtab_l1_idx(sid)],
1510 				      l2ptr_dma);
1511 	return 0;
1512 }
1513 
arm_smmu_streams_cmp_key(const void * lhs,const struct rb_node * rhs)1514 static int arm_smmu_streams_cmp_key(const void *lhs, const struct rb_node *rhs)
1515 {
1516 	struct arm_smmu_stream *stream_rhs =
1517 		rb_entry(rhs, struct arm_smmu_stream, node);
1518 	const u32 *sid_lhs = lhs;
1519 
1520 	if (*sid_lhs < stream_rhs->id)
1521 		return -1;
1522 	if (*sid_lhs > stream_rhs->id)
1523 		return 1;
1524 	return 0;
1525 }
1526 
arm_smmu_streams_cmp_node(struct rb_node * lhs,const struct rb_node * rhs)1527 static int arm_smmu_streams_cmp_node(struct rb_node *lhs,
1528 				     const struct rb_node *rhs)
1529 {
1530 	return arm_smmu_streams_cmp_key(
1531 		&rb_entry(lhs, struct arm_smmu_stream, node)->id, rhs);
1532 }
1533 
1534 static struct arm_smmu_master *
arm_smmu_find_master(struct arm_smmu_device * smmu,u32 sid)1535 arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid)
1536 {
1537 	struct rb_node *node;
1538 
1539 	lockdep_assert_held(&smmu->streams_mutex);
1540 
1541 	node = rb_find(&sid, &smmu->streams, arm_smmu_streams_cmp_key);
1542 	if (!node)
1543 		return NULL;
1544 	return rb_entry(node, struct arm_smmu_stream, node)->master;
1545 }
1546 
1547 /* IRQ and event handlers */
arm_smmu_handle_evt(struct arm_smmu_device * smmu,u64 * evt)1548 static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt)
1549 {
1550 	int ret = 0;
1551 	u32 perm = 0;
1552 	struct arm_smmu_master *master;
1553 	bool ssid_valid = evt[0] & EVTQ_0_SSV;
1554 	u32 sid = FIELD_GET(EVTQ_0_SID, evt[0]);
1555 	struct iopf_fault fault_evt = { };
1556 	struct iommu_fault *flt = &fault_evt.fault;
1557 
1558 	switch (FIELD_GET(EVTQ_0_ID, evt[0])) {
1559 	case EVT_ID_TRANSLATION_FAULT:
1560 	case EVT_ID_ADDR_SIZE_FAULT:
1561 	case EVT_ID_ACCESS_FAULT:
1562 	case EVT_ID_PERMISSION_FAULT:
1563 		break;
1564 	default:
1565 		return -EOPNOTSUPP;
1566 	}
1567 
1568 	if (!(evt[1] & EVTQ_1_STALL))
1569 		return -EOPNOTSUPP;
1570 
1571 	if (evt[1] & EVTQ_1_RnW)
1572 		perm |= IOMMU_FAULT_PERM_READ;
1573 	else
1574 		perm |= IOMMU_FAULT_PERM_WRITE;
1575 
1576 	if (evt[1] & EVTQ_1_InD)
1577 		perm |= IOMMU_FAULT_PERM_EXEC;
1578 
1579 	if (evt[1] & EVTQ_1_PnU)
1580 		perm |= IOMMU_FAULT_PERM_PRIV;
1581 
1582 	flt->type = IOMMU_FAULT_PAGE_REQ;
1583 	flt->prm = (struct iommu_fault_page_request) {
1584 		.flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE,
1585 		.grpid = FIELD_GET(EVTQ_1_STAG, evt[1]),
1586 		.perm = perm,
1587 		.addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
1588 	};
1589 
1590 	if (ssid_valid) {
1591 		flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
1592 		flt->prm.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
1593 	}
1594 
1595 	mutex_lock(&smmu->streams_mutex);
1596 	master = arm_smmu_find_master(smmu, sid);
1597 	if (!master) {
1598 		ret = -EINVAL;
1599 		goto out_unlock;
1600 	}
1601 
1602 	ret = iommu_report_device_fault(master->dev, &fault_evt);
1603 out_unlock:
1604 	mutex_unlock(&smmu->streams_mutex);
1605 	return ret;
1606 }
1607 
arm_smmu_evtq_thread(int irq,void * dev)1608 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1609 {
1610 	int i, ret;
1611 	struct arm_smmu_device *smmu = dev;
1612 	struct arm_smmu_queue *q = &smmu->evtq.q;
1613 	struct arm_smmu_ll_queue *llq = &q->llq;
1614 	static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
1615 				      DEFAULT_RATELIMIT_BURST);
1616 	u64 evt[EVTQ_ENT_DWORDS];
1617 
1618 	do {
1619 		while (!queue_remove_raw(q, evt)) {
1620 			u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
1621 
1622 			ret = arm_smmu_handle_evt(smmu, evt);
1623 			if (!ret || !__ratelimit(&rs))
1624 				continue;
1625 
1626 			dev_info(smmu->dev, "event 0x%02x received:\n", id);
1627 			for (i = 0; i < ARRAY_SIZE(evt); ++i)
1628 				dev_info(smmu->dev, "\t0x%016llx\n",
1629 					 (unsigned long long)evt[i]);
1630 
1631 			cond_resched();
1632 		}
1633 
1634 		/*
1635 		 * Not much we can do on overflow, so scream and pretend we're
1636 		 * trying harder.
1637 		 */
1638 		if (queue_sync_prod_in(q) == -EOVERFLOW)
1639 			dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1640 	} while (!queue_empty(llq));
1641 
1642 	/* Sync our overflow flag, as we believe we're up to speed */
1643 	queue_sync_cons_ovf(q);
1644 	return IRQ_HANDLED;
1645 }
1646 
arm_smmu_handle_ppr(struct arm_smmu_device * smmu,u64 * evt)1647 static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
1648 {
1649 	u32 sid, ssid;
1650 	u16 grpid;
1651 	bool ssv, last;
1652 
1653 	sid = FIELD_GET(PRIQ_0_SID, evt[0]);
1654 	ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
1655 	ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : IOMMU_NO_PASID;
1656 	last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
1657 	grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
1658 
1659 	dev_info(smmu->dev, "unexpected PRI request received:\n");
1660 	dev_info(smmu->dev,
1661 		 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1662 		 sid, ssid, grpid, last ? "L" : "",
1663 		 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1664 		 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1665 		 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1666 		 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1667 		 evt[1] & PRIQ_1_ADDR_MASK);
1668 
1669 	if (last) {
1670 		struct arm_smmu_cmdq_ent cmd = {
1671 			.opcode			= CMDQ_OP_PRI_RESP,
1672 			.substream_valid	= ssv,
1673 			.pri			= {
1674 				.sid	= sid,
1675 				.ssid	= ssid,
1676 				.grpid	= grpid,
1677 				.resp	= PRI_RESP_DENY,
1678 			},
1679 		};
1680 
1681 		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1682 	}
1683 }
1684 
arm_smmu_priq_thread(int irq,void * dev)1685 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1686 {
1687 	struct arm_smmu_device *smmu = dev;
1688 	struct arm_smmu_queue *q = &smmu->priq.q;
1689 	struct arm_smmu_ll_queue *llq = &q->llq;
1690 	u64 evt[PRIQ_ENT_DWORDS];
1691 
1692 	do {
1693 		while (!queue_remove_raw(q, evt))
1694 			arm_smmu_handle_ppr(smmu, evt);
1695 
1696 		if (queue_sync_prod_in(q) == -EOVERFLOW)
1697 			dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1698 	} while (!queue_empty(llq));
1699 
1700 	/* Sync our overflow flag, as we believe we're up to speed */
1701 	queue_sync_cons_ovf(q);
1702 	return IRQ_HANDLED;
1703 }
1704 
arm_smmu_gerror_handler(int irq,void * dev)1705 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1706 {
1707 	u32 gerror, gerrorn, active;
1708 	struct arm_smmu_device *smmu = dev;
1709 
1710 	gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1711 	gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1712 
1713 	active = gerror ^ gerrorn;
1714 	if (!(active & GERROR_ERR_MASK))
1715 		return IRQ_NONE; /* No errors pending */
1716 
1717 	dev_warn(smmu->dev,
1718 		 "unexpected global error reported (0x%08x), this could be serious\n",
1719 		 active);
1720 
1721 	if (active & GERROR_SFM_ERR) {
1722 		dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1723 		arm_smmu_device_disable(smmu);
1724 	}
1725 
1726 	if (active & GERROR_MSI_GERROR_ABT_ERR)
1727 		dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1728 
1729 	if (active & GERROR_MSI_PRIQ_ABT_ERR)
1730 		dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1731 
1732 	if (active & GERROR_MSI_EVTQ_ABT_ERR)
1733 		dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1734 
1735 	if (active & GERROR_MSI_CMDQ_ABT_ERR)
1736 		dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1737 
1738 	if (active & GERROR_PRIQ_ABT_ERR)
1739 		dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1740 
1741 	if (active & GERROR_EVTQ_ABT_ERR)
1742 		dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1743 
1744 	if (active & GERROR_CMDQ_ERR)
1745 		arm_smmu_cmdq_skip_err(smmu);
1746 
1747 	writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1748 	return IRQ_HANDLED;
1749 }
1750 
arm_smmu_combined_irq_thread(int irq,void * dev)1751 static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
1752 {
1753 	struct arm_smmu_device *smmu = dev;
1754 
1755 	arm_smmu_evtq_thread(irq, dev);
1756 	if (smmu->features & ARM_SMMU_FEAT_PRI)
1757 		arm_smmu_priq_thread(irq, dev);
1758 
1759 	return IRQ_HANDLED;
1760 }
1761 
arm_smmu_combined_irq_handler(int irq,void * dev)1762 static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
1763 {
1764 	arm_smmu_gerror_handler(irq, dev);
1765 	return IRQ_WAKE_THREAD;
1766 }
1767 
1768 static void
arm_smmu_atc_inv_to_cmd(int ssid,unsigned long iova,size_t size,struct arm_smmu_cmdq_ent * cmd)1769 arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
1770 			struct arm_smmu_cmdq_ent *cmd)
1771 {
1772 	size_t log2_span;
1773 	size_t span_mask;
1774 	/* ATC invalidates are always on 4096-bytes pages */
1775 	size_t inval_grain_shift = 12;
1776 	unsigned long page_start, page_end;
1777 
1778 	/*
1779 	 * ATS and PASID:
1780 	 *
1781 	 * If substream_valid is clear, the PCIe TLP is sent without a PASID
1782 	 * prefix. In that case all ATC entries within the address range are
1783 	 * invalidated, including those that were requested with a PASID! There
1784 	 * is no way to invalidate only entries without PASID.
1785 	 *
1786 	 * When using STRTAB_STE_1_S1DSS_SSID0 (reserving CD 0 for non-PASID
1787 	 * traffic), translation requests without PASID create ATC entries
1788 	 * without PASID, which must be invalidated with substream_valid clear.
1789 	 * This has the unpleasant side-effect of invalidating all PASID-tagged
1790 	 * ATC entries within the address range.
1791 	 */
1792 	*cmd = (struct arm_smmu_cmdq_ent) {
1793 		.opcode			= CMDQ_OP_ATC_INV,
1794 		.substream_valid	= (ssid != IOMMU_NO_PASID),
1795 		.atc.ssid		= ssid,
1796 	};
1797 
1798 	if (!size) {
1799 		cmd->atc.size = ATC_INV_SIZE_ALL;
1800 		return;
1801 	}
1802 
1803 	page_start	= iova >> inval_grain_shift;
1804 	page_end	= (iova + size - 1) >> inval_grain_shift;
1805 
1806 	/*
1807 	 * In an ATS Invalidate Request, the address must be aligned on the
1808 	 * range size, which must be a power of two number of page sizes. We
1809 	 * thus have to choose between grossly over-invalidating the region, or
1810 	 * splitting the invalidation into multiple commands. For simplicity
1811 	 * we'll go with the first solution, but should refine it in the future
1812 	 * if multiple commands are shown to be more efficient.
1813 	 *
1814 	 * Find the smallest power of two that covers the range. The most
1815 	 * significant differing bit between the start and end addresses,
1816 	 * fls(start ^ end), indicates the required span. For example:
1817 	 *
1818 	 * We want to invalidate pages [8; 11]. This is already the ideal range:
1819 	 *		x = 0b1000 ^ 0b1011 = 0b11
1820 	 *		span = 1 << fls(x) = 4
1821 	 *
1822 	 * To invalidate pages [7; 10], we need to invalidate [0; 15]:
1823 	 *		x = 0b0111 ^ 0b1010 = 0b1101
1824 	 *		span = 1 << fls(x) = 16
1825 	 */
1826 	log2_span	= fls_long(page_start ^ page_end);
1827 	span_mask	= (1ULL << log2_span) - 1;
1828 
1829 	page_start	&= ~span_mask;
1830 
1831 	cmd->atc.addr	= page_start << inval_grain_shift;
1832 	cmd->atc.size	= log2_span;
1833 }
1834 
arm_smmu_atc_inv_master(struct arm_smmu_master * master,ioasid_t ssid)1835 static int arm_smmu_atc_inv_master(struct arm_smmu_master *master,
1836 				   ioasid_t ssid)
1837 {
1838 	int i;
1839 	struct arm_smmu_cmdq_ent cmd;
1840 	struct arm_smmu_cmdq_batch cmds;
1841 
1842 	arm_smmu_atc_inv_to_cmd(ssid, 0, 0, &cmd);
1843 
1844 	arm_smmu_cmdq_batch_init(master->smmu, &cmds, &cmd);
1845 	for (i = 0; i < master->num_streams; i++) {
1846 		cmd.atc.sid = master->streams[i].id;
1847 		arm_smmu_cmdq_batch_add(master->smmu, &cmds, &cmd);
1848 	}
1849 
1850 	return arm_smmu_cmdq_batch_submit(master->smmu, &cmds);
1851 }
1852 
arm_smmu_atc_inv_domain(struct arm_smmu_domain * smmu_domain,unsigned long iova,size_t size)1853 int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain,
1854 			    unsigned long iova, size_t size)
1855 {
1856 	struct arm_smmu_master_domain *master_domain;
1857 	int i;
1858 	unsigned long flags;
1859 	struct arm_smmu_cmdq_ent cmd = {
1860 		.opcode = CMDQ_OP_ATC_INV,
1861 	};
1862 	struct arm_smmu_cmdq_batch cmds;
1863 
1864 	if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
1865 		return 0;
1866 
1867 	/*
1868 	 * Ensure that we've completed prior invalidation of the main TLBs
1869 	 * before we read 'nr_ats_masters' in case of a concurrent call to
1870 	 * arm_smmu_enable_ats():
1871 	 *
1872 	 *	// unmap()			// arm_smmu_enable_ats()
1873 	 *	TLBI+SYNC			atomic_inc(&nr_ats_masters);
1874 	 *	smp_mb();			[...]
1875 	 *	atomic_read(&nr_ats_masters);	pci_enable_ats() // writel()
1876 	 *
1877 	 * Ensures that we always see the incremented 'nr_ats_masters' count if
1878 	 * ATS was enabled at the PCI device before completion of the TLBI.
1879 	 */
1880 	smp_mb();
1881 	if (!atomic_read(&smmu_domain->nr_ats_masters))
1882 		return 0;
1883 
1884 	arm_smmu_cmdq_batch_init(smmu_domain->smmu, &cmds, &cmd);
1885 
1886 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
1887 	list_for_each_entry(master_domain, &smmu_domain->devices,
1888 			    devices_elm) {
1889 		struct arm_smmu_master *master = master_domain->master;
1890 
1891 		if (!master->ats_enabled)
1892 			continue;
1893 
1894 		arm_smmu_atc_inv_to_cmd(master_domain->ssid, iova, size, &cmd);
1895 
1896 		for (i = 0; i < master->num_streams; i++) {
1897 			cmd.atc.sid = master->streams[i].id;
1898 			arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);
1899 		}
1900 	}
1901 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
1902 
1903 	return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);
1904 }
1905 
1906 /* IO_PGTABLE API */
arm_smmu_tlb_inv_context(void * cookie)1907 static void arm_smmu_tlb_inv_context(void *cookie)
1908 {
1909 	struct arm_smmu_domain *smmu_domain = cookie;
1910 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1911 	struct arm_smmu_cmdq_ent cmd;
1912 
1913 	/*
1914 	 * NOTE: when io-pgtable is in non-strict mode, we may get here with
1915 	 * PTEs previously cleared by unmaps on the current CPU not yet visible
1916 	 * to the SMMU. We are relying on the dma_wmb() implicit during cmd
1917 	 * insertion to guarantee those are observed before the TLBI. Do be
1918 	 * careful, 007.
1919 	 */
1920 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1921 		arm_smmu_tlb_inv_asid(smmu, smmu_domain->cd.asid);
1922 	} else {
1923 		cmd.opcode	= CMDQ_OP_TLBI_S12_VMALL;
1924 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
1925 		arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
1926 	}
1927 	arm_smmu_atc_inv_domain(smmu_domain, 0, 0);
1928 }
1929 
__arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent * cmd,unsigned long iova,size_t size,size_t granule,struct arm_smmu_domain * smmu_domain)1930 static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
1931 				     unsigned long iova, size_t size,
1932 				     size_t granule,
1933 				     struct arm_smmu_domain *smmu_domain)
1934 {
1935 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1936 	unsigned long end = iova + size, num_pages = 0, tg = 0;
1937 	size_t inv_range = granule;
1938 	struct arm_smmu_cmdq_batch cmds;
1939 
1940 	if (!size)
1941 		return;
1942 
1943 	if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1944 		/* Get the leaf page size */
1945 		tg = __ffs(smmu_domain->domain.pgsize_bitmap);
1946 
1947 		num_pages = size >> tg;
1948 
1949 		/* Convert page size of 12,14,16 (log2) to 1,2,3 */
1950 		cmd->tlbi.tg = (tg - 10) / 2;
1951 
1952 		/*
1953 		 * Determine what level the granule is at. For non-leaf, both
1954 		 * io-pgtable and SVA pass a nominal last-level granule because
1955 		 * they don't know what level(s) actually apply, so ignore that
1956 		 * and leave TTL=0. However for various errata reasons we still
1957 		 * want to use a range command, so avoid the SVA corner case
1958 		 * where both scale and num could be 0 as well.
1959 		 */
1960 		if (cmd->tlbi.leaf)
1961 			cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
1962 		else if ((num_pages & CMDQ_TLBI_RANGE_NUM_MAX) == 1)
1963 			num_pages++;
1964 	}
1965 
1966 	arm_smmu_cmdq_batch_init(smmu, &cmds, cmd);
1967 
1968 	while (iova < end) {
1969 		if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1970 			/*
1971 			 * On each iteration of the loop, the range is 5 bits
1972 			 * worth of the aligned size remaining.
1973 			 * The range in pages is:
1974 			 *
1975 			 * range = (num_pages & (0x1f << __ffs(num_pages)))
1976 			 */
1977 			unsigned long scale, num;
1978 
1979 			/* Determine the power of 2 multiple number of pages */
1980 			scale = __ffs(num_pages);
1981 			cmd->tlbi.scale = scale;
1982 
1983 			/* Determine how many chunks of 2^scale size we have */
1984 			num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
1985 			cmd->tlbi.num = num - 1;
1986 
1987 			/* range is num * 2^scale * pgsize */
1988 			inv_range = num << (scale + tg);
1989 
1990 			/* Clear out the lower order bits for the next iteration */
1991 			num_pages -= num << scale;
1992 		}
1993 
1994 		cmd->tlbi.addr = iova;
1995 		arm_smmu_cmdq_batch_add(smmu, &cmds, cmd);
1996 		iova += inv_range;
1997 	}
1998 	arm_smmu_cmdq_batch_submit(smmu, &cmds);
1999 }
2000 
arm_smmu_tlb_inv_range_domain(unsigned long iova,size_t size,size_t granule,bool leaf,struct arm_smmu_domain * smmu_domain)2001 static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size,
2002 					  size_t granule, bool leaf,
2003 					  struct arm_smmu_domain *smmu_domain)
2004 {
2005 	struct arm_smmu_cmdq_ent cmd = {
2006 		.tlbi = {
2007 			.leaf	= leaf,
2008 		},
2009 	};
2010 
2011 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2012 		cmd.opcode	= smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
2013 				  CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA;
2014 		cmd.tlbi.asid	= smmu_domain->cd.asid;
2015 	} else {
2016 		cmd.opcode	= CMDQ_OP_TLBI_S2_IPA;
2017 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
2018 	}
2019 	__arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
2020 
2021 	/*
2022 	 * Unfortunately, this can't be leaf-only since we may have
2023 	 * zapped an entire table.
2024 	 */
2025 	arm_smmu_atc_inv_domain(smmu_domain, iova, size);
2026 }
2027 
arm_smmu_tlb_inv_range_asid(unsigned long iova,size_t size,int asid,size_t granule,bool leaf,struct arm_smmu_domain * smmu_domain)2028 void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,
2029 				 size_t granule, bool leaf,
2030 				 struct arm_smmu_domain *smmu_domain)
2031 {
2032 	struct arm_smmu_cmdq_ent cmd = {
2033 		.opcode	= smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
2034 			  CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA,
2035 		.tlbi = {
2036 			.asid	= asid,
2037 			.leaf	= leaf,
2038 		},
2039 	};
2040 
2041 	__arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
2042 }
2043 
arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather * gather,unsigned long iova,size_t granule,void * cookie)2044 static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
2045 					 unsigned long iova, size_t granule,
2046 					 void *cookie)
2047 {
2048 	struct arm_smmu_domain *smmu_domain = cookie;
2049 	struct iommu_domain *domain = &smmu_domain->domain;
2050 
2051 	iommu_iotlb_gather_add_page(domain, gather, iova, granule);
2052 }
2053 
arm_smmu_tlb_inv_walk(unsigned long iova,size_t size,size_t granule,void * cookie)2054 static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
2055 				  size_t granule, void *cookie)
2056 {
2057 	arm_smmu_tlb_inv_range_domain(iova, size, granule, false, cookie);
2058 }
2059 
2060 static const struct iommu_flush_ops arm_smmu_flush_ops = {
2061 	.tlb_flush_all	= arm_smmu_tlb_inv_context,
2062 	.tlb_flush_walk = arm_smmu_tlb_inv_walk,
2063 	.tlb_add_page	= arm_smmu_tlb_inv_page_nosync,
2064 };
2065 
arm_smmu_dbm_capable(struct arm_smmu_device * smmu)2066 static bool arm_smmu_dbm_capable(struct arm_smmu_device *smmu)
2067 {
2068 	u32 features = (ARM_SMMU_FEAT_HD | ARM_SMMU_FEAT_COHERENCY);
2069 
2070 	return (smmu->features & features) == features;
2071 }
2072 
2073 /* IOMMU API */
arm_smmu_capable(struct device * dev,enum iommu_cap cap)2074 static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap)
2075 {
2076 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2077 
2078 	switch (cap) {
2079 	case IOMMU_CAP_CACHE_COHERENCY:
2080 		/* Assume that a coherent TCU implies coherent TBUs */
2081 		return master->smmu->features & ARM_SMMU_FEAT_COHERENCY;
2082 	case IOMMU_CAP_NOEXEC:
2083 	case IOMMU_CAP_DEFERRED_FLUSH:
2084 		return true;
2085 	case IOMMU_CAP_DIRTY_TRACKING:
2086 		return arm_smmu_dbm_capable(master->smmu);
2087 	default:
2088 		return false;
2089 	}
2090 }
2091 
arm_smmu_domain_alloc(void)2092 struct arm_smmu_domain *arm_smmu_domain_alloc(void)
2093 {
2094 	struct arm_smmu_domain *smmu_domain;
2095 
2096 	smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
2097 	if (!smmu_domain)
2098 		return ERR_PTR(-ENOMEM);
2099 
2100 	mutex_init(&smmu_domain->init_mutex);
2101 	INIT_LIST_HEAD(&smmu_domain->devices);
2102 	spin_lock_init(&smmu_domain->devices_lock);
2103 
2104 	return smmu_domain;
2105 }
2106 
arm_smmu_domain_alloc_paging(struct device * dev)2107 static struct iommu_domain *arm_smmu_domain_alloc_paging(struct device *dev)
2108 {
2109 	struct arm_smmu_domain *smmu_domain;
2110 
2111 	/*
2112 	 * Allocate the domain and initialise some of its data structures.
2113 	 * We can't really do anything meaningful until we've added a
2114 	 * master.
2115 	 */
2116 	smmu_domain = arm_smmu_domain_alloc();
2117 	if (IS_ERR(smmu_domain))
2118 		return ERR_CAST(smmu_domain);
2119 
2120 	if (dev) {
2121 		struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2122 		int ret;
2123 
2124 		ret = arm_smmu_domain_finalise(smmu_domain, master->smmu, 0);
2125 		if (ret) {
2126 			kfree(smmu_domain);
2127 			return ERR_PTR(ret);
2128 		}
2129 	}
2130 	return &smmu_domain->domain;
2131 }
2132 
arm_smmu_domain_free_paging(struct iommu_domain * domain)2133 static void arm_smmu_domain_free_paging(struct iommu_domain *domain)
2134 {
2135 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2136 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2137 
2138 	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
2139 
2140 	/* Free the ASID or VMID */
2141 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2142 		/* Prevent SVA from touching the CD while we're freeing it */
2143 		mutex_lock(&arm_smmu_asid_lock);
2144 		xa_erase(&arm_smmu_asid_xa, smmu_domain->cd.asid);
2145 		mutex_unlock(&arm_smmu_asid_lock);
2146 	} else {
2147 		struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2148 		if (cfg->vmid)
2149 			ida_free(&smmu->vmid_map, cfg->vmid);
2150 	}
2151 
2152 	kfree(smmu_domain);
2153 }
2154 
arm_smmu_domain_finalise_s1(struct arm_smmu_device * smmu,struct arm_smmu_domain * smmu_domain)2155 static int arm_smmu_domain_finalise_s1(struct arm_smmu_device *smmu,
2156 				       struct arm_smmu_domain *smmu_domain)
2157 {
2158 	int ret;
2159 	u32 asid = 0;
2160 	struct arm_smmu_ctx_desc *cd = &smmu_domain->cd;
2161 
2162 	/* Prevent SVA from modifying the ASID until it is written to the CD */
2163 	mutex_lock(&arm_smmu_asid_lock);
2164 	ret = xa_alloc(&arm_smmu_asid_xa, &asid, smmu_domain,
2165 		       XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
2166 	cd->asid	= (u16)asid;
2167 	mutex_unlock(&arm_smmu_asid_lock);
2168 	return ret;
2169 }
2170 
arm_smmu_domain_finalise_s2(struct arm_smmu_device * smmu,struct arm_smmu_domain * smmu_domain)2171 static int arm_smmu_domain_finalise_s2(struct arm_smmu_device *smmu,
2172 				       struct arm_smmu_domain *smmu_domain)
2173 {
2174 	int vmid;
2175 	struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2176 
2177 	/* Reserve VMID 0 for stage-2 bypass STEs */
2178 	vmid = ida_alloc_range(&smmu->vmid_map, 1, (1 << smmu->vmid_bits) - 1,
2179 			       GFP_KERNEL);
2180 	if (vmid < 0)
2181 		return vmid;
2182 
2183 	cfg->vmid	= (u16)vmid;
2184 	return 0;
2185 }
2186 
arm_smmu_domain_finalise(struct arm_smmu_domain * smmu_domain,struct arm_smmu_device * smmu,u32 flags)2187 static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain,
2188 				    struct arm_smmu_device *smmu, u32 flags)
2189 {
2190 	int ret;
2191 	enum io_pgtable_fmt fmt;
2192 	struct io_pgtable_cfg pgtbl_cfg;
2193 	struct io_pgtable_ops *pgtbl_ops;
2194 	int (*finalise_stage_fn)(struct arm_smmu_device *smmu,
2195 				 struct arm_smmu_domain *smmu_domain);
2196 	bool enable_dirty = flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
2197 
2198 	/* Restrict the stage to what we can actually support */
2199 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
2200 		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
2201 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
2202 		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
2203 
2204 	pgtbl_cfg = (struct io_pgtable_cfg) {
2205 		.pgsize_bitmap	= smmu->pgsize_bitmap,
2206 		.coherent_walk	= smmu->features & ARM_SMMU_FEAT_COHERENCY,
2207 		.tlb		= &arm_smmu_flush_ops,
2208 		.iommu_dev	= smmu->dev,
2209 	};
2210 
2211 	switch (smmu_domain->stage) {
2212 	case ARM_SMMU_DOMAIN_S1: {
2213 		unsigned long ias = (smmu->features &
2214 				     ARM_SMMU_FEAT_VAX) ? 52 : 48;
2215 
2216 		pgtbl_cfg.ias = min_t(unsigned long, ias, VA_BITS);
2217 		pgtbl_cfg.oas = smmu->ias;
2218 		if (enable_dirty)
2219 			pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_ARM_HD;
2220 		fmt = ARM_64_LPAE_S1;
2221 		finalise_stage_fn = arm_smmu_domain_finalise_s1;
2222 		break;
2223 	}
2224 	case ARM_SMMU_DOMAIN_S2:
2225 		if (enable_dirty)
2226 			return -EOPNOTSUPP;
2227 		pgtbl_cfg.ias = smmu->ias;
2228 		pgtbl_cfg.oas = smmu->oas;
2229 		fmt = ARM_64_LPAE_S2;
2230 		finalise_stage_fn = arm_smmu_domain_finalise_s2;
2231 		break;
2232 	default:
2233 		return -EINVAL;
2234 	}
2235 
2236 	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
2237 	if (!pgtbl_ops)
2238 		return -ENOMEM;
2239 
2240 	smmu_domain->domain.pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
2241 	smmu_domain->domain.geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
2242 	smmu_domain->domain.geometry.force_aperture = true;
2243 	if (enable_dirty && smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
2244 		smmu_domain->domain.dirty_ops = &arm_smmu_dirty_ops;
2245 
2246 	ret = finalise_stage_fn(smmu, smmu_domain);
2247 	if (ret < 0) {
2248 		free_io_pgtable_ops(pgtbl_ops);
2249 		return ret;
2250 	}
2251 
2252 	smmu_domain->pgtbl_ops = pgtbl_ops;
2253 	smmu_domain->smmu = smmu;
2254 	return 0;
2255 }
2256 
2257 static struct arm_smmu_ste *
arm_smmu_get_step_for_sid(struct arm_smmu_device * smmu,u32 sid)2258 arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
2259 {
2260 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2261 
2262 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2263 		/* Two-level walk */
2264 		return &cfg->l2.l2ptrs[arm_smmu_strtab_l1_idx(sid)]
2265 				->stes[arm_smmu_strtab_l2_idx(sid)];
2266 	} else {
2267 		/* Simple linear lookup */
2268 		return &cfg->linear.table[sid];
2269 	}
2270 }
2271 
arm_smmu_install_ste_for_dev(struct arm_smmu_master * master,const struct arm_smmu_ste * target)2272 static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master,
2273 					 const struct arm_smmu_ste *target)
2274 {
2275 	int i, j;
2276 	struct arm_smmu_device *smmu = master->smmu;
2277 
2278 	master->cd_table.in_ste =
2279 		FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(target->data[0])) ==
2280 		STRTAB_STE_0_CFG_S1_TRANS;
2281 	master->ste_ats_enabled =
2282 		FIELD_GET(STRTAB_STE_1_EATS, le64_to_cpu(target->data[1])) ==
2283 		STRTAB_STE_1_EATS_TRANS;
2284 
2285 	for (i = 0; i < master->num_streams; ++i) {
2286 		u32 sid = master->streams[i].id;
2287 		struct arm_smmu_ste *step =
2288 			arm_smmu_get_step_for_sid(smmu, sid);
2289 
2290 		/* Bridged PCI devices may end up with duplicated IDs */
2291 		for (j = 0; j < i; j++)
2292 			if (master->streams[j].id == sid)
2293 				break;
2294 		if (j < i)
2295 			continue;
2296 
2297 		arm_smmu_write_ste(master, sid, step, target);
2298 	}
2299 }
2300 
arm_smmu_ats_supported(struct arm_smmu_master * master)2301 static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
2302 {
2303 	struct device *dev = master->dev;
2304 	struct arm_smmu_device *smmu = master->smmu;
2305 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2306 
2307 	if (!(smmu->features & ARM_SMMU_FEAT_ATS))
2308 		return false;
2309 
2310 	if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS))
2311 		return false;
2312 
2313 	return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev));
2314 }
2315 
arm_smmu_enable_ats(struct arm_smmu_master * master)2316 static void arm_smmu_enable_ats(struct arm_smmu_master *master)
2317 {
2318 	size_t stu;
2319 	struct pci_dev *pdev;
2320 	struct arm_smmu_device *smmu = master->smmu;
2321 
2322 	/* Smallest Translation Unit: log2 of the smallest supported granule */
2323 	stu = __ffs(smmu->pgsize_bitmap);
2324 	pdev = to_pci_dev(master->dev);
2325 
2326 	/*
2327 	 * ATC invalidation of PASID 0 causes the entire ATC to be flushed.
2328 	 */
2329 	arm_smmu_atc_inv_master(master, IOMMU_NO_PASID);
2330 	if (pci_enable_ats(pdev, stu))
2331 		dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
2332 }
2333 
arm_smmu_enable_pasid(struct arm_smmu_master * master)2334 static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
2335 {
2336 	int ret;
2337 	int features;
2338 	int num_pasids;
2339 	struct pci_dev *pdev;
2340 
2341 	if (!dev_is_pci(master->dev))
2342 		return -ENODEV;
2343 
2344 	pdev = to_pci_dev(master->dev);
2345 
2346 	features = pci_pasid_features(pdev);
2347 	if (features < 0)
2348 		return features;
2349 
2350 	num_pasids = pci_max_pasids(pdev);
2351 	if (num_pasids <= 0)
2352 		return num_pasids;
2353 
2354 	ret = pci_enable_pasid(pdev, features);
2355 	if (ret) {
2356 		dev_err(&pdev->dev, "Failed to enable PASID\n");
2357 		return ret;
2358 	}
2359 
2360 	master->ssid_bits = min_t(u8, ilog2(num_pasids),
2361 				  master->smmu->ssid_bits);
2362 	return 0;
2363 }
2364 
arm_smmu_disable_pasid(struct arm_smmu_master * master)2365 static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
2366 {
2367 	struct pci_dev *pdev;
2368 
2369 	if (!dev_is_pci(master->dev))
2370 		return;
2371 
2372 	pdev = to_pci_dev(master->dev);
2373 
2374 	if (!pdev->pasid_enabled)
2375 		return;
2376 
2377 	master->ssid_bits = 0;
2378 	pci_disable_pasid(pdev);
2379 }
2380 
2381 static struct arm_smmu_master_domain *
arm_smmu_find_master_domain(struct arm_smmu_domain * smmu_domain,struct arm_smmu_master * master,ioasid_t ssid)2382 arm_smmu_find_master_domain(struct arm_smmu_domain *smmu_domain,
2383 			    struct arm_smmu_master *master,
2384 			    ioasid_t ssid)
2385 {
2386 	struct arm_smmu_master_domain *master_domain;
2387 
2388 	lockdep_assert_held(&smmu_domain->devices_lock);
2389 
2390 	list_for_each_entry(master_domain, &smmu_domain->devices,
2391 			    devices_elm) {
2392 		if (master_domain->master == master &&
2393 		    master_domain->ssid == ssid)
2394 			return master_domain;
2395 	}
2396 	return NULL;
2397 }
2398 
2399 /*
2400  * If the domain uses the smmu_domain->devices list return the arm_smmu_domain
2401  * structure, otherwise NULL. These domains track attached devices so they can
2402  * issue invalidations.
2403  */
2404 static struct arm_smmu_domain *
to_smmu_domain_devices(struct iommu_domain * domain)2405 to_smmu_domain_devices(struct iommu_domain *domain)
2406 {
2407 	/* The domain can be NULL only when processing the first attach */
2408 	if (!domain)
2409 		return NULL;
2410 	if ((domain->type & __IOMMU_DOMAIN_PAGING) ||
2411 	    domain->type == IOMMU_DOMAIN_SVA)
2412 		return to_smmu_domain(domain);
2413 	return NULL;
2414 }
2415 
arm_smmu_remove_master_domain(struct arm_smmu_master * master,struct iommu_domain * domain,ioasid_t ssid)2416 static void arm_smmu_remove_master_domain(struct arm_smmu_master *master,
2417 					  struct iommu_domain *domain,
2418 					  ioasid_t ssid)
2419 {
2420 	struct arm_smmu_domain *smmu_domain = to_smmu_domain_devices(domain);
2421 	struct arm_smmu_master_domain *master_domain;
2422 	unsigned long flags;
2423 
2424 	if (!smmu_domain)
2425 		return;
2426 
2427 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2428 	master_domain = arm_smmu_find_master_domain(smmu_domain, master, ssid);
2429 	if (master_domain) {
2430 		list_del(&master_domain->devices_elm);
2431 		kfree(master_domain);
2432 		if (master->ats_enabled)
2433 			atomic_dec(&smmu_domain->nr_ats_masters);
2434 	}
2435 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2436 }
2437 
2438 struct arm_smmu_attach_state {
2439 	/* Inputs */
2440 	struct iommu_domain *old_domain;
2441 	struct arm_smmu_master *master;
2442 	bool cd_needs_ats;
2443 	ioasid_t ssid;
2444 	/* Resulting state */
2445 	bool ats_enabled;
2446 };
2447 
2448 /*
2449  * Start the sequence to attach a domain to a master. The sequence contains three
2450  * steps:
2451  *  arm_smmu_attach_prepare()
2452  *  arm_smmu_install_ste_for_dev()
2453  *  arm_smmu_attach_commit()
2454  *
2455  * If prepare succeeds then the sequence must be completed. The STE installed
2456  * must set the STE.EATS field according to state.ats_enabled.
2457  *
2458  * If the device supports ATS then this determines if EATS should be enabled
2459  * in the STE, and starts sequencing EATS disable if required.
2460  *
2461  * The change of the EATS in the STE and the PCI ATS config space is managed by
2462  * this sequence to be in the right order so that if PCI ATS is enabled then
2463  * STE.ETAS is enabled.
2464  *
2465  * new_domain can be a non-paging domain. In this case ATS will not be enabled,
2466  * and invalidations won't be tracked.
2467  */
arm_smmu_attach_prepare(struct arm_smmu_attach_state * state,struct iommu_domain * new_domain)2468 static int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state,
2469 				   struct iommu_domain *new_domain)
2470 {
2471 	struct arm_smmu_master *master = state->master;
2472 	struct arm_smmu_master_domain *master_domain;
2473 	struct arm_smmu_domain *smmu_domain =
2474 		to_smmu_domain_devices(new_domain);
2475 	unsigned long flags;
2476 
2477 	/*
2478 	 * arm_smmu_share_asid() must not see two domains pointing to the same
2479 	 * arm_smmu_master_domain contents otherwise it could randomly write one
2480 	 * or the other to the CD.
2481 	 */
2482 	lockdep_assert_held(&arm_smmu_asid_lock);
2483 
2484 	if (smmu_domain || state->cd_needs_ats) {
2485 		/*
2486 		 * The SMMU does not support enabling ATS with bypass/abort.
2487 		 * When the STE is in bypass (STE.Config[2:0] == 0b100), ATS
2488 		 * Translation Requests and Translated transactions are denied
2489 		 * as though ATS is disabled for the stream (STE.EATS == 0b00),
2490 		 * causing F_BAD_ATS_TREQ and F_TRANSL_FORBIDDEN events
2491 		 * (IHI0070Ea 5.2 Stream Table Entry).
2492 		 *
2493 		 * However, if we have installed a CD table and are using S1DSS
2494 		 * then ATS will work in S1DSS bypass. See "13.6.4 Full ATS
2495 		 * skipping stage 1".
2496 		 *
2497 		 * Disable ATS if we are going to create a normal 0b100 bypass
2498 		 * STE.
2499 		 */
2500 		state->ats_enabled = arm_smmu_ats_supported(master);
2501 	}
2502 
2503 	if (smmu_domain) {
2504 		master_domain = kzalloc(sizeof(*master_domain), GFP_KERNEL);
2505 		if (!master_domain)
2506 			return -ENOMEM;
2507 		master_domain->master = master;
2508 		master_domain->ssid = state->ssid;
2509 
2510 		/*
2511 		 * During prepare we want the current smmu_domain and new
2512 		 * smmu_domain to be in the devices list before we change any
2513 		 * HW. This ensures that both domains will send ATS
2514 		 * invalidations to the master until we are done.
2515 		 *
2516 		 * It is tempting to make this list only track masters that are
2517 		 * using ATS, but arm_smmu_share_asid() also uses this to change
2518 		 * the ASID of a domain, unrelated to ATS.
2519 		 *
2520 		 * Notice if we are re-attaching the same domain then the list
2521 		 * will have two identical entries and commit will remove only
2522 		 * one of them.
2523 		 */
2524 		spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2525 		if (state->ats_enabled)
2526 			atomic_inc(&smmu_domain->nr_ats_masters);
2527 		list_add(&master_domain->devices_elm, &smmu_domain->devices);
2528 		spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2529 	}
2530 
2531 	if (!state->ats_enabled && master->ats_enabled) {
2532 		pci_disable_ats(to_pci_dev(master->dev));
2533 		/*
2534 		 * This is probably overkill, but the config write for disabling
2535 		 * ATS should complete before the STE is configured to generate
2536 		 * UR to avoid AER noise.
2537 		 */
2538 		wmb();
2539 	}
2540 	return 0;
2541 }
2542 
2543 /*
2544  * Commit is done after the STE/CD are configured with the EATS setting. It
2545  * completes synchronizing the PCI device's ATC and finishes manipulating the
2546  * smmu_domain->devices list.
2547  */
arm_smmu_attach_commit(struct arm_smmu_attach_state * state)2548 static void arm_smmu_attach_commit(struct arm_smmu_attach_state *state)
2549 {
2550 	struct arm_smmu_master *master = state->master;
2551 
2552 	lockdep_assert_held(&arm_smmu_asid_lock);
2553 
2554 	if (state->ats_enabled && !master->ats_enabled) {
2555 		arm_smmu_enable_ats(master);
2556 	} else if (state->ats_enabled && master->ats_enabled) {
2557 		/*
2558 		 * The translation has changed, flush the ATC. At this point the
2559 		 * SMMU is translating for the new domain and both the old&new
2560 		 * domain will issue invalidations.
2561 		 */
2562 		arm_smmu_atc_inv_master(master, state->ssid);
2563 	} else if (!state->ats_enabled && master->ats_enabled) {
2564 		/* ATS is being switched off, invalidate the entire ATC */
2565 		arm_smmu_atc_inv_master(master, IOMMU_NO_PASID);
2566 	}
2567 
2568 	arm_smmu_remove_master_domain(master, state->old_domain, state->ssid);
2569 	master->ats_enabled = state->ats_enabled;
2570 }
2571 
arm_smmu_attach_dev(struct iommu_domain * domain,struct device * dev)2572 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
2573 {
2574 	int ret = 0;
2575 	struct arm_smmu_ste target;
2576 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2577 	struct arm_smmu_device *smmu;
2578 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2579 	struct arm_smmu_attach_state state = {
2580 		.old_domain = iommu_get_domain_for_dev(dev),
2581 		.ssid = IOMMU_NO_PASID,
2582 	};
2583 	struct arm_smmu_master *master;
2584 	struct arm_smmu_cd *cdptr;
2585 
2586 	if (!fwspec)
2587 		return -ENOENT;
2588 
2589 	state.master = master = dev_iommu_priv_get(dev);
2590 	smmu = master->smmu;
2591 
2592 	mutex_lock(&smmu_domain->init_mutex);
2593 
2594 	if (!smmu_domain->smmu) {
2595 		ret = arm_smmu_domain_finalise(smmu_domain, smmu, 0);
2596 	} else if (smmu_domain->smmu != smmu)
2597 		ret = -EINVAL;
2598 
2599 	mutex_unlock(&smmu_domain->init_mutex);
2600 	if (ret)
2601 		return ret;
2602 
2603 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2604 		cdptr = arm_smmu_alloc_cd_ptr(master, IOMMU_NO_PASID);
2605 		if (!cdptr)
2606 			return -ENOMEM;
2607 	} else if (arm_smmu_ssids_in_use(&master->cd_table))
2608 		return -EBUSY;
2609 
2610 	/*
2611 	 * Prevent arm_smmu_share_asid() from trying to change the ASID
2612 	 * of either the old or new domain while we are working on it.
2613 	 * This allows the STE and the smmu_domain->devices list to
2614 	 * be inconsistent during this routine.
2615 	 */
2616 	mutex_lock(&arm_smmu_asid_lock);
2617 
2618 	ret = arm_smmu_attach_prepare(&state, domain);
2619 	if (ret) {
2620 		mutex_unlock(&arm_smmu_asid_lock);
2621 		return ret;
2622 	}
2623 
2624 	switch (smmu_domain->stage) {
2625 	case ARM_SMMU_DOMAIN_S1: {
2626 		struct arm_smmu_cd target_cd;
2627 
2628 		arm_smmu_make_s1_cd(&target_cd, master, smmu_domain);
2629 		arm_smmu_write_cd_entry(master, IOMMU_NO_PASID, cdptr,
2630 					&target_cd);
2631 		arm_smmu_make_cdtable_ste(&target, master, state.ats_enabled,
2632 					  STRTAB_STE_1_S1DSS_SSID0);
2633 		arm_smmu_install_ste_for_dev(master, &target);
2634 		break;
2635 	}
2636 	case ARM_SMMU_DOMAIN_S2:
2637 		arm_smmu_make_s2_domain_ste(&target, master, smmu_domain,
2638 					    state.ats_enabled);
2639 		arm_smmu_install_ste_for_dev(master, &target);
2640 		arm_smmu_clear_cd(master, IOMMU_NO_PASID);
2641 		break;
2642 	}
2643 
2644 	arm_smmu_attach_commit(&state);
2645 	mutex_unlock(&arm_smmu_asid_lock);
2646 	return 0;
2647 }
2648 
arm_smmu_s1_set_dev_pasid(struct iommu_domain * domain,struct device * dev,ioasid_t id)2649 static int arm_smmu_s1_set_dev_pasid(struct iommu_domain *domain,
2650 				      struct device *dev, ioasid_t id)
2651 {
2652 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2653 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2654 	struct arm_smmu_device *smmu = master->smmu;
2655 	struct arm_smmu_cd target_cd;
2656 	int ret = 0;
2657 
2658 	mutex_lock(&smmu_domain->init_mutex);
2659 	if (!smmu_domain->smmu)
2660 		ret = arm_smmu_domain_finalise(smmu_domain, smmu, 0);
2661 	else if (smmu_domain->smmu != smmu)
2662 		ret = -EINVAL;
2663 	mutex_unlock(&smmu_domain->init_mutex);
2664 	if (ret)
2665 		return ret;
2666 
2667 	if (smmu_domain->stage != ARM_SMMU_DOMAIN_S1)
2668 		return -EINVAL;
2669 
2670 	/*
2671 	 * We can read cd.asid outside the lock because arm_smmu_set_pasid()
2672 	 * will fix it
2673 	 */
2674 	arm_smmu_make_s1_cd(&target_cd, master, smmu_domain);
2675 	return arm_smmu_set_pasid(master, to_smmu_domain(domain), id,
2676 				  &target_cd);
2677 }
2678 
arm_smmu_update_ste(struct arm_smmu_master * master,struct iommu_domain * sid_domain,bool ats_enabled)2679 static void arm_smmu_update_ste(struct arm_smmu_master *master,
2680 				struct iommu_domain *sid_domain,
2681 				bool ats_enabled)
2682 {
2683 	unsigned int s1dss = STRTAB_STE_1_S1DSS_TERMINATE;
2684 	struct arm_smmu_ste ste;
2685 
2686 	if (master->cd_table.in_ste && master->ste_ats_enabled == ats_enabled)
2687 		return;
2688 
2689 	if (sid_domain->type == IOMMU_DOMAIN_IDENTITY)
2690 		s1dss = STRTAB_STE_1_S1DSS_BYPASS;
2691 	else
2692 		WARN_ON(sid_domain->type != IOMMU_DOMAIN_BLOCKED);
2693 
2694 	/*
2695 	 * Change the STE into a cdtable one with SID IDENTITY/BLOCKED behavior
2696 	 * using s1dss if necessary. If the cd_table is already installed then
2697 	 * the S1DSS is correct and this will just update the EATS. Otherwise it
2698 	 * installs the entire thing. This will be hitless.
2699 	 */
2700 	arm_smmu_make_cdtable_ste(&ste, master, ats_enabled, s1dss);
2701 	arm_smmu_install_ste_for_dev(master, &ste);
2702 }
2703 
arm_smmu_set_pasid(struct arm_smmu_master * master,struct arm_smmu_domain * smmu_domain,ioasid_t pasid,struct arm_smmu_cd * cd)2704 int arm_smmu_set_pasid(struct arm_smmu_master *master,
2705 		       struct arm_smmu_domain *smmu_domain, ioasid_t pasid,
2706 		       struct arm_smmu_cd *cd)
2707 {
2708 	struct iommu_domain *sid_domain = iommu_get_domain_for_dev(master->dev);
2709 	struct arm_smmu_attach_state state = {
2710 		.master = master,
2711 		/*
2712 		 * For now the core code prevents calling this when a domain is
2713 		 * already attached, no need to set old_domain.
2714 		 */
2715 		.ssid = pasid,
2716 	};
2717 	struct arm_smmu_cd *cdptr;
2718 	int ret;
2719 
2720 	/* The core code validates pasid */
2721 
2722 	if (smmu_domain->smmu != master->smmu)
2723 		return -EINVAL;
2724 
2725 	if (!master->cd_table.in_ste &&
2726 	    sid_domain->type != IOMMU_DOMAIN_IDENTITY &&
2727 	    sid_domain->type != IOMMU_DOMAIN_BLOCKED)
2728 		return -EINVAL;
2729 
2730 	cdptr = arm_smmu_alloc_cd_ptr(master, pasid);
2731 	if (!cdptr)
2732 		return -ENOMEM;
2733 
2734 	mutex_lock(&arm_smmu_asid_lock);
2735 	ret = arm_smmu_attach_prepare(&state, &smmu_domain->domain);
2736 	if (ret)
2737 		goto out_unlock;
2738 
2739 	/*
2740 	 * We don't want to obtain to the asid_lock too early, so fix up the
2741 	 * caller set ASID under the lock in case it changed.
2742 	 */
2743 	cd->data[0] &= ~cpu_to_le64(CTXDESC_CD_0_ASID);
2744 	cd->data[0] |= cpu_to_le64(
2745 		FIELD_PREP(CTXDESC_CD_0_ASID, smmu_domain->cd.asid));
2746 
2747 	arm_smmu_write_cd_entry(master, pasid, cdptr, cd);
2748 	arm_smmu_update_ste(master, sid_domain, state.ats_enabled);
2749 
2750 	arm_smmu_attach_commit(&state);
2751 
2752 out_unlock:
2753 	mutex_unlock(&arm_smmu_asid_lock);
2754 	return ret;
2755 }
2756 
arm_smmu_remove_dev_pasid(struct device * dev,ioasid_t pasid,struct iommu_domain * domain)2757 static void arm_smmu_remove_dev_pasid(struct device *dev, ioasid_t pasid,
2758 				      struct iommu_domain *domain)
2759 {
2760 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2761 	struct arm_smmu_domain *smmu_domain;
2762 
2763 	smmu_domain = to_smmu_domain(domain);
2764 
2765 	mutex_lock(&arm_smmu_asid_lock);
2766 	arm_smmu_clear_cd(master, pasid);
2767 	if (master->ats_enabled)
2768 		arm_smmu_atc_inv_master(master, pasid);
2769 	arm_smmu_remove_master_domain(master, &smmu_domain->domain, pasid);
2770 	mutex_unlock(&arm_smmu_asid_lock);
2771 
2772 	/*
2773 	 * When the last user of the CD table goes away downgrade the STE back
2774 	 * to a non-cd_table one.
2775 	 */
2776 	if (!arm_smmu_ssids_in_use(&master->cd_table)) {
2777 		struct iommu_domain *sid_domain =
2778 			iommu_get_domain_for_dev(master->dev);
2779 
2780 		if (sid_domain->type == IOMMU_DOMAIN_IDENTITY ||
2781 		    sid_domain->type == IOMMU_DOMAIN_BLOCKED)
2782 			sid_domain->ops->attach_dev(sid_domain, dev);
2783 	}
2784 }
2785 
arm_smmu_attach_dev_ste(struct iommu_domain * domain,struct device * dev,struct arm_smmu_ste * ste,unsigned int s1dss)2786 static void arm_smmu_attach_dev_ste(struct iommu_domain *domain,
2787 				    struct device *dev,
2788 				    struct arm_smmu_ste *ste,
2789 				    unsigned int s1dss)
2790 {
2791 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2792 	struct arm_smmu_attach_state state = {
2793 		.master = master,
2794 		.old_domain = iommu_get_domain_for_dev(dev),
2795 		.ssid = IOMMU_NO_PASID,
2796 	};
2797 
2798 	/*
2799 	 * Do not allow any ASID to be changed while are working on the STE,
2800 	 * otherwise we could miss invalidations.
2801 	 */
2802 	mutex_lock(&arm_smmu_asid_lock);
2803 
2804 	/*
2805 	 * If the CD table is not in use we can use the provided STE, otherwise
2806 	 * we use a cdtable STE with the provided S1DSS.
2807 	 */
2808 	if (arm_smmu_ssids_in_use(&master->cd_table)) {
2809 		/*
2810 		 * If a CD table has to be present then we need to run with ATS
2811 		 * on because we have to assume a PASID is using ATS. For
2812 		 * IDENTITY this will setup things so that S1DSS=bypass which
2813 		 * follows the explanation in "13.6.4 Full ATS skipping stage 1"
2814 		 * and allows for ATS on the RID to work.
2815 		 */
2816 		state.cd_needs_ats = true;
2817 		arm_smmu_attach_prepare(&state, domain);
2818 		arm_smmu_make_cdtable_ste(ste, master, state.ats_enabled, s1dss);
2819 	} else {
2820 		arm_smmu_attach_prepare(&state, domain);
2821 	}
2822 	arm_smmu_install_ste_for_dev(master, ste);
2823 	arm_smmu_attach_commit(&state);
2824 	mutex_unlock(&arm_smmu_asid_lock);
2825 
2826 	/*
2827 	 * This has to be done after removing the master from the
2828 	 * arm_smmu_domain->devices to avoid races updating the same context
2829 	 * descriptor from arm_smmu_share_asid().
2830 	 */
2831 	arm_smmu_clear_cd(master, IOMMU_NO_PASID);
2832 }
2833 
arm_smmu_attach_dev_identity(struct iommu_domain * domain,struct device * dev)2834 static int arm_smmu_attach_dev_identity(struct iommu_domain *domain,
2835 					struct device *dev)
2836 {
2837 	struct arm_smmu_ste ste;
2838 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2839 
2840 	arm_smmu_make_bypass_ste(master->smmu, &ste);
2841 	arm_smmu_attach_dev_ste(domain, dev, &ste, STRTAB_STE_1_S1DSS_BYPASS);
2842 	return 0;
2843 }
2844 
2845 static const struct iommu_domain_ops arm_smmu_identity_ops = {
2846 	.attach_dev = arm_smmu_attach_dev_identity,
2847 };
2848 
2849 static struct iommu_domain arm_smmu_identity_domain = {
2850 	.type = IOMMU_DOMAIN_IDENTITY,
2851 	.ops = &arm_smmu_identity_ops,
2852 };
2853 
arm_smmu_attach_dev_blocked(struct iommu_domain * domain,struct device * dev)2854 static int arm_smmu_attach_dev_blocked(struct iommu_domain *domain,
2855 					struct device *dev)
2856 {
2857 	struct arm_smmu_ste ste;
2858 
2859 	arm_smmu_make_abort_ste(&ste);
2860 	arm_smmu_attach_dev_ste(domain, dev, &ste,
2861 				STRTAB_STE_1_S1DSS_TERMINATE);
2862 	return 0;
2863 }
2864 
2865 static const struct iommu_domain_ops arm_smmu_blocked_ops = {
2866 	.attach_dev = arm_smmu_attach_dev_blocked,
2867 };
2868 
2869 static struct iommu_domain arm_smmu_blocked_domain = {
2870 	.type = IOMMU_DOMAIN_BLOCKED,
2871 	.ops = &arm_smmu_blocked_ops,
2872 };
2873 
2874 static struct iommu_domain *
arm_smmu_domain_alloc_user(struct device * dev,u32 flags,struct iommu_domain * parent,const struct iommu_user_data * user_data)2875 arm_smmu_domain_alloc_user(struct device *dev, u32 flags,
2876 			   struct iommu_domain *parent,
2877 			   const struct iommu_user_data *user_data)
2878 {
2879 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2880 	const u32 PAGING_FLAGS = IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
2881 	struct arm_smmu_domain *smmu_domain;
2882 	int ret;
2883 
2884 	if (flags & ~PAGING_FLAGS)
2885 		return ERR_PTR(-EOPNOTSUPP);
2886 	if (parent || user_data)
2887 		return ERR_PTR(-EOPNOTSUPP);
2888 
2889 	smmu_domain = arm_smmu_domain_alloc();
2890 	if (IS_ERR(smmu_domain))
2891 		return ERR_CAST(smmu_domain);
2892 
2893 	smmu_domain->domain.type = IOMMU_DOMAIN_UNMANAGED;
2894 	smmu_domain->domain.ops = arm_smmu_ops.default_domain_ops;
2895 	ret = arm_smmu_domain_finalise(smmu_domain, master->smmu, flags);
2896 	if (ret)
2897 		goto err_free;
2898 	return &smmu_domain->domain;
2899 
2900 err_free:
2901 	kfree(smmu_domain);
2902 	return ERR_PTR(ret);
2903 }
2904 
arm_smmu_map_pages(struct iommu_domain * domain,unsigned long iova,phys_addr_t paddr,size_t pgsize,size_t pgcount,int prot,gfp_t gfp,size_t * mapped)2905 static int arm_smmu_map_pages(struct iommu_domain *domain, unsigned long iova,
2906 			      phys_addr_t paddr, size_t pgsize, size_t pgcount,
2907 			      int prot, gfp_t gfp, size_t *mapped)
2908 {
2909 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2910 
2911 	if (!ops)
2912 		return -ENODEV;
2913 
2914 	return ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, gfp, mapped);
2915 }
2916 
arm_smmu_unmap_pages(struct iommu_domain * domain,unsigned long iova,size_t pgsize,size_t pgcount,struct iommu_iotlb_gather * gather)2917 static size_t arm_smmu_unmap_pages(struct iommu_domain *domain, unsigned long iova,
2918 				   size_t pgsize, size_t pgcount,
2919 				   struct iommu_iotlb_gather *gather)
2920 {
2921 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2922 	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
2923 
2924 	if (!ops)
2925 		return 0;
2926 
2927 	return ops->unmap_pages(ops, iova, pgsize, pgcount, gather);
2928 }
2929 
arm_smmu_flush_iotlb_all(struct iommu_domain * domain)2930 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
2931 {
2932 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2933 
2934 	if (smmu_domain->smmu)
2935 		arm_smmu_tlb_inv_context(smmu_domain);
2936 }
2937 
arm_smmu_iotlb_sync(struct iommu_domain * domain,struct iommu_iotlb_gather * gather)2938 static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
2939 				struct iommu_iotlb_gather *gather)
2940 {
2941 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2942 
2943 	if (!gather->pgsize)
2944 		return;
2945 
2946 	arm_smmu_tlb_inv_range_domain(gather->start,
2947 				      gather->end - gather->start + 1,
2948 				      gather->pgsize, true, smmu_domain);
2949 }
2950 
2951 static phys_addr_t
arm_smmu_iova_to_phys(struct iommu_domain * domain,dma_addr_t iova)2952 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
2953 {
2954 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2955 
2956 	if (!ops)
2957 		return 0;
2958 
2959 	return ops->iova_to_phys(ops, iova);
2960 }
2961 
2962 static struct platform_driver arm_smmu_driver;
2963 
2964 static
arm_smmu_get_by_fwnode(struct fwnode_handle * fwnode)2965 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
2966 {
2967 	struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
2968 							  fwnode);
2969 	put_device(dev);
2970 	return dev ? dev_get_drvdata(dev) : NULL;
2971 }
2972 
arm_smmu_sid_in_range(struct arm_smmu_device * smmu,u32 sid)2973 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
2974 {
2975 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2976 		return arm_smmu_strtab_l1_idx(sid) < smmu->strtab_cfg.l2.num_l1_ents;
2977 	return sid < smmu->strtab_cfg.linear.num_ents;
2978 }
2979 
arm_smmu_init_sid_strtab(struct arm_smmu_device * smmu,u32 sid)2980 static int arm_smmu_init_sid_strtab(struct arm_smmu_device *smmu, u32 sid)
2981 {
2982 	/* Check the SIDs are in range of the SMMU and our stream table */
2983 	if (!arm_smmu_sid_in_range(smmu, sid))
2984 		return -ERANGE;
2985 
2986 	/* Ensure l2 strtab is initialised */
2987 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2988 		return arm_smmu_init_l2_strtab(smmu, sid);
2989 
2990 	return 0;
2991 }
2992 
arm_smmu_insert_master(struct arm_smmu_device * smmu,struct arm_smmu_master * master)2993 static int arm_smmu_insert_master(struct arm_smmu_device *smmu,
2994 				  struct arm_smmu_master *master)
2995 {
2996 	int i;
2997 	int ret = 0;
2998 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
2999 
3000 	master->streams = kcalloc(fwspec->num_ids, sizeof(*master->streams),
3001 				  GFP_KERNEL);
3002 	if (!master->streams)
3003 		return -ENOMEM;
3004 	master->num_streams = fwspec->num_ids;
3005 
3006 	mutex_lock(&smmu->streams_mutex);
3007 	for (i = 0; i < fwspec->num_ids; i++) {
3008 		struct arm_smmu_stream *new_stream = &master->streams[i];
3009 		struct rb_node *existing;
3010 		u32 sid = fwspec->ids[i];
3011 
3012 		new_stream->id = sid;
3013 		new_stream->master = master;
3014 
3015 		ret = arm_smmu_init_sid_strtab(smmu, sid);
3016 		if (ret)
3017 			break;
3018 
3019 		/* Insert into SID tree */
3020 		existing = rb_find_add(&new_stream->node, &smmu->streams,
3021 				       arm_smmu_streams_cmp_node);
3022 		if (existing) {
3023 			struct arm_smmu_master *existing_master =
3024 				rb_entry(existing, struct arm_smmu_stream, node)
3025 					->master;
3026 
3027 			/* Bridged PCI devices may end up with duplicated IDs */
3028 			if (existing_master == master)
3029 				continue;
3030 
3031 			dev_warn(master->dev,
3032 				 "stream %u already in tree from dev %s\n", sid,
3033 				 dev_name(existing_master->dev));
3034 			ret = -EINVAL;
3035 			break;
3036 		}
3037 	}
3038 
3039 	if (ret) {
3040 		for (i--; i >= 0; i--)
3041 			rb_erase(&master->streams[i].node, &smmu->streams);
3042 		kfree(master->streams);
3043 	}
3044 	mutex_unlock(&smmu->streams_mutex);
3045 
3046 	return ret;
3047 }
3048 
arm_smmu_remove_master(struct arm_smmu_master * master)3049 static void arm_smmu_remove_master(struct arm_smmu_master *master)
3050 {
3051 	int i;
3052 	struct arm_smmu_device *smmu = master->smmu;
3053 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
3054 
3055 	if (!smmu || !master->streams)
3056 		return;
3057 
3058 	mutex_lock(&smmu->streams_mutex);
3059 	for (i = 0; i < fwspec->num_ids; i++)
3060 		rb_erase(&master->streams[i].node, &smmu->streams);
3061 	mutex_unlock(&smmu->streams_mutex);
3062 
3063 	kfree(master->streams);
3064 }
3065 
arm_smmu_probe_device(struct device * dev)3066 static struct iommu_device *arm_smmu_probe_device(struct device *dev)
3067 {
3068 	int ret;
3069 	struct arm_smmu_device *smmu;
3070 	struct arm_smmu_master *master;
3071 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
3072 
3073 	if (WARN_ON_ONCE(dev_iommu_priv_get(dev)))
3074 		return ERR_PTR(-EBUSY);
3075 
3076 	smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
3077 	if (!smmu)
3078 		return ERR_PTR(-ENODEV);
3079 
3080 	master = kzalloc(sizeof(*master), GFP_KERNEL);
3081 	if (!master)
3082 		return ERR_PTR(-ENOMEM);
3083 
3084 	master->dev = dev;
3085 	master->smmu = smmu;
3086 	dev_iommu_priv_set(dev, master);
3087 
3088 	ret = arm_smmu_insert_master(smmu, master);
3089 	if (ret)
3090 		goto err_free_master;
3091 
3092 	device_property_read_u32(dev, "pasid-num-bits", &master->ssid_bits);
3093 	master->ssid_bits = min(smmu->ssid_bits, master->ssid_bits);
3094 
3095 	/*
3096 	 * Note that PASID must be enabled before, and disabled after ATS:
3097 	 * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register
3098 	 *
3099 	 *   Behavior is undefined if this bit is Set and the value of the PASID
3100 	 *   Enable, Execute Requested Enable, or Privileged Mode Requested bits
3101 	 *   are changed.
3102 	 */
3103 	arm_smmu_enable_pasid(master);
3104 
3105 	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
3106 		master->ssid_bits = min_t(u8, master->ssid_bits,
3107 					  CTXDESC_LINEAR_CDMAX);
3108 
3109 	if ((smmu->features & ARM_SMMU_FEAT_STALLS &&
3110 	     device_property_read_bool(dev, "dma-can-stall")) ||
3111 	    smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
3112 		master->stall_enabled = true;
3113 
3114 	if (dev_is_pci(dev)) {
3115 		unsigned int stu = __ffs(smmu->pgsize_bitmap);
3116 
3117 		pci_prepare_ats(to_pci_dev(dev), stu);
3118 	}
3119 
3120 	return &smmu->iommu;
3121 
3122 err_free_master:
3123 	kfree(master);
3124 	return ERR_PTR(ret);
3125 }
3126 
arm_smmu_release_device(struct device * dev)3127 static void arm_smmu_release_device(struct device *dev)
3128 {
3129 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
3130 
3131 	if (WARN_ON(arm_smmu_master_sva_enabled(master)))
3132 		iopf_queue_remove_device(master->smmu->evtq.iopf, dev);
3133 
3134 	/* Put the STE back to what arm_smmu_init_strtab() sets */
3135 	if (dev->iommu->require_direct)
3136 		arm_smmu_attach_dev_identity(&arm_smmu_identity_domain, dev);
3137 	else
3138 		arm_smmu_attach_dev_blocked(&arm_smmu_blocked_domain, dev);
3139 
3140 	arm_smmu_disable_pasid(master);
3141 	arm_smmu_remove_master(master);
3142 	if (arm_smmu_cdtab_allocated(&master->cd_table))
3143 		arm_smmu_free_cd_tables(master);
3144 	kfree(master);
3145 }
3146 
arm_smmu_read_and_clear_dirty(struct iommu_domain * domain,unsigned long iova,size_t size,unsigned long flags,struct iommu_dirty_bitmap * dirty)3147 static int arm_smmu_read_and_clear_dirty(struct iommu_domain *domain,
3148 					 unsigned long iova, size_t size,
3149 					 unsigned long flags,
3150 					 struct iommu_dirty_bitmap *dirty)
3151 {
3152 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
3153 	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
3154 
3155 	return ops->read_and_clear_dirty(ops, iova, size, flags, dirty);
3156 }
3157 
arm_smmu_set_dirty_tracking(struct iommu_domain * domain,bool enabled)3158 static int arm_smmu_set_dirty_tracking(struct iommu_domain *domain,
3159 				       bool enabled)
3160 {
3161 	/*
3162 	 * Always enabled and the dirty bitmap is cleared prior to
3163 	 * set_dirty_tracking().
3164 	 */
3165 	return 0;
3166 }
3167 
arm_smmu_enable_nesting(struct iommu_domain * domain)3168 static int arm_smmu_enable_nesting(struct iommu_domain *domain)
3169 {
3170 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
3171 	int ret = 0;
3172 
3173 	mutex_lock(&smmu_domain->init_mutex);
3174 	if (smmu_domain->smmu)
3175 		ret = -EPERM;
3176 	else
3177 		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
3178 	mutex_unlock(&smmu_domain->init_mutex);
3179 
3180 	return ret;
3181 }
3182 
arm_smmu_dev_enable_feature(struct device * dev,enum iommu_dev_features feat)3183 static int arm_smmu_dev_enable_feature(struct device *dev,
3184 				       enum iommu_dev_features feat)
3185 {
3186 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
3187 
3188 	if (!master)
3189 		return -ENODEV;
3190 
3191 	switch (feat) {
3192 	case IOMMU_DEV_FEAT_IOPF:
3193 		if (!arm_smmu_master_iopf_supported(master))
3194 			return -EINVAL;
3195 		if (master->iopf_enabled)
3196 			return -EBUSY;
3197 		master->iopf_enabled = true;
3198 		return 0;
3199 	case IOMMU_DEV_FEAT_SVA:
3200 		if (!arm_smmu_master_sva_supported(master))
3201 			return -EINVAL;
3202 		if (arm_smmu_master_sva_enabled(master))
3203 			return -EBUSY;
3204 		return arm_smmu_master_enable_sva(master);
3205 	default:
3206 		return -EINVAL;
3207 	}
3208 }
3209 
arm_smmu_dev_disable_feature(struct device * dev,enum iommu_dev_features feat)3210 static int arm_smmu_dev_disable_feature(struct device *dev,
3211 					enum iommu_dev_features feat)
3212 {
3213 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
3214 
3215 	if (!master)
3216 		return -EINVAL;
3217 
3218 	switch (feat) {
3219 	case IOMMU_DEV_FEAT_IOPF:
3220 		if (!master->iopf_enabled)
3221 			return -EINVAL;
3222 		if (master->sva_enabled)
3223 			return -EBUSY;
3224 		master->iopf_enabled = false;
3225 		return 0;
3226 	case IOMMU_DEV_FEAT_SVA:
3227 		if (!arm_smmu_master_sva_enabled(master))
3228 			return -EINVAL;
3229 		return arm_smmu_master_disable_sva(master);
3230 	default:
3231 		return -EINVAL;
3232 	}
3233 }
3234 
3235 /*
3236  * HiSilicon PCIe tune and trace device can be used to trace TLP headers on the
3237  * PCIe link and save the data to memory by DMA. The hardware is restricted to
3238  * use identity mapping only.
3239  */
3240 #define IS_HISI_PTT_DEVICE(pdev)	((pdev)->vendor == PCI_VENDOR_ID_HUAWEI && \
3241 					 (pdev)->device == 0xa12e)
3242 
arm_smmu_def_domain_type(struct device * dev)3243 static int arm_smmu_def_domain_type(struct device *dev)
3244 {
3245 	if (dev_is_pci(dev)) {
3246 		struct pci_dev *pdev = to_pci_dev(dev);
3247 
3248 		if (IS_HISI_PTT_DEVICE(pdev))
3249 			return IOMMU_DOMAIN_IDENTITY;
3250 	}
3251 
3252 	return 0;
3253 }
3254 
3255 static struct iommu_ops arm_smmu_ops = {
3256 	.identity_domain	= &arm_smmu_identity_domain,
3257 	.blocked_domain		= &arm_smmu_blocked_domain,
3258 	.capable		= arm_smmu_capable,
3259 	.domain_alloc_paging    = arm_smmu_domain_alloc_paging,
3260 	.domain_alloc_sva       = arm_smmu_sva_domain_alloc,
3261 	.domain_alloc_user	= arm_smmu_domain_alloc_user,
3262 	.probe_device		= arm_smmu_probe_device,
3263 	.release_device		= arm_smmu_release_device,
3264 	.device_group		= arm_smmu_device_group,
3265 	.of_xlate		= arm_smmu_of_xlate,
3266 	.get_resv_regions	= arm_smmu_get_resv_regions,
3267 	.remove_dev_pasid	= arm_smmu_remove_dev_pasid,
3268 	.dev_enable_feat	= arm_smmu_dev_enable_feature,
3269 	.dev_disable_feat	= arm_smmu_dev_disable_feature,
3270 	.page_response		= arm_smmu_page_response,
3271 	.def_domain_type	= arm_smmu_def_domain_type,
3272 	.pgsize_bitmap		= -1UL, /* Restricted during device attach */
3273 	.owner			= THIS_MODULE,
3274 	.default_domain_ops = &(const struct iommu_domain_ops) {
3275 		.attach_dev		= arm_smmu_attach_dev,
3276 		.set_dev_pasid		= arm_smmu_s1_set_dev_pasid,
3277 		.map_pages		= arm_smmu_map_pages,
3278 		.unmap_pages		= arm_smmu_unmap_pages,
3279 		.flush_iotlb_all	= arm_smmu_flush_iotlb_all,
3280 		.iotlb_sync		= arm_smmu_iotlb_sync,
3281 		.iova_to_phys		= arm_smmu_iova_to_phys,
3282 		.enable_nesting		= arm_smmu_enable_nesting,
3283 		.free			= arm_smmu_domain_free_paging,
3284 	}
3285 };
3286 
3287 static struct iommu_dirty_ops arm_smmu_dirty_ops = {
3288 	.read_and_clear_dirty	= arm_smmu_read_and_clear_dirty,
3289 	.set_dirty_tracking     = arm_smmu_set_dirty_tracking,
3290 };
3291 
3292 /* Probing and initialisation functions */
arm_smmu_cmdq_init(struct arm_smmu_device * smmu,struct arm_smmu_cmdq * cmdq)3293 int arm_smmu_cmdq_init(struct arm_smmu_device *smmu,
3294 		       struct arm_smmu_cmdq *cmdq)
3295 {
3296 	unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
3297 
3298 	atomic_set(&cmdq->owner_prod, 0);
3299 	atomic_set(&cmdq->lock, 0);
3300 
3301 	cmdq->valid_map = (atomic_long_t *)devm_bitmap_zalloc(smmu->dev, nents,
3302 							      GFP_KERNEL);
3303 	if (!cmdq->valid_map)
3304 		return -ENOMEM;
3305 
3306 	return 0;
3307 }
3308 
arm_smmu_init_queues(struct arm_smmu_device * smmu)3309 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
3310 {
3311 	int ret;
3312 
3313 	/* cmdq */
3314 	ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, smmu->base,
3315 				      ARM_SMMU_CMDQ_PROD, ARM_SMMU_CMDQ_CONS,
3316 				      CMDQ_ENT_DWORDS, "cmdq");
3317 	if (ret)
3318 		return ret;
3319 
3320 	ret = arm_smmu_cmdq_init(smmu, &smmu->cmdq);
3321 	if (ret)
3322 		return ret;
3323 
3324 	/* evtq */
3325 	ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, smmu->page1,
3326 				      ARM_SMMU_EVTQ_PROD, ARM_SMMU_EVTQ_CONS,
3327 				      EVTQ_ENT_DWORDS, "evtq");
3328 	if (ret)
3329 		return ret;
3330 
3331 	if ((smmu->features & ARM_SMMU_FEAT_SVA) &&
3332 	    (smmu->features & ARM_SMMU_FEAT_STALLS)) {
3333 		smmu->evtq.iopf = iopf_queue_alloc(dev_name(smmu->dev));
3334 		if (!smmu->evtq.iopf)
3335 			return -ENOMEM;
3336 	}
3337 
3338 	/* priq */
3339 	if (!(smmu->features & ARM_SMMU_FEAT_PRI))
3340 		return 0;
3341 
3342 	return arm_smmu_init_one_queue(smmu, &smmu->priq.q, smmu->page1,
3343 				       ARM_SMMU_PRIQ_PROD, ARM_SMMU_PRIQ_CONS,
3344 				       PRIQ_ENT_DWORDS, "priq");
3345 }
3346 
arm_smmu_init_structures(struct arm_smmu_device * smmu)3347 static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
3348 {
3349 	int ret;
3350 
3351 	mutex_init(&smmu->streams_mutex);
3352 	smmu->streams = RB_ROOT;
3353 
3354 	ret = arm_smmu_init_queues(smmu);
3355 	if (ret)
3356 		return ret;
3357 
3358 	ret = arm_smmu_init_strtab(smmu);
3359 	if (ret)
3360 		return ret;
3361 
3362 	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB))
3363 		arm_smmu_init_initial_stes(smmu->strtab_cfg.linear.table,
3364 					   smmu->strtab_cfg.linear.num_ents);
3365 
3366 	if (smmu->impl_ops && smmu->impl_ops->init_structures)
3367 		return smmu->impl_ops->init_structures(smmu);
3368 
3369 	return 0;
3370 }
3371 
arm_smmu_setup_irqs(struct arm_smmu_device * smmu)3372 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
3373 {
3374 	int ret, irq;
3375 	u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
3376 
3377 	/* Disable IRQs first */
3378 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
3379 				      ARM_SMMU_IRQ_CTRLACK);
3380 	if (ret) {
3381 		dev_err(smmu->dev, "failed to disable irqs\n");
3382 		return ret;
3383 	}
3384 
3385 	irq = smmu->combined_irq;
3386 	if (irq) {
3387 		/*
3388 		 * Cavium ThunderX2 implementation doesn't support unique irq
3389 		 * lines. Use a single irq line for all the SMMUv3 interrupts.
3390 		 */
3391 		ret = devm_request_threaded_irq(smmu->dev, irq,
3392 					arm_smmu_combined_irq_handler,
3393 					arm_smmu_combined_irq_thread,
3394 					IRQF_ONESHOT,
3395 					"arm-smmu-v3-combined-irq", smmu);
3396 		if (ret < 0)
3397 			dev_warn(smmu->dev, "failed to enable combined irq\n");
3398 	} else
3399 		arm_smmu_setup_unique_irqs(smmu, arm_smmu_evtq_thread,
3400 					   arm_smmu_gerror_handler, arm_smmu_priq_thread);
3401 
3402 	if (smmu->features & ARM_SMMU_FEAT_PRI)
3403 		irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
3404 
3405 	/* Enable interrupt generation on the SMMU */
3406 	ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
3407 				      ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
3408 	if (ret)
3409 		dev_warn(smmu->dev, "failed to enable irqs\n");
3410 
3411 	return 0;
3412 }
3413 
arm_smmu_device_reset(struct arm_smmu_device * smmu)3414 static int arm_smmu_device_reset(struct arm_smmu_device *smmu)
3415 {
3416 	int ret;
3417 	u32 reg, enables;
3418 	struct arm_smmu_cmdq_ent cmd;
3419 
3420 	/* Clear CR0 and sync (disables SMMU and queue processing) */
3421 	reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
3422 	if (reg & CR0_SMMUEN) {
3423 		dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
3424 		arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
3425 	}
3426 
3427 	ret = arm_smmu_device_disable(smmu);
3428 	if (ret)
3429 		return ret;
3430 
3431 	/* CR1 (table and queue memory attributes) */
3432 	reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
3433 	      FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
3434 	      FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
3435 	      FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
3436 	      FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
3437 	      FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
3438 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
3439 
3440 	/* CR2 (random crap) */
3441 	reg = CR2_PTM | CR2_RECINVSID;
3442 
3443 	if (smmu->features & ARM_SMMU_FEAT_E2H)
3444 		reg |= CR2_E2H;
3445 
3446 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
3447 
3448 	/* Stream table */
3449 	arm_smmu_write_strtab(smmu);
3450 
3451 	/* Command queue */
3452 	writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
3453 	writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
3454 	writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
3455 
3456 	enables = CR0_CMDQEN;
3457 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3458 				      ARM_SMMU_CR0ACK);
3459 	if (ret) {
3460 		dev_err(smmu->dev, "failed to enable command queue\n");
3461 		return ret;
3462 	}
3463 
3464 	/* Invalidate any cached configuration */
3465 	cmd.opcode = CMDQ_OP_CFGI_ALL;
3466 	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3467 
3468 	/* Invalidate any stale TLB entries */
3469 	if (smmu->features & ARM_SMMU_FEAT_HYP) {
3470 		cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
3471 		arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3472 	}
3473 
3474 	cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
3475 	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3476 
3477 	/* Event queue */
3478 	writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
3479 	writel_relaxed(smmu->evtq.q.llq.prod, smmu->page1 + ARM_SMMU_EVTQ_PROD);
3480 	writel_relaxed(smmu->evtq.q.llq.cons, smmu->page1 + ARM_SMMU_EVTQ_CONS);
3481 
3482 	enables |= CR0_EVTQEN;
3483 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3484 				      ARM_SMMU_CR0ACK);
3485 	if (ret) {
3486 		dev_err(smmu->dev, "failed to enable event queue\n");
3487 		return ret;
3488 	}
3489 
3490 	/* PRI queue */
3491 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
3492 		writeq_relaxed(smmu->priq.q.q_base,
3493 			       smmu->base + ARM_SMMU_PRIQ_BASE);
3494 		writel_relaxed(smmu->priq.q.llq.prod,
3495 			       smmu->page1 + ARM_SMMU_PRIQ_PROD);
3496 		writel_relaxed(smmu->priq.q.llq.cons,
3497 			       smmu->page1 + ARM_SMMU_PRIQ_CONS);
3498 
3499 		enables |= CR0_PRIQEN;
3500 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3501 					      ARM_SMMU_CR0ACK);
3502 		if (ret) {
3503 			dev_err(smmu->dev, "failed to enable PRI queue\n");
3504 			return ret;
3505 		}
3506 	}
3507 
3508 	if (smmu->features & ARM_SMMU_FEAT_ATS) {
3509 		enables |= CR0_ATSCHK;
3510 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3511 					      ARM_SMMU_CR0ACK);
3512 		if (ret) {
3513 			dev_err(smmu->dev, "failed to enable ATS check\n");
3514 			return ret;
3515 		}
3516 	}
3517 
3518 	ret = arm_smmu_setup_irqs(smmu);
3519 	if (ret) {
3520 		dev_err(smmu->dev, "failed to setup irqs\n");
3521 		return ret;
3522 	}
3523 
3524 	if (is_kdump_kernel())
3525 		enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
3526 
3527 	/* Enable the SMMU interface */
3528 	enables |= CR0_SMMUEN;
3529 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3530 				      ARM_SMMU_CR0ACK);
3531 	if (ret) {
3532 		dev_err(smmu->dev, "failed to enable SMMU interface\n");
3533 		return ret;
3534 	}
3535 
3536 	if (smmu->impl_ops && smmu->impl_ops->device_reset) {
3537 		ret = smmu->impl_ops->device_reset(smmu);
3538 		if (ret) {
3539 			dev_err(smmu->dev, "failed to reset impl\n");
3540 			return ret;
3541 		}
3542 	}
3543 
3544 	return 0;
3545 }
3546 
arm_smmu_resource_size(struct arm_smmu_device * smmu)3547 static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
3548 {
3549 	if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
3550 		return SZ_64K;
3551 	else
3552 		return SZ_128K;
3553 }
3554 
arm_smmu_ioremap(struct device * dev,resource_size_t start,resource_size_t size)3555 static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start,
3556 				      resource_size_t size)
3557 {
3558 	struct resource res = DEFINE_RES_MEM(start, size);
3559 
3560 	return devm_ioremap_resource(dev, &res);
3561 }
3562 
arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device * smmu)3563 static void arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device *smmu)
3564 {
3565 	struct list_head rmr_list;
3566 	struct iommu_resv_region *e;
3567 
3568 	INIT_LIST_HEAD(&rmr_list);
3569 	iort_get_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
3570 
3571 	list_for_each_entry(e, &rmr_list, list) {
3572 		struct iommu_iort_rmr_data *rmr;
3573 		int ret, i;
3574 
3575 		rmr = container_of(e, struct iommu_iort_rmr_data, rr);
3576 		for (i = 0; i < rmr->num_sids; i++) {
3577 			ret = arm_smmu_init_sid_strtab(smmu, rmr->sids[i]);
3578 			if (ret) {
3579 				dev_err(smmu->dev, "RMR SID(0x%x) bypass failed\n",
3580 					rmr->sids[i]);
3581 				continue;
3582 			}
3583 
3584 			/*
3585 			 * STE table is not programmed to HW, see
3586 			 * arm_smmu_initial_bypass_stes()
3587 			 */
3588 			arm_smmu_make_bypass_ste(smmu,
3589 				arm_smmu_get_step_for_sid(smmu, rmr->sids[i]));
3590 		}
3591 	}
3592 
3593 	iort_put_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
3594 }
3595 
arm_smmu_impl_remove(void * data)3596 static void arm_smmu_impl_remove(void *data)
3597 {
3598 	struct arm_smmu_device *smmu = data;
3599 
3600 	if (smmu->impl_ops && smmu->impl_ops->device_remove)
3601 		smmu->impl_ops->device_remove(smmu);
3602 }
3603 
3604 /*
3605  * Probe all the compiled in implementations. Each one checks to see if it
3606  * matches this HW and if so returns a devm_krealloc'd arm_smmu_device which
3607  * replaces the callers. Otherwise the original is returned or ERR_PTR.
3608  */
arm_smmu_impl_probe(struct arm_smmu_device * smmu)3609 static struct arm_smmu_device *arm_smmu_impl_probe(struct arm_smmu_device *smmu)
3610 {
3611 	struct arm_smmu_device *new_smmu = ERR_PTR(-ENODEV);
3612 	int ret;
3613 
3614 	if (smmu->impl_dev && (smmu->options & ARM_SMMU_OPT_TEGRA241_CMDQV))
3615 		new_smmu = tegra241_cmdqv_probe(smmu);
3616 
3617 	if (new_smmu == ERR_PTR(-ENODEV))
3618 		return smmu;
3619 	if (IS_ERR(new_smmu))
3620 		return new_smmu;
3621 
3622 	ret = devm_add_action_or_reset(new_smmu->dev, arm_smmu_impl_remove,
3623 				       new_smmu);
3624 	if (ret)
3625 		return ERR_PTR(ret);
3626 	return new_smmu;
3627 }
3628 
arm_smmu_device_probe(struct platform_device * pdev)3629 static int arm_smmu_device_probe(struct platform_device *pdev)
3630 {
3631 	int ret;
3632 	struct resource *res;
3633 	resource_size_t ioaddr;
3634 	struct arm_smmu_device *smmu;
3635 	struct device *dev = &pdev->dev;
3636 
3637 	smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
3638 	if (!smmu)
3639 		return -ENOMEM;
3640 	smmu->dev = dev;
3641 
3642 	ret = arm_smmu_fw_probe(pdev, smmu);
3643 	if (ret)
3644 		return ret;
3645 
3646 	smmu = arm_smmu_impl_probe(smmu);
3647 	if (IS_ERR(smmu))
3648 		return PTR_ERR(smmu);
3649 
3650 	/* Base address */
3651 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
3652 	if (!res)
3653 		return -EINVAL;
3654 	if (resource_size(res) < arm_smmu_resource_size(smmu)) {
3655 		dev_err(dev, "MMIO region too small (%pr)\n", res);
3656 		return -EINVAL;
3657 	}
3658 	ioaddr = res->start;
3659 
3660 	/*
3661 	 * Don't map the IMPLEMENTATION DEFINED regions, since they may contain
3662 	 * the PMCG registers which are reserved by the PMU driver.
3663 	 */
3664 	smmu->base = arm_smmu_ioremap(dev, ioaddr, ARM_SMMU_REG_SZ);
3665 	if (IS_ERR(smmu->base))
3666 		return PTR_ERR(smmu->base);
3667 
3668 	if (arm_smmu_resource_size(smmu) > SZ_64K) {
3669 		smmu->page1 = arm_smmu_ioremap(dev, ioaddr + SZ_64K,
3670 					       ARM_SMMU_REG_SZ);
3671 		if (IS_ERR(smmu->page1))
3672 			return PTR_ERR(smmu->page1);
3673 	} else {
3674 		smmu->page1 = smmu->base;
3675 	}
3676 
3677 	arm_smmu_probe_irq(pdev, smmu);
3678 
3679 	/* Probe the h/w */
3680 	ret = arm_smmu_device_hw_probe(smmu);
3681 	if (ret)
3682 		return ret;
3683 
3684 	if (arm_smmu_sva_supported(smmu))
3685 		smmu->features |= ARM_SMMU_FEAT_SVA;
3686 
3687 	if (disable_msipolling)
3688 		smmu->options &= ~ARM_SMMU_OPT_MSIPOLL;
3689 
3690 	if (arm_smmu_ops.pgsize_bitmap == -1UL)
3691 		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
3692 	else
3693 		arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
3694 
3695 	/* Initialise in-memory data structures */
3696 	ret = arm_smmu_init_structures(smmu);
3697 	if (ret)
3698 		goto err_free_iopf;
3699 
3700 	/* Record our private device structure */
3701 	platform_set_drvdata(pdev, smmu);
3702 
3703 	/* Check for RMRs and install bypass STEs if any */
3704 	arm_smmu_rmr_install_bypass_ste(smmu);
3705 
3706 	/* Reset the device */
3707 	ret = arm_smmu_device_reset(smmu);
3708 	if (ret)
3709 		goto err_disable;
3710 
3711 	/* And we're up. Go go go! */
3712 	return arm_smmu_register_iommu(smmu, &arm_smmu_ops, ioaddr);
3713 
3714 err_disable:
3715 	arm_smmu_device_disable(smmu);
3716 err_free_iopf:
3717 	iopf_queue_free(smmu->evtq.iopf);
3718 	return ret;
3719 }
3720 
arm_smmu_device_remove(struct platform_device * pdev)3721 static void arm_smmu_device_remove(struct platform_device *pdev)
3722 {
3723 	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
3724 
3725 	arm_smmu_unregister_iommu(smmu);
3726 	arm_smmu_device_disable(smmu);
3727 	iopf_queue_free(smmu->evtq.iopf);
3728 	ida_destroy(&smmu->vmid_map);
3729 }
3730 
arm_smmu_device_shutdown(struct platform_device * pdev)3731 static void arm_smmu_device_shutdown(struct platform_device *pdev)
3732 {
3733 	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
3734 
3735 	arm_smmu_device_disable(smmu);
3736 }
3737 
3738 static const struct of_device_id arm_smmu_of_match[] = {
3739 	{ .compatible = "arm,smmu-v3", },
3740 	{ },
3741 };
3742 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
3743 
arm_smmu_driver_unregister(struct platform_driver * drv)3744 static void arm_smmu_driver_unregister(struct platform_driver *drv)
3745 {
3746 	arm_smmu_sva_notifier_synchronize();
3747 	platform_driver_unregister(drv);
3748 }
3749 
3750 static struct platform_driver arm_smmu_driver = {
3751 	.driver	= {
3752 		.name			= "arm-smmu-v3",
3753 		.of_match_table		= arm_smmu_of_match,
3754 		.suppress_bind_attrs	= true,
3755 	},
3756 	.probe	= arm_smmu_device_probe,
3757 	.remove_new = arm_smmu_device_remove,
3758 	.shutdown = arm_smmu_device_shutdown,
3759 };
3760 module_driver(arm_smmu_driver, platform_driver_register,
3761 	      arm_smmu_driver_unregister);
3762 
3763 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
3764 MODULE_AUTHOR("Will Deacon <will@kernel.org>");
3765 MODULE_ALIAS("platform:arm-smmu-v3");
3766 MODULE_LICENSE("GPL v2");
3767