• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * IOMMU API for ARM architected SMMUv3 implementations.
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License version 2 as
6  * published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11  * GNU General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
15  *
16  * Copyright (C) 2015 ARM Limited
17  *
18  * Author: Will Deacon <will.deacon@arm.com>
19  *
20  * This driver is powered by bad coffee and bombay mix.
21  */
22 
23 #include <linux/delay.h>
24 #include <linux/err.h>
25 #include <linux/interrupt.h>
26 #include <linux/iommu.h>
27 #include <linux/iopoll.h>
28 #include <linux/module.h>
29 #include <linux/msi.h>
30 #include <linux/of.h>
31 #include <linux/of_address.h>
32 #include <linux/of_platform.h>
33 #include <linux/pci.h>
34 #include <linux/platform_device.h>
35 
36 #include "io-pgtable.h"
37 
38 /* MMIO registers */
39 #define ARM_SMMU_IDR0			0x0
40 #define IDR0_ST_LVL_SHIFT		27
41 #define IDR0_ST_LVL_MASK		0x3
42 #define IDR0_ST_LVL_2LVL		(1 << IDR0_ST_LVL_SHIFT)
43 #define IDR0_STALL_MODEL		(3 << 24)
44 #define IDR0_TTENDIAN_SHIFT		21
45 #define IDR0_TTENDIAN_MASK		0x3
46 #define IDR0_TTENDIAN_LE		(2 << IDR0_TTENDIAN_SHIFT)
47 #define IDR0_TTENDIAN_BE		(3 << IDR0_TTENDIAN_SHIFT)
48 #define IDR0_TTENDIAN_MIXED		(0 << IDR0_TTENDIAN_SHIFT)
49 #define IDR0_CD2L			(1 << 19)
50 #define IDR0_VMID16			(1 << 18)
51 #define IDR0_PRI			(1 << 16)
52 #define IDR0_SEV			(1 << 14)
53 #define IDR0_MSI			(1 << 13)
54 #define IDR0_ASID16			(1 << 12)
55 #define IDR0_ATS			(1 << 10)
56 #define IDR0_HYP			(1 << 9)
57 #define IDR0_COHACC			(1 << 4)
58 #define IDR0_TTF_SHIFT			2
59 #define IDR0_TTF_MASK			0x3
60 #define IDR0_TTF_AARCH64		(2 << IDR0_TTF_SHIFT)
61 #define IDR0_TTF_AARCH32_64		(3 << IDR0_TTF_SHIFT)
62 #define IDR0_S1P			(1 << 1)
63 #define IDR0_S2P			(1 << 0)
64 
65 #define ARM_SMMU_IDR1			0x4
66 #define IDR1_TABLES_PRESET		(1 << 30)
67 #define IDR1_QUEUES_PRESET		(1 << 29)
68 #define IDR1_REL			(1 << 28)
69 #define IDR1_CMDQ_SHIFT			21
70 #define IDR1_CMDQ_MASK			0x1f
71 #define IDR1_EVTQ_SHIFT			16
72 #define IDR1_EVTQ_MASK			0x1f
73 #define IDR1_PRIQ_SHIFT			11
74 #define IDR1_PRIQ_MASK			0x1f
75 #define IDR1_SSID_SHIFT			6
76 #define IDR1_SSID_MASK			0x1f
77 #define IDR1_SID_SHIFT			0
78 #define IDR1_SID_MASK			0x3f
79 
80 #define ARM_SMMU_IDR5			0x14
81 #define IDR5_STALL_MAX_SHIFT		16
82 #define IDR5_STALL_MAX_MASK		0xffff
83 #define IDR5_GRAN64K			(1 << 6)
84 #define IDR5_GRAN16K			(1 << 5)
85 #define IDR5_GRAN4K			(1 << 4)
86 #define IDR5_OAS_SHIFT			0
87 #define IDR5_OAS_MASK			0x7
88 #define IDR5_OAS_32_BIT			(0 << IDR5_OAS_SHIFT)
89 #define IDR5_OAS_36_BIT			(1 << IDR5_OAS_SHIFT)
90 #define IDR5_OAS_40_BIT			(2 << IDR5_OAS_SHIFT)
91 #define IDR5_OAS_42_BIT			(3 << IDR5_OAS_SHIFT)
92 #define IDR5_OAS_44_BIT			(4 << IDR5_OAS_SHIFT)
93 #define IDR5_OAS_48_BIT			(5 << IDR5_OAS_SHIFT)
94 
95 #define ARM_SMMU_CR0			0x20
96 #define CR0_CMDQEN			(1 << 3)
97 #define CR0_EVTQEN			(1 << 2)
98 #define CR0_PRIQEN			(1 << 1)
99 #define CR0_SMMUEN			(1 << 0)
100 
101 #define ARM_SMMU_CR0ACK			0x24
102 
103 #define ARM_SMMU_CR1			0x28
104 #define CR1_SH_NSH			0
105 #define CR1_SH_OSH			2
106 #define CR1_SH_ISH			3
107 #define CR1_CACHE_NC			0
108 #define CR1_CACHE_WB			1
109 #define CR1_CACHE_WT			2
110 #define CR1_TABLE_SH_SHIFT		10
111 #define CR1_TABLE_OC_SHIFT		8
112 #define CR1_TABLE_IC_SHIFT		6
113 #define CR1_QUEUE_SH_SHIFT		4
114 #define CR1_QUEUE_OC_SHIFT		2
115 #define CR1_QUEUE_IC_SHIFT		0
116 
117 #define ARM_SMMU_CR2			0x2c
118 #define CR2_PTM				(1 << 2)
119 #define CR2_RECINVSID			(1 << 1)
120 #define CR2_E2H				(1 << 0)
121 
122 #define ARM_SMMU_IRQ_CTRL		0x50
123 #define IRQ_CTRL_EVTQ_IRQEN		(1 << 2)
124 #define IRQ_CTRL_PRIQ_IRQEN		(1 << 1)
125 #define IRQ_CTRL_GERROR_IRQEN		(1 << 0)
126 
127 #define ARM_SMMU_IRQ_CTRLACK		0x54
128 
129 #define ARM_SMMU_GERROR			0x60
130 #define GERROR_SFM_ERR			(1 << 8)
131 #define GERROR_MSI_GERROR_ABT_ERR	(1 << 7)
132 #define GERROR_MSI_PRIQ_ABT_ERR		(1 << 6)
133 #define GERROR_MSI_EVTQ_ABT_ERR		(1 << 5)
134 #define GERROR_MSI_CMDQ_ABT_ERR		(1 << 4)
135 #define GERROR_PRIQ_ABT_ERR		(1 << 3)
136 #define GERROR_EVTQ_ABT_ERR		(1 << 2)
137 #define GERROR_CMDQ_ERR			(1 << 0)
138 #define GERROR_ERR_MASK			0xfd
139 
140 #define ARM_SMMU_GERRORN		0x64
141 
142 #define ARM_SMMU_GERROR_IRQ_CFG0	0x68
143 #define ARM_SMMU_GERROR_IRQ_CFG1	0x70
144 #define ARM_SMMU_GERROR_IRQ_CFG2	0x74
145 
146 #define ARM_SMMU_STRTAB_BASE		0x80
147 #define STRTAB_BASE_RA			(1UL << 62)
148 #define STRTAB_BASE_ADDR_SHIFT		6
149 #define STRTAB_BASE_ADDR_MASK		0x3ffffffffffUL
150 
151 #define ARM_SMMU_STRTAB_BASE_CFG	0x88
152 #define STRTAB_BASE_CFG_LOG2SIZE_SHIFT	0
153 #define STRTAB_BASE_CFG_LOG2SIZE_MASK	0x3f
154 #define STRTAB_BASE_CFG_SPLIT_SHIFT	6
155 #define STRTAB_BASE_CFG_SPLIT_MASK	0x1f
156 #define STRTAB_BASE_CFG_FMT_SHIFT	16
157 #define STRTAB_BASE_CFG_FMT_MASK	0x3
158 #define STRTAB_BASE_CFG_FMT_LINEAR	(0 << STRTAB_BASE_CFG_FMT_SHIFT)
159 #define STRTAB_BASE_CFG_FMT_2LVL	(1 << STRTAB_BASE_CFG_FMT_SHIFT)
160 
161 #define ARM_SMMU_CMDQ_BASE		0x90
162 #define ARM_SMMU_CMDQ_PROD		0x98
163 #define ARM_SMMU_CMDQ_CONS		0x9c
164 
165 #define ARM_SMMU_EVTQ_BASE		0xa0
166 #define ARM_SMMU_EVTQ_PROD		0x100a8
167 #define ARM_SMMU_EVTQ_CONS		0x100ac
168 #define ARM_SMMU_EVTQ_IRQ_CFG0		0xb0
169 #define ARM_SMMU_EVTQ_IRQ_CFG1		0xb8
170 #define ARM_SMMU_EVTQ_IRQ_CFG2		0xbc
171 
172 #define ARM_SMMU_PRIQ_BASE		0xc0
173 #define ARM_SMMU_PRIQ_PROD		0x100c8
174 #define ARM_SMMU_PRIQ_CONS		0x100cc
175 #define ARM_SMMU_PRIQ_IRQ_CFG0		0xd0
176 #define ARM_SMMU_PRIQ_IRQ_CFG1		0xd8
177 #define ARM_SMMU_PRIQ_IRQ_CFG2		0xdc
178 
179 /* Common MSI config fields */
180 #define MSI_CFG0_ADDR_SHIFT		2
181 #define MSI_CFG0_ADDR_MASK		0x3fffffffffffUL
182 #define MSI_CFG2_SH_SHIFT		4
183 #define MSI_CFG2_SH_NSH			(0UL << MSI_CFG2_SH_SHIFT)
184 #define MSI_CFG2_SH_OSH			(2UL << MSI_CFG2_SH_SHIFT)
185 #define MSI_CFG2_SH_ISH			(3UL << MSI_CFG2_SH_SHIFT)
186 #define MSI_CFG2_MEMATTR_SHIFT		0
187 #define MSI_CFG2_MEMATTR_DEVICE_nGnRE	(0x1 << MSI_CFG2_MEMATTR_SHIFT)
188 
189 #define Q_IDX(q, p)			((p) & ((1 << (q)->max_n_shift) - 1))
190 #define Q_WRP(q, p)			((p) & (1 << (q)->max_n_shift))
191 #define Q_OVERFLOW_FLAG			(1 << 31)
192 #define Q_OVF(q, p)			((p) & Q_OVERFLOW_FLAG)
193 #define Q_ENT(q, p)			((q)->base +			\
194 					 Q_IDX(q, p) * (q)->ent_dwords)
195 
196 #define Q_BASE_RWA			(1UL << 62)
197 #define Q_BASE_ADDR_SHIFT		5
198 #define Q_BASE_ADDR_MASK		0xfffffffffffUL
199 #define Q_BASE_LOG2SIZE_SHIFT		0
200 #define Q_BASE_LOG2SIZE_MASK		0x1fUL
201 
202 /*
203  * Stream table.
204  *
205  * Linear: Enough to cover 1 << IDR1.SIDSIZE entries
206  * 2lvl: 128k L1 entries,
207  *       256 lazy entries per table (each table covers a PCI bus)
208  */
209 #define STRTAB_L1_SZ_SHIFT		20
210 #define STRTAB_SPLIT			8
211 
212 #define STRTAB_L1_DESC_DWORDS		1
213 #define STRTAB_L1_DESC_SPAN_SHIFT	0
214 #define STRTAB_L1_DESC_SPAN_MASK	0x1fUL
215 #define STRTAB_L1_DESC_L2PTR_SHIFT	6
216 #define STRTAB_L1_DESC_L2PTR_MASK	0x3ffffffffffUL
217 
218 #define STRTAB_STE_DWORDS		8
219 #define STRTAB_STE_0_V			(1UL << 0)
220 #define STRTAB_STE_0_CFG_SHIFT		1
221 #define STRTAB_STE_0_CFG_MASK		0x7UL
222 #define STRTAB_STE_0_CFG_ABORT		(0UL << STRTAB_STE_0_CFG_SHIFT)
223 #define STRTAB_STE_0_CFG_BYPASS		(4UL << STRTAB_STE_0_CFG_SHIFT)
224 #define STRTAB_STE_0_CFG_S1_TRANS	(5UL << STRTAB_STE_0_CFG_SHIFT)
225 #define STRTAB_STE_0_CFG_S2_TRANS	(6UL << STRTAB_STE_0_CFG_SHIFT)
226 
227 #define STRTAB_STE_0_S1FMT_SHIFT	4
228 #define STRTAB_STE_0_S1FMT_LINEAR	(0UL << STRTAB_STE_0_S1FMT_SHIFT)
229 #define STRTAB_STE_0_S1CTXPTR_SHIFT	6
230 #define STRTAB_STE_0_S1CTXPTR_MASK	0x3ffffffffffUL
231 #define STRTAB_STE_0_S1CDMAX_SHIFT	59
232 #define STRTAB_STE_0_S1CDMAX_MASK	0x1fUL
233 
234 #define STRTAB_STE_1_S1C_CACHE_NC	0UL
235 #define STRTAB_STE_1_S1C_CACHE_WBRA	1UL
236 #define STRTAB_STE_1_S1C_CACHE_WT	2UL
237 #define STRTAB_STE_1_S1C_CACHE_WB	3UL
238 #define STRTAB_STE_1_S1C_SH_NSH		0UL
239 #define STRTAB_STE_1_S1C_SH_OSH		2UL
240 #define STRTAB_STE_1_S1C_SH_ISH		3UL
241 #define STRTAB_STE_1_S1CIR_SHIFT	2
242 #define STRTAB_STE_1_S1COR_SHIFT	4
243 #define STRTAB_STE_1_S1CSH_SHIFT	6
244 
245 #define STRTAB_STE_1_S1STALLD		(1UL << 27)
246 
247 #define STRTAB_STE_1_EATS_ABT		0UL
248 #define STRTAB_STE_1_EATS_TRANS		1UL
249 #define STRTAB_STE_1_EATS_S1CHK		2UL
250 #define STRTAB_STE_1_EATS_SHIFT		28
251 
252 #define STRTAB_STE_1_STRW_NSEL1		0UL
253 #define STRTAB_STE_1_STRW_EL2		2UL
254 #define STRTAB_STE_1_STRW_SHIFT		30
255 
256 #define STRTAB_STE_2_S2VMID_SHIFT	0
257 #define STRTAB_STE_2_S2VMID_MASK	0xffffUL
258 #define STRTAB_STE_2_VTCR_SHIFT		32
259 #define STRTAB_STE_2_VTCR_MASK		0x7ffffUL
260 #define STRTAB_STE_2_S2AA64		(1UL << 51)
261 #define STRTAB_STE_2_S2ENDI		(1UL << 52)
262 #define STRTAB_STE_2_S2PTW		(1UL << 54)
263 #define STRTAB_STE_2_S2R		(1UL << 58)
264 
265 #define STRTAB_STE_3_S2TTB_SHIFT	4
266 #define STRTAB_STE_3_S2TTB_MASK		0xfffffffffffUL
267 
268 /* Context descriptor (stage-1 only) */
269 #define CTXDESC_CD_DWORDS		8
270 #define CTXDESC_CD_0_TCR_T0SZ_SHIFT	0
271 #define ARM64_TCR_T0SZ_SHIFT		0
272 #define ARM64_TCR_T0SZ_MASK		0x1fUL
273 #define CTXDESC_CD_0_TCR_TG0_SHIFT	6
274 #define ARM64_TCR_TG0_SHIFT		14
275 #define ARM64_TCR_TG0_MASK		0x3UL
276 #define CTXDESC_CD_0_TCR_IRGN0_SHIFT	8
277 #define ARM64_TCR_IRGN0_SHIFT		8
278 #define ARM64_TCR_IRGN0_MASK		0x3UL
279 #define CTXDESC_CD_0_TCR_ORGN0_SHIFT	10
280 #define ARM64_TCR_ORGN0_SHIFT		10
281 #define ARM64_TCR_ORGN0_MASK		0x3UL
282 #define CTXDESC_CD_0_TCR_SH0_SHIFT	12
283 #define ARM64_TCR_SH0_SHIFT		12
284 #define ARM64_TCR_SH0_MASK		0x3UL
285 #define CTXDESC_CD_0_TCR_EPD0_SHIFT	14
286 #define ARM64_TCR_EPD0_SHIFT		7
287 #define ARM64_TCR_EPD0_MASK		0x1UL
288 #define CTXDESC_CD_0_TCR_EPD1_SHIFT	30
289 #define ARM64_TCR_EPD1_SHIFT		23
290 #define ARM64_TCR_EPD1_MASK		0x1UL
291 
292 #define CTXDESC_CD_0_ENDI		(1UL << 15)
293 #define CTXDESC_CD_0_V			(1UL << 31)
294 
295 #define CTXDESC_CD_0_TCR_IPS_SHIFT	32
296 #define ARM64_TCR_IPS_SHIFT		32
297 #define ARM64_TCR_IPS_MASK		0x7UL
298 #define CTXDESC_CD_0_TCR_TBI0_SHIFT	38
299 #define ARM64_TCR_TBI0_SHIFT		37
300 #define ARM64_TCR_TBI0_MASK		0x1UL
301 
302 #define CTXDESC_CD_0_AA64		(1UL << 41)
303 #define CTXDESC_CD_0_R			(1UL << 45)
304 #define CTXDESC_CD_0_A			(1UL << 46)
305 #define CTXDESC_CD_0_ASET_SHIFT		47
306 #define CTXDESC_CD_0_ASET_SHARED	(0UL << CTXDESC_CD_0_ASET_SHIFT)
307 #define CTXDESC_CD_0_ASET_PRIVATE	(1UL << CTXDESC_CD_0_ASET_SHIFT)
308 #define CTXDESC_CD_0_ASID_SHIFT		48
309 #define CTXDESC_CD_0_ASID_MASK		0xffffUL
310 
311 #define CTXDESC_CD_1_TTB0_SHIFT		4
312 #define CTXDESC_CD_1_TTB0_MASK		0xfffffffffffUL
313 
314 #define CTXDESC_CD_3_MAIR_SHIFT		0
315 
316 /* Convert between AArch64 (CPU) TCR format and SMMU CD format */
317 #define ARM_SMMU_TCR2CD(tcr, fld)					\
318 	(((tcr) >> ARM64_TCR_##fld##_SHIFT & ARM64_TCR_##fld##_MASK)	\
319 	 << CTXDESC_CD_0_TCR_##fld##_SHIFT)
320 
321 /* Command queue */
322 #define CMDQ_ENT_DWORDS			2
323 #define CMDQ_MAX_SZ_SHIFT		8
324 
325 #define CMDQ_ERR_SHIFT			24
326 #define CMDQ_ERR_MASK			0x7f
327 #define CMDQ_ERR_CERROR_NONE_IDX	0
328 #define CMDQ_ERR_CERROR_ILL_IDX		1
329 #define CMDQ_ERR_CERROR_ABT_IDX		2
330 
331 #define CMDQ_0_OP_SHIFT			0
332 #define CMDQ_0_OP_MASK			0xffUL
333 #define CMDQ_0_SSV			(1UL << 11)
334 
335 #define CMDQ_PREFETCH_0_SID_SHIFT	32
336 #define CMDQ_PREFETCH_1_SIZE_SHIFT	0
337 #define CMDQ_PREFETCH_1_ADDR_MASK	~0xfffUL
338 
339 #define CMDQ_CFGI_0_SID_SHIFT		32
340 #define CMDQ_CFGI_0_SID_MASK		0xffffffffUL
341 #define CMDQ_CFGI_1_LEAF		(1UL << 0)
342 #define CMDQ_CFGI_1_RANGE_SHIFT		0
343 #define CMDQ_CFGI_1_RANGE_MASK		0x1fUL
344 
345 #define CMDQ_TLBI_0_VMID_SHIFT		32
346 #define CMDQ_TLBI_0_ASID_SHIFT		48
347 #define CMDQ_TLBI_1_LEAF		(1UL << 0)
348 #define CMDQ_TLBI_1_VA_MASK		~0xfffUL
349 #define CMDQ_TLBI_1_IPA_MASK		0xfffffffff000UL
350 
351 #define CMDQ_PRI_0_SSID_SHIFT		12
352 #define CMDQ_PRI_0_SSID_MASK		0xfffffUL
353 #define CMDQ_PRI_0_SID_SHIFT		32
354 #define CMDQ_PRI_0_SID_MASK		0xffffffffUL
355 #define CMDQ_PRI_1_GRPID_SHIFT		0
356 #define CMDQ_PRI_1_GRPID_MASK		0x1ffUL
357 #define CMDQ_PRI_1_RESP_SHIFT		12
358 #define CMDQ_PRI_1_RESP_DENY		(0UL << CMDQ_PRI_1_RESP_SHIFT)
359 #define CMDQ_PRI_1_RESP_FAIL		(1UL << CMDQ_PRI_1_RESP_SHIFT)
360 #define CMDQ_PRI_1_RESP_SUCC		(2UL << CMDQ_PRI_1_RESP_SHIFT)
361 
362 #define CMDQ_SYNC_0_CS_SHIFT		12
363 #define CMDQ_SYNC_0_CS_NONE		(0UL << CMDQ_SYNC_0_CS_SHIFT)
364 #define CMDQ_SYNC_0_CS_SEV		(2UL << CMDQ_SYNC_0_CS_SHIFT)
365 
366 /* Event queue */
367 #define EVTQ_ENT_DWORDS			4
368 #define EVTQ_MAX_SZ_SHIFT		7
369 
370 #define EVTQ_0_ID_SHIFT			0
371 #define EVTQ_0_ID_MASK			0xffUL
372 
373 /* PRI queue */
374 #define PRIQ_ENT_DWORDS			2
375 #define PRIQ_MAX_SZ_SHIFT		8
376 
377 #define PRIQ_0_SID_SHIFT		0
378 #define PRIQ_0_SID_MASK			0xffffffffUL
379 #define PRIQ_0_SSID_SHIFT		32
380 #define PRIQ_0_SSID_MASK		0xfffffUL
381 #define PRIQ_0_OF			(1UL << 57)
382 #define PRIQ_0_PERM_PRIV		(1UL << 58)
383 #define PRIQ_0_PERM_EXEC		(1UL << 59)
384 #define PRIQ_0_PERM_READ		(1UL << 60)
385 #define PRIQ_0_PERM_WRITE		(1UL << 61)
386 #define PRIQ_0_PRG_LAST			(1UL << 62)
387 #define PRIQ_0_SSID_V			(1UL << 63)
388 
389 #define PRIQ_1_PRG_IDX_SHIFT		0
390 #define PRIQ_1_PRG_IDX_MASK		0x1ffUL
391 #define PRIQ_1_ADDR_SHIFT		12
392 #define PRIQ_1_ADDR_MASK		0xfffffffffffffUL
393 
394 /* High-level queue structures */
395 #define ARM_SMMU_POLL_TIMEOUT_US	100
396 
397 static bool disable_bypass;
398 module_param_named(disable_bypass, disable_bypass, bool, S_IRUGO);
399 MODULE_PARM_DESC(disable_bypass,
400 	"Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
401 
402 enum pri_resp {
403 	PRI_RESP_DENY,
404 	PRI_RESP_FAIL,
405 	PRI_RESP_SUCC,
406 };
407 
408 enum arm_smmu_msi_index {
409 	EVTQ_MSI_INDEX,
410 	GERROR_MSI_INDEX,
411 	PRIQ_MSI_INDEX,
412 	ARM_SMMU_MAX_MSIS,
413 };
414 
415 static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
416 	[EVTQ_MSI_INDEX] = {
417 		ARM_SMMU_EVTQ_IRQ_CFG0,
418 		ARM_SMMU_EVTQ_IRQ_CFG1,
419 		ARM_SMMU_EVTQ_IRQ_CFG2,
420 	},
421 	[GERROR_MSI_INDEX] = {
422 		ARM_SMMU_GERROR_IRQ_CFG0,
423 		ARM_SMMU_GERROR_IRQ_CFG1,
424 		ARM_SMMU_GERROR_IRQ_CFG2,
425 	},
426 	[PRIQ_MSI_INDEX] = {
427 		ARM_SMMU_PRIQ_IRQ_CFG0,
428 		ARM_SMMU_PRIQ_IRQ_CFG1,
429 		ARM_SMMU_PRIQ_IRQ_CFG2,
430 	},
431 };
432 
433 struct arm_smmu_cmdq_ent {
434 	/* Common fields */
435 	u8				opcode;
436 	bool				substream_valid;
437 
438 	/* Command-specific fields */
439 	union {
440 		#define CMDQ_OP_PREFETCH_CFG	0x1
441 		struct {
442 			u32			sid;
443 			u8			size;
444 			u64			addr;
445 		} prefetch;
446 
447 		#define CMDQ_OP_CFGI_STE	0x3
448 		#define CMDQ_OP_CFGI_ALL	0x4
449 		struct {
450 			u32			sid;
451 			union {
452 				bool		leaf;
453 				u8		span;
454 			};
455 		} cfgi;
456 
457 		#define CMDQ_OP_TLBI_NH_ASID	0x11
458 		#define CMDQ_OP_TLBI_NH_VA	0x12
459 		#define CMDQ_OP_TLBI_EL2_ALL	0x20
460 		#define CMDQ_OP_TLBI_S12_VMALL	0x28
461 		#define CMDQ_OP_TLBI_S2_IPA	0x2a
462 		#define CMDQ_OP_TLBI_NSNH_ALL	0x30
463 		struct {
464 			u16			asid;
465 			u16			vmid;
466 			bool			leaf;
467 			u64			addr;
468 		} tlbi;
469 
470 		#define CMDQ_OP_PRI_RESP	0x41
471 		struct {
472 			u32			sid;
473 			u32			ssid;
474 			u16			grpid;
475 			enum pri_resp		resp;
476 		} pri;
477 
478 		#define CMDQ_OP_CMD_SYNC	0x46
479 	};
480 };
481 
482 struct arm_smmu_queue {
483 	int				irq; /* Wired interrupt */
484 
485 	__le64				*base;
486 	dma_addr_t			base_dma;
487 	u64				q_base;
488 
489 	size_t				ent_dwords;
490 	u32				max_n_shift;
491 	u32				prod;
492 	u32				cons;
493 
494 	u32 __iomem			*prod_reg;
495 	u32 __iomem			*cons_reg;
496 };
497 
498 struct arm_smmu_cmdq {
499 	struct arm_smmu_queue		q;
500 	spinlock_t			lock;
501 };
502 
503 struct arm_smmu_evtq {
504 	struct arm_smmu_queue		q;
505 	u32				max_stalls;
506 };
507 
508 struct arm_smmu_priq {
509 	struct arm_smmu_queue		q;
510 };
511 
512 /* High-level stream table and context descriptor structures */
513 struct arm_smmu_strtab_l1_desc {
514 	u8				span;
515 
516 	__le64				*l2ptr;
517 	dma_addr_t			l2ptr_dma;
518 };
519 
520 struct arm_smmu_s1_cfg {
521 	__le64				*cdptr;
522 	dma_addr_t			cdptr_dma;
523 
524 	struct arm_smmu_ctx_desc {
525 		u16	asid;
526 		u64	ttbr;
527 		u64	tcr;
528 		u64	mair;
529 	}				cd;
530 };
531 
532 struct arm_smmu_s2_cfg {
533 	u16				vmid;
534 	u64				vttbr;
535 	u64				vtcr;
536 };
537 
538 struct arm_smmu_strtab_ent {
539 	bool				valid;
540 
541 	bool				bypass;	/* Overrides s1/s2 config */
542 	struct arm_smmu_s1_cfg		*s1_cfg;
543 	struct arm_smmu_s2_cfg		*s2_cfg;
544 };
545 
546 struct arm_smmu_strtab_cfg {
547 	__le64				*strtab;
548 	dma_addr_t			strtab_dma;
549 	struct arm_smmu_strtab_l1_desc	*l1_desc;
550 	unsigned int			num_l1_ents;
551 
552 	u64				strtab_base;
553 	u32				strtab_base_cfg;
554 };
555 
556 /* An SMMUv3 instance */
557 struct arm_smmu_device {
558 	struct device			*dev;
559 	void __iomem			*base;
560 
561 #define ARM_SMMU_FEAT_2_LVL_STRTAB	(1 << 0)
562 #define ARM_SMMU_FEAT_2_LVL_CDTAB	(1 << 1)
563 #define ARM_SMMU_FEAT_TT_LE		(1 << 2)
564 #define ARM_SMMU_FEAT_TT_BE		(1 << 3)
565 #define ARM_SMMU_FEAT_PRI		(1 << 4)
566 #define ARM_SMMU_FEAT_ATS		(1 << 5)
567 #define ARM_SMMU_FEAT_SEV		(1 << 6)
568 #define ARM_SMMU_FEAT_MSI		(1 << 7)
569 #define ARM_SMMU_FEAT_COHERENCY		(1 << 8)
570 #define ARM_SMMU_FEAT_TRANS_S1		(1 << 9)
571 #define ARM_SMMU_FEAT_TRANS_S2		(1 << 10)
572 #define ARM_SMMU_FEAT_STALLS		(1 << 11)
573 #define ARM_SMMU_FEAT_HYP		(1 << 12)
574 	u32				features;
575 
576 #define ARM_SMMU_OPT_SKIP_PREFETCH	(1 << 0)
577 	u32				options;
578 
579 	struct arm_smmu_cmdq		cmdq;
580 	struct arm_smmu_evtq		evtq;
581 	struct arm_smmu_priq		priq;
582 
583 	int				gerr_irq;
584 
585 	unsigned long			ias; /* IPA */
586 	unsigned long			oas; /* PA */
587 
588 #define ARM_SMMU_MAX_ASIDS		(1 << 16)
589 	unsigned int			asid_bits;
590 	DECLARE_BITMAP(asid_map, ARM_SMMU_MAX_ASIDS);
591 
592 #define ARM_SMMU_MAX_VMIDS		(1 << 16)
593 	unsigned int			vmid_bits;
594 	DECLARE_BITMAP(vmid_map, ARM_SMMU_MAX_VMIDS);
595 
596 	unsigned int			ssid_bits;
597 	unsigned int			sid_bits;
598 
599 	struct arm_smmu_strtab_cfg	strtab_cfg;
600 };
601 
602 /* SMMU private data for an IOMMU group */
603 struct arm_smmu_group {
604 	struct arm_smmu_device		*smmu;
605 	struct arm_smmu_domain		*domain;
606 	int				num_sids;
607 	u32				*sids;
608 	struct arm_smmu_strtab_ent	ste;
609 };
610 
611 /* SMMU private data for an IOMMU domain */
612 enum arm_smmu_domain_stage {
613 	ARM_SMMU_DOMAIN_S1 = 0,
614 	ARM_SMMU_DOMAIN_S2,
615 	ARM_SMMU_DOMAIN_NESTED,
616 };
617 
618 struct arm_smmu_domain {
619 	struct arm_smmu_device		*smmu;
620 	struct mutex			init_mutex; /* Protects smmu pointer */
621 
622 	struct io_pgtable_ops		*pgtbl_ops;
623 	spinlock_t			pgtbl_lock;
624 
625 	enum arm_smmu_domain_stage	stage;
626 	union {
627 		struct arm_smmu_s1_cfg	s1_cfg;
628 		struct arm_smmu_s2_cfg	s2_cfg;
629 	};
630 
631 	struct iommu_domain		domain;
632 };
633 
634 struct arm_smmu_option_prop {
635 	u32 opt;
636 	const char *prop;
637 };
638 
639 static struct arm_smmu_option_prop arm_smmu_options[] = {
640 	{ ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
641 	{ 0, NULL},
642 };
643 
to_smmu_domain(struct iommu_domain * dom)644 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
645 {
646 	return container_of(dom, struct arm_smmu_domain, domain);
647 }
648 
parse_driver_options(struct arm_smmu_device * smmu)649 static void parse_driver_options(struct arm_smmu_device *smmu)
650 {
651 	int i = 0;
652 
653 	do {
654 		if (of_property_read_bool(smmu->dev->of_node,
655 						arm_smmu_options[i].prop)) {
656 			smmu->options |= arm_smmu_options[i].opt;
657 			dev_notice(smmu->dev, "option %s\n",
658 				arm_smmu_options[i].prop);
659 		}
660 	} while (arm_smmu_options[++i].opt);
661 }
662 
663 /* Low-level queue manipulation functions */
queue_full(struct arm_smmu_queue * q)664 static bool queue_full(struct arm_smmu_queue *q)
665 {
666 	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
667 	       Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
668 }
669 
queue_empty(struct arm_smmu_queue * q)670 static bool queue_empty(struct arm_smmu_queue *q)
671 {
672 	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
673 	       Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
674 }
675 
queue_sync_cons(struct arm_smmu_queue * q)676 static void queue_sync_cons(struct arm_smmu_queue *q)
677 {
678 	q->cons = readl_relaxed(q->cons_reg);
679 }
680 
queue_inc_cons(struct arm_smmu_queue * q)681 static void queue_inc_cons(struct arm_smmu_queue *q)
682 {
683 	u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
684 
685 	q->cons = Q_OVF(q, q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
686 
687 	/*
688 	 * Ensure that all CPU accesses (reads and writes) to the queue
689 	 * are complete before we update the cons pointer.
690 	 */
691 	mb();
692 	writel_relaxed(q->cons, q->cons_reg);
693 }
694 
queue_sync_prod(struct arm_smmu_queue * q)695 static int queue_sync_prod(struct arm_smmu_queue *q)
696 {
697 	int ret = 0;
698 	u32 prod = readl_relaxed(q->prod_reg);
699 
700 	if (Q_OVF(q, prod) != Q_OVF(q, q->prod))
701 		ret = -EOVERFLOW;
702 
703 	q->prod = prod;
704 	return ret;
705 }
706 
queue_inc_prod(struct arm_smmu_queue * q)707 static void queue_inc_prod(struct arm_smmu_queue *q)
708 {
709 	u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + 1;
710 
711 	q->prod = Q_OVF(q, q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
712 	writel(q->prod, q->prod_reg);
713 }
714 
__queue_cons_before(struct arm_smmu_queue * q,u32 until)715 static bool __queue_cons_before(struct arm_smmu_queue *q, u32 until)
716 {
717 	if (Q_WRP(q, q->cons) == Q_WRP(q, until))
718 		return Q_IDX(q, q->cons) < Q_IDX(q, until);
719 
720 	return Q_IDX(q, q->cons) >= Q_IDX(q, until);
721 }
722 
queue_poll_cons(struct arm_smmu_queue * q,u32 until,bool wfe)723 static int queue_poll_cons(struct arm_smmu_queue *q, u32 until, bool wfe)
724 {
725 	ktime_t timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
726 
727 	while (queue_sync_cons(q), __queue_cons_before(q, until)) {
728 		if (ktime_compare(ktime_get(), timeout) > 0)
729 			return -ETIMEDOUT;
730 
731 		if (wfe) {
732 			wfe();
733 		} else {
734 			cpu_relax();
735 			udelay(1);
736 		}
737 	}
738 
739 	return 0;
740 }
741 
queue_write(__le64 * dst,u64 * src,size_t n_dwords)742 static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
743 {
744 	int i;
745 
746 	for (i = 0; i < n_dwords; ++i)
747 		*dst++ = cpu_to_le64(*src++);
748 }
749 
queue_insert_raw(struct arm_smmu_queue * q,u64 * ent)750 static int queue_insert_raw(struct arm_smmu_queue *q, u64 *ent)
751 {
752 	if (queue_full(q))
753 		return -ENOSPC;
754 
755 	queue_write(Q_ENT(q, q->prod), ent, q->ent_dwords);
756 	queue_inc_prod(q);
757 	return 0;
758 }
759 
queue_read(__le64 * dst,u64 * src,size_t n_dwords)760 static void queue_read(__le64 *dst, u64 *src, size_t n_dwords)
761 {
762 	int i;
763 
764 	for (i = 0; i < n_dwords; ++i)
765 		*dst++ = le64_to_cpu(*src++);
766 }
767 
queue_remove_raw(struct arm_smmu_queue * q,u64 * ent)768 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
769 {
770 	if (queue_empty(q))
771 		return -EAGAIN;
772 
773 	queue_read(ent, Q_ENT(q, q->cons), q->ent_dwords);
774 	queue_inc_cons(q);
775 	return 0;
776 }
777 
778 /* High-level queue accessors */
arm_smmu_cmdq_build_cmd(u64 * cmd,struct arm_smmu_cmdq_ent * ent)779 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
780 {
781 	memset(cmd, 0, CMDQ_ENT_DWORDS << 3);
782 	cmd[0] |= (ent->opcode & CMDQ_0_OP_MASK) << CMDQ_0_OP_SHIFT;
783 
784 	switch (ent->opcode) {
785 	case CMDQ_OP_TLBI_EL2_ALL:
786 	case CMDQ_OP_TLBI_NSNH_ALL:
787 		break;
788 	case CMDQ_OP_PREFETCH_CFG:
789 		cmd[0] |= (u64)ent->prefetch.sid << CMDQ_PREFETCH_0_SID_SHIFT;
790 		cmd[1] |= ent->prefetch.size << CMDQ_PREFETCH_1_SIZE_SHIFT;
791 		cmd[1] |= ent->prefetch.addr & CMDQ_PREFETCH_1_ADDR_MASK;
792 		break;
793 	case CMDQ_OP_CFGI_STE:
794 		cmd[0] |= (u64)ent->cfgi.sid << CMDQ_CFGI_0_SID_SHIFT;
795 		cmd[1] |= ent->cfgi.leaf ? CMDQ_CFGI_1_LEAF : 0;
796 		break;
797 	case CMDQ_OP_CFGI_ALL:
798 		/* Cover the entire SID range */
799 		cmd[1] |= CMDQ_CFGI_1_RANGE_MASK << CMDQ_CFGI_1_RANGE_SHIFT;
800 		break;
801 	case CMDQ_OP_TLBI_NH_VA:
802 		cmd[0] |= (u64)ent->tlbi.asid << CMDQ_TLBI_0_ASID_SHIFT;
803 		cmd[1] |= ent->tlbi.leaf ? CMDQ_TLBI_1_LEAF : 0;
804 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
805 		break;
806 	case CMDQ_OP_TLBI_S2_IPA:
807 		cmd[0] |= (u64)ent->tlbi.vmid << CMDQ_TLBI_0_VMID_SHIFT;
808 		cmd[1] |= ent->tlbi.leaf ? CMDQ_TLBI_1_LEAF : 0;
809 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
810 		break;
811 	case CMDQ_OP_TLBI_NH_ASID:
812 		cmd[0] |= (u64)ent->tlbi.asid << CMDQ_TLBI_0_ASID_SHIFT;
813 		/* Fallthrough */
814 	case CMDQ_OP_TLBI_S12_VMALL:
815 		cmd[0] |= (u64)ent->tlbi.vmid << CMDQ_TLBI_0_VMID_SHIFT;
816 		break;
817 	case CMDQ_OP_PRI_RESP:
818 		cmd[0] |= ent->substream_valid ? CMDQ_0_SSV : 0;
819 		cmd[0] |= ent->pri.ssid << CMDQ_PRI_0_SSID_SHIFT;
820 		cmd[0] |= (u64)ent->pri.sid << CMDQ_PRI_0_SID_SHIFT;
821 		cmd[1] |= ent->pri.grpid << CMDQ_PRI_1_GRPID_SHIFT;
822 		switch (ent->pri.resp) {
823 		case PRI_RESP_DENY:
824 			cmd[1] |= CMDQ_PRI_1_RESP_DENY;
825 			break;
826 		case PRI_RESP_FAIL:
827 			cmd[1] |= CMDQ_PRI_1_RESP_FAIL;
828 			break;
829 		case PRI_RESP_SUCC:
830 			cmd[1] |= CMDQ_PRI_1_RESP_SUCC;
831 			break;
832 		default:
833 			return -EINVAL;
834 		}
835 		break;
836 	case CMDQ_OP_CMD_SYNC:
837 		cmd[0] |= CMDQ_SYNC_0_CS_SEV;
838 		break;
839 	default:
840 		return -ENOENT;
841 	}
842 
843 	return 0;
844 }
845 
arm_smmu_cmdq_skip_err(struct arm_smmu_device * smmu)846 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
847 {
848 	static const char *cerror_str[] = {
849 		[CMDQ_ERR_CERROR_NONE_IDX]	= "No error",
850 		[CMDQ_ERR_CERROR_ILL_IDX]	= "Illegal command",
851 		[CMDQ_ERR_CERROR_ABT_IDX]	= "Abort on command fetch",
852 	};
853 
854 	int i;
855 	u64 cmd[CMDQ_ENT_DWORDS];
856 	struct arm_smmu_queue *q = &smmu->cmdq.q;
857 	u32 cons = readl_relaxed(q->cons_reg);
858 	u32 idx = cons >> CMDQ_ERR_SHIFT & CMDQ_ERR_MASK;
859 	struct arm_smmu_cmdq_ent cmd_sync = {
860 		.opcode = CMDQ_OP_CMD_SYNC,
861 	};
862 
863 	dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
864 		cerror_str[idx]);
865 
866 	switch (idx) {
867 	case CMDQ_ERR_CERROR_ILL_IDX:
868 		break;
869 	case CMDQ_ERR_CERROR_ABT_IDX:
870 		dev_err(smmu->dev, "retrying command fetch\n");
871 	case CMDQ_ERR_CERROR_NONE_IDX:
872 		return;
873 	}
874 
875 	/*
876 	 * We may have concurrent producers, so we need to be careful
877 	 * not to touch any of the shadow cmdq state.
878 	 */
879 	queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
880 	dev_err(smmu->dev, "skipping command in error state:\n");
881 	for (i = 0; i < ARRAY_SIZE(cmd); ++i)
882 		dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
883 
884 	/* Convert the erroneous command into a CMD_SYNC */
885 	if (arm_smmu_cmdq_build_cmd(cmd, &cmd_sync)) {
886 		dev_err(smmu->dev, "failed to convert to CMD_SYNC\n");
887 		return;
888 	}
889 
890 	queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
891 }
892 
arm_smmu_cmdq_issue_cmd(struct arm_smmu_device * smmu,struct arm_smmu_cmdq_ent * ent)893 static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
894 				    struct arm_smmu_cmdq_ent *ent)
895 {
896 	u32 until;
897 	u64 cmd[CMDQ_ENT_DWORDS];
898 	bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
899 	struct arm_smmu_queue *q = &smmu->cmdq.q;
900 
901 	if (arm_smmu_cmdq_build_cmd(cmd, ent)) {
902 		dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
903 			 ent->opcode);
904 		return;
905 	}
906 
907 	spin_lock(&smmu->cmdq.lock);
908 	while (until = q->prod + 1, queue_insert_raw(q, cmd) == -ENOSPC) {
909 		/*
910 		 * Keep the queue locked, otherwise the producer could wrap
911 		 * twice and we could see a future consumer pointer that looks
912 		 * like it's behind us.
913 		 */
914 		if (queue_poll_cons(q, until, wfe))
915 			dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
916 	}
917 
918 	if (ent->opcode == CMDQ_OP_CMD_SYNC && queue_poll_cons(q, until, wfe))
919 		dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout\n");
920 	spin_unlock(&smmu->cmdq.lock);
921 }
922 
923 /* Context descriptor manipulation functions */
arm_smmu_cpu_tcr_to_cd(u64 tcr)924 static u64 arm_smmu_cpu_tcr_to_cd(u64 tcr)
925 {
926 	u64 val = 0;
927 
928 	/* Repack the TCR. Just care about TTBR0 for now */
929 	val |= ARM_SMMU_TCR2CD(tcr, T0SZ);
930 	val |= ARM_SMMU_TCR2CD(tcr, TG0);
931 	val |= ARM_SMMU_TCR2CD(tcr, IRGN0);
932 	val |= ARM_SMMU_TCR2CD(tcr, ORGN0);
933 	val |= ARM_SMMU_TCR2CD(tcr, SH0);
934 	val |= ARM_SMMU_TCR2CD(tcr, EPD0);
935 	val |= ARM_SMMU_TCR2CD(tcr, EPD1);
936 	val |= ARM_SMMU_TCR2CD(tcr, IPS);
937 	val |= ARM_SMMU_TCR2CD(tcr, TBI0);
938 
939 	return val;
940 }
941 
arm_smmu_write_ctx_desc(struct arm_smmu_device * smmu,struct arm_smmu_s1_cfg * cfg)942 static void arm_smmu_write_ctx_desc(struct arm_smmu_device *smmu,
943 				    struct arm_smmu_s1_cfg *cfg)
944 {
945 	u64 val;
946 
947 	/*
948 	 * We don't need to issue any invalidation here, as we'll invalidate
949 	 * the STE when installing the new entry anyway.
950 	 */
951 	val = arm_smmu_cpu_tcr_to_cd(cfg->cd.tcr) |
952 #ifdef __BIG_ENDIAN
953 	      CTXDESC_CD_0_ENDI |
954 #endif
955 	      CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET_PRIVATE |
956 	      CTXDESC_CD_0_AA64 | (u64)cfg->cd.asid << CTXDESC_CD_0_ASID_SHIFT |
957 	      CTXDESC_CD_0_V;
958 	cfg->cdptr[0] = cpu_to_le64(val);
959 
960 	val = cfg->cd.ttbr & CTXDESC_CD_1_TTB0_MASK << CTXDESC_CD_1_TTB0_SHIFT;
961 	cfg->cdptr[1] = cpu_to_le64(val);
962 
963 	cfg->cdptr[3] = cpu_to_le64(cfg->cd.mair << CTXDESC_CD_3_MAIR_SHIFT);
964 }
965 
966 /* Stream table manipulation functions */
967 static void
arm_smmu_write_strtab_l1_desc(__le64 * dst,struct arm_smmu_strtab_l1_desc * desc)968 arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
969 {
970 	u64 val = 0;
971 
972 	val |= (desc->span & STRTAB_L1_DESC_SPAN_MASK)
973 		<< STRTAB_L1_DESC_SPAN_SHIFT;
974 	val |= desc->l2ptr_dma &
975 	       STRTAB_L1_DESC_L2PTR_MASK << STRTAB_L1_DESC_L2PTR_SHIFT;
976 
977 	*dst = cpu_to_le64(val);
978 }
979 
arm_smmu_sync_ste_for_sid(struct arm_smmu_device * smmu,u32 sid)980 static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
981 {
982 	struct arm_smmu_cmdq_ent cmd = {
983 		.opcode	= CMDQ_OP_CFGI_STE,
984 		.cfgi	= {
985 			.sid	= sid,
986 			.leaf	= true,
987 		},
988 	};
989 
990 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
991 	cmd.opcode = CMDQ_OP_CMD_SYNC;
992 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
993 }
994 
arm_smmu_write_strtab_ent(struct arm_smmu_device * smmu,u32 sid,__le64 * dst,struct arm_smmu_strtab_ent * ste)995 static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
996 				      __le64 *dst, struct arm_smmu_strtab_ent *ste)
997 {
998 	/*
999 	 * This is hideously complicated, but we only really care about
1000 	 * three cases at the moment:
1001 	 *
1002 	 * 1. Invalid (all zero) -> bypass  (init)
1003 	 * 2. Bypass -> translation (attach)
1004 	 * 3. Translation -> bypass (detach)
1005 	 *
1006 	 * Given that we can't update the STE atomically and the SMMU
1007 	 * doesn't read the thing in a defined order, that leaves us
1008 	 * with the following maintenance requirements:
1009 	 *
1010 	 * 1. Update Config, return (init time STEs aren't live)
1011 	 * 2. Write everything apart from dword 0, sync, write dword 0, sync
1012 	 * 3. Update Config, sync
1013 	 */
1014 	u64 val = le64_to_cpu(dst[0]);
1015 	bool ste_live = false;
1016 	struct arm_smmu_cmdq_ent prefetch_cmd = {
1017 		.opcode		= CMDQ_OP_PREFETCH_CFG,
1018 		.prefetch	= {
1019 			.sid	= sid,
1020 		},
1021 	};
1022 
1023 	if (val & STRTAB_STE_0_V) {
1024 		u64 cfg;
1025 
1026 		cfg = val & STRTAB_STE_0_CFG_MASK << STRTAB_STE_0_CFG_SHIFT;
1027 		switch (cfg) {
1028 		case STRTAB_STE_0_CFG_BYPASS:
1029 			break;
1030 		case STRTAB_STE_0_CFG_S1_TRANS:
1031 		case STRTAB_STE_0_CFG_S2_TRANS:
1032 			ste_live = true;
1033 			break;
1034 		case STRTAB_STE_0_CFG_ABORT:
1035 			if (disable_bypass)
1036 				break;
1037 		default:
1038 			BUG(); /* STE corruption */
1039 		}
1040 	}
1041 
1042 	/* Nuke the existing STE_0 value, as we're going to rewrite it */
1043 	val = ste->valid ? STRTAB_STE_0_V : 0;
1044 
1045 	if (ste->bypass) {
1046 		val |= disable_bypass ? STRTAB_STE_0_CFG_ABORT
1047 				      : STRTAB_STE_0_CFG_BYPASS;
1048 		dst[0] = cpu_to_le64(val);
1049 		dst[2] = 0; /* Nuke the VMID */
1050 		if (ste_live)
1051 			arm_smmu_sync_ste_for_sid(smmu, sid);
1052 		return;
1053 	}
1054 
1055 	if (ste->s1_cfg) {
1056 		BUG_ON(ste_live);
1057 		dst[1] = cpu_to_le64(
1058 			 STRTAB_STE_1_S1C_CACHE_WBRA
1059 			 << STRTAB_STE_1_S1CIR_SHIFT |
1060 			 STRTAB_STE_1_S1C_CACHE_WBRA
1061 			 << STRTAB_STE_1_S1COR_SHIFT |
1062 			 STRTAB_STE_1_S1C_SH_ISH << STRTAB_STE_1_S1CSH_SHIFT |
1063 			 STRTAB_STE_1_S1STALLD |
1064 #ifdef CONFIG_PCI_ATS
1065 			 STRTAB_STE_1_EATS_TRANS << STRTAB_STE_1_EATS_SHIFT |
1066 #endif
1067 			 STRTAB_STE_1_STRW_NSEL1 << STRTAB_STE_1_STRW_SHIFT);
1068 
1069 		val |= (ste->s1_cfg->cdptr_dma & STRTAB_STE_0_S1CTXPTR_MASK
1070 		        << STRTAB_STE_0_S1CTXPTR_SHIFT) |
1071 			STRTAB_STE_0_CFG_S1_TRANS;
1072 	}
1073 
1074 	if (ste->s2_cfg) {
1075 		BUG_ON(ste_live);
1076 		dst[2] = cpu_to_le64(
1077 			 ste->s2_cfg->vmid << STRTAB_STE_2_S2VMID_SHIFT |
1078 			 (ste->s2_cfg->vtcr & STRTAB_STE_2_VTCR_MASK)
1079 			  << STRTAB_STE_2_VTCR_SHIFT |
1080 #ifdef __BIG_ENDIAN
1081 			 STRTAB_STE_2_S2ENDI |
1082 #endif
1083 			 STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1084 			 STRTAB_STE_2_S2R);
1085 
1086 		dst[3] = cpu_to_le64(ste->s2_cfg->vttbr &
1087 			 STRTAB_STE_3_S2TTB_MASK << STRTAB_STE_3_S2TTB_SHIFT);
1088 
1089 		val |= STRTAB_STE_0_CFG_S2_TRANS;
1090 	}
1091 
1092 	arm_smmu_sync_ste_for_sid(smmu, sid);
1093 	/* See comment in arm_smmu_write_ctx_desc() */
1094 	WRITE_ONCE(dst[0], cpu_to_le64(val));
1095 	arm_smmu_sync_ste_for_sid(smmu, sid);
1096 
1097 	/* It's likely that we'll want to use the new STE soon */
1098 	if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
1099 		arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1100 }
1101 
arm_smmu_init_bypass_stes(u64 * strtab,unsigned int nent)1102 static void arm_smmu_init_bypass_stes(u64 *strtab, unsigned int nent)
1103 {
1104 	unsigned int i;
1105 	struct arm_smmu_strtab_ent ste = {
1106 		.valid	= true,
1107 		.bypass	= true,
1108 	};
1109 
1110 	for (i = 0; i < nent; ++i) {
1111 		arm_smmu_write_strtab_ent(NULL, -1, strtab, &ste);
1112 		strtab += STRTAB_STE_DWORDS;
1113 	}
1114 }
1115 
arm_smmu_init_l2_strtab(struct arm_smmu_device * smmu,u32 sid)1116 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1117 {
1118 	size_t size;
1119 	void *strtab;
1120 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1121 	struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1122 
1123 	if (desc->l2ptr)
1124 		return 0;
1125 
1126 	size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1127 	strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1128 
1129 	desc->span = STRTAB_SPLIT + 1;
1130 	desc->l2ptr = dma_zalloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1131 					  GFP_KERNEL);
1132 	if (!desc->l2ptr) {
1133 		dev_err(smmu->dev,
1134 			"failed to allocate l2 stream table for SID %u\n",
1135 			sid);
1136 		return -ENOMEM;
1137 	}
1138 
1139 	arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT);
1140 	arm_smmu_write_strtab_l1_desc(strtab, desc);
1141 	return 0;
1142 }
1143 
1144 /* IRQ and event handlers */
arm_smmu_evtq_thread(int irq,void * dev)1145 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1146 {
1147 	int i;
1148 	struct arm_smmu_device *smmu = dev;
1149 	struct arm_smmu_queue *q = &smmu->evtq.q;
1150 	u64 evt[EVTQ_ENT_DWORDS];
1151 
1152 	while (!queue_remove_raw(q, evt)) {
1153 		u8 id = evt[0] >> EVTQ_0_ID_SHIFT & EVTQ_0_ID_MASK;
1154 
1155 		dev_info(smmu->dev, "event 0x%02x received:\n", id);
1156 		for (i = 0; i < ARRAY_SIZE(evt); ++i)
1157 			dev_info(smmu->dev, "\t0x%016llx\n",
1158 				 (unsigned long long)evt[i]);
1159 	}
1160 
1161 	/* Sync our overflow flag, as we believe we're up to speed */
1162 	q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
1163 	return IRQ_HANDLED;
1164 }
1165 
arm_smmu_evtq_handler(int irq,void * dev)1166 static irqreturn_t arm_smmu_evtq_handler(int irq, void *dev)
1167 {
1168 	irqreturn_t ret = IRQ_WAKE_THREAD;
1169 	struct arm_smmu_device *smmu = dev;
1170 	struct arm_smmu_queue *q = &smmu->evtq.q;
1171 
1172 	/*
1173 	 * Not much we can do on overflow, so scream and pretend we're
1174 	 * trying harder.
1175 	 */
1176 	if (queue_sync_prod(q) == -EOVERFLOW)
1177 		dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1178 	else if (queue_empty(q))
1179 		ret = IRQ_NONE;
1180 
1181 	return ret;
1182 }
1183 
arm_smmu_priq_thread(int irq,void * dev)1184 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1185 {
1186 	struct arm_smmu_device *smmu = dev;
1187 	struct arm_smmu_queue *q = &smmu->priq.q;
1188 	u64 evt[PRIQ_ENT_DWORDS];
1189 
1190 	while (!queue_remove_raw(q, evt)) {
1191 		u32 sid, ssid;
1192 		u16 grpid;
1193 		bool ssv, last;
1194 
1195 		sid = evt[0] >> PRIQ_0_SID_SHIFT & PRIQ_0_SID_MASK;
1196 		ssv = evt[0] & PRIQ_0_SSID_V;
1197 		ssid = ssv ? evt[0] >> PRIQ_0_SSID_SHIFT & PRIQ_0_SSID_MASK : 0;
1198 		last = evt[0] & PRIQ_0_PRG_LAST;
1199 		grpid = evt[1] >> PRIQ_1_PRG_IDX_SHIFT & PRIQ_1_PRG_IDX_MASK;
1200 
1201 		dev_info(smmu->dev, "unexpected PRI request received:\n");
1202 		dev_info(smmu->dev,
1203 			 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1204 			 sid, ssid, grpid, last ? "L" : "",
1205 			 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1206 			 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1207 			 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1208 			 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1209 			 evt[1] & PRIQ_1_ADDR_MASK << PRIQ_1_ADDR_SHIFT);
1210 
1211 		if (last) {
1212 			struct arm_smmu_cmdq_ent cmd = {
1213 				.opcode			= CMDQ_OP_PRI_RESP,
1214 				.substream_valid	= ssv,
1215 				.pri			= {
1216 					.sid	= sid,
1217 					.ssid	= ssid,
1218 					.grpid	= grpid,
1219 					.resp	= PRI_RESP_DENY,
1220 				},
1221 			};
1222 
1223 			arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1224 		}
1225 	}
1226 
1227 	/* Sync our overflow flag, as we believe we're up to speed */
1228 	q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
1229 	writel(q->cons, q->cons_reg);
1230 	return IRQ_HANDLED;
1231 }
1232 
arm_smmu_priq_handler(int irq,void * dev)1233 static irqreturn_t arm_smmu_priq_handler(int irq, void *dev)
1234 {
1235 	irqreturn_t ret = IRQ_WAKE_THREAD;
1236 	struct arm_smmu_device *smmu = dev;
1237 	struct arm_smmu_queue *q = &smmu->priq.q;
1238 
1239 	/* PRIQ overflow indicates a programming error */
1240 	if (queue_sync_prod(q) == -EOVERFLOW)
1241 		dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1242 	else if (queue_empty(q))
1243 		ret = IRQ_NONE;
1244 
1245 	return ret;
1246 }
1247 
arm_smmu_cmdq_sync_handler(int irq,void * dev)1248 static irqreturn_t arm_smmu_cmdq_sync_handler(int irq, void *dev)
1249 {
1250 	/* We don't actually use CMD_SYNC interrupts for anything */
1251 	return IRQ_HANDLED;
1252 }
1253 
1254 static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1255 
arm_smmu_gerror_handler(int irq,void * dev)1256 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1257 {
1258 	u32 gerror, gerrorn;
1259 	struct arm_smmu_device *smmu = dev;
1260 
1261 	gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1262 	gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1263 
1264 	gerror ^= gerrorn;
1265 	if (!(gerror & GERROR_ERR_MASK))
1266 		return IRQ_NONE; /* No errors pending */
1267 
1268 	dev_warn(smmu->dev,
1269 		 "unexpected global error reported (0x%08x), this could be serious\n",
1270 		 gerror);
1271 
1272 	if (gerror & GERROR_SFM_ERR) {
1273 		dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1274 		arm_smmu_device_disable(smmu);
1275 	}
1276 
1277 	if (gerror & GERROR_MSI_GERROR_ABT_ERR)
1278 		dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1279 
1280 	if (gerror & GERROR_MSI_PRIQ_ABT_ERR) {
1281 		dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1282 		arm_smmu_priq_handler(irq, smmu->dev);
1283 	}
1284 
1285 	if (gerror & GERROR_MSI_EVTQ_ABT_ERR) {
1286 		dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1287 		arm_smmu_evtq_handler(irq, smmu->dev);
1288 	}
1289 
1290 	if (gerror & GERROR_MSI_CMDQ_ABT_ERR) {
1291 		dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1292 		arm_smmu_cmdq_sync_handler(irq, smmu->dev);
1293 	}
1294 
1295 	if (gerror & GERROR_PRIQ_ABT_ERR)
1296 		dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1297 
1298 	if (gerror & GERROR_EVTQ_ABT_ERR)
1299 		dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1300 
1301 	if (gerror & GERROR_CMDQ_ERR)
1302 		arm_smmu_cmdq_skip_err(smmu);
1303 
1304 	writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1305 	return IRQ_HANDLED;
1306 }
1307 
1308 /* IO_PGTABLE API */
__arm_smmu_tlb_sync(struct arm_smmu_device * smmu)1309 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu)
1310 {
1311 	struct arm_smmu_cmdq_ent cmd;
1312 
1313 	cmd.opcode = CMDQ_OP_CMD_SYNC;
1314 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1315 }
1316 
arm_smmu_tlb_sync(void * cookie)1317 static void arm_smmu_tlb_sync(void *cookie)
1318 {
1319 	struct arm_smmu_domain *smmu_domain = cookie;
1320 	__arm_smmu_tlb_sync(smmu_domain->smmu);
1321 }
1322 
arm_smmu_tlb_inv_context(void * cookie)1323 static void arm_smmu_tlb_inv_context(void *cookie)
1324 {
1325 	struct arm_smmu_domain *smmu_domain = cookie;
1326 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1327 	struct arm_smmu_cmdq_ent cmd;
1328 
1329 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1330 		cmd.opcode	= CMDQ_OP_TLBI_NH_ASID;
1331 		cmd.tlbi.asid	= smmu_domain->s1_cfg.cd.asid;
1332 		cmd.tlbi.vmid	= 0;
1333 	} else {
1334 		cmd.opcode	= CMDQ_OP_TLBI_S12_VMALL;
1335 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
1336 	}
1337 
1338 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1339 	__arm_smmu_tlb_sync(smmu);
1340 }
1341 
arm_smmu_tlb_inv_range_nosync(unsigned long iova,size_t size,bool leaf,void * cookie)1342 static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
1343 					  bool leaf, void *cookie)
1344 {
1345 	struct arm_smmu_domain *smmu_domain = cookie;
1346 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1347 	struct arm_smmu_cmdq_ent cmd = {
1348 		.tlbi = {
1349 			.leaf	= leaf,
1350 			.addr	= iova,
1351 		},
1352 	};
1353 
1354 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1355 		cmd.opcode	= CMDQ_OP_TLBI_NH_VA;
1356 		cmd.tlbi.asid	= smmu_domain->s1_cfg.cd.asid;
1357 	} else {
1358 		cmd.opcode	= CMDQ_OP_TLBI_S2_IPA;
1359 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
1360 	}
1361 
1362 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1363 }
1364 
1365 static struct iommu_gather_ops arm_smmu_gather_ops = {
1366 	.tlb_flush_all	= arm_smmu_tlb_inv_context,
1367 	.tlb_add_flush	= arm_smmu_tlb_inv_range_nosync,
1368 	.tlb_sync	= arm_smmu_tlb_sync,
1369 };
1370 
1371 /* IOMMU API */
arm_smmu_capable(enum iommu_cap cap)1372 static bool arm_smmu_capable(enum iommu_cap cap)
1373 {
1374 	switch (cap) {
1375 	case IOMMU_CAP_CACHE_COHERENCY:
1376 		return true;
1377 	case IOMMU_CAP_INTR_REMAP:
1378 		return true; /* MSIs are just memory writes */
1379 	case IOMMU_CAP_NOEXEC:
1380 		return true;
1381 	default:
1382 		return false;
1383 	}
1384 }
1385 
arm_smmu_domain_alloc(unsigned type)1386 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
1387 {
1388 	struct arm_smmu_domain *smmu_domain;
1389 
1390 	if (type != IOMMU_DOMAIN_UNMANAGED)
1391 		return NULL;
1392 
1393 	/*
1394 	 * Allocate the domain and initialise some of its data structures.
1395 	 * We can't really do anything meaningful until we've added a
1396 	 * master.
1397 	 */
1398 	smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
1399 	if (!smmu_domain)
1400 		return NULL;
1401 
1402 	mutex_init(&smmu_domain->init_mutex);
1403 	spin_lock_init(&smmu_domain->pgtbl_lock);
1404 	return &smmu_domain->domain;
1405 }
1406 
arm_smmu_bitmap_alloc(unsigned long * map,int span)1407 static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
1408 {
1409 	int idx, size = 1 << span;
1410 
1411 	do {
1412 		idx = find_first_zero_bit(map, size);
1413 		if (idx == size)
1414 			return -ENOSPC;
1415 	} while (test_and_set_bit(idx, map));
1416 
1417 	return idx;
1418 }
1419 
arm_smmu_bitmap_free(unsigned long * map,int idx)1420 static void arm_smmu_bitmap_free(unsigned long *map, int idx)
1421 {
1422 	clear_bit(idx, map);
1423 }
1424 
arm_smmu_domain_free(struct iommu_domain * domain)1425 static void arm_smmu_domain_free(struct iommu_domain *domain)
1426 {
1427 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1428 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1429 
1430 	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
1431 
1432 	/* Free the CD and ASID, if we allocated them */
1433 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1434 		struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1435 
1436 		if (cfg->cdptr) {
1437 			dma_free_coherent(smmu_domain->smmu->dev,
1438 					  CTXDESC_CD_DWORDS << 3,
1439 					  cfg->cdptr,
1440 					  cfg->cdptr_dma);
1441 
1442 			arm_smmu_bitmap_free(smmu->asid_map, cfg->cd.asid);
1443 		}
1444 	} else {
1445 		struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1446 		if (cfg->vmid)
1447 			arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
1448 	}
1449 
1450 	kfree(smmu_domain);
1451 }
1452 
arm_smmu_domain_finalise_s1(struct arm_smmu_domain * smmu_domain,struct io_pgtable_cfg * pgtbl_cfg)1453 static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
1454 				       struct io_pgtable_cfg *pgtbl_cfg)
1455 {
1456 	int ret;
1457 	int asid;
1458 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1459 	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1460 
1461 	asid = arm_smmu_bitmap_alloc(smmu->asid_map, smmu->asid_bits);
1462 	if (IS_ERR_VALUE(asid))
1463 		return asid;
1464 
1465 	cfg->cdptr = dma_zalloc_coherent(smmu->dev, CTXDESC_CD_DWORDS << 3,
1466 					 &cfg->cdptr_dma, GFP_KERNEL);
1467 	if (!cfg->cdptr) {
1468 		dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1469 		ret = -ENOMEM;
1470 		goto out_free_asid;
1471 	}
1472 
1473 	cfg->cd.asid	= (u16)asid;
1474 	cfg->cd.ttbr	= pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
1475 	cfg->cd.tcr	= pgtbl_cfg->arm_lpae_s1_cfg.tcr;
1476 	cfg->cd.mair	= pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
1477 	return 0;
1478 
1479 out_free_asid:
1480 	arm_smmu_bitmap_free(smmu->asid_map, asid);
1481 	return ret;
1482 }
1483 
arm_smmu_domain_finalise_s2(struct arm_smmu_domain * smmu_domain,struct io_pgtable_cfg * pgtbl_cfg)1484 static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
1485 				       struct io_pgtable_cfg *pgtbl_cfg)
1486 {
1487 	int vmid;
1488 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1489 	struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1490 
1491 	vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
1492 	if (IS_ERR_VALUE(vmid))
1493 		return vmid;
1494 
1495 	cfg->vmid	= (u16)vmid;
1496 	cfg->vttbr	= pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
1497 	cfg->vtcr	= pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
1498 	return 0;
1499 }
1500 
1501 static struct iommu_ops arm_smmu_ops;
1502 
arm_smmu_domain_finalise(struct iommu_domain * domain)1503 static int arm_smmu_domain_finalise(struct iommu_domain *domain)
1504 {
1505 	int ret;
1506 	unsigned long ias, oas;
1507 	enum io_pgtable_fmt fmt;
1508 	struct io_pgtable_cfg pgtbl_cfg;
1509 	struct io_pgtable_ops *pgtbl_ops;
1510 	int (*finalise_stage_fn)(struct arm_smmu_domain *,
1511 				 struct io_pgtable_cfg *);
1512 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1513 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1514 
1515 	/* Restrict the stage to what we can actually support */
1516 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
1517 		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
1518 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
1519 		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1520 
1521 	switch (smmu_domain->stage) {
1522 	case ARM_SMMU_DOMAIN_S1:
1523 		ias = VA_BITS;
1524 		oas = smmu->ias;
1525 		fmt = ARM_64_LPAE_S1;
1526 		finalise_stage_fn = arm_smmu_domain_finalise_s1;
1527 		break;
1528 	case ARM_SMMU_DOMAIN_NESTED:
1529 	case ARM_SMMU_DOMAIN_S2:
1530 		ias = smmu->ias;
1531 		oas = smmu->oas;
1532 		fmt = ARM_64_LPAE_S2;
1533 		finalise_stage_fn = arm_smmu_domain_finalise_s2;
1534 		break;
1535 	default:
1536 		return -EINVAL;
1537 	}
1538 
1539 	pgtbl_cfg = (struct io_pgtable_cfg) {
1540 		.pgsize_bitmap	= arm_smmu_ops.pgsize_bitmap,
1541 		.ias		= ias,
1542 		.oas		= oas,
1543 		.tlb		= &arm_smmu_gather_ops,
1544 		.iommu_dev	= smmu->dev,
1545 	};
1546 
1547 	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
1548 	if (!pgtbl_ops)
1549 		return -ENOMEM;
1550 
1551 	arm_smmu_ops.pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
1552 
1553 	ret = finalise_stage_fn(smmu_domain, &pgtbl_cfg);
1554 	if (IS_ERR_VALUE(ret)) {
1555 		free_io_pgtable_ops(pgtbl_ops);
1556 		return ret;
1557 	}
1558 
1559 	smmu_domain->pgtbl_ops = pgtbl_ops;
1560 	return 0;
1561 }
1562 
arm_smmu_group_get(struct device * dev)1563 static struct arm_smmu_group *arm_smmu_group_get(struct device *dev)
1564 {
1565 	struct iommu_group *group;
1566 	struct arm_smmu_group *smmu_group;
1567 
1568 	group = iommu_group_get(dev);
1569 	if (!group)
1570 		return NULL;
1571 
1572 	smmu_group = iommu_group_get_iommudata(group);
1573 	iommu_group_put(group);
1574 	return smmu_group;
1575 }
1576 
arm_smmu_get_step_for_sid(struct arm_smmu_device * smmu,u32 sid)1577 static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
1578 {
1579 	__le64 *step;
1580 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1581 
1582 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
1583 		struct arm_smmu_strtab_l1_desc *l1_desc;
1584 		int idx;
1585 
1586 		/* Two-level walk */
1587 		idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
1588 		l1_desc = &cfg->l1_desc[idx];
1589 		idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
1590 		step = &l1_desc->l2ptr[idx];
1591 	} else {
1592 		/* Simple linear lookup */
1593 		step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
1594 	}
1595 
1596 	return step;
1597 }
1598 
arm_smmu_install_ste_for_group(struct arm_smmu_group * smmu_group)1599 static int arm_smmu_install_ste_for_group(struct arm_smmu_group *smmu_group)
1600 {
1601 	int i;
1602 	struct arm_smmu_domain *smmu_domain = smmu_group->domain;
1603 	struct arm_smmu_strtab_ent *ste = &smmu_group->ste;
1604 	struct arm_smmu_device *smmu = smmu_group->smmu;
1605 
1606 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1607 		ste->s1_cfg = &smmu_domain->s1_cfg;
1608 		ste->s2_cfg = NULL;
1609 		arm_smmu_write_ctx_desc(smmu, ste->s1_cfg);
1610 	} else {
1611 		ste->s1_cfg = NULL;
1612 		ste->s2_cfg = &smmu_domain->s2_cfg;
1613 	}
1614 
1615 	for (i = 0; i < smmu_group->num_sids; ++i) {
1616 		u32 sid = smmu_group->sids[i];
1617 		__le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
1618 
1619 		arm_smmu_write_strtab_ent(smmu, sid, step, ste);
1620 	}
1621 
1622 	return 0;
1623 }
1624 
arm_smmu_attach_dev(struct iommu_domain * domain,struct device * dev)1625 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1626 {
1627 	int ret = 0;
1628 	struct arm_smmu_device *smmu;
1629 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1630 	struct arm_smmu_group *smmu_group = arm_smmu_group_get(dev);
1631 
1632 	if (!smmu_group)
1633 		return -ENOENT;
1634 
1635 	/* Already attached to a different domain? */
1636 	if (smmu_group->domain && smmu_group->domain != smmu_domain)
1637 		return -EEXIST;
1638 
1639 	smmu = smmu_group->smmu;
1640 	mutex_lock(&smmu_domain->init_mutex);
1641 
1642 	if (!smmu_domain->smmu) {
1643 		smmu_domain->smmu = smmu;
1644 		ret = arm_smmu_domain_finalise(domain);
1645 		if (ret) {
1646 			smmu_domain->smmu = NULL;
1647 			goto out_unlock;
1648 		}
1649 	} else if (smmu_domain->smmu != smmu) {
1650 		dev_err(dev,
1651 			"cannot attach to SMMU %s (upstream of %s)\n",
1652 			dev_name(smmu_domain->smmu->dev),
1653 			dev_name(smmu->dev));
1654 		ret = -ENXIO;
1655 		goto out_unlock;
1656 	}
1657 
1658 	/* Group already attached to this domain? */
1659 	if (smmu_group->domain)
1660 		goto out_unlock;
1661 
1662 	smmu_group->domain	= smmu_domain;
1663 	smmu_group->ste.bypass	= false;
1664 
1665 	ret = arm_smmu_install_ste_for_group(smmu_group);
1666 	if (IS_ERR_VALUE(ret))
1667 		smmu_group->domain = NULL;
1668 
1669 out_unlock:
1670 	mutex_unlock(&smmu_domain->init_mutex);
1671 	return ret;
1672 }
1673 
arm_smmu_detach_dev(struct iommu_domain * domain,struct device * dev)1674 static void arm_smmu_detach_dev(struct iommu_domain *domain, struct device *dev)
1675 {
1676 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1677 	struct arm_smmu_group *smmu_group = arm_smmu_group_get(dev);
1678 
1679 	BUG_ON(!smmu_domain);
1680 	BUG_ON(!smmu_group);
1681 
1682 	mutex_lock(&smmu_domain->init_mutex);
1683 	BUG_ON(smmu_group->domain != smmu_domain);
1684 
1685 	smmu_group->ste.bypass = true;
1686 	if (IS_ERR_VALUE(arm_smmu_install_ste_for_group(smmu_group)))
1687 		dev_warn(dev, "failed to install bypass STE\n");
1688 
1689 	smmu_group->domain = NULL;
1690 	mutex_unlock(&smmu_domain->init_mutex);
1691 }
1692 
arm_smmu_map(struct iommu_domain * domain,unsigned long iova,phys_addr_t paddr,size_t size,int prot)1693 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1694 			phys_addr_t paddr, size_t size, int prot)
1695 {
1696 	int ret;
1697 	unsigned long flags;
1698 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1699 	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1700 
1701 	if (!ops)
1702 		return -ENODEV;
1703 
1704 	spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
1705 	ret = ops->map(ops, iova, paddr, size, prot);
1706 	spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
1707 	return ret;
1708 }
1709 
1710 static size_t
arm_smmu_unmap(struct iommu_domain * domain,unsigned long iova,size_t size)1711 arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size)
1712 {
1713 	size_t ret;
1714 	unsigned long flags;
1715 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1716 	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1717 
1718 	if (!ops)
1719 		return 0;
1720 
1721 	spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
1722 	ret = ops->unmap(ops, iova, size);
1723 	spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
1724 	return ret;
1725 }
1726 
1727 static phys_addr_t
arm_smmu_iova_to_phys(struct iommu_domain * domain,dma_addr_t iova)1728 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
1729 {
1730 	phys_addr_t ret;
1731 	unsigned long flags;
1732 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1733 	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1734 
1735 	if (!ops)
1736 		return 0;
1737 
1738 	spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
1739 	ret = ops->iova_to_phys(ops, iova);
1740 	spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
1741 
1742 	return ret;
1743 }
1744 
__arm_smmu_get_pci_sid(struct pci_dev * pdev,u16 alias,void * sidp)1745 static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *sidp)
1746 {
1747 	*(u32 *)sidp = alias;
1748 	return 0; /* Continue walking */
1749 }
1750 
__arm_smmu_release_pci_iommudata(void * data)1751 static void __arm_smmu_release_pci_iommudata(void *data)
1752 {
1753 	kfree(data);
1754 }
1755 
arm_smmu_get_for_pci_dev(struct pci_dev * pdev)1756 static struct arm_smmu_device *arm_smmu_get_for_pci_dev(struct pci_dev *pdev)
1757 {
1758 	struct device_node *of_node;
1759 	struct platform_device *smmu_pdev;
1760 	struct arm_smmu_device *smmu = NULL;
1761 	struct pci_bus *bus = pdev->bus;
1762 
1763 	/* Walk up to the root bus */
1764 	while (!pci_is_root_bus(bus))
1765 		bus = bus->parent;
1766 
1767 	/* Follow the "iommus" phandle from the host controller */
1768 	of_node = of_parse_phandle(bus->bridge->parent->of_node, "iommus", 0);
1769 	if (!of_node)
1770 		return NULL;
1771 
1772 	/* See if we can find an SMMU corresponding to the phandle */
1773 	smmu_pdev = of_find_device_by_node(of_node);
1774 	if (smmu_pdev)
1775 		smmu = platform_get_drvdata(smmu_pdev);
1776 
1777 	of_node_put(of_node);
1778 	return smmu;
1779 }
1780 
arm_smmu_sid_in_range(struct arm_smmu_device * smmu,u32 sid)1781 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
1782 {
1783 	unsigned long limit = smmu->strtab_cfg.num_l1_ents;
1784 
1785 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
1786 		limit *= 1UL << STRTAB_SPLIT;
1787 
1788 	return sid < limit;
1789 }
1790 
arm_smmu_add_device(struct device * dev)1791 static int arm_smmu_add_device(struct device *dev)
1792 {
1793 	int i, ret;
1794 	u32 sid, *sids;
1795 	struct pci_dev *pdev;
1796 	struct iommu_group *group;
1797 	struct arm_smmu_group *smmu_group;
1798 	struct arm_smmu_device *smmu;
1799 
1800 	/* We only support PCI, for now */
1801 	if (!dev_is_pci(dev))
1802 		return -ENODEV;
1803 
1804 	pdev = to_pci_dev(dev);
1805 	group = iommu_group_get_for_dev(dev);
1806 	if (IS_ERR(group))
1807 		return PTR_ERR(group);
1808 
1809 	smmu_group = iommu_group_get_iommudata(group);
1810 	if (!smmu_group) {
1811 		smmu = arm_smmu_get_for_pci_dev(pdev);
1812 		if (!smmu) {
1813 			ret = -ENOENT;
1814 			goto out_put_group;
1815 		}
1816 
1817 		smmu_group = kzalloc(sizeof(*smmu_group), GFP_KERNEL);
1818 		if (!smmu_group) {
1819 			ret = -ENOMEM;
1820 			goto out_put_group;
1821 		}
1822 
1823 		smmu_group->ste.valid	= true;
1824 		smmu_group->smmu	= smmu;
1825 		iommu_group_set_iommudata(group, smmu_group,
1826 					  __arm_smmu_release_pci_iommudata);
1827 	} else {
1828 		smmu = smmu_group->smmu;
1829 	}
1830 
1831 	/* Assume SID == RID until firmware tells us otherwise */
1832 	pci_for_each_dma_alias(pdev, __arm_smmu_get_pci_sid, &sid);
1833 	for (i = 0; i < smmu_group->num_sids; ++i) {
1834 		/* If we already know about this SID, then we're done */
1835 		if (smmu_group->sids[i] == sid)
1836 			return 0;
1837 	}
1838 
1839 	/* Check the SID is in range of the SMMU and our stream table */
1840 	if (!arm_smmu_sid_in_range(smmu, sid)) {
1841 		ret = -ERANGE;
1842 		goto out_put_group;
1843 	}
1844 
1845 	/* Ensure l2 strtab is initialised */
1846 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
1847 		ret = arm_smmu_init_l2_strtab(smmu, sid);
1848 		if (ret)
1849 			goto out_put_group;
1850 	}
1851 
1852 	/* Resize the SID array for the group */
1853 	smmu_group->num_sids++;
1854 	sids = krealloc(smmu_group->sids, smmu_group->num_sids * sizeof(*sids),
1855 			GFP_KERNEL);
1856 	if (!sids) {
1857 		smmu_group->num_sids--;
1858 		ret = -ENOMEM;
1859 		goto out_put_group;
1860 	}
1861 
1862 	/* Add the new SID */
1863 	sids[smmu_group->num_sids - 1] = sid;
1864 	smmu_group->sids = sids;
1865 	return 0;
1866 
1867 out_put_group:
1868 	iommu_group_put(group);
1869 	return ret;
1870 }
1871 
arm_smmu_remove_device(struct device * dev)1872 static void arm_smmu_remove_device(struct device *dev)
1873 {
1874 	iommu_group_remove_device(dev);
1875 }
1876 
arm_smmu_domain_get_attr(struct iommu_domain * domain,enum iommu_attr attr,void * data)1877 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
1878 				    enum iommu_attr attr, void *data)
1879 {
1880 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1881 
1882 	switch (attr) {
1883 	case DOMAIN_ATTR_NESTING:
1884 		*(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
1885 		return 0;
1886 	default:
1887 		return -ENODEV;
1888 	}
1889 }
1890 
arm_smmu_domain_set_attr(struct iommu_domain * domain,enum iommu_attr attr,void * data)1891 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
1892 				    enum iommu_attr attr, void *data)
1893 {
1894 	int ret = 0;
1895 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1896 
1897 	mutex_lock(&smmu_domain->init_mutex);
1898 
1899 	switch (attr) {
1900 	case DOMAIN_ATTR_NESTING:
1901 		if (smmu_domain->smmu) {
1902 			ret = -EPERM;
1903 			goto out_unlock;
1904 		}
1905 
1906 		if (*(int *)data)
1907 			smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1908 		else
1909 			smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1910 
1911 		break;
1912 	default:
1913 		ret = -ENODEV;
1914 	}
1915 
1916 out_unlock:
1917 	mutex_unlock(&smmu_domain->init_mutex);
1918 	return ret;
1919 }
1920 
1921 static struct iommu_ops arm_smmu_ops = {
1922 	.capable		= arm_smmu_capable,
1923 	.domain_alloc		= arm_smmu_domain_alloc,
1924 	.domain_free		= arm_smmu_domain_free,
1925 	.attach_dev		= arm_smmu_attach_dev,
1926 	.detach_dev		= arm_smmu_detach_dev,
1927 	.map			= arm_smmu_map,
1928 	.unmap			= arm_smmu_unmap,
1929 	.map_sg			= default_iommu_map_sg,
1930 	.iova_to_phys		= arm_smmu_iova_to_phys,
1931 	.add_device		= arm_smmu_add_device,
1932 	.remove_device		= arm_smmu_remove_device,
1933 	.device_group		= pci_device_group,
1934 	.domain_get_attr	= arm_smmu_domain_get_attr,
1935 	.domain_set_attr	= arm_smmu_domain_set_attr,
1936 	.pgsize_bitmap		= -1UL, /* Restricted during device attach */
1937 };
1938 
1939 /* Probing and initialisation functions */
arm_smmu_init_one_queue(struct arm_smmu_device * smmu,struct arm_smmu_queue * q,unsigned long prod_off,unsigned long cons_off,size_t dwords)1940 static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
1941 				   struct arm_smmu_queue *q,
1942 				   unsigned long prod_off,
1943 				   unsigned long cons_off,
1944 				   size_t dwords)
1945 {
1946 	size_t qsz = ((1 << q->max_n_shift) * dwords) << 3;
1947 
1948 	q->base = dma_alloc_coherent(smmu->dev, qsz, &q->base_dma, GFP_KERNEL);
1949 	if (!q->base) {
1950 		dev_err(smmu->dev, "failed to allocate queue (0x%zx bytes)\n",
1951 			qsz);
1952 		return -ENOMEM;
1953 	}
1954 
1955 	q->prod_reg	= smmu->base + prod_off;
1956 	q->cons_reg	= smmu->base + cons_off;
1957 	q->ent_dwords	= dwords;
1958 
1959 	q->q_base  = Q_BASE_RWA;
1960 	q->q_base |= q->base_dma & Q_BASE_ADDR_MASK << Q_BASE_ADDR_SHIFT;
1961 	q->q_base |= (q->max_n_shift & Q_BASE_LOG2SIZE_MASK)
1962 		     << Q_BASE_LOG2SIZE_SHIFT;
1963 
1964 	q->prod = q->cons = 0;
1965 	return 0;
1966 }
1967 
arm_smmu_free_one_queue(struct arm_smmu_device * smmu,struct arm_smmu_queue * q)1968 static void arm_smmu_free_one_queue(struct arm_smmu_device *smmu,
1969 				    struct arm_smmu_queue *q)
1970 {
1971 	size_t qsz = ((1 << q->max_n_shift) * q->ent_dwords) << 3;
1972 
1973 	dma_free_coherent(smmu->dev, qsz, q->base, q->base_dma);
1974 }
1975 
arm_smmu_free_queues(struct arm_smmu_device * smmu)1976 static void arm_smmu_free_queues(struct arm_smmu_device *smmu)
1977 {
1978 	arm_smmu_free_one_queue(smmu, &smmu->cmdq.q);
1979 	arm_smmu_free_one_queue(smmu, &smmu->evtq.q);
1980 
1981 	if (smmu->features & ARM_SMMU_FEAT_PRI)
1982 		arm_smmu_free_one_queue(smmu, &smmu->priq.q);
1983 }
1984 
arm_smmu_init_queues(struct arm_smmu_device * smmu)1985 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
1986 {
1987 	int ret;
1988 
1989 	/* cmdq */
1990 	spin_lock_init(&smmu->cmdq.lock);
1991 	ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, ARM_SMMU_CMDQ_PROD,
1992 				      ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS);
1993 	if (ret)
1994 		goto out;
1995 
1996 	/* evtq */
1997 	ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, ARM_SMMU_EVTQ_PROD,
1998 				      ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS);
1999 	if (ret)
2000 		goto out_free_cmdq;
2001 
2002 	/* priq */
2003 	if (!(smmu->features & ARM_SMMU_FEAT_PRI))
2004 		return 0;
2005 
2006 	ret = arm_smmu_init_one_queue(smmu, &smmu->priq.q, ARM_SMMU_PRIQ_PROD,
2007 				      ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS);
2008 	if (ret)
2009 		goto out_free_evtq;
2010 
2011 	return 0;
2012 
2013 out_free_evtq:
2014 	arm_smmu_free_one_queue(smmu, &smmu->evtq.q);
2015 out_free_cmdq:
2016 	arm_smmu_free_one_queue(smmu, &smmu->cmdq.q);
2017 out:
2018 	return ret;
2019 }
2020 
arm_smmu_free_l2_strtab(struct arm_smmu_device * smmu)2021 static void arm_smmu_free_l2_strtab(struct arm_smmu_device *smmu)
2022 {
2023 	int i;
2024 	size_t size;
2025 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2026 
2027 	size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
2028 	for (i = 0; i < cfg->num_l1_ents; ++i) {
2029 		struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[i];
2030 
2031 		if (!desc->l2ptr)
2032 			continue;
2033 
2034 		dma_free_coherent(smmu->dev, size, desc->l2ptr,
2035 				  desc->l2ptr_dma);
2036 	}
2037 }
2038 
arm_smmu_init_l1_strtab(struct arm_smmu_device * smmu)2039 static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
2040 {
2041 	unsigned int i;
2042 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2043 	size_t size = sizeof(*cfg->l1_desc) * cfg->num_l1_ents;
2044 	void *strtab = smmu->strtab_cfg.strtab;
2045 
2046 	cfg->l1_desc = devm_kzalloc(smmu->dev, size, GFP_KERNEL);
2047 	if (!cfg->l1_desc) {
2048 		dev_err(smmu->dev, "failed to allocate l1 stream table desc\n");
2049 		return -ENOMEM;
2050 	}
2051 
2052 	for (i = 0; i < cfg->num_l1_ents; ++i) {
2053 		arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
2054 		strtab += STRTAB_L1_DESC_DWORDS << 3;
2055 	}
2056 
2057 	return 0;
2058 }
2059 
arm_smmu_init_strtab_2lvl(struct arm_smmu_device * smmu)2060 static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
2061 {
2062 	void *strtab;
2063 	u64 reg;
2064 	u32 size, l1size;
2065 	int ret;
2066 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2067 
2068 	/*
2069 	 * If we can resolve everything with a single L2 table, then we
2070 	 * just need a single L1 descriptor. Otherwise, calculate the L1
2071 	 * size, capped to the SIDSIZE.
2072 	 */
2073 	if (smmu->sid_bits < STRTAB_SPLIT) {
2074 		size = 0;
2075 	} else {
2076 		size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
2077 		size = min(size, smmu->sid_bits - STRTAB_SPLIT);
2078 	}
2079 	cfg->num_l1_ents = 1 << size;
2080 
2081 	size += STRTAB_SPLIT;
2082 	if (size < smmu->sid_bits)
2083 		dev_warn(smmu->dev,
2084 			 "2-level strtab only covers %u/%u bits of SID\n",
2085 			 size, smmu->sid_bits);
2086 
2087 	l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
2088 	strtab = dma_zalloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
2089 				     GFP_KERNEL);
2090 	if (!strtab) {
2091 		dev_err(smmu->dev,
2092 			"failed to allocate l1 stream table (%u bytes)\n",
2093 			size);
2094 		return -ENOMEM;
2095 	}
2096 	cfg->strtab = strtab;
2097 
2098 	/* Configure strtab_base_cfg for 2 levels */
2099 	reg  = STRTAB_BASE_CFG_FMT_2LVL;
2100 	reg |= (size & STRTAB_BASE_CFG_LOG2SIZE_MASK)
2101 		<< STRTAB_BASE_CFG_LOG2SIZE_SHIFT;
2102 	reg |= (STRTAB_SPLIT & STRTAB_BASE_CFG_SPLIT_MASK)
2103 		<< STRTAB_BASE_CFG_SPLIT_SHIFT;
2104 	cfg->strtab_base_cfg = reg;
2105 
2106 	ret = arm_smmu_init_l1_strtab(smmu);
2107 	if (ret)
2108 		dma_free_coherent(smmu->dev,
2109 				  l1size,
2110 				  strtab,
2111 				  cfg->strtab_dma);
2112 	return ret;
2113 }
2114 
arm_smmu_init_strtab_linear(struct arm_smmu_device * smmu)2115 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
2116 {
2117 	void *strtab;
2118 	u64 reg;
2119 	u32 size;
2120 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2121 
2122 	size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
2123 	strtab = dma_zalloc_coherent(smmu->dev, size, &cfg->strtab_dma,
2124 				     GFP_KERNEL);
2125 	if (!strtab) {
2126 		dev_err(smmu->dev,
2127 			"failed to allocate linear stream table (%u bytes)\n",
2128 			size);
2129 		return -ENOMEM;
2130 	}
2131 	cfg->strtab = strtab;
2132 	cfg->num_l1_ents = 1 << smmu->sid_bits;
2133 
2134 	/* Configure strtab_base_cfg for a linear table covering all SIDs */
2135 	reg  = STRTAB_BASE_CFG_FMT_LINEAR;
2136 	reg |= (smmu->sid_bits & STRTAB_BASE_CFG_LOG2SIZE_MASK)
2137 		<< STRTAB_BASE_CFG_LOG2SIZE_SHIFT;
2138 	cfg->strtab_base_cfg = reg;
2139 
2140 	arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents);
2141 	return 0;
2142 }
2143 
arm_smmu_init_strtab(struct arm_smmu_device * smmu)2144 static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
2145 {
2146 	u64 reg;
2147 	int ret;
2148 
2149 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2150 		ret = arm_smmu_init_strtab_2lvl(smmu);
2151 	else
2152 		ret = arm_smmu_init_strtab_linear(smmu);
2153 
2154 	if (ret)
2155 		return ret;
2156 
2157 	/* Set the strtab base address */
2158 	reg  = smmu->strtab_cfg.strtab_dma &
2159 	       STRTAB_BASE_ADDR_MASK << STRTAB_BASE_ADDR_SHIFT;
2160 	reg |= STRTAB_BASE_RA;
2161 	smmu->strtab_cfg.strtab_base = reg;
2162 
2163 	/* Allocate the first VMID for stage-2 bypass STEs */
2164 	set_bit(0, smmu->vmid_map);
2165 	return 0;
2166 }
2167 
arm_smmu_free_strtab(struct arm_smmu_device * smmu)2168 static void arm_smmu_free_strtab(struct arm_smmu_device *smmu)
2169 {
2170 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2171 	u32 size = cfg->num_l1_ents;
2172 
2173 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2174 		arm_smmu_free_l2_strtab(smmu);
2175 		size *= STRTAB_L1_DESC_DWORDS << 3;
2176 	} else {
2177 		size *= STRTAB_STE_DWORDS * 3;
2178 	}
2179 
2180 	dma_free_coherent(smmu->dev, size, cfg->strtab, cfg->strtab_dma);
2181 }
2182 
arm_smmu_init_structures(struct arm_smmu_device * smmu)2183 static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
2184 {
2185 	int ret;
2186 
2187 	ret = arm_smmu_init_queues(smmu);
2188 	if (ret)
2189 		return ret;
2190 
2191 	ret = arm_smmu_init_strtab(smmu);
2192 	if (ret)
2193 		goto out_free_queues;
2194 
2195 	return 0;
2196 
2197 out_free_queues:
2198 	arm_smmu_free_queues(smmu);
2199 	return ret;
2200 }
2201 
arm_smmu_free_structures(struct arm_smmu_device * smmu)2202 static void arm_smmu_free_structures(struct arm_smmu_device *smmu)
2203 {
2204 	arm_smmu_free_strtab(smmu);
2205 	arm_smmu_free_queues(smmu);
2206 }
2207 
arm_smmu_write_reg_sync(struct arm_smmu_device * smmu,u32 val,unsigned int reg_off,unsigned int ack_off)2208 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
2209 				   unsigned int reg_off, unsigned int ack_off)
2210 {
2211 	u32 reg;
2212 
2213 	writel_relaxed(val, smmu->base + reg_off);
2214 	return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
2215 					  1, ARM_SMMU_POLL_TIMEOUT_US);
2216 }
2217 
arm_smmu_free_msis(void * data)2218 static void arm_smmu_free_msis(void *data)
2219 {
2220 	struct device *dev = data;
2221 	platform_msi_domain_free_irqs(dev);
2222 }
2223 
arm_smmu_write_msi_msg(struct msi_desc * desc,struct msi_msg * msg)2224 static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
2225 {
2226 	phys_addr_t doorbell;
2227 	struct device *dev = msi_desc_to_dev(desc);
2228 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2229 	phys_addr_t *cfg = arm_smmu_msi_cfg[desc->platform.msi_index];
2230 
2231 	doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
2232 	doorbell &= MSI_CFG0_ADDR_MASK << MSI_CFG0_ADDR_SHIFT;
2233 
2234 	writeq_relaxed(doorbell, smmu->base + cfg[0]);
2235 	writel_relaxed(msg->data, smmu->base + cfg[1]);
2236 	writel_relaxed(MSI_CFG2_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
2237 }
2238 
arm_smmu_setup_msis(struct arm_smmu_device * smmu)2239 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
2240 {
2241 	struct msi_desc *desc;
2242 	int ret, nvec = ARM_SMMU_MAX_MSIS;
2243 	struct device *dev = smmu->dev;
2244 
2245 	/* Clear the MSI address regs */
2246 	writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
2247 	writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
2248 
2249 	if (smmu->features & ARM_SMMU_FEAT_PRI)
2250 		writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
2251 	else
2252 		nvec--;
2253 
2254 	if (!(smmu->features & ARM_SMMU_FEAT_MSI))
2255 		return;
2256 
2257 	/* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
2258 	ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
2259 	if (ret) {
2260 		dev_warn(dev, "failed to allocate MSIs\n");
2261 		return;
2262 	}
2263 
2264 	for_each_msi_entry(desc, dev) {
2265 		switch (desc->platform.msi_index) {
2266 		case EVTQ_MSI_INDEX:
2267 			smmu->evtq.q.irq = desc->irq;
2268 			break;
2269 		case GERROR_MSI_INDEX:
2270 			smmu->gerr_irq = desc->irq;
2271 			break;
2272 		case PRIQ_MSI_INDEX:
2273 			smmu->priq.q.irq = desc->irq;
2274 			break;
2275 		default:	/* Unknown */
2276 			continue;
2277 		}
2278 	}
2279 
2280 	/* Add callback to free MSIs on teardown */
2281 	devm_add_action(dev, arm_smmu_free_msis, dev);
2282 }
2283 
arm_smmu_setup_irqs(struct arm_smmu_device * smmu)2284 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
2285 {
2286 	int ret, irq;
2287 	u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
2288 
2289 	/* Disable IRQs first */
2290 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
2291 				      ARM_SMMU_IRQ_CTRLACK);
2292 	if (ret) {
2293 		dev_err(smmu->dev, "failed to disable irqs\n");
2294 		return ret;
2295 	}
2296 
2297 	arm_smmu_setup_msis(smmu);
2298 
2299 	/* Request interrupt lines */
2300 	irq = smmu->evtq.q.irq;
2301 	if (irq) {
2302 		ret = devm_request_threaded_irq(smmu->dev, irq,
2303 						arm_smmu_evtq_handler,
2304 						arm_smmu_evtq_thread,
2305 						0, "arm-smmu-v3-evtq", smmu);
2306 		if (IS_ERR_VALUE(ret))
2307 			dev_warn(smmu->dev, "failed to enable evtq irq\n");
2308 	}
2309 
2310 	irq = smmu->cmdq.q.irq;
2311 	if (irq) {
2312 		ret = devm_request_irq(smmu->dev, irq,
2313 				       arm_smmu_cmdq_sync_handler, 0,
2314 				       "arm-smmu-v3-cmdq-sync", smmu);
2315 		if (IS_ERR_VALUE(ret))
2316 			dev_warn(smmu->dev, "failed to enable cmdq-sync irq\n");
2317 	}
2318 
2319 	irq = smmu->gerr_irq;
2320 	if (irq) {
2321 		ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
2322 				       0, "arm-smmu-v3-gerror", smmu);
2323 		if (IS_ERR_VALUE(ret))
2324 			dev_warn(smmu->dev, "failed to enable gerror irq\n");
2325 	}
2326 
2327 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
2328 		irq = smmu->priq.q.irq;
2329 		if (irq) {
2330 			ret = devm_request_threaded_irq(smmu->dev, irq,
2331 							arm_smmu_priq_handler,
2332 							arm_smmu_priq_thread,
2333 							0, "arm-smmu-v3-priq",
2334 							smmu);
2335 			if (IS_ERR_VALUE(ret))
2336 				dev_warn(smmu->dev,
2337 					 "failed to enable priq irq\n");
2338 			else
2339 				irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
2340 		}
2341 	}
2342 
2343 	/* Enable interrupt generation on the SMMU */
2344 	ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
2345 				      ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
2346 	if (ret)
2347 		dev_warn(smmu->dev, "failed to enable irqs\n");
2348 
2349 	return 0;
2350 }
2351 
arm_smmu_device_disable(struct arm_smmu_device * smmu)2352 static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
2353 {
2354 	int ret;
2355 
2356 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
2357 	if (ret)
2358 		dev_err(smmu->dev, "failed to clear cr0\n");
2359 
2360 	return ret;
2361 }
2362 
arm_smmu_device_reset(struct arm_smmu_device * smmu)2363 static int arm_smmu_device_reset(struct arm_smmu_device *smmu)
2364 {
2365 	int ret;
2366 	u32 reg, enables;
2367 	struct arm_smmu_cmdq_ent cmd;
2368 
2369 	/* Clear CR0 and sync (disables SMMU and queue processing) */
2370 	reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
2371 	if (reg & CR0_SMMUEN)
2372 		dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
2373 
2374 	ret = arm_smmu_device_disable(smmu);
2375 	if (ret)
2376 		return ret;
2377 
2378 	/* CR1 (table and queue memory attributes) */
2379 	reg = (CR1_SH_ISH << CR1_TABLE_SH_SHIFT) |
2380 	      (CR1_CACHE_WB << CR1_TABLE_OC_SHIFT) |
2381 	      (CR1_CACHE_WB << CR1_TABLE_IC_SHIFT) |
2382 	      (CR1_SH_ISH << CR1_QUEUE_SH_SHIFT) |
2383 	      (CR1_CACHE_WB << CR1_QUEUE_OC_SHIFT) |
2384 	      (CR1_CACHE_WB << CR1_QUEUE_IC_SHIFT);
2385 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
2386 
2387 	/* CR2 (random crap) */
2388 	reg = CR2_PTM | CR2_RECINVSID | CR2_E2H;
2389 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
2390 
2391 	/* Stream table */
2392 	writeq_relaxed(smmu->strtab_cfg.strtab_base,
2393 		       smmu->base + ARM_SMMU_STRTAB_BASE);
2394 	writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
2395 		       smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
2396 
2397 	/* Command queue */
2398 	writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
2399 	writel_relaxed(smmu->cmdq.q.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
2400 	writel_relaxed(smmu->cmdq.q.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
2401 
2402 	enables = CR0_CMDQEN;
2403 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2404 				      ARM_SMMU_CR0ACK);
2405 	if (ret) {
2406 		dev_err(smmu->dev, "failed to enable command queue\n");
2407 		return ret;
2408 	}
2409 
2410 	/* Invalidate any cached configuration */
2411 	cmd.opcode = CMDQ_OP_CFGI_ALL;
2412 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2413 	cmd.opcode = CMDQ_OP_CMD_SYNC;
2414 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2415 
2416 	/* Invalidate any stale TLB entries */
2417 	if (smmu->features & ARM_SMMU_FEAT_HYP) {
2418 		cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
2419 		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2420 	}
2421 
2422 	cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
2423 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2424 	cmd.opcode = CMDQ_OP_CMD_SYNC;
2425 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2426 
2427 	/* Event queue */
2428 	writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
2429 	writel_relaxed(smmu->evtq.q.prod, smmu->base + ARM_SMMU_EVTQ_PROD);
2430 	writel_relaxed(smmu->evtq.q.cons, smmu->base + ARM_SMMU_EVTQ_CONS);
2431 
2432 	enables |= CR0_EVTQEN;
2433 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2434 				      ARM_SMMU_CR0ACK);
2435 	if (ret) {
2436 		dev_err(smmu->dev, "failed to enable event queue\n");
2437 		return ret;
2438 	}
2439 
2440 	/* PRI queue */
2441 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
2442 		writeq_relaxed(smmu->priq.q.q_base,
2443 			       smmu->base + ARM_SMMU_PRIQ_BASE);
2444 		writel_relaxed(smmu->priq.q.prod,
2445 			       smmu->base + ARM_SMMU_PRIQ_PROD);
2446 		writel_relaxed(smmu->priq.q.cons,
2447 			       smmu->base + ARM_SMMU_PRIQ_CONS);
2448 
2449 		enables |= CR0_PRIQEN;
2450 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2451 					      ARM_SMMU_CR0ACK);
2452 		if (ret) {
2453 			dev_err(smmu->dev, "failed to enable PRI queue\n");
2454 			return ret;
2455 		}
2456 	}
2457 
2458 	ret = arm_smmu_setup_irqs(smmu);
2459 	if (ret) {
2460 		dev_err(smmu->dev, "failed to setup irqs\n");
2461 		return ret;
2462 	}
2463 
2464 	/* Enable the SMMU interface */
2465 	enables |= CR0_SMMUEN;
2466 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2467 				      ARM_SMMU_CR0ACK);
2468 	if (ret) {
2469 		dev_err(smmu->dev, "failed to enable SMMU interface\n");
2470 		return ret;
2471 	}
2472 
2473 	return 0;
2474 }
2475 
arm_smmu_device_probe(struct arm_smmu_device * smmu)2476 static int arm_smmu_device_probe(struct arm_smmu_device *smmu)
2477 {
2478 	u32 reg;
2479 	bool coherent;
2480 	unsigned long pgsize_bitmap = 0;
2481 
2482 	/* IDR0 */
2483 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
2484 
2485 	/* 2-level structures */
2486 	if ((reg & IDR0_ST_LVL_MASK << IDR0_ST_LVL_SHIFT) == IDR0_ST_LVL_2LVL)
2487 		smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
2488 
2489 	if (reg & IDR0_CD2L)
2490 		smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
2491 
2492 	/*
2493 	 * Translation table endianness.
2494 	 * We currently require the same endianness as the CPU, but this
2495 	 * could be changed later by adding a new IO_PGTABLE_QUIRK.
2496 	 */
2497 	switch (reg & IDR0_TTENDIAN_MASK << IDR0_TTENDIAN_SHIFT) {
2498 	case IDR0_TTENDIAN_MIXED:
2499 		smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
2500 		break;
2501 #ifdef __BIG_ENDIAN
2502 	case IDR0_TTENDIAN_BE:
2503 		smmu->features |= ARM_SMMU_FEAT_TT_BE;
2504 		break;
2505 #else
2506 	case IDR0_TTENDIAN_LE:
2507 		smmu->features |= ARM_SMMU_FEAT_TT_LE;
2508 		break;
2509 #endif
2510 	default:
2511 		dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
2512 		return -ENXIO;
2513 	}
2514 
2515 	/* Boolean feature flags */
2516 	if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
2517 		smmu->features |= ARM_SMMU_FEAT_PRI;
2518 
2519 	if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
2520 		smmu->features |= ARM_SMMU_FEAT_ATS;
2521 
2522 	if (reg & IDR0_SEV)
2523 		smmu->features |= ARM_SMMU_FEAT_SEV;
2524 
2525 	if (reg & IDR0_MSI)
2526 		smmu->features |= ARM_SMMU_FEAT_MSI;
2527 
2528 	if (reg & IDR0_HYP)
2529 		smmu->features |= ARM_SMMU_FEAT_HYP;
2530 
2531 	/*
2532 	 * The dma-coherent property is used in preference to the ID
2533 	 * register, but warn on mismatch.
2534 	 */
2535 	coherent = of_dma_is_coherent(smmu->dev->of_node);
2536 	if (coherent)
2537 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
2538 
2539 	if (!!(reg & IDR0_COHACC) != coherent)
2540 		dev_warn(smmu->dev, "IDR0.COHACC overridden by dma-coherent property (%s)\n",
2541 			 coherent ? "true" : "false");
2542 
2543 	if (reg & IDR0_STALL_MODEL)
2544 		smmu->features |= ARM_SMMU_FEAT_STALLS;
2545 
2546 	if (reg & IDR0_S1P)
2547 		smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
2548 
2549 	if (reg & IDR0_S2P)
2550 		smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
2551 
2552 	if (!(reg & (IDR0_S1P | IDR0_S2P))) {
2553 		dev_err(smmu->dev, "no translation support!\n");
2554 		return -ENXIO;
2555 	}
2556 
2557 	/* We only support the AArch64 table format at present */
2558 	switch (reg & IDR0_TTF_MASK << IDR0_TTF_SHIFT) {
2559 	case IDR0_TTF_AARCH32_64:
2560 		smmu->ias = 40;
2561 		/* Fallthrough */
2562 	case IDR0_TTF_AARCH64:
2563 		break;
2564 	default:
2565 		dev_err(smmu->dev, "AArch64 table format not supported!\n");
2566 		return -ENXIO;
2567 	}
2568 
2569 	/* ASID/VMID sizes */
2570 	smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
2571 	smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
2572 
2573 	/* IDR1 */
2574 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
2575 	if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
2576 		dev_err(smmu->dev, "embedded implementation not supported\n");
2577 		return -ENXIO;
2578 	}
2579 
2580 	/* Queue sizes, capped at 4k */
2581 	smmu->cmdq.q.max_n_shift = min((u32)CMDQ_MAX_SZ_SHIFT,
2582 				       reg >> IDR1_CMDQ_SHIFT & IDR1_CMDQ_MASK);
2583 	if (!smmu->cmdq.q.max_n_shift) {
2584 		/* Odd alignment restrictions on the base, so ignore for now */
2585 		dev_err(smmu->dev, "unit-length command queue not supported\n");
2586 		return -ENXIO;
2587 	}
2588 
2589 	smmu->evtq.q.max_n_shift = min((u32)EVTQ_MAX_SZ_SHIFT,
2590 				       reg >> IDR1_EVTQ_SHIFT & IDR1_EVTQ_MASK);
2591 	smmu->priq.q.max_n_shift = min((u32)PRIQ_MAX_SZ_SHIFT,
2592 				       reg >> IDR1_PRIQ_SHIFT & IDR1_PRIQ_MASK);
2593 
2594 	/* SID/SSID sizes */
2595 	smmu->ssid_bits = reg >> IDR1_SSID_SHIFT & IDR1_SSID_MASK;
2596 	smmu->sid_bits = reg >> IDR1_SID_SHIFT & IDR1_SID_MASK;
2597 
2598 	/* IDR5 */
2599 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
2600 
2601 	/* Maximum number of outstanding stalls */
2602 	smmu->evtq.max_stalls = reg >> IDR5_STALL_MAX_SHIFT
2603 				& IDR5_STALL_MAX_MASK;
2604 
2605 	/* Page sizes */
2606 	if (reg & IDR5_GRAN64K)
2607 		pgsize_bitmap |= SZ_64K | SZ_512M;
2608 	if (reg & IDR5_GRAN16K)
2609 		pgsize_bitmap |= SZ_16K | SZ_32M;
2610 	if (reg & IDR5_GRAN4K)
2611 		pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
2612 
2613 	arm_smmu_ops.pgsize_bitmap &= pgsize_bitmap;
2614 
2615 	/* Output address size */
2616 	switch (reg & IDR5_OAS_MASK << IDR5_OAS_SHIFT) {
2617 	case IDR5_OAS_32_BIT:
2618 		smmu->oas = 32;
2619 		break;
2620 	case IDR5_OAS_36_BIT:
2621 		smmu->oas = 36;
2622 		break;
2623 	case IDR5_OAS_40_BIT:
2624 		smmu->oas = 40;
2625 		break;
2626 	case IDR5_OAS_42_BIT:
2627 		smmu->oas = 42;
2628 		break;
2629 	case IDR5_OAS_44_BIT:
2630 		smmu->oas = 44;
2631 		break;
2632 	default:
2633 		dev_info(smmu->dev,
2634 			"unknown output address size. Truncating to 48-bit\n");
2635 		/* Fallthrough */
2636 	case IDR5_OAS_48_BIT:
2637 		smmu->oas = 48;
2638 	}
2639 
2640 	/* Set the DMA mask for our table walker */
2641 	if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
2642 		dev_warn(smmu->dev,
2643 			 "failed to set DMA mask for table walker\n");
2644 
2645 	smmu->ias = max(smmu->ias, smmu->oas);
2646 
2647 	dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
2648 		 smmu->ias, smmu->oas, smmu->features);
2649 	return 0;
2650 }
2651 
arm_smmu_device_dt_probe(struct platform_device * pdev)2652 static int arm_smmu_device_dt_probe(struct platform_device *pdev)
2653 {
2654 	int irq, ret;
2655 	struct resource *res;
2656 	struct arm_smmu_device *smmu;
2657 	struct device *dev = &pdev->dev;
2658 
2659 	smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
2660 	if (!smmu) {
2661 		dev_err(dev, "failed to allocate arm_smmu_device\n");
2662 		return -ENOMEM;
2663 	}
2664 	smmu->dev = dev;
2665 
2666 	/* Base address */
2667 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
2668 	if (resource_size(res) + 1 < SZ_128K) {
2669 		dev_err(dev, "MMIO region too small (%pr)\n", res);
2670 		return -EINVAL;
2671 	}
2672 
2673 	smmu->base = devm_ioremap_resource(dev, res);
2674 	if (IS_ERR(smmu->base))
2675 		return PTR_ERR(smmu->base);
2676 
2677 	/* Interrupt lines */
2678 	irq = platform_get_irq_byname(pdev, "eventq");
2679 	if (irq > 0)
2680 		smmu->evtq.q.irq = irq;
2681 
2682 	irq = platform_get_irq_byname(pdev, "priq");
2683 	if (irq > 0)
2684 		smmu->priq.q.irq = irq;
2685 
2686 	irq = platform_get_irq_byname(pdev, "cmdq-sync");
2687 	if (irq > 0)
2688 		smmu->cmdq.q.irq = irq;
2689 
2690 	irq = platform_get_irq_byname(pdev, "gerror");
2691 	if (irq > 0)
2692 		smmu->gerr_irq = irq;
2693 
2694 	parse_driver_options(smmu);
2695 
2696 	/* Probe the h/w */
2697 	ret = arm_smmu_device_probe(smmu);
2698 	if (ret)
2699 		return ret;
2700 
2701 	/* Initialise in-memory data structures */
2702 	ret = arm_smmu_init_structures(smmu);
2703 	if (ret)
2704 		return ret;
2705 
2706 	/* Record our private device structure */
2707 	platform_set_drvdata(pdev, smmu);
2708 
2709 	/* Reset the device */
2710 	ret = arm_smmu_device_reset(smmu);
2711 	if (ret)
2712 		goto out_free_structures;
2713 
2714 	return 0;
2715 
2716 out_free_structures:
2717 	arm_smmu_free_structures(smmu);
2718 	return ret;
2719 }
2720 
arm_smmu_device_remove(struct platform_device * pdev)2721 static int arm_smmu_device_remove(struct platform_device *pdev)
2722 {
2723 	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2724 
2725 	arm_smmu_device_disable(smmu);
2726 	arm_smmu_free_structures(smmu);
2727 	return 0;
2728 }
2729 
2730 static struct of_device_id arm_smmu_of_match[] = {
2731 	{ .compatible = "arm,smmu-v3", },
2732 	{ },
2733 };
2734 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
2735 
2736 static struct platform_driver arm_smmu_driver = {
2737 	.driver	= {
2738 		.name		= "arm-smmu-v3",
2739 		.of_match_table	= of_match_ptr(arm_smmu_of_match),
2740 	},
2741 	.probe	= arm_smmu_device_dt_probe,
2742 	.remove	= arm_smmu_device_remove,
2743 };
2744 
arm_smmu_init(void)2745 static int __init arm_smmu_init(void)
2746 {
2747 	struct device_node *np;
2748 	int ret;
2749 
2750 	np = of_find_matching_node(NULL, arm_smmu_of_match);
2751 	if (!np)
2752 		return 0;
2753 
2754 	of_node_put(np);
2755 
2756 	ret = platform_driver_register(&arm_smmu_driver);
2757 	if (ret)
2758 		return ret;
2759 
2760 	return bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
2761 }
2762 
arm_smmu_exit(void)2763 static void __exit arm_smmu_exit(void)
2764 {
2765 	return platform_driver_unregister(&arm_smmu_driver);
2766 }
2767 
2768 subsys_initcall(arm_smmu_init);
2769 module_exit(arm_smmu_exit);
2770 
2771 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
2772 MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
2773 MODULE_LICENSE("GPL v2");
2774