1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * CPU-agnostic ARM page table allocator.
4 *
5 * ARMv7 Short-descriptor format, supporting
6 * - Basic memory attributes
7 * - Simplified access permissions (AP[2:1] model)
8 * - Backwards-compatible TEX remap
9 * - Large pages/supersections (if indicated by the caller)
10 *
11 * Not supporting:
12 * - Legacy access permissions (AP[2:0] model)
13 *
14 * Almost certainly never supporting:
15 * - PXN
16 * - Domains
17 *
18 * Copyright (C) 2014-2015 ARM Limited
19 * Copyright (c) 2014-2015 MediaTek Inc.
20 */
21
22 #define pr_fmt(fmt) "arm-v7s io-pgtable: " fmt
23
24 #include <linux/atomic.h>
25 #include <linux/dma-mapping.h>
26 #include <linux/gfp.h>
27 #include <linux/io-pgtable.h>
28 #include <linux/iommu.h>
29 #include <linux/kernel.h>
30 #include <linux/kmemleak.h>
31 #include <linux/sizes.h>
32 #include <linux/slab.h>
33 #include <linux/spinlock.h>
34 #include <linux/types.h>
35
36 #include <asm/barrier.h>
37
38 /* Struct accessors */
39 #define io_pgtable_to_data(x) \
40 container_of((x), struct arm_v7s_io_pgtable, iop)
41
42 #define io_pgtable_ops_to_data(x) \
43 io_pgtable_to_data(io_pgtable_ops_to_pgtable(x))
44
45 /*
46 * We have 32 bits total; 12 bits resolved at level 1, 8 bits at level 2,
47 * and 12 bits in a page. With some carefully-chosen coefficients we can
48 * hide the ugly inconsistencies behind these macros and at least let the
49 * rest of the code pretend to be somewhat sane.
50 */
51 #define ARM_V7S_ADDR_BITS 32
52 #define _ARM_V7S_LVL_BITS(lvl) (16 - (lvl) * 4)
53 /* MediaTek: totally 34bits, 14bits at lvl1 and 8bits at lvl2. */
54 #define _ARM_V7S_LVL_BITS_MTK(lvl) (20 - (lvl) * 6)
55 #define ARM_V7S_LVL_SHIFT(lvl) (ARM_V7S_ADDR_BITS - (4 + 8 * (lvl)))
56 #define ARM_V7S_TABLE_SHIFT 10
57
58 #define ARM_V7S_PTES_PER_LVL(lvl, cfg) ({ \
59 !arm_v7s_is_mtk_enabled(cfg) ? \
60 (1 << _ARM_V7S_LVL_BITS(lvl)) : (1 << _ARM_V7S_LVL_BITS_MTK(lvl));\
61 })
62
63 #define ARM_V7S_TABLE_SIZE(lvl, cfg) \
64 (ARM_V7S_PTES_PER_LVL(lvl, cfg) * sizeof(arm_v7s_iopte))
65
66 #define ARM_V7S_BLOCK_SIZE(lvl) (1UL << ARM_V7S_LVL_SHIFT(lvl))
67 #define ARM_V7S_LVL_MASK(lvl) ((u32)(~0U << ARM_V7S_LVL_SHIFT(lvl)))
68 #define ARM_V7S_TABLE_MASK ((u32)(~0U << ARM_V7S_TABLE_SHIFT))
69 #define _ARM_V7S_IDX_MASK(lvl, cfg) (ARM_V7S_PTES_PER_LVL(lvl, cfg) - 1)
70 #define ARM_V7S_LVL_IDX(addr, lvl, cfg) ({ \
71 int _l = lvl; \
72 ((addr) >> ARM_V7S_LVL_SHIFT(_l)) & _ARM_V7S_IDX_MASK(_l, cfg); \
73 })
74
75 /*
76 * Large page/supersection entries are effectively a block of 16 page/section
77 * entries, along the lines of the LPAE contiguous hint, but all with the
78 * same output address. For want of a better common name we'll call them
79 * "contiguous" versions of their respective page/section entries here, but
80 * noting the distinction (WRT to TLB maintenance) that they represent *one*
81 * entry repeated 16 times, not 16 separate entries (as in the LPAE case).
82 */
83 #define ARM_V7S_CONT_PAGES 16
84
85 /* PTE type bits: these are all mixed up with XN/PXN bits in most cases */
86 #define ARM_V7S_PTE_TYPE_TABLE 0x1
87 #define ARM_V7S_PTE_TYPE_PAGE 0x2
88 #define ARM_V7S_PTE_TYPE_CONT_PAGE 0x1
89
90 #define ARM_V7S_PTE_IS_VALID(pte) (((pte) & 0x3) != 0)
91 #define ARM_V7S_PTE_IS_TABLE(pte, lvl) \
92 ((lvl) == 1 && (((pte) & 0x3) == ARM_V7S_PTE_TYPE_TABLE))
93
94 /* Page table bits */
95 #define ARM_V7S_ATTR_XN(lvl) BIT(4 * (2 - (lvl)))
96 #define ARM_V7S_ATTR_B BIT(2)
97 #define ARM_V7S_ATTR_C BIT(3)
98 #define ARM_V7S_ATTR_NS_TABLE BIT(3)
99 #define ARM_V7S_ATTR_NS_SECTION BIT(19)
100
101 #define ARM_V7S_CONT_SECTION BIT(18)
102 #define ARM_V7S_CONT_PAGE_XN_SHIFT 15
103
104 /*
105 * The attribute bits are consistently ordered*, but occupy bits [17:10] of
106 * a level 1 PTE vs. bits [11:4] at level 2. Thus we define the individual
107 * fields relative to that 8-bit block, plus a total shift relative to the PTE.
108 */
109 #define ARM_V7S_ATTR_SHIFT(lvl) (16 - (lvl) * 6)
110
111 #define ARM_V7S_ATTR_MASK 0xff
112 #define ARM_V7S_ATTR_AP0 BIT(0)
113 #define ARM_V7S_ATTR_AP1 BIT(1)
114 #define ARM_V7S_ATTR_AP2 BIT(5)
115 #define ARM_V7S_ATTR_S BIT(6)
116 #define ARM_V7S_ATTR_NG BIT(7)
117 #define ARM_V7S_TEX_SHIFT 2
118 #define ARM_V7S_TEX_MASK 0x7
119 #define ARM_V7S_ATTR_TEX(val) (((val) & ARM_V7S_TEX_MASK) << ARM_V7S_TEX_SHIFT)
120
121 /* MediaTek extend the bits below for PA 32bit/33bit/34bit */
122 #define ARM_V7S_ATTR_MTK_PA_BIT32 BIT(9)
123 #define ARM_V7S_ATTR_MTK_PA_BIT33 BIT(4)
124 #define ARM_V7S_ATTR_MTK_PA_BIT34 BIT(5)
125
126 /* *well, except for TEX on level 2 large pages, of course :( */
127 #define ARM_V7S_CONT_PAGE_TEX_SHIFT 6
128 #define ARM_V7S_CONT_PAGE_TEX_MASK (ARM_V7S_TEX_MASK << ARM_V7S_CONT_PAGE_TEX_SHIFT)
129
130 /* Simplified access permissions */
131 #define ARM_V7S_PTE_AF ARM_V7S_ATTR_AP0
132 #define ARM_V7S_PTE_AP_UNPRIV ARM_V7S_ATTR_AP1
133 #define ARM_V7S_PTE_AP_RDONLY ARM_V7S_ATTR_AP2
134
135 /* Register bits */
136 #define ARM_V7S_RGN_NC 0
137 #define ARM_V7S_RGN_WBWA 1
138 #define ARM_V7S_RGN_WT 2
139 #define ARM_V7S_RGN_WB 3
140
141 #define ARM_V7S_PRRR_TYPE_DEVICE 1
142 #define ARM_V7S_PRRR_TYPE_NORMAL 2
143 #define ARM_V7S_PRRR_TR(n, type) (((type) & 0x3) << ((n) * 2))
144 #define ARM_V7S_PRRR_DS0 BIT(16)
145 #define ARM_V7S_PRRR_DS1 BIT(17)
146 #define ARM_V7S_PRRR_NS0 BIT(18)
147 #define ARM_V7S_PRRR_NS1 BIT(19)
148 #define ARM_V7S_PRRR_NOS(n) BIT((n) + 24)
149
150 #define ARM_V7S_NMRR_IR(n, attr) (((attr) & 0x3) << ((n) * 2))
151 #define ARM_V7S_NMRR_OR(n, attr) (((attr) & 0x3) << ((n) * 2 + 16))
152
153 #define ARM_V7S_TTBR_S BIT(1)
154 #define ARM_V7S_TTBR_NOS BIT(5)
155 #define ARM_V7S_TTBR_ORGN_ATTR(attr) (((attr) & 0x3) << 3)
156 #define ARM_V7S_TTBR_IRGN_ATTR(attr) \
157 ((((attr) & 0x1) << 6) | (((attr) & 0x2) >> 1))
158
159 #define ARM_V7S_TCR_PD1 BIT(5)
160
161 #ifdef CONFIG_ZONE_DMA32
162 #define ARM_V7S_TABLE_GFP_DMA GFP_DMA32
163 #define ARM_V7S_TABLE_SLAB_FLAGS SLAB_CACHE_DMA32
164 #else
165 #define ARM_V7S_TABLE_GFP_DMA GFP_DMA
166 #define ARM_V7S_TABLE_SLAB_FLAGS SLAB_CACHE_DMA
167 #endif
168
169 typedef u32 arm_v7s_iopte;
170
171 static bool selftest_running;
172
173 struct arm_v7s_io_pgtable {
174 struct io_pgtable iop;
175
176 arm_v7s_iopte *pgd;
177 struct kmem_cache *l2_tables;
178 spinlock_t split_lock;
179 };
180
181 static bool arm_v7s_pte_is_cont(arm_v7s_iopte pte, int lvl);
182
__arm_v7s_dma_addr(void * pages)183 static dma_addr_t __arm_v7s_dma_addr(void *pages)
184 {
185 return (dma_addr_t)virt_to_phys(pages);
186 }
187
arm_v7s_is_mtk_enabled(struct io_pgtable_cfg * cfg)188 static bool arm_v7s_is_mtk_enabled(struct io_pgtable_cfg *cfg)
189 {
190 return IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT) &&
191 (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_EXT);
192 }
193
paddr_to_iopte(phys_addr_t paddr,int lvl,struct io_pgtable_cfg * cfg)194 static arm_v7s_iopte paddr_to_iopte(phys_addr_t paddr, int lvl,
195 struct io_pgtable_cfg *cfg)
196 {
197 arm_v7s_iopte pte = paddr & ARM_V7S_LVL_MASK(lvl);
198
199 if (!arm_v7s_is_mtk_enabled(cfg))
200 return pte;
201
202 if (paddr & BIT_ULL(32))
203 pte |= ARM_V7S_ATTR_MTK_PA_BIT32;
204 if (paddr & BIT_ULL(33))
205 pte |= ARM_V7S_ATTR_MTK_PA_BIT33;
206 if (paddr & BIT_ULL(34))
207 pte |= ARM_V7S_ATTR_MTK_PA_BIT34;
208 return pte;
209 }
210
iopte_to_paddr(arm_v7s_iopte pte,int lvl,struct io_pgtable_cfg * cfg)211 static phys_addr_t iopte_to_paddr(arm_v7s_iopte pte, int lvl,
212 struct io_pgtable_cfg *cfg)
213 {
214 arm_v7s_iopte mask;
215 phys_addr_t paddr;
216
217 if (ARM_V7S_PTE_IS_TABLE(pte, lvl))
218 mask = ARM_V7S_TABLE_MASK;
219 else if (arm_v7s_pte_is_cont(pte, lvl))
220 mask = ARM_V7S_LVL_MASK(lvl) * ARM_V7S_CONT_PAGES;
221 else
222 mask = ARM_V7S_LVL_MASK(lvl);
223
224 paddr = pte & mask;
225 if (!arm_v7s_is_mtk_enabled(cfg))
226 return paddr;
227
228 if (pte & ARM_V7S_ATTR_MTK_PA_BIT32)
229 paddr |= BIT_ULL(32);
230 if (pte & ARM_V7S_ATTR_MTK_PA_BIT33)
231 paddr |= BIT_ULL(33);
232 if (pte & ARM_V7S_ATTR_MTK_PA_BIT34)
233 paddr |= BIT_ULL(34);
234 return paddr;
235 }
236
iopte_deref(arm_v7s_iopte pte,int lvl,struct arm_v7s_io_pgtable * data)237 static arm_v7s_iopte *iopte_deref(arm_v7s_iopte pte, int lvl,
238 struct arm_v7s_io_pgtable *data)
239 {
240 return phys_to_virt(iopte_to_paddr(pte, lvl, &data->iop.cfg));
241 }
242
__arm_v7s_alloc_table(int lvl,gfp_t gfp,struct arm_v7s_io_pgtable * data)243 static void *__arm_v7s_alloc_table(int lvl, gfp_t gfp,
244 struct arm_v7s_io_pgtable *data)
245 {
246 struct io_pgtable_cfg *cfg = &data->iop.cfg;
247 struct device *dev = cfg->iommu_dev;
248 phys_addr_t phys;
249 dma_addr_t dma;
250 size_t size = ARM_V7S_TABLE_SIZE(lvl, cfg);
251 void *table = NULL;
252
253 if (lvl == 1)
254 table = (void *)__get_free_pages(
255 __GFP_ZERO | ARM_V7S_TABLE_GFP_DMA, get_order(size));
256 else if (lvl == 2)
257 table = kmem_cache_zalloc(data->l2_tables, gfp);
258
259 if (!table)
260 return NULL;
261
262 phys = virt_to_phys(table);
263 if (phys != (arm_v7s_iopte)phys) {
264 /* Doesn't fit in PTE */
265 dev_err(dev, "Page table does not fit in PTE: %pa", &phys);
266 goto out_free;
267 }
268 if (!cfg->coherent_walk) {
269 dma = dma_map_single(dev, table, size, DMA_TO_DEVICE);
270 if (dma_mapping_error(dev, dma))
271 goto out_free;
272 /*
273 * We depend on the IOMMU being able to work with any physical
274 * address directly, so if the DMA layer suggests otherwise by
275 * translating or truncating them, that bodes very badly...
276 */
277 if (dma != phys)
278 goto out_unmap;
279 }
280 if (lvl == 2)
281 kmemleak_ignore(table);
282 return table;
283
284 out_unmap:
285 dev_err(dev, "Cannot accommodate DMA translation for IOMMU page tables\n");
286 dma_unmap_single(dev, dma, size, DMA_TO_DEVICE);
287 out_free:
288 if (lvl == 1)
289 free_pages((unsigned long)table, get_order(size));
290 else
291 kmem_cache_free(data->l2_tables, table);
292 return NULL;
293 }
294
__arm_v7s_free_table(void * table,int lvl,struct arm_v7s_io_pgtable * data)295 static void __arm_v7s_free_table(void *table, int lvl,
296 struct arm_v7s_io_pgtable *data)
297 {
298 struct io_pgtable_cfg *cfg = &data->iop.cfg;
299 struct device *dev = cfg->iommu_dev;
300 size_t size = ARM_V7S_TABLE_SIZE(lvl, cfg);
301
302 if (!cfg->coherent_walk)
303 dma_unmap_single(dev, __arm_v7s_dma_addr(table), size,
304 DMA_TO_DEVICE);
305 if (lvl == 1)
306 free_pages((unsigned long)table, get_order(size));
307 else
308 kmem_cache_free(data->l2_tables, table);
309 }
310
__arm_v7s_pte_sync(arm_v7s_iopte * ptep,int num_entries,struct io_pgtable_cfg * cfg)311 static void __arm_v7s_pte_sync(arm_v7s_iopte *ptep, int num_entries,
312 struct io_pgtable_cfg *cfg)
313 {
314 if (cfg->coherent_walk)
315 return;
316
317 dma_sync_single_for_device(cfg->iommu_dev, __arm_v7s_dma_addr(ptep),
318 num_entries * sizeof(*ptep), DMA_TO_DEVICE);
319 }
__arm_v7s_set_pte(arm_v7s_iopte * ptep,arm_v7s_iopte pte,int num_entries,struct io_pgtable_cfg * cfg)320 static void __arm_v7s_set_pte(arm_v7s_iopte *ptep, arm_v7s_iopte pte,
321 int num_entries, struct io_pgtable_cfg *cfg)
322 {
323 int i;
324
325 for (i = 0; i < num_entries; i++)
326 ptep[i] = pte;
327
328 __arm_v7s_pte_sync(ptep, num_entries, cfg);
329 }
330
arm_v7s_prot_to_pte(int prot,int lvl,struct io_pgtable_cfg * cfg)331 static arm_v7s_iopte arm_v7s_prot_to_pte(int prot, int lvl,
332 struct io_pgtable_cfg *cfg)
333 {
334 bool ap = !(cfg->quirks & IO_PGTABLE_QUIRK_NO_PERMS);
335 arm_v7s_iopte pte = ARM_V7S_ATTR_NG | ARM_V7S_ATTR_S;
336
337 if (!(prot & IOMMU_MMIO))
338 pte |= ARM_V7S_ATTR_TEX(1);
339 if (ap) {
340 pte |= ARM_V7S_PTE_AF;
341 if (!(prot & IOMMU_PRIV))
342 pte |= ARM_V7S_PTE_AP_UNPRIV;
343 if (!(prot & IOMMU_WRITE))
344 pte |= ARM_V7S_PTE_AP_RDONLY;
345 }
346 pte <<= ARM_V7S_ATTR_SHIFT(lvl);
347
348 if ((prot & IOMMU_NOEXEC) && ap)
349 pte |= ARM_V7S_ATTR_XN(lvl);
350 if (prot & IOMMU_MMIO)
351 pte |= ARM_V7S_ATTR_B;
352 else if (prot & IOMMU_CACHE)
353 pte |= ARM_V7S_ATTR_B | ARM_V7S_ATTR_C;
354
355 pte |= ARM_V7S_PTE_TYPE_PAGE;
356 if (lvl == 1 && (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS))
357 pte |= ARM_V7S_ATTR_NS_SECTION;
358
359 return pte;
360 }
361
arm_v7s_pte_to_prot(arm_v7s_iopte pte,int lvl)362 static int arm_v7s_pte_to_prot(arm_v7s_iopte pte, int lvl)
363 {
364 int prot = IOMMU_READ;
365 arm_v7s_iopte attr = pte >> ARM_V7S_ATTR_SHIFT(lvl);
366
367 if (!(attr & ARM_V7S_PTE_AP_RDONLY))
368 prot |= IOMMU_WRITE;
369 if (!(attr & ARM_V7S_PTE_AP_UNPRIV))
370 prot |= IOMMU_PRIV;
371 if ((attr & (ARM_V7S_TEX_MASK << ARM_V7S_TEX_SHIFT)) == 0)
372 prot |= IOMMU_MMIO;
373 else if (pte & ARM_V7S_ATTR_C)
374 prot |= IOMMU_CACHE;
375 if (pte & ARM_V7S_ATTR_XN(lvl))
376 prot |= IOMMU_NOEXEC;
377
378 return prot;
379 }
380
arm_v7s_pte_to_cont(arm_v7s_iopte pte,int lvl)381 static arm_v7s_iopte arm_v7s_pte_to_cont(arm_v7s_iopte pte, int lvl)
382 {
383 if (lvl == 1) {
384 pte |= ARM_V7S_CONT_SECTION;
385 } else if (lvl == 2) {
386 arm_v7s_iopte xn = pte & ARM_V7S_ATTR_XN(lvl);
387 arm_v7s_iopte tex = pte & ARM_V7S_CONT_PAGE_TEX_MASK;
388
389 pte ^= xn | tex | ARM_V7S_PTE_TYPE_PAGE;
390 pte |= (xn << ARM_V7S_CONT_PAGE_XN_SHIFT) |
391 (tex << ARM_V7S_CONT_PAGE_TEX_SHIFT) |
392 ARM_V7S_PTE_TYPE_CONT_PAGE;
393 }
394 return pte;
395 }
396
arm_v7s_cont_to_pte(arm_v7s_iopte pte,int lvl)397 static arm_v7s_iopte arm_v7s_cont_to_pte(arm_v7s_iopte pte, int lvl)
398 {
399 if (lvl == 1) {
400 pte &= ~ARM_V7S_CONT_SECTION;
401 } else if (lvl == 2) {
402 arm_v7s_iopte xn = pte & BIT(ARM_V7S_CONT_PAGE_XN_SHIFT);
403 arm_v7s_iopte tex = pte & (ARM_V7S_CONT_PAGE_TEX_MASK <<
404 ARM_V7S_CONT_PAGE_TEX_SHIFT);
405
406 pte ^= xn | tex | ARM_V7S_PTE_TYPE_CONT_PAGE;
407 pte |= (xn >> ARM_V7S_CONT_PAGE_XN_SHIFT) |
408 (tex >> ARM_V7S_CONT_PAGE_TEX_SHIFT) |
409 ARM_V7S_PTE_TYPE_PAGE;
410 }
411 return pte;
412 }
413
arm_v7s_pte_is_cont(arm_v7s_iopte pte,int lvl)414 static bool arm_v7s_pte_is_cont(arm_v7s_iopte pte, int lvl)
415 {
416 if (lvl == 1 && !ARM_V7S_PTE_IS_TABLE(pte, lvl))
417 return pte & ARM_V7S_CONT_SECTION;
418 else if (lvl == 2)
419 return !(pte & ARM_V7S_PTE_TYPE_PAGE);
420 return false;
421 }
422
423 static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *,
424 struct iommu_iotlb_gather *, unsigned long,
425 size_t, int, arm_v7s_iopte *);
426
arm_v7s_init_pte(struct arm_v7s_io_pgtable * data,unsigned long iova,phys_addr_t paddr,int prot,int lvl,int num_entries,arm_v7s_iopte * ptep)427 static int arm_v7s_init_pte(struct arm_v7s_io_pgtable *data,
428 unsigned long iova, phys_addr_t paddr, int prot,
429 int lvl, int num_entries, arm_v7s_iopte *ptep)
430 {
431 struct io_pgtable_cfg *cfg = &data->iop.cfg;
432 arm_v7s_iopte pte;
433 int i;
434
435 for (i = 0; i < num_entries; i++)
436 if (ARM_V7S_PTE_IS_TABLE(ptep[i], lvl)) {
437 /*
438 * We need to unmap and free the old table before
439 * overwriting it with a block entry.
440 */
441 arm_v7s_iopte *tblp;
442 size_t sz = ARM_V7S_BLOCK_SIZE(lvl);
443
444 tblp = ptep - ARM_V7S_LVL_IDX(iova, lvl, cfg);
445 if (WARN_ON(__arm_v7s_unmap(data, NULL, iova + i * sz,
446 sz, lvl, tblp) != sz))
447 return -EINVAL;
448 } else if (ptep[i]) {
449 /* We require an unmap first */
450 WARN_ON(!selftest_running);
451 return -EEXIST;
452 }
453
454 pte = arm_v7s_prot_to_pte(prot, lvl, cfg);
455 if (num_entries > 1)
456 pte = arm_v7s_pte_to_cont(pte, lvl);
457
458 pte |= paddr_to_iopte(paddr, lvl, cfg);
459
460 __arm_v7s_set_pte(ptep, pte, num_entries, cfg);
461 return 0;
462 }
463
arm_v7s_install_table(arm_v7s_iopte * table,arm_v7s_iopte * ptep,arm_v7s_iopte curr,struct io_pgtable_cfg * cfg)464 static arm_v7s_iopte arm_v7s_install_table(arm_v7s_iopte *table,
465 arm_v7s_iopte *ptep,
466 arm_v7s_iopte curr,
467 struct io_pgtable_cfg *cfg)
468 {
469 arm_v7s_iopte old, new;
470
471 new = virt_to_phys(table) | ARM_V7S_PTE_TYPE_TABLE;
472 if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS)
473 new |= ARM_V7S_ATTR_NS_TABLE;
474
475 /*
476 * Ensure the table itself is visible before its PTE can be.
477 * Whilst we could get away with cmpxchg64_release below, this
478 * doesn't have any ordering semantics when !CONFIG_SMP.
479 */
480 dma_wmb();
481
482 old = cmpxchg_relaxed(ptep, curr, new);
483 __arm_v7s_pte_sync(ptep, 1, cfg);
484
485 return old;
486 }
487
__arm_v7s_map(struct arm_v7s_io_pgtable * data,unsigned long iova,phys_addr_t paddr,size_t size,int prot,int lvl,arm_v7s_iopte * ptep)488 static int __arm_v7s_map(struct arm_v7s_io_pgtable *data, unsigned long iova,
489 phys_addr_t paddr, size_t size, int prot,
490 int lvl, arm_v7s_iopte *ptep)
491 {
492 struct io_pgtable_cfg *cfg = &data->iop.cfg;
493 arm_v7s_iopte pte, *cptep;
494 int num_entries = size >> ARM_V7S_LVL_SHIFT(lvl);
495
496 /* Find our entry at the current level */
497 ptep += ARM_V7S_LVL_IDX(iova, lvl, cfg);
498
499 /* If we can install a leaf entry at this level, then do so */
500 if (num_entries)
501 return arm_v7s_init_pte(data, iova, paddr, prot,
502 lvl, num_entries, ptep);
503
504 /* We can't allocate tables at the final level */
505 if (WARN_ON(lvl == 2))
506 return -EINVAL;
507
508 /* Grab a pointer to the next level */
509 pte = READ_ONCE(*ptep);
510 if (!pte) {
511 cptep = __arm_v7s_alloc_table(lvl + 1, GFP_ATOMIC, data);
512 if (!cptep)
513 return -ENOMEM;
514
515 pte = arm_v7s_install_table(cptep, ptep, 0, cfg);
516 if (pte)
517 __arm_v7s_free_table(cptep, lvl + 1, data);
518 } else {
519 /* We've no easy way of knowing if it's synced yet, so... */
520 __arm_v7s_pte_sync(ptep, 1, cfg);
521 }
522
523 if (ARM_V7S_PTE_IS_TABLE(pte, lvl)) {
524 cptep = iopte_deref(pte, lvl, data);
525 } else if (pte) {
526 /* We require an unmap first */
527 WARN_ON(!selftest_running);
528 return -EEXIST;
529 }
530
531 /* Rinse, repeat */
532 return __arm_v7s_map(data, iova, paddr, size, prot, lvl + 1, cptep);
533 }
534
arm_v7s_map(struct io_pgtable_ops * ops,unsigned long iova,phys_addr_t paddr,size_t size,int prot)535 static int arm_v7s_map(struct io_pgtable_ops *ops, unsigned long iova,
536 phys_addr_t paddr, size_t size, int prot)
537 {
538 struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops);
539 struct io_pgtable *iop = &data->iop;
540 int ret;
541
542 /* If no access, then nothing to do */
543 if (!(prot & (IOMMU_READ | IOMMU_WRITE)))
544 return 0;
545
546 if (WARN_ON(iova >= (1ULL << data->iop.cfg.ias) ||
547 paddr >= (1ULL << data->iop.cfg.oas)))
548 return -ERANGE;
549
550 ret = __arm_v7s_map(data, iova, paddr, size, prot, 1, data->pgd);
551 /*
552 * Synchronise all PTE updates for the new mapping before there's
553 * a chance for anything to kick off a table walk for the new iova.
554 */
555 if (iop->cfg.quirks & IO_PGTABLE_QUIRK_TLBI_ON_MAP) {
556 io_pgtable_tlb_flush_walk(iop, iova, size,
557 ARM_V7S_BLOCK_SIZE(2));
558 } else {
559 wmb();
560 }
561
562 return ret;
563 }
564
arm_v7s_free_pgtable(struct io_pgtable * iop)565 static void arm_v7s_free_pgtable(struct io_pgtable *iop)
566 {
567 struct arm_v7s_io_pgtable *data = io_pgtable_to_data(iop);
568 int i;
569
570 for (i = 0; i < ARM_V7S_PTES_PER_LVL(1, &data->iop.cfg); i++) {
571 arm_v7s_iopte pte = data->pgd[i];
572
573 if (ARM_V7S_PTE_IS_TABLE(pte, 1))
574 __arm_v7s_free_table(iopte_deref(pte, 1, data),
575 2, data);
576 }
577 __arm_v7s_free_table(data->pgd, 1, data);
578 kmem_cache_destroy(data->l2_tables);
579 kfree(data);
580 }
581
arm_v7s_split_cont(struct arm_v7s_io_pgtable * data,unsigned long iova,int idx,int lvl,arm_v7s_iopte * ptep)582 static arm_v7s_iopte arm_v7s_split_cont(struct arm_v7s_io_pgtable *data,
583 unsigned long iova, int idx, int lvl,
584 arm_v7s_iopte *ptep)
585 {
586 struct io_pgtable *iop = &data->iop;
587 arm_v7s_iopte pte;
588 size_t size = ARM_V7S_BLOCK_SIZE(lvl);
589 int i;
590
591 /* Check that we didn't lose a race to get the lock */
592 pte = *ptep;
593 if (!arm_v7s_pte_is_cont(pte, lvl))
594 return pte;
595
596 ptep -= idx & (ARM_V7S_CONT_PAGES - 1);
597 pte = arm_v7s_cont_to_pte(pte, lvl);
598 for (i = 0; i < ARM_V7S_CONT_PAGES; i++)
599 ptep[i] = pte + i * size;
600
601 __arm_v7s_pte_sync(ptep, ARM_V7S_CONT_PAGES, &iop->cfg);
602
603 size *= ARM_V7S_CONT_PAGES;
604 io_pgtable_tlb_flush_leaf(iop, iova, size, size);
605 return pte;
606 }
607
arm_v7s_split_blk_unmap(struct arm_v7s_io_pgtable * data,struct iommu_iotlb_gather * gather,unsigned long iova,size_t size,arm_v7s_iopte blk_pte,arm_v7s_iopte * ptep)608 static size_t arm_v7s_split_blk_unmap(struct arm_v7s_io_pgtable *data,
609 struct iommu_iotlb_gather *gather,
610 unsigned long iova, size_t size,
611 arm_v7s_iopte blk_pte,
612 arm_v7s_iopte *ptep)
613 {
614 struct io_pgtable_cfg *cfg = &data->iop.cfg;
615 arm_v7s_iopte pte, *tablep;
616 int i, unmap_idx, num_entries, num_ptes;
617
618 tablep = __arm_v7s_alloc_table(2, GFP_ATOMIC, data);
619 if (!tablep)
620 return 0; /* Bytes unmapped */
621
622 num_ptes = ARM_V7S_PTES_PER_LVL(2, cfg);
623 num_entries = size >> ARM_V7S_LVL_SHIFT(2);
624 unmap_idx = ARM_V7S_LVL_IDX(iova, 2, cfg);
625
626 pte = arm_v7s_prot_to_pte(arm_v7s_pte_to_prot(blk_pte, 1), 2, cfg);
627 if (num_entries > 1)
628 pte = arm_v7s_pte_to_cont(pte, 2);
629
630 for (i = 0; i < num_ptes; i += num_entries, pte += size) {
631 /* Unmap! */
632 if (i == unmap_idx)
633 continue;
634
635 __arm_v7s_set_pte(&tablep[i], pte, num_entries, cfg);
636 }
637
638 pte = arm_v7s_install_table(tablep, ptep, blk_pte, cfg);
639 if (pte != blk_pte) {
640 __arm_v7s_free_table(tablep, 2, data);
641
642 if (!ARM_V7S_PTE_IS_TABLE(pte, 1))
643 return 0;
644
645 tablep = iopte_deref(pte, 1, data);
646 return __arm_v7s_unmap(data, gather, iova, size, 2, tablep);
647 }
648
649 io_pgtable_tlb_add_page(&data->iop, gather, iova, size);
650 return size;
651 }
652
__arm_v7s_unmap(struct arm_v7s_io_pgtable * data,struct iommu_iotlb_gather * gather,unsigned long iova,size_t size,int lvl,arm_v7s_iopte * ptep)653 static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *data,
654 struct iommu_iotlb_gather *gather,
655 unsigned long iova, size_t size, int lvl,
656 arm_v7s_iopte *ptep)
657 {
658 arm_v7s_iopte pte[ARM_V7S_CONT_PAGES];
659 struct io_pgtable *iop = &data->iop;
660 int idx, i = 0, num_entries = size >> ARM_V7S_LVL_SHIFT(lvl);
661
662 /* Something went horribly wrong and we ran out of page table */
663 if (WARN_ON(lvl > 2))
664 return 0;
665
666 idx = ARM_V7S_LVL_IDX(iova, lvl, &iop->cfg);
667 ptep += idx;
668 do {
669 pte[i] = READ_ONCE(ptep[i]);
670 if (WARN_ON(!ARM_V7S_PTE_IS_VALID(pte[i])))
671 return 0;
672 } while (++i < num_entries);
673
674 /*
675 * If we've hit a contiguous 'large page' entry at this level, it
676 * needs splitting first, unless we're unmapping the whole lot.
677 *
678 * For splitting, we can't rewrite 16 PTEs atomically, and since we
679 * can't necessarily assume TEX remap we don't have a software bit to
680 * mark live entries being split. In practice (i.e. DMA API code), we
681 * will never be splitting large pages anyway, so just wrap this edge
682 * case in a lock for the sake of correctness and be done with it.
683 */
684 if (num_entries <= 1 && arm_v7s_pte_is_cont(pte[0], lvl)) {
685 unsigned long flags;
686
687 spin_lock_irqsave(&data->split_lock, flags);
688 pte[0] = arm_v7s_split_cont(data, iova, idx, lvl, ptep);
689 spin_unlock_irqrestore(&data->split_lock, flags);
690 }
691
692 /* If the size matches this level, we're in the right place */
693 if (num_entries) {
694 size_t blk_size = ARM_V7S_BLOCK_SIZE(lvl);
695
696 __arm_v7s_set_pte(ptep, 0, num_entries, &iop->cfg);
697
698 for (i = 0; i < num_entries; i++) {
699 if (ARM_V7S_PTE_IS_TABLE(pte[i], lvl)) {
700 /* Also flush any partial walks */
701 io_pgtable_tlb_flush_walk(iop, iova, blk_size,
702 ARM_V7S_BLOCK_SIZE(lvl + 1));
703 ptep = iopte_deref(pte[i], lvl, data);
704 __arm_v7s_free_table(ptep, lvl + 1, data);
705 } else if (iop->cfg.quirks & IO_PGTABLE_QUIRK_NON_STRICT) {
706 /*
707 * Order the PTE update against queueing the IOVA, to
708 * guarantee that a flush callback from a different CPU
709 * has observed it before the TLBIALL can be issued.
710 */
711 smp_wmb();
712 } else {
713 io_pgtable_tlb_add_page(iop, gather, iova, blk_size);
714 }
715 iova += blk_size;
716 }
717 return size;
718 } else if (lvl == 1 && !ARM_V7S_PTE_IS_TABLE(pte[0], lvl)) {
719 /*
720 * Insert a table at the next level to map the old region,
721 * minus the part we want to unmap
722 */
723 return arm_v7s_split_blk_unmap(data, gather, iova, size, pte[0],
724 ptep);
725 }
726
727 /* Keep on walkin' */
728 ptep = iopte_deref(pte[0], lvl, data);
729 return __arm_v7s_unmap(data, gather, iova, size, lvl + 1, ptep);
730 }
731
arm_v7s_unmap(struct io_pgtable_ops * ops,unsigned long iova,size_t size,struct iommu_iotlb_gather * gather)732 static size_t arm_v7s_unmap(struct io_pgtable_ops *ops, unsigned long iova,
733 size_t size, struct iommu_iotlb_gather *gather)
734 {
735 struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops);
736
737 if (WARN_ON(iova >= (1ULL << data->iop.cfg.ias)))
738 return 0;
739
740 return __arm_v7s_unmap(data, gather, iova, size, 1, data->pgd);
741 }
742
arm_v7s_iova_to_phys(struct io_pgtable_ops * ops,unsigned long iova)743 static phys_addr_t arm_v7s_iova_to_phys(struct io_pgtable_ops *ops,
744 unsigned long iova)
745 {
746 struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops);
747 arm_v7s_iopte *ptep = data->pgd, pte;
748 int lvl = 0;
749 u32 mask;
750
751 do {
752 ptep += ARM_V7S_LVL_IDX(iova, ++lvl, &data->iop.cfg);
753 pte = READ_ONCE(*ptep);
754 ptep = iopte_deref(pte, lvl, data);
755 } while (ARM_V7S_PTE_IS_TABLE(pte, lvl));
756
757 if (!ARM_V7S_PTE_IS_VALID(pte))
758 return 0;
759
760 mask = ARM_V7S_LVL_MASK(lvl);
761 if (arm_v7s_pte_is_cont(pte, lvl))
762 mask *= ARM_V7S_CONT_PAGES;
763 return iopte_to_paddr(pte, lvl, &data->iop.cfg) | (iova & ~mask);
764 }
765
arm_v7s_alloc_pgtable(struct io_pgtable_cfg * cfg,void * cookie)766 static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg,
767 void *cookie)
768 {
769 struct arm_v7s_io_pgtable *data;
770
771 if (cfg->ias > (arm_v7s_is_mtk_enabled(cfg) ? 34 : ARM_V7S_ADDR_BITS))
772 return NULL;
773
774 if (cfg->oas > (arm_v7s_is_mtk_enabled(cfg) ? 35 : ARM_V7S_ADDR_BITS))
775 return NULL;
776
777 if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS |
778 IO_PGTABLE_QUIRK_NO_PERMS |
779 IO_PGTABLE_QUIRK_TLBI_ON_MAP |
780 IO_PGTABLE_QUIRK_ARM_MTK_EXT |
781 IO_PGTABLE_QUIRK_NON_STRICT))
782 return NULL;
783
784 /* If ARM_MTK_4GB is enabled, the NO_PERMS is also expected. */
785 if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_EXT &&
786 !(cfg->quirks & IO_PGTABLE_QUIRK_NO_PERMS))
787 return NULL;
788
789 data = kmalloc(sizeof(*data), GFP_KERNEL);
790 if (!data)
791 return NULL;
792
793 spin_lock_init(&data->split_lock);
794 data->l2_tables = kmem_cache_create("io-pgtable_armv7s_l2",
795 ARM_V7S_TABLE_SIZE(2, cfg),
796 ARM_V7S_TABLE_SIZE(2, cfg),
797 ARM_V7S_TABLE_SLAB_FLAGS, NULL);
798 if (!data->l2_tables)
799 goto out_free_data;
800
801 data->iop.ops = (struct io_pgtable_ops) {
802 .map = arm_v7s_map,
803 .unmap = arm_v7s_unmap,
804 .iova_to_phys = arm_v7s_iova_to_phys,
805 };
806
807 /* We have to do this early for __arm_v7s_alloc_table to work... */
808 data->iop.cfg = *cfg;
809
810 /*
811 * Unless the IOMMU driver indicates supersection support by
812 * having SZ_16M set in the initial bitmap, they won't be used.
813 */
814 cfg->pgsize_bitmap &= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
815
816 /* TCR: T0SZ=0, disable TTBR1 */
817 cfg->arm_v7s_cfg.tcr = ARM_V7S_TCR_PD1;
818
819 /*
820 * TEX remap: the indices used map to the closest equivalent types
821 * under the non-TEX-remap interpretation of those attribute bits,
822 * excepting various implementation-defined aspects of shareability.
823 */
824 cfg->arm_v7s_cfg.prrr = ARM_V7S_PRRR_TR(1, ARM_V7S_PRRR_TYPE_DEVICE) |
825 ARM_V7S_PRRR_TR(4, ARM_V7S_PRRR_TYPE_NORMAL) |
826 ARM_V7S_PRRR_TR(7, ARM_V7S_PRRR_TYPE_NORMAL) |
827 ARM_V7S_PRRR_DS0 | ARM_V7S_PRRR_DS1 |
828 ARM_V7S_PRRR_NS1 | ARM_V7S_PRRR_NOS(7);
829 cfg->arm_v7s_cfg.nmrr = ARM_V7S_NMRR_IR(7, ARM_V7S_RGN_WBWA) |
830 ARM_V7S_NMRR_OR(7, ARM_V7S_RGN_WBWA);
831
832 /* Looking good; allocate a pgd */
833 data->pgd = __arm_v7s_alloc_table(1, GFP_KERNEL, data);
834 if (!data->pgd)
835 goto out_free_data;
836
837 /* Ensure the empty pgd is visible before any actual TTBR write */
838 wmb();
839
840 /* TTBRs */
841 cfg->arm_v7s_cfg.ttbr[0] = virt_to_phys(data->pgd) |
842 ARM_V7S_TTBR_S | ARM_V7S_TTBR_NOS |
843 (cfg->coherent_walk ?
844 (ARM_V7S_TTBR_IRGN_ATTR(ARM_V7S_RGN_WBWA) |
845 ARM_V7S_TTBR_ORGN_ATTR(ARM_V7S_RGN_WBWA)) :
846 (ARM_V7S_TTBR_IRGN_ATTR(ARM_V7S_RGN_NC) |
847 ARM_V7S_TTBR_ORGN_ATTR(ARM_V7S_RGN_NC)));
848 cfg->arm_v7s_cfg.ttbr[1] = 0;
849 return &data->iop;
850
851 out_free_data:
852 kmem_cache_destroy(data->l2_tables);
853 kfree(data);
854 return NULL;
855 }
856
857 struct io_pgtable_init_fns io_pgtable_arm_v7s_init_fns = {
858 .alloc = arm_v7s_alloc_pgtable,
859 .free = arm_v7s_free_pgtable,
860 };
861
862 #ifdef CONFIG_IOMMU_IO_PGTABLE_ARMV7S_SELFTEST
863
864 static struct io_pgtable_cfg *cfg_cookie;
865
dummy_tlb_flush_all(void * cookie)866 static void dummy_tlb_flush_all(void *cookie)
867 {
868 WARN_ON(cookie != cfg_cookie);
869 }
870
dummy_tlb_flush(unsigned long iova,size_t size,size_t granule,void * cookie)871 static void dummy_tlb_flush(unsigned long iova, size_t size, size_t granule,
872 void *cookie)
873 {
874 WARN_ON(cookie != cfg_cookie);
875 WARN_ON(!(size & cfg_cookie->pgsize_bitmap));
876 }
877
dummy_tlb_add_page(struct iommu_iotlb_gather * gather,unsigned long iova,size_t granule,void * cookie)878 static void dummy_tlb_add_page(struct iommu_iotlb_gather *gather,
879 unsigned long iova, size_t granule, void *cookie)
880 {
881 dummy_tlb_flush(iova, granule, granule, cookie);
882 }
883
884 static const struct iommu_flush_ops dummy_tlb_ops = {
885 .tlb_flush_all = dummy_tlb_flush_all,
886 .tlb_flush_walk = dummy_tlb_flush,
887 .tlb_flush_leaf = dummy_tlb_flush,
888 .tlb_add_page = dummy_tlb_add_page,
889 };
890
891 #define __FAIL(ops) ({ \
892 WARN(1, "selftest: test failed\n"); \
893 selftest_running = false; \
894 -EFAULT; \
895 })
896
arm_v7s_do_selftests(void)897 static int __init arm_v7s_do_selftests(void)
898 {
899 struct io_pgtable_ops *ops;
900 struct io_pgtable_cfg cfg = {
901 .tlb = &dummy_tlb_ops,
902 .oas = 32,
903 .ias = 32,
904 .coherent_walk = true,
905 .quirks = IO_PGTABLE_QUIRK_ARM_NS,
906 .pgsize_bitmap = SZ_4K | SZ_64K | SZ_1M | SZ_16M,
907 };
908 unsigned int iova, size, iova_start;
909 unsigned int i, loopnr = 0;
910
911 selftest_running = true;
912
913 cfg_cookie = &cfg;
914
915 ops = alloc_io_pgtable_ops(ARM_V7S, &cfg, &cfg);
916 if (!ops) {
917 pr_err("selftest: failed to allocate io pgtable ops\n");
918 return -EINVAL;
919 }
920
921 /*
922 * Initial sanity checks.
923 * Empty page tables shouldn't provide any translations.
924 */
925 if (ops->iova_to_phys(ops, 42))
926 return __FAIL(ops);
927
928 if (ops->iova_to_phys(ops, SZ_1G + 42))
929 return __FAIL(ops);
930
931 if (ops->iova_to_phys(ops, SZ_2G + 42))
932 return __FAIL(ops);
933
934 /*
935 * Distinct mappings of different granule sizes.
936 */
937 iova = 0;
938 for_each_set_bit(i, &cfg.pgsize_bitmap, BITS_PER_LONG) {
939 size = 1UL << i;
940 if (ops->map(ops, iova, iova, size, IOMMU_READ |
941 IOMMU_WRITE |
942 IOMMU_NOEXEC |
943 IOMMU_CACHE))
944 return __FAIL(ops);
945
946 /* Overlapping mappings */
947 if (!ops->map(ops, iova, iova + size, size,
948 IOMMU_READ | IOMMU_NOEXEC))
949 return __FAIL(ops);
950
951 if (ops->iova_to_phys(ops, iova + 42) != (iova + 42))
952 return __FAIL(ops);
953
954 iova += SZ_16M;
955 loopnr++;
956 }
957
958 /* Partial unmap */
959 i = 1;
960 size = 1UL << __ffs(cfg.pgsize_bitmap);
961 while (i < loopnr) {
962 iova_start = i * SZ_16M;
963 if (ops->unmap(ops, iova_start + size, size, NULL) != size)
964 return __FAIL(ops);
965
966 /* Remap of partial unmap */
967 if (ops->map(ops, iova_start + size, size, size, IOMMU_READ))
968 return __FAIL(ops);
969
970 if (ops->iova_to_phys(ops, iova_start + size + 42)
971 != (size + 42))
972 return __FAIL(ops);
973 i++;
974 }
975
976 /* Full unmap */
977 iova = 0;
978 for_each_set_bit(i, &cfg.pgsize_bitmap, BITS_PER_LONG) {
979 size = 1UL << i;
980
981 if (ops->unmap(ops, iova, size, NULL) != size)
982 return __FAIL(ops);
983
984 if (ops->iova_to_phys(ops, iova + 42))
985 return __FAIL(ops);
986
987 /* Remap full block */
988 if (ops->map(ops, iova, iova, size, IOMMU_WRITE))
989 return __FAIL(ops);
990
991 if (ops->iova_to_phys(ops, iova + 42) != (iova + 42))
992 return __FAIL(ops);
993
994 iova += SZ_16M;
995 }
996
997 free_io_pgtable_ops(ops);
998
999 selftest_running = false;
1000
1001 pr_info("self test ok\n");
1002 return 0;
1003 }
1004 subsys_initcall(arm_v7s_do_selftests);
1005 #endif
1006