1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * CPU-agnostic ARM page table allocator.
4 *
5 * ARMv7 Short-descriptor format, supporting
6 * - Basic memory attributes
7 * - Simplified access permissions (AP[2:1] model)
8 * - Backwards-compatible TEX remap
9 * - Large pages/supersections (if indicated by the caller)
10 *
11 * Not supporting:
12 * - Legacy access permissions (AP[2:0] model)
13 *
14 * Almost certainly never supporting:
15 * - PXN
16 * - Domains
17 *
18 * Copyright (C) 2014-2015 ARM Limited
19 * Copyright (c) 2014-2015 MediaTek Inc.
20 */
21
22 #define pr_fmt(fmt) "arm-v7s io-pgtable: " fmt
23
24 #include <linux/atomic.h>
25 #include <linux/dma-mapping.h>
26 #include <linux/gfp.h>
27 #include <linux/io-pgtable.h>
28 #include <linux/iommu.h>
29 #include <linux/kernel.h>
30 #include <linux/kmemleak.h>
31 #include <linux/sizes.h>
32 #include <linux/slab.h>
33 #include <linux/spinlock.h>
34 #include <linux/types.h>
35
36 #include <asm/barrier.h>
37
38 /* Struct accessors */
39 #define io_pgtable_to_data(x) \
40 container_of((x), struct arm_v7s_io_pgtable, iop)
41
42 #define io_pgtable_ops_to_data(x) \
43 io_pgtable_to_data(io_pgtable_ops_to_pgtable(x))
44
45 /*
46 * We have 32 bits total; 12 bits resolved at level 1, 8 bits at level 2,
47 * and 12 bits in a page. With some carefully-chosen coefficients we can
48 * hide the ugly inconsistencies behind these macros and at least let the
49 * rest of the code pretend to be somewhat sane.
50 */
51 #define ARM_V7S_ADDR_BITS 32
52 #define _ARM_V7S_LVL_BITS(lvl) (16 - (lvl) * 4)
53 #define ARM_V7S_LVL_SHIFT(lvl) (ARM_V7S_ADDR_BITS - (4 + 8 * (lvl)))
54 #define ARM_V7S_TABLE_SHIFT 10
55
56 #define ARM_V7S_PTES_PER_LVL(lvl) (1 << _ARM_V7S_LVL_BITS(lvl))
57 #define ARM_V7S_TABLE_SIZE(lvl) \
58 (ARM_V7S_PTES_PER_LVL(lvl) * sizeof(arm_v7s_iopte))
59
60 #define ARM_V7S_BLOCK_SIZE(lvl) (1UL << ARM_V7S_LVL_SHIFT(lvl))
61 #define ARM_V7S_LVL_MASK(lvl) ((u32)(~0U << ARM_V7S_LVL_SHIFT(lvl)))
62 #define ARM_V7S_TABLE_MASK ((u32)(~0U << ARM_V7S_TABLE_SHIFT))
63 #define _ARM_V7S_IDX_MASK(lvl) (ARM_V7S_PTES_PER_LVL(lvl) - 1)
64 #define ARM_V7S_LVL_IDX(addr, lvl) ({ \
65 int _l = lvl; \
66 ((u32)(addr) >> ARM_V7S_LVL_SHIFT(_l)) & _ARM_V7S_IDX_MASK(_l); \
67 })
68
69 /*
70 * Large page/supersection entries are effectively a block of 16 page/section
71 * entries, along the lines of the LPAE contiguous hint, but all with the
72 * same output address. For want of a better common name we'll call them
73 * "contiguous" versions of their respective page/section entries here, but
74 * noting the distinction (WRT to TLB maintenance) that they represent *one*
75 * entry repeated 16 times, not 16 separate entries (as in the LPAE case).
76 */
77 #define ARM_V7S_CONT_PAGES 16
78
79 /* PTE type bits: these are all mixed up with XN/PXN bits in most cases */
80 #define ARM_V7S_PTE_TYPE_TABLE 0x1
81 #define ARM_V7S_PTE_TYPE_PAGE 0x2
82 #define ARM_V7S_PTE_TYPE_CONT_PAGE 0x1
83
84 #define ARM_V7S_PTE_IS_VALID(pte) (((pte) & 0x3) != 0)
85 #define ARM_V7S_PTE_IS_TABLE(pte, lvl) \
86 ((lvl) == 1 && (((pte) & 0x3) == ARM_V7S_PTE_TYPE_TABLE))
87
88 /* Page table bits */
89 #define ARM_V7S_ATTR_XN(lvl) BIT(4 * (2 - (lvl)))
90 #define ARM_V7S_ATTR_B BIT(2)
91 #define ARM_V7S_ATTR_C BIT(3)
92 #define ARM_V7S_ATTR_NS_TABLE BIT(3)
93 #define ARM_V7S_ATTR_NS_SECTION BIT(19)
94
95 #define ARM_V7S_CONT_SECTION BIT(18)
96 #define ARM_V7S_CONT_PAGE_XN_SHIFT 15
97
98 /*
99 * The attribute bits are consistently ordered*, but occupy bits [17:10] of
100 * a level 1 PTE vs. bits [11:4] at level 2. Thus we define the individual
101 * fields relative to that 8-bit block, plus a total shift relative to the PTE.
102 */
103 #define ARM_V7S_ATTR_SHIFT(lvl) (16 - (lvl) * 6)
104
105 #define ARM_V7S_ATTR_MASK 0xff
106 #define ARM_V7S_ATTR_AP0 BIT(0)
107 #define ARM_V7S_ATTR_AP1 BIT(1)
108 #define ARM_V7S_ATTR_AP2 BIT(5)
109 #define ARM_V7S_ATTR_S BIT(6)
110 #define ARM_V7S_ATTR_NG BIT(7)
111 #define ARM_V7S_TEX_SHIFT 2
112 #define ARM_V7S_TEX_MASK 0x7
113 #define ARM_V7S_ATTR_TEX(val) (((val) & ARM_V7S_TEX_MASK) << ARM_V7S_TEX_SHIFT)
114
115 /* MediaTek extend the two bits for PA 32bit/33bit */
116 #define ARM_V7S_ATTR_MTK_PA_BIT32 BIT(9)
117 #define ARM_V7S_ATTR_MTK_PA_BIT33 BIT(4)
118
119 /* *well, except for TEX on level 2 large pages, of course :( */
120 #define ARM_V7S_CONT_PAGE_TEX_SHIFT 6
121 #define ARM_V7S_CONT_PAGE_TEX_MASK (ARM_V7S_TEX_MASK << ARM_V7S_CONT_PAGE_TEX_SHIFT)
122
123 /* Simplified access permissions */
124 #define ARM_V7S_PTE_AF ARM_V7S_ATTR_AP0
125 #define ARM_V7S_PTE_AP_UNPRIV ARM_V7S_ATTR_AP1
126 #define ARM_V7S_PTE_AP_RDONLY ARM_V7S_ATTR_AP2
127
128 /* Register bits */
129 #define ARM_V7S_RGN_NC 0
130 #define ARM_V7S_RGN_WBWA 1
131 #define ARM_V7S_RGN_WT 2
132 #define ARM_V7S_RGN_WB 3
133
134 #define ARM_V7S_PRRR_TYPE_DEVICE 1
135 #define ARM_V7S_PRRR_TYPE_NORMAL 2
136 #define ARM_V7S_PRRR_TR(n, type) (((type) & 0x3) << ((n) * 2))
137 #define ARM_V7S_PRRR_DS0 BIT(16)
138 #define ARM_V7S_PRRR_DS1 BIT(17)
139 #define ARM_V7S_PRRR_NS0 BIT(18)
140 #define ARM_V7S_PRRR_NS1 BIT(19)
141 #define ARM_V7S_PRRR_NOS(n) BIT((n) + 24)
142
143 #define ARM_V7S_NMRR_IR(n, attr) (((attr) & 0x3) << ((n) * 2))
144 #define ARM_V7S_NMRR_OR(n, attr) (((attr) & 0x3) << ((n) * 2 + 16))
145
146 #define ARM_V7S_TTBR_S BIT(1)
147 #define ARM_V7S_TTBR_NOS BIT(5)
148 #define ARM_V7S_TTBR_ORGN_ATTR(attr) (((attr) & 0x3) << 3)
149 #define ARM_V7S_TTBR_IRGN_ATTR(attr) \
150 ((((attr) & 0x1) << 6) | (((attr) & 0x2) >> 1))
151
152 #define ARM_V7S_TCR_PD1 BIT(5)
153
154 #ifdef CONFIG_ZONE_DMA32
155 #define ARM_V7S_TABLE_GFP_DMA GFP_DMA32
156 #define ARM_V7S_TABLE_SLAB_FLAGS SLAB_CACHE_DMA32
157 #else
158 #define ARM_V7S_TABLE_GFP_DMA GFP_DMA
159 #define ARM_V7S_TABLE_SLAB_FLAGS SLAB_CACHE_DMA
160 #endif
161
162 typedef u32 arm_v7s_iopte;
163
164 static bool selftest_running;
165
166 struct arm_v7s_io_pgtable {
167 struct io_pgtable iop;
168
169 arm_v7s_iopte *pgd;
170 struct kmem_cache *l2_tables;
171 spinlock_t split_lock;
172 };
173
174 static bool arm_v7s_pte_is_cont(arm_v7s_iopte pte, int lvl);
175
__arm_v7s_dma_addr(void * pages)176 static dma_addr_t __arm_v7s_dma_addr(void *pages)
177 {
178 return (dma_addr_t)virt_to_phys(pages);
179 }
180
arm_v7s_is_mtk_enabled(struct io_pgtable_cfg * cfg)181 static bool arm_v7s_is_mtk_enabled(struct io_pgtable_cfg *cfg)
182 {
183 return IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT) &&
184 (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_EXT);
185 }
186
paddr_to_iopte(phys_addr_t paddr,int lvl,struct io_pgtable_cfg * cfg)187 static arm_v7s_iopte paddr_to_iopte(phys_addr_t paddr, int lvl,
188 struct io_pgtable_cfg *cfg)
189 {
190 arm_v7s_iopte pte = paddr & ARM_V7S_LVL_MASK(lvl);
191
192 if (!arm_v7s_is_mtk_enabled(cfg))
193 return pte;
194
195 if (paddr & BIT_ULL(32))
196 pte |= ARM_V7S_ATTR_MTK_PA_BIT32;
197 if (paddr & BIT_ULL(33))
198 pte |= ARM_V7S_ATTR_MTK_PA_BIT33;
199 return pte;
200 }
201
iopte_to_paddr(arm_v7s_iopte pte,int lvl,struct io_pgtable_cfg * cfg)202 static phys_addr_t iopte_to_paddr(arm_v7s_iopte pte, int lvl,
203 struct io_pgtable_cfg *cfg)
204 {
205 arm_v7s_iopte mask;
206 phys_addr_t paddr;
207
208 if (ARM_V7S_PTE_IS_TABLE(pte, lvl))
209 mask = ARM_V7S_TABLE_MASK;
210 else if (arm_v7s_pte_is_cont(pte, lvl))
211 mask = ARM_V7S_LVL_MASK(lvl) * ARM_V7S_CONT_PAGES;
212 else
213 mask = ARM_V7S_LVL_MASK(lvl);
214
215 paddr = pte & mask;
216 if (!arm_v7s_is_mtk_enabled(cfg))
217 return paddr;
218
219 if (pte & ARM_V7S_ATTR_MTK_PA_BIT32)
220 paddr |= BIT_ULL(32);
221 if (pte & ARM_V7S_ATTR_MTK_PA_BIT33)
222 paddr |= BIT_ULL(33);
223 return paddr;
224 }
225
iopte_deref(arm_v7s_iopte pte,int lvl,struct arm_v7s_io_pgtable * data)226 static arm_v7s_iopte *iopte_deref(arm_v7s_iopte pte, int lvl,
227 struct arm_v7s_io_pgtable *data)
228 {
229 return phys_to_virt(iopte_to_paddr(pte, lvl, &data->iop.cfg));
230 }
231
__arm_v7s_alloc_table(int lvl,gfp_t gfp,struct arm_v7s_io_pgtable * data)232 static void *__arm_v7s_alloc_table(int lvl, gfp_t gfp,
233 struct arm_v7s_io_pgtable *data)
234 {
235 struct io_pgtable_cfg *cfg = &data->iop.cfg;
236 struct device *dev = cfg->iommu_dev;
237 phys_addr_t phys;
238 dma_addr_t dma;
239 size_t size = ARM_V7S_TABLE_SIZE(lvl);
240 void *table = NULL;
241
242 if (lvl == 1)
243 table = (void *)__get_free_pages(
244 __GFP_ZERO | ARM_V7S_TABLE_GFP_DMA, get_order(size));
245 else if (lvl == 2)
246 table = kmem_cache_zalloc(data->l2_tables, gfp);
247 phys = virt_to_phys(table);
248 if (phys != (arm_v7s_iopte)phys) {
249 /* Doesn't fit in PTE */
250 dev_err(dev, "Page table does not fit in PTE: %pa", &phys);
251 goto out_free;
252 }
253 if (table && !cfg->coherent_walk) {
254 dma = dma_map_single(dev, table, size, DMA_TO_DEVICE);
255 if (dma_mapping_error(dev, dma))
256 goto out_free;
257 /*
258 * We depend on the IOMMU being able to work with any physical
259 * address directly, so if the DMA layer suggests otherwise by
260 * translating or truncating them, that bodes very badly...
261 */
262 if (dma != phys)
263 goto out_unmap;
264 }
265 if (lvl == 2)
266 kmemleak_ignore(table);
267 return table;
268
269 out_unmap:
270 dev_err(dev, "Cannot accommodate DMA translation for IOMMU page tables\n");
271 dma_unmap_single(dev, dma, size, DMA_TO_DEVICE);
272 out_free:
273 if (lvl == 1)
274 free_pages((unsigned long)table, get_order(size));
275 else
276 kmem_cache_free(data->l2_tables, table);
277 return NULL;
278 }
279
__arm_v7s_free_table(void * table,int lvl,struct arm_v7s_io_pgtable * data)280 static void __arm_v7s_free_table(void *table, int lvl,
281 struct arm_v7s_io_pgtable *data)
282 {
283 struct io_pgtable_cfg *cfg = &data->iop.cfg;
284 struct device *dev = cfg->iommu_dev;
285 size_t size = ARM_V7S_TABLE_SIZE(lvl);
286
287 if (!cfg->coherent_walk)
288 dma_unmap_single(dev, __arm_v7s_dma_addr(table), size,
289 DMA_TO_DEVICE);
290 if (lvl == 1)
291 free_pages((unsigned long)table, get_order(size));
292 else
293 kmem_cache_free(data->l2_tables, table);
294 }
295
__arm_v7s_pte_sync(arm_v7s_iopte * ptep,int num_entries,struct io_pgtable_cfg * cfg)296 static void __arm_v7s_pte_sync(arm_v7s_iopte *ptep, int num_entries,
297 struct io_pgtable_cfg *cfg)
298 {
299 if (cfg->coherent_walk)
300 return;
301
302 dma_sync_single_for_device(cfg->iommu_dev, __arm_v7s_dma_addr(ptep),
303 num_entries * sizeof(*ptep), DMA_TO_DEVICE);
304 }
__arm_v7s_set_pte(arm_v7s_iopte * ptep,arm_v7s_iopte pte,int num_entries,struct io_pgtable_cfg * cfg)305 static void __arm_v7s_set_pte(arm_v7s_iopte *ptep, arm_v7s_iopte pte,
306 int num_entries, struct io_pgtable_cfg *cfg)
307 {
308 int i;
309
310 for (i = 0; i < num_entries; i++)
311 ptep[i] = pte;
312
313 __arm_v7s_pte_sync(ptep, num_entries, cfg);
314 }
315
arm_v7s_prot_to_pte(int prot,int lvl,struct io_pgtable_cfg * cfg)316 static arm_v7s_iopte arm_v7s_prot_to_pte(int prot, int lvl,
317 struct io_pgtable_cfg *cfg)
318 {
319 bool ap = !(cfg->quirks & IO_PGTABLE_QUIRK_NO_PERMS);
320 arm_v7s_iopte pte = ARM_V7S_ATTR_NG | ARM_V7S_ATTR_S;
321
322 if (!(prot & IOMMU_MMIO))
323 pte |= ARM_V7S_ATTR_TEX(1);
324 if (ap) {
325 pte |= ARM_V7S_PTE_AF;
326 if (!(prot & IOMMU_PRIV))
327 pte |= ARM_V7S_PTE_AP_UNPRIV;
328 if (!(prot & IOMMU_WRITE))
329 pte |= ARM_V7S_PTE_AP_RDONLY;
330 }
331 pte <<= ARM_V7S_ATTR_SHIFT(lvl);
332
333 if ((prot & IOMMU_NOEXEC) && ap)
334 pte |= ARM_V7S_ATTR_XN(lvl);
335 if (prot & IOMMU_MMIO)
336 pte |= ARM_V7S_ATTR_B;
337 else if (prot & IOMMU_CACHE)
338 pte |= ARM_V7S_ATTR_B | ARM_V7S_ATTR_C;
339
340 pte |= ARM_V7S_PTE_TYPE_PAGE;
341 if (lvl == 1 && (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS))
342 pte |= ARM_V7S_ATTR_NS_SECTION;
343
344 return pte;
345 }
346
arm_v7s_pte_to_prot(arm_v7s_iopte pte,int lvl)347 static int arm_v7s_pte_to_prot(arm_v7s_iopte pte, int lvl)
348 {
349 int prot = IOMMU_READ;
350 arm_v7s_iopte attr = pte >> ARM_V7S_ATTR_SHIFT(lvl);
351
352 if (!(attr & ARM_V7S_PTE_AP_RDONLY))
353 prot |= IOMMU_WRITE;
354 if (!(attr & ARM_V7S_PTE_AP_UNPRIV))
355 prot |= IOMMU_PRIV;
356 if ((attr & (ARM_V7S_TEX_MASK << ARM_V7S_TEX_SHIFT)) == 0)
357 prot |= IOMMU_MMIO;
358 else if (pte & ARM_V7S_ATTR_C)
359 prot |= IOMMU_CACHE;
360 if (pte & ARM_V7S_ATTR_XN(lvl))
361 prot |= IOMMU_NOEXEC;
362
363 return prot;
364 }
365
arm_v7s_pte_to_cont(arm_v7s_iopte pte,int lvl)366 static arm_v7s_iopte arm_v7s_pte_to_cont(arm_v7s_iopte pte, int lvl)
367 {
368 if (lvl == 1) {
369 pte |= ARM_V7S_CONT_SECTION;
370 } else if (lvl == 2) {
371 arm_v7s_iopte xn = pte & ARM_V7S_ATTR_XN(lvl);
372 arm_v7s_iopte tex = pte & ARM_V7S_CONT_PAGE_TEX_MASK;
373
374 pte ^= xn | tex | ARM_V7S_PTE_TYPE_PAGE;
375 pte |= (xn << ARM_V7S_CONT_PAGE_XN_SHIFT) |
376 (tex << ARM_V7S_CONT_PAGE_TEX_SHIFT) |
377 ARM_V7S_PTE_TYPE_CONT_PAGE;
378 }
379 return pte;
380 }
381
arm_v7s_cont_to_pte(arm_v7s_iopte pte,int lvl)382 static arm_v7s_iopte arm_v7s_cont_to_pte(arm_v7s_iopte pte, int lvl)
383 {
384 if (lvl == 1) {
385 pte &= ~ARM_V7S_CONT_SECTION;
386 } else if (lvl == 2) {
387 arm_v7s_iopte xn = pte & BIT(ARM_V7S_CONT_PAGE_XN_SHIFT);
388 arm_v7s_iopte tex = pte & (ARM_V7S_CONT_PAGE_TEX_MASK <<
389 ARM_V7S_CONT_PAGE_TEX_SHIFT);
390
391 pte ^= xn | tex | ARM_V7S_PTE_TYPE_CONT_PAGE;
392 pte |= (xn >> ARM_V7S_CONT_PAGE_XN_SHIFT) |
393 (tex >> ARM_V7S_CONT_PAGE_TEX_SHIFT) |
394 ARM_V7S_PTE_TYPE_PAGE;
395 }
396 return pte;
397 }
398
arm_v7s_pte_is_cont(arm_v7s_iopte pte,int lvl)399 static bool arm_v7s_pte_is_cont(arm_v7s_iopte pte, int lvl)
400 {
401 if (lvl == 1 && !ARM_V7S_PTE_IS_TABLE(pte, lvl))
402 return pte & ARM_V7S_CONT_SECTION;
403 else if (lvl == 2)
404 return !(pte & ARM_V7S_PTE_TYPE_PAGE);
405 return false;
406 }
407
408 static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *,
409 struct iommu_iotlb_gather *, unsigned long,
410 size_t, int, arm_v7s_iopte *);
411
arm_v7s_init_pte(struct arm_v7s_io_pgtable * data,unsigned long iova,phys_addr_t paddr,int prot,int lvl,int num_entries,arm_v7s_iopte * ptep)412 static int arm_v7s_init_pte(struct arm_v7s_io_pgtable *data,
413 unsigned long iova, phys_addr_t paddr, int prot,
414 int lvl, int num_entries, arm_v7s_iopte *ptep)
415 {
416 struct io_pgtable_cfg *cfg = &data->iop.cfg;
417 arm_v7s_iopte pte;
418 int i;
419
420 for (i = 0; i < num_entries; i++)
421 if (ARM_V7S_PTE_IS_TABLE(ptep[i], lvl)) {
422 /*
423 * We need to unmap and free the old table before
424 * overwriting it with a block entry.
425 */
426 arm_v7s_iopte *tblp;
427 size_t sz = ARM_V7S_BLOCK_SIZE(lvl);
428
429 tblp = ptep - ARM_V7S_LVL_IDX(iova, lvl);
430 if (WARN_ON(__arm_v7s_unmap(data, NULL, iova + i * sz,
431 sz, lvl, tblp) != sz))
432 return -EINVAL;
433 } else if (ptep[i]) {
434 /* We require an unmap first */
435 WARN_ON(!selftest_running);
436 return -EEXIST;
437 }
438
439 pte = arm_v7s_prot_to_pte(prot, lvl, cfg);
440 if (num_entries > 1)
441 pte = arm_v7s_pte_to_cont(pte, lvl);
442
443 pte |= paddr_to_iopte(paddr, lvl, cfg);
444
445 __arm_v7s_set_pte(ptep, pte, num_entries, cfg);
446 return 0;
447 }
448
arm_v7s_install_table(arm_v7s_iopte * table,arm_v7s_iopte * ptep,arm_v7s_iopte curr,struct io_pgtable_cfg * cfg)449 static arm_v7s_iopte arm_v7s_install_table(arm_v7s_iopte *table,
450 arm_v7s_iopte *ptep,
451 arm_v7s_iopte curr,
452 struct io_pgtable_cfg *cfg)
453 {
454 arm_v7s_iopte old, new;
455
456 new = virt_to_phys(table) | ARM_V7S_PTE_TYPE_TABLE;
457 if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS)
458 new |= ARM_V7S_ATTR_NS_TABLE;
459
460 /*
461 * Ensure the table itself is visible before its PTE can be.
462 * Whilst we could get away with cmpxchg64_release below, this
463 * doesn't have any ordering semantics when !CONFIG_SMP.
464 */
465 dma_wmb();
466
467 old = cmpxchg_relaxed(ptep, curr, new);
468 __arm_v7s_pte_sync(ptep, 1, cfg);
469
470 return old;
471 }
472
__arm_v7s_map(struct arm_v7s_io_pgtable * data,unsigned long iova,phys_addr_t paddr,size_t size,int prot,int lvl,arm_v7s_iopte * ptep)473 static int __arm_v7s_map(struct arm_v7s_io_pgtable *data, unsigned long iova,
474 phys_addr_t paddr, size_t size, int prot,
475 int lvl, arm_v7s_iopte *ptep)
476 {
477 struct io_pgtable_cfg *cfg = &data->iop.cfg;
478 arm_v7s_iopte pte, *cptep;
479 int num_entries = size >> ARM_V7S_LVL_SHIFT(lvl);
480
481 /* Find our entry at the current level */
482 ptep += ARM_V7S_LVL_IDX(iova, lvl);
483
484 /* If we can install a leaf entry at this level, then do so */
485 if (num_entries)
486 return arm_v7s_init_pte(data, iova, paddr, prot,
487 lvl, num_entries, ptep);
488
489 /* We can't allocate tables at the final level */
490 if (WARN_ON(lvl == 2))
491 return -EINVAL;
492
493 /* Grab a pointer to the next level */
494 pte = READ_ONCE(*ptep);
495 if (!pte) {
496 cptep = __arm_v7s_alloc_table(lvl + 1, GFP_ATOMIC, data);
497 if (!cptep)
498 return -ENOMEM;
499
500 pte = arm_v7s_install_table(cptep, ptep, 0, cfg);
501 if (pte)
502 __arm_v7s_free_table(cptep, lvl + 1, data);
503 } else {
504 /* We've no easy way of knowing if it's synced yet, so... */
505 __arm_v7s_pte_sync(ptep, 1, cfg);
506 }
507
508 if (ARM_V7S_PTE_IS_TABLE(pte, lvl)) {
509 cptep = iopte_deref(pte, lvl, data);
510 } else if (pte) {
511 /* We require an unmap first */
512 WARN_ON(!selftest_running);
513 return -EEXIST;
514 }
515
516 /* Rinse, repeat */
517 return __arm_v7s_map(data, iova, paddr, size, prot, lvl + 1, cptep);
518 }
519
arm_v7s_map(struct io_pgtable_ops * ops,unsigned long iova,phys_addr_t paddr,size_t size,int prot)520 static int arm_v7s_map(struct io_pgtable_ops *ops, unsigned long iova,
521 phys_addr_t paddr, size_t size, int prot)
522 {
523 struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops);
524 struct io_pgtable *iop = &data->iop;
525 int ret;
526
527 /* If no access, then nothing to do */
528 if (!(prot & (IOMMU_READ | IOMMU_WRITE)))
529 return 0;
530
531 if (WARN_ON(iova >= (1ULL << data->iop.cfg.ias) ||
532 paddr >= (1ULL << data->iop.cfg.oas)))
533 return -ERANGE;
534
535 ret = __arm_v7s_map(data, iova, paddr, size, prot, 1, data->pgd);
536 /*
537 * Synchronise all PTE updates for the new mapping before there's
538 * a chance for anything to kick off a table walk for the new iova.
539 */
540 if (iop->cfg.quirks & IO_PGTABLE_QUIRK_TLBI_ON_MAP) {
541 io_pgtable_tlb_flush_walk(iop, iova, size,
542 ARM_V7S_BLOCK_SIZE(2));
543 } else {
544 wmb();
545 }
546
547 return ret;
548 }
549
arm_v7s_free_pgtable(struct io_pgtable * iop)550 static void arm_v7s_free_pgtable(struct io_pgtable *iop)
551 {
552 struct arm_v7s_io_pgtable *data = io_pgtable_to_data(iop);
553 int i;
554
555 for (i = 0; i < ARM_V7S_PTES_PER_LVL(1); i++) {
556 arm_v7s_iopte pte = data->pgd[i];
557
558 if (ARM_V7S_PTE_IS_TABLE(pte, 1))
559 __arm_v7s_free_table(iopte_deref(pte, 1, data),
560 2, data);
561 }
562 __arm_v7s_free_table(data->pgd, 1, data);
563 kmem_cache_destroy(data->l2_tables);
564 kfree(data);
565 }
566
arm_v7s_split_cont(struct arm_v7s_io_pgtable * data,unsigned long iova,int idx,int lvl,arm_v7s_iopte * ptep)567 static arm_v7s_iopte arm_v7s_split_cont(struct arm_v7s_io_pgtable *data,
568 unsigned long iova, int idx, int lvl,
569 arm_v7s_iopte *ptep)
570 {
571 struct io_pgtable *iop = &data->iop;
572 arm_v7s_iopte pte;
573 size_t size = ARM_V7S_BLOCK_SIZE(lvl);
574 int i;
575
576 /* Check that we didn't lose a race to get the lock */
577 pte = *ptep;
578 if (!arm_v7s_pte_is_cont(pte, lvl))
579 return pte;
580
581 ptep -= idx & (ARM_V7S_CONT_PAGES - 1);
582 pte = arm_v7s_cont_to_pte(pte, lvl);
583 for (i = 0; i < ARM_V7S_CONT_PAGES; i++)
584 ptep[i] = pte + i * size;
585
586 __arm_v7s_pte_sync(ptep, ARM_V7S_CONT_PAGES, &iop->cfg);
587
588 size *= ARM_V7S_CONT_PAGES;
589 io_pgtable_tlb_flush_leaf(iop, iova, size, size);
590 return pte;
591 }
592
arm_v7s_split_blk_unmap(struct arm_v7s_io_pgtable * data,struct iommu_iotlb_gather * gather,unsigned long iova,size_t size,arm_v7s_iopte blk_pte,arm_v7s_iopte * ptep)593 static size_t arm_v7s_split_blk_unmap(struct arm_v7s_io_pgtable *data,
594 struct iommu_iotlb_gather *gather,
595 unsigned long iova, size_t size,
596 arm_v7s_iopte blk_pte,
597 arm_v7s_iopte *ptep)
598 {
599 struct io_pgtable_cfg *cfg = &data->iop.cfg;
600 arm_v7s_iopte pte, *tablep;
601 int i, unmap_idx, num_entries, num_ptes;
602
603 tablep = __arm_v7s_alloc_table(2, GFP_ATOMIC, data);
604 if (!tablep)
605 return 0; /* Bytes unmapped */
606
607 num_ptes = ARM_V7S_PTES_PER_LVL(2);
608 num_entries = size >> ARM_V7S_LVL_SHIFT(2);
609 unmap_idx = ARM_V7S_LVL_IDX(iova, 2);
610
611 pte = arm_v7s_prot_to_pte(arm_v7s_pte_to_prot(blk_pte, 1), 2, cfg);
612 if (num_entries > 1)
613 pte = arm_v7s_pte_to_cont(pte, 2);
614
615 for (i = 0; i < num_ptes; i += num_entries, pte += size) {
616 /* Unmap! */
617 if (i == unmap_idx)
618 continue;
619
620 __arm_v7s_set_pte(&tablep[i], pte, num_entries, cfg);
621 }
622
623 pte = arm_v7s_install_table(tablep, ptep, blk_pte, cfg);
624 if (pte != blk_pte) {
625 __arm_v7s_free_table(tablep, 2, data);
626
627 if (!ARM_V7S_PTE_IS_TABLE(pte, 1))
628 return 0;
629
630 tablep = iopte_deref(pte, 1, data);
631 return __arm_v7s_unmap(data, gather, iova, size, 2, tablep);
632 }
633
634 io_pgtable_tlb_add_page(&data->iop, gather, iova, size);
635 return size;
636 }
637
__arm_v7s_unmap(struct arm_v7s_io_pgtable * data,struct iommu_iotlb_gather * gather,unsigned long iova,size_t size,int lvl,arm_v7s_iopte * ptep)638 static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *data,
639 struct iommu_iotlb_gather *gather,
640 unsigned long iova, size_t size, int lvl,
641 arm_v7s_iopte *ptep)
642 {
643 arm_v7s_iopte pte[ARM_V7S_CONT_PAGES];
644 struct io_pgtable *iop = &data->iop;
645 int idx, i = 0, num_entries = size >> ARM_V7S_LVL_SHIFT(lvl);
646
647 /* Something went horribly wrong and we ran out of page table */
648 if (WARN_ON(lvl > 2))
649 return 0;
650
651 idx = ARM_V7S_LVL_IDX(iova, lvl);
652 ptep += idx;
653 do {
654 pte[i] = READ_ONCE(ptep[i]);
655 if (WARN_ON(!ARM_V7S_PTE_IS_VALID(pte[i])))
656 return 0;
657 } while (++i < num_entries);
658
659 /*
660 * If we've hit a contiguous 'large page' entry at this level, it
661 * needs splitting first, unless we're unmapping the whole lot.
662 *
663 * For splitting, we can't rewrite 16 PTEs atomically, and since we
664 * can't necessarily assume TEX remap we don't have a software bit to
665 * mark live entries being split. In practice (i.e. DMA API code), we
666 * will never be splitting large pages anyway, so just wrap this edge
667 * case in a lock for the sake of correctness and be done with it.
668 */
669 if (num_entries <= 1 && arm_v7s_pte_is_cont(pte[0], lvl)) {
670 unsigned long flags;
671
672 spin_lock_irqsave(&data->split_lock, flags);
673 pte[0] = arm_v7s_split_cont(data, iova, idx, lvl, ptep);
674 spin_unlock_irqrestore(&data->split_lock, flags);
675 }
676
677 /* If the size matches this level, we're in the right place */
678 if (num_entries) {
679 size_t blk_size = ARM_V7S_BLOCK_SIZE(lvl);
680
681 __arm_v7s_set_pte(ptep, 0, num_entries, &iop->cfg);
682
683 for (i = 0; i < num_entries; i++) {
684 if (ARM_V7S_PTE_IS_TABLE(pte[i], lvl)) {
685 /* Also flush any partial walks */
686 io_pgtable_tlb_flush_walk(iop, iova, blk_size,
687 ARM_V7S_BLOCK_SIZE(lvl + 1));
688 ptep = iopte_deref(pte[i], lvl, data);
689 __arm_v7s_free_table(ptep, lvl + 1, data);
690 } else if (iop->cfg.quirks & IO_PGTABLE_QUIRK_NON_STRICT) {
691 /*
692 * Order the PTE update against queueing the IOVA, to
693 * guarantee that a flush callback from a different CPU
694 * has observed it before the TLBIALL can be issued.
695 */
696 smp_wmb();
697 } else {
698 io_pgtable_tlb_add_page(iop, gather, iova, blk_size);
699 }
700 iova += blk_size;
701 }
702 return size;
703 } else if (lvl == 1 && !ARM_V7S_PTE_IS_TABLE(pte[0], lvl)) {
704 /*
705 * Insert a table at the next level to map the old region,
706 * minus the part we want to unmap
707 */
708 return arm_v7s_split_blk_unmap(data, gather, iova, size, pte[0],
709 ptep);
710 }
711
712 /* Keep on walkin' */
713 ptep = iopte_deref(pte[0], lvl, data);
714 return __arm_v7s_unmap(data, gather, iova, size, lvl + 1, ptep);
715 }
716
arm_v7s_unmap(struct io_pgtable_ops * ops,unsigned long iova,size_t size,struct iommu_iotlb_gather * gather)717 static size_t arm_v7s_unmap(struct io_pgtable_ops *ops, unsigned long iova,
718 size_t size, struct iommu_iotlb_gather *gather)
719 {
720 struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops);
721
722 if (WARN_ON(upper_32_bits(iova)))
723 return 0;
724
725 return __arm_v7s_unmap(data, gather, iova, size, 1, data->pgd);
726 }
727
arm_v7s_iova_to_phys(struct io_pgtable_ops * ops,unsigned long iova)728 static phys_addr_t arm_v7s_iova_to_phys(struct io_pgtable_ops *ops,
729 unsigned long iova)
730 {
731 struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops);
732 arm_v7s_iopte *ptep = data->pgd, pte;
733 int lvl = 0;
734 u32 mask;
735
736 do {
737 ptep += ARM_V7S_LVL_IDX(iova, ++lvl);
738 pte = READ_ONCE(*ptep);
739 ptep = iopte_deref(pte, lvl, data);
740 } while (ARM_V7S_PTE_IS_TABLE(pte, lvl));
741
742 if (!ARM_V7S_PTE_IS_VALID(pte))
743 return 0;
744
745 mask = ARM_V7S_LVL_MASK(lvl);
746 if (arm_v7s_pte_is_cont(pte, lvl))
747 mask *= ARM_V7S_CONT_PAGES;
748 return iopte_to_paddr(pte, lvl, &data->iop.cfg) | (iova & ~mask);
749 }
750
arm_v7s_alloc_pgtable(struct io_pgtable_cfg * cfg,void * cookie)751 static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg,
752 void *cookie)
753 {
754 struct arm_v7s_io_pgtable *data;
755
756 if (cfg->ias > ARM_V7S_ADDR_BITS)
757 return NULL;
758
759 if (cfg->oas > (arm_v7s_is_mtk_enabled(cfg) ? 34 : ARM_V7S_ADDR_BITS))
760 return NULL;
761
762 if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS |
763 IO_PGTABLE_QUIRK_NO_PERMS |
764 IO_PGTABLE_QUIRK_TLBI_ON_MAP |
765 IO_PGTABLE_QUIRK_ARM_MTK_EXT |
766 IO_PGTABLE_QUIRK_NON_STRICT))
767 return NULL;
768
769 /* If ARM_MTK_4GB is enabled, the NO_PERMS is also expected. */
770 if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_EXT &&
771 !(cfg->quirks & IO_PGTABLE_QUIRK_NO_PERMS))
772 return NULL;
773
774 data = kmalloc(sizeof(*data), GFP_KERNEL);
775 if (!data)
776 return NULL;
777
778 spin_lock_init(&data->split_lock);
779 data->l2_tables = kmem_cache_create("io-pgtable_armv7s_l2",
780 ARM_V7S_TABLE_SIZE(2),
781 ARM_V7S_TABLE_SIZE(2),
782 ARM_V7S_TABLE_SLAB_FLAGS, NULL);
783 if (!data->l2_tables)
784 goto out_free_data;
785
786 data->iop.ops = (struct io_pgtable_ops) {
787 .map = arm_v7s_map,
788 .unmap = arm_v7s_unmap,
789 .iova_to_phys = arm_v7s_iova_to_phys,
790 };
791
792 /* We have to do this early for __arm_v7s_alloc_table to work... */
793 data->iop.cfg = *cfg;
794
795 /*
796 * Unless the IOMMU driver indicates supersection support by
797 * having SZ_16M set in the initial bitmap, they won't be used.
798 */
799 cfg->pgsize_bitmap &= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
800
801 /* TCR: T0SZ=0, disable TTBR1 */
802 cfg->arm_v7s_cfg.tcr = ARM_V7S_TCR_PD1;
803
804 /*
805 * TEX remap: the indices used map to the closest equivalent types
806 * under the non-TEX-remap interpretation of those attribute bits,
807 * excepting various implementation-defined aspects of shareability.
808 */
809 cfg->arm_v7s_cfg.prrr = ARM_V7S_PRRR_TR(1, ARM_V7S_PRRR_TYPE_DEVICE) |
810 ARM_V7S_PRRR_TR(4, ARM_V7S_PRRR_TYPE_NORMAL) |
811 ARM_V7S_PRRR_TR(7, ARM_V7S_PRRR_TYPE_NORMAL) |
812 ARM_V7S_PRRR_DS0 | ARM_V7S_PRRR_DS1 |
813 ARM_V7S_PRRR_NS1 | ARM_V7S_PRRR_NOS(7);
814 cfg->arm_v7s_cfg.nmrr = ARM_V7S_NMRR_IR(7, ARM_V7S_RGN_WBWA) |
815 ARM_V7S_NMRR_OR(7, ARM_V7S_RGN_WBWA);
816
817 /* Looking good; allocate a pgd */
818 data->pgd = __arm_v7s_alloc_table(1, GFP_KERNEL, data);
819 if (!data->pgd)
820 goto out_free_data;
821
822 /* Ensure the empty pgd is visible before any actual TTBR write */
823 wmb();
824
825 /* TTBRs */
826 cfg->arm_v7s_cfg.ttbr[0] = virt_to_phys(data->pgd) |
827 ARM_V7S_TTBR_S | ARM_V7S_TTBR_NOS |
828 (cfg->coherent_walk ?
829 (ARM_V7S_TTBR_IRGN_ATTR(ARM_V7S_RGN_WBWA) |
830 ARM_V7S_TTBR_ORGN_ATTR(ARM_V7S_RGN_WBWA)) :
831 (ARM_V7S_TTBR_IRGN_ATTR(ARM_V7S_RGN_NC) |
832 ARM_V7S_TTBR_ORGN_ATTR(ARM_V7S_RGN_NC)));
833 cfg->arm_v7s_cfg.ttbr[1] = 0;
834 return &data->iop;
835
836 out_free_data:
837 kmem_cache_destroy(data->l2_tables);
838 kfree(data);
839 return NULL;
840 }
841
842 struct io_pgtable_init_fns io_pgtable_arm_v7s_init_fns = {
843 .alloc = arm_v7s_alloc_pgtable,
844 .free = arm_v7s_free_pgtable,
845 };
846
847 #ifdef CONFIG_IOMMU_IO_PGTABLE_ARMV7S_SELFTEST
848
849 static struct io_pgtable_cfg *cfg_cookie;
850
dummy_tlb_flush_all(void * cookie)851 static void dummy_tlb_flush_all(void *cookie)
852 {
853 WARN_ON(cookie != cfg_cookie);
854 }
855
dummy_tlb_flush(unsigned long iova,size_t size,size_t granule,void * cookie)856 static void dummy_tlb_flush(unsigned long iova, size_t size, size_t granule,
857 void *cookie)
858 {
859 WARN_ON(cookie != cfg_cookie);
860 WARN_ON(!(size & cfg_cookie->pgsize_bitmap));
861 }
862
dummy_tlb_add_page(struct iommu_iotlb_gather * gather,unsigned long iova,size_t granule,void * cookie)863 static void dummy_tlb_add_page(struct iommu_iotlb_gather *gather,
864 unsigned long iova, size_t granule, void *cookie)
865 {
866 dummy_tlb_flush(iova, granule, granule, cookie);
867 }
868
869 static const struct iommu_flush_ops dummy_tlb_ops = {
870 .tlb_flush_all = dummy_tlb_flush_all,
871 .tlb_flush_walk = dummy_tlb_flush,
872 .tlb_flush_leaf = dummy_tlb_flush,
873 .tlb_add_page = dummy_tlb_add_page,
874 };
875
876 #define __FAIL(ops) ({ \
877 WARN(1, "selftest: test failed\n"); \
878 selftest_running = false; \
879 -EFAULT; \
880 })
881
arm_v7s_do_selftests(void)882 static int __init arm_v7s_do_selftests(void)
883 {
884 struct io_pgtable_ops *ops;
885 struct io_pgtable_cfg cfg = {
886 .tlb = &dummy_tlb_ops,
887 .oas = 32,
888 .ias = 32,
889 .coherent_walk = true,
890 .quirks = IO_PGTABLE_QUIRK_ARM_NS,
891 .pgsize_bitmap = SZ_4K | SZ_64K | SZ_1M | SZ_16M,
892 };
893 unsigned int iova, size, iova_start;
894 unsigned int i, loopnr = 0;
895
896 selftest_running = true;
897
898 cfg_cookie = &cfg;
899
900 ops = alloc_io_pgtable_ops(ARM_V7S, &cfg, &cfg);
901 if (!ops) {
902 pr_err("selftest: failed to allocate io pgtable ops\n");
903 return -EINVAL;
904 }
905
906 /*
907 * Initial sanity checks.
908 * Empty page tables shouldn't provide any translations.
909 */
910 if (ops->iova_to_phys(ops, 42))
911 return __FAIL(ops);
912
913 if (ops->iova_to_phys(ops, SZ_1G + 42))
914 return __FAIL(ops);
915
916 if (ops->iova_to_phys(ops, SZ_2G + 42))
917 return __FAIL(ops);
918
919 /*
920 * Distinct mappings of different granule sizes.
921 */
922 iova = 0;
923 for_each_set_bit(i, &cfg.pgsize_bitmap, BITS_PER_LONG) {
924 size = 1UL << i;
925 if (ops->map(ops, iova, iova, size, IOMMU_READ |
926 IOMMU_WRITE |
927 IOMMU_NOEXEC |
928 IOMMU_CACHE))
929 return __FAIL(ops);
930
931 /* Overlapping mappings */
932 if (!ops->map(ops, iova, iova + size, size,
933 IOMMU_READ | IOMMU_NOEXEC))
934 return __FAIL(ops);
935
936 if (ops->iova_to_phys(ops, iova + 42) != (iova + 42))
937 return __FAIL(ops);
938
939 iova += SZ_16M;
940 loopnr++;
941 }
942
943 /* Partial unmap */
944 i = 1;
945 size = 1UL << __ffs(cfg.pgsize_bitmap);
946 while (i < loopnr) {
947 iova_start = i * SZ_16M;
948 if (ops->unmap(ops, iova_start + size, size, NULL) != size)
949 return __FAIL(ops);
950
951 /* Remap of partial unmap */
952 if (ops->map(ops, iova_start + size, size, size, IOMMU_READ))
953 return __FAIL(ops);
954
955 if (ops->iova_to_phys(ops, iova_start + size + 42)
956 != (size + 42))
957 return __FAIL(ops);
958 i++;
959 }
960
961 /* Full unmap */
962 iova = 0;
963 for_each_set_bit(i, &cfg.pgsize_bitmap, BITS_PER_LONG) {
964 size = 1UL << i;
965
966 if (ops->unmap(ops, iova, size, NULL) != size)
967 return __FAIL(ops);
968
969 if (ops->iova_to_phys(ops, iova + 42))
970 return __FAIL(ops);
971
972 /* Remap full block */
973 if (ops->map(ops, iova, iova, size, IOMMU_WRITE))
974 return __FAIL(ops);
975
976 if (ops->iova_to_phys(ops, iova + 42) != (iova + 42))
977 return __FAIL(ops);
978
979 iova += SZ_16M;
980 }
981
982 free_io_pgtable_ops(ops);
983
984 selftest_running = false;
985
986 pr_info("self test ok\n");
987 return 0;
988 }
989 subsys_initcall(arm_v7s_do_selftests);
990 #endif
991