1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * CPU-agnostic ARM page table allocator.
4 * A copy of this library is embedded in the KVM nVHE image.
5 *
6 * Copyright (C) 2022 Arm Limited
7 *
8 * Author: Will Deacon <will.deacon@arm.com>
9 */
10
11 #include <linux/io-pgtable-arm.h>
12
13 #include <linux/sizes.h>
14 #include <linux/types.h>
15
16 #include "arm/arm-smmu-v3/pkvm/arm-smmu-v3-module.h"
17
18 #define iopte_deref(pte, d) __arm_lpae_phys_to_virt(iopte_to_paddr(pte, d))
19
20 #define ARM_LPAE_MAX_ADDR_BITS 52
21 #define ARM_LPAE_S2_MAX_CONCAT_PAGES 16
22
paddr_to_iopte(phys_addr_t paddr,struct arm_lpae_io_pgtable * data)23 static arm_lpae_iopte paddr_to_iopte(phys_addr_t paddr,
24 struct arm_lpae_io_pgtable *data)
25 {
26 arm_lpae_iopte pte = paddr;
27
28 /* Of the bits which overlap, either 51:48 or 15:12 are always RES0 */
29 return (pte | (pte >> (48 - 12))) & ARM_LPAE_PTE_ADDR_MASK;
30 }
31
iopte_to_paddr(arm_lpae_iopte pte,struct arm_lpae_io_pgtable * data)32 static phys_addr_t iopte_to_paddr(arm_lpae_iopte pte,
33 struct arm_lpae_io_pgtable *data)
34 {
35 u64 paddr = pte & ARM_LPAE_PTE_ADDR_MASK;
36
37 if (ARM_LPAE_GRANULE(data) < SZ_64K)
38 return paddr;
39
40 /* Rotate the packed high-order bits back to the top */
41 return (paddr | (paddr << (48 - 12))) & (ARM_LPAE_PTE_ADDR_MASK << 4);
42 }
43
44 /*
45 * Convert an index returned by ARM_LPAE_PGD_IDX(), which can point into
46 * a concatenated PGD, into the maximum number of entries that can be
47 * mapped in the same table page.
48 */
arm_lpae_max_entries(int i,struct arm_lpae_io_pgtable * data)49 static inline int arm_lpae_max_entries(int i, struct arm_lpae_io_pgtable *data)
50 {
51 int ptes_per_table = ARM_LPAE_PTES_PER_TABLE(data);
52
53 return ptes_per_table - (i & (ptes_per_table - 1));
54 }
55
__arm_lpae_clear_pte(arm_lpae_iopte * ptep,struct io_pgtable_cfg * cfg,int num_entries)56 static void __arm_lpae_clear_pte(arm_lpae_iopte *ptep, struct io_pgtable_cfg *cfg, int num_entries)
57 {
58 for (int i = 0; i < num_entries; i++)
59 if (cfg->quirks & IO_PGTABLE_QUIRK_UNMAP_INVAL)
60 ptep[i] &= ~ARM_LPAE_PTE_VALID;
61 else
62 ptep[i] = 0;
63
64 if (!cfg->coherent_walk && num_entries)
65 __arm_lpae_sync_pte(ptep, num_entries, cfg);
66 }
67
68 /*
69 * Check if concatenated PGDs are mandatory according to Arm DDI0487 (K.a)
70 * 1) R_DXBSH: For 16KB, and 48-bit input size, use level 1 instead of 0.
71 * 2) R_SRKBC: After de-ciphering the table for PA size and valid initial lookup
72 * a) 40 bits PA size with 4K: use level 1 instead of level 0 (2 tables for ias = oas)
73 * b) 40 bits PA size with 16K: use level 2 instead of level 1 (16 tables for ias = oas)
74 * c) 42 bits PA size with 4K: use level 1 instead of level 0 (8 tables for ias = oas)
75 * d) 48 bits PA size with 16K: use level 1 instead of level 0 (2 tables for ias = oas)
76 */
arm_lpae_concat_mandatory(struct io_pgtable_cfg * cfg,struct arm_lpae_io_pgtable * data)77 static inline bool arm_lpae_concat_mandatory(struct io_pgtable_cfg *cfg,
78 struct arm_lpae_io_pgtable *data)
79 {
80 unsigned int ias = cfg->ias;
81 unsigned int oas = cfg->oas;
82
83 /* Covers 1 and 2.d */
84 if ((ARM_LPAE_GRANULE(data) == SZ_16K) && (data->start_level == 0))
85 return (oas == 48) || (ias == 48);
86
87 /* Covers 2.a and 2.c */
88 if ((ARM_LPAE_GRANULE(data) == SZ_4K) && (data->start_level == 0))
89 return (oas == 40) || (oas == 42);
90
91 /* Case 2.b */
92 return (ARM_LPAE_GRANULE(data) == SZ_16K) &&
93 (data->start_level == 1) && (oas == 40);
94 }
95
96 static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
97 struct iommu_iotlb_gather *gather,
98 unsigned long iova, size_t size, size_t pgcount,
99 int lvl, arm_lpae_iopte *ptep);
100
__arm_lpae_init_pte(struct arm_lpae_io_pgtable * data,phys_addr_t paddr,arm_lpae_iopte prot,int lvl,int num_entries,arm_lpae_iopte * ptep)101 static void __arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
102 phys_addr_t paddr, arm_lpae_iopte prot,
103 int lvl, int num_entries, arm_lpae_iopte *ptep)
104 {
105 arm_lpae_iopte pte = prot;
106 struct io_pgtable_cfg *cfg = &data->iop.cfg;
107 size_t sz = ARM_LPAE_BLOCK_SIZE(lvl, data);
108 int i;
109
110 if (data->iop.fmt != ARM_MALI_LPAE && lvl == ARM_LPAE_MAX_LEVELS - 1)
111 pte |= ARM_LPAE_PTE_TYPE_PAGE;
112 else
113 pte |= ARM_LPAE_PTE_TYPE_BLOCK;
114
115 for (i = 0; i < num_entries; i++)
116 ptep[i] = pte | paddr_to_iopte(paddr + i * sz, data);
117
118 if (!cfg->coherent_walk)
119 __arm_lpae_sync_pte(ptep, num_entries, cfg);
120 }
121
arm_lpae_init_pte(struct arm_lpae_io_pgtable * data,unsigned long iova,phys_addr_t paddr,arm_lpae_iopte prot,int lvl,int num_entries,arm_lpae_iopte * ptep)122 static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
123 unsigned long iova, phys_addr_t paddr,
124 arm_lpae_iopte prot, int lvl, int num_entries,
125 arm_lpae_iopte *ptep)
126 {
127 int i;
128
129 for (i = 0; i < num_entries; i++)
130 if (iopte_leaf(ptep[i], lvl, data->iop.fmt)) {
131 /* We require an unmap first */
132 return arm_lpae_map_exists();
133 } else if (iopte_type(ptep[i]) == ARM_LPAE_PTE_TYPE_TABLE) {
134 /*
135 * We need to unmap and free the old table before
136 * overwriting it with a block entry.
137 */
138 arm_lpae_iopte *tblp;
139 size_t sz = ARM_LPAE_BLOCK_SIZE(lvl, data);
140
141 tblp = ptep - ARM_LPAE_LVL_IDX(iova, lvl, data);
142 if (__arm_lpae_unmap(data, NULL, iova + i * sz, sz, 1,
143 lvl, tblp) != sz) {
144 WARN_ON(1);
145 return -EINVAL;
146 }
147 }
148
149 __arm_lpae_init_pte(data, paddr, prot, lvl, num_entries, ptep);
150 return 0;
151 }
152
arm_lpae_install_table(arm_lpae_iopte * table,arm_lpae_iopte * ptep,arm_lpae_iopte curr,struct arm_lpae_io_pgtable * data)153 static arm_lpae_iopte arm_lpae_install_table(arm_lpae_iopte *table,
154 arm_lpae_iopte *ptep,
155 arm_lpae_iopte curr,
156 struct arm_lpae_io_pgtable *data)
157 {
158 arm_lpae_iopte old, new;
159 struct io_pgtable_cfg *cfg = &data->iop.cfg;
160
161 new = paddr_to_iopte(__arm_lpae_virt_to_phys(table), data) |
162 ARM_LPAE_PTE_TYPE_TABLE;
163 if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS)
164 new |= ARM_LPAE_PTE_NSTABLE;
165
166 /*
167 * Ensure the table itself is visible before its PTE can be.
168 * Whilst we could get away with cmpxchg64_release below, this
169 * doesn't have any ordering semantics when !CONFIG_SMP.
170 */
171 dma_wmb();
172
173 old = cmpxchg64_relaxed(ptep, curr, new);
174
175 if (cfg->coherent_walk || (old & ARM_LPAE_PTE_SW_SYNC))
176 return old;
177
178 /* Even if it's not ours, there's no point waiting; just kick it */
179 __arm_lpae_sync_pte(ptep, 1, cfg);
180 if (old == curr)
181 WRITE_ONCE(*ptep, new | ARM_LPAE_PTE_SW_SYNC);
182
183 return old;
184 }
185
__arm_lpae_map(struct arm_lpae_io_pgtable * data,unsigned long iova,phys_addr_t paddr,size_t size,size_t pgcount,arm_lpae_iopte prot,int lvl,arm_lpae_iopte * ptep,gfp_t gfp,size_t * mapped)186 static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
187 phys_addr_t paddr, size_t size, size_t pgcount,
188 arm_lpae_iopte prot, int lvl, arm_lpae_iopte *ptep,
189 gfp_t gfp, size_t *mapped)
190 {
191 arm_lpae_iopte *cptep, pte;
192 size_t block_size = ARM_LPAE_BLOCK_SIZE(lvl, data);
193 size_t tblsz = ARM_LPAE_GRANULE(data);
194 struct io_pgtable_cfg *cfg = &data->iop.cfg;
195 int ret = 0, num_entries, max_entries, map_idx_start;
196
197 /* Find our entry at the current level */
198 map_idx_start = ARM_LPAE_LVL_IDX(iova, lvl, data);
199 ptep += map_idx_start;
200
201 /* If we can install a leaf entry at this level, then do so */
202 if (size == block_size) {
203 max_entries = arm_lpae_max_entries(map_idx_start, data);
204 num_entries = min_t(int, pgcount, max_entries);
205 ret = arm_lpae_init_pte(data, iova, paddr, prot, lvl, num_entries, ptep);
206 if (!ret)
207 *mapped += num_entries * size;
208
209 return ret;
210 }
211
212 /* We can't allocate tables at the final level */
213 if (WARN_ON(lvl >= ARM_LPAE_MAX_LEVELS - 1))
214 return -EINVAL;
215
216 /* Grab a pointer to the next level */
217 pte = READ_ONCE(*ptep);
218 if (!iopte_valid(pte)) {
219 cptep = __arm_lpae_alloc_pages(tblsz, gfp, cfg, data->iop.cookie);
220 if (!cptep)
221 return -ENOMEM;
222
223 pte = arm_lpae_install_table(cptep, ptep, 0, data);
224 if (iopte_valid(pte))
225 __arm_lpae_free_pages(cptep, tblsz, cfg, data->iop.cookie);
226 } else if (!cfg->coherent_walk && !(pte & ARM_LPAE_PTE_SW_SYNC)) {
227 __arm_lpae_sync_pte(ptep, 1, cfg);
228 }
229
230 if (iopte_valid(pte) && !iopte_leaf(pte, lvl, data->iop.fmt)) {
231 cptep = iopte_deref(pte, data);
232 } else if (iopte_valid(pte)) {
233 /* We require an unmap first */
234 return arm_lpae_map_exists();
235 }
236
237 /* Rinse, repeat */
238 return __arm_lpae_map(data, iova, paddr, size, pgcount, prot, lvl + 1,
239 cptep, gfp, mapped);
240 }
241
arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable * data,int prot)242 static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data,
243 int prot)
244 {
245 arm_lpae_iopte pte;
246
247 if (data->iop.fmt == ARM_64_LPAE_S1 ||
248 data->iop.fmt == ARM_32_LPAE_S1) {
249 pte = ARM_LPAE_PTE_nG;
250 if (!(prot & IOMMU_WRITE) && (prot & IOMMU_READ))
251 pte |= ARM_LPAE_PTE_AP_RDONLY;
252 else if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_HD)
253 pte |= ARM_LPAE_PTE_DBM;
254 if (!(prot & IOMMU_PRIV))
255 pte |= ARM_LPAE_PTE_AP_UNPRIV;
256 } else {
257 pte = ARM_LPAE_PTE_HAP_FAULT;
258 if (prot & IOMMU_READ)
259 pte |= ARM_LPAE_PTE_HAP_READ;
260 if (prot & IOMMU_WRITE)
261 pte |= ARM_LPAE_PTE_HAP_WRITE;
262 }
263
264 /*
265 * Note that this logic is structured to accommodate Mali LPAE
266 * having stage-1-like attributes but stage-2-like permissions.
267 */
268 if (data->iop.fmt == ARM_64_LPAE_S2 ||
269 data->iop.fmt == ARM_32_LPAE_S2) {
270 if (prot & IOMMU_MMIO)
271 pte |= ARM_LPAE_PTE_MEMATTR_DEV;
272 else if (prot & IOMMU_CACHE)
273 pte |= ARM_LPAE_PTE_MEMATTR_OIWB;
274 else
275 pte |= ARM_LPAE_PTE_MEMATTR_NC;
276 } else {
277 if (prot & IOMMU_MMIO)
278 pte |= (ARM_LPAE_MAIR_ATTR_IDX_DEV
279 << ARM_LPAE_PTE_ATTRINDX_SHIFT);
280 else if (prot & IOMMU_CACHE)
281 pte |= (ARM_LPAE_MAIR_ATTR_IDX_CACHE
282 << ARM_LPAE_PTE_ATTRINDX_SHIFT);
283 }
284
285 /*
286 * Also Mali has its own notions of shareability wherein its Inner
287 * domain covers the cores within the GPU, and its Outer domain is
288 * "outside the GPU" (i.e. either the Inner or System domain in CPU
289 * terms, depending on coherency).
290 */
291 if (prot & IOMMU_CACHE && data->iop.fmt != ARM_MALI_LPAE)
292 pte |= ARM_LPAE_PTE_SH_IS;
293 else
294 pte |= ARM_LPAE_PTE_SH_OS;
295
296 if (prot & IOMMU_NOEXEC)
297 pte |= ARM_LPAE_PTE_XN;
298
299 if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_NS)
300 pte |= ARM_LPAE_PTE_NS;
301
302 if (data->iop.fmt != ARM_MALI_LPAE)
303 pte |= ARM_LPAE_PTE_AF;
304
305 return pte;
306 }
307
arm_lpae_map_pages(struct io_pgtable_ops * ops,unsigned long iova,phys_addr_t paddr,size_t pgsize,size_t pgcount,int iommu_prot,gfp_t gfp,size_t * mapped)308 static int arm_lpae_map_pages(struct io_pgtable_ops *ops, unsigned long iova,
309 phys_addr_t paddr, size_t pgsize, size_t pgcount,
310 int iommu_prot, gfp_t gfp, size_t *mapped)
311 {
312 struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
313 struct io_pgtable_cfg *cfg = &data->iop.cfg;
314 arm_lpae_iopte *ptep = data->pgd;
315 int ret, lvl = data->start_level;
316 arm_lpae_iopte prot;
317 long iaext = (s64)iova >> cfg->ias;
318
319 if (WARN_ON(!pgsize || (pgsize & cfg->pgsize_bitmap) != pgsize))
320 return -EINVAL;
321
322 if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1)
323 iaext = ~iaext;
324 if (WARN_ON(iaext || paddr >> cfg->oas))
325 return -ERANGE;
326
327 if (!(iommu_prot & (IOMMU_READ | IOMMU_WRITE)))
328 return -EINVAL;
329
330 prot = arm_lpae_prot_to_pte(data, iommu_prot);
331 ret = __arm_lpae_map(data, iova, paddr, pgsize, pgcount, prot, lvl,
332 ptep, gfp, mapped);
333 /*
334 * Synchronise all PTE updates for the new mapping before there's
335 * a chance for anything to kick off a table walk for the new iova.
336 */
337 wmb();
338
339 return ret;
340 }
341
__arm_lpae_free_pgtable(struct arm_lpae_io_pgtable * data,int lvl,arm_lpae_iopte * ptep)342 void __arm_lpae_free_pgtable(struct arm_lpae_io_pgtable *data, int lvl,
343 arm_lpae_iopte *ptep)
344 {
345 arm_lpae_iopte *start, *end;
346 unsigned long table_size;
347
348 if (lvl == data->start_level)
349 table_size = ARM_LPAE_PGD_SIZE(data);
350 else
351 table_size = ARM_LPAE_GRANULE(data);
352
353 start = ptep;
354 end = (void *)ptep + table_size;
355
356 while (ptep != end) {
357 arm_lpae_iopte pte = *ptep++;
358
359 if (iopte_leaf(pte, lvl, data->iop.fmt)) {
360 io_pgtable_free_leaf(&data->iop, iopte_to_paddr(pte, data),
361 ARM_LPAE_BLOCK_SIZE(lvl, data));
362 continue;
363 }
364
365 /* Only leaf entries at the last level */
366 if ((lvl != ARM_LPAE_MAX_LEVELS - 1) && iopte_valid(pte))
367 __arm_lpae_free_pgtable(data, lvl + 1, iopte_deref(pte, data));
368 }
369
370 __arm_lpae_free_pages(start, table_size, &data->iop.cfg, data->iop.cookie);
371 }
372
arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable * data,struct iommu_iotlb_gather * gather,unsigned long iova,size_t size,arm_lpae_iopte blk_pte,int lvl,arm_lpae_iopte * ptep,size_t pgcount)373 static size_t arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
374 struct iommu_iotlb_gather *gather,
375 unsigned long iova, size_t size,
376 arm_lpae_iopte blk_pte, int lvl,
377 arm_lpae_iopte *ptep, size_t pgcount)
378 {
379 struct io_pgtable_cfg *cfg = &data->iop.cfg;
380 arm_lpae_iopte pte, *tablep;
381 phys_addr_t blk_paddr;
382 size_t tablesz = ARM_LPAE_GRANULE(data);
383 int ptes_per_table = ARM_LPAE_PTES_PER_TABLE(data);
384
385 if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS))
386 return 0;
387
388 tablep = __arm_lpae_alloc_pages(tablesz, GFP_ATOMIC, cfg, data->iop.cookie);
389 if (!tablep)
390 return 0; /* Bytes unmapped */
391
392 blk_paddr = iopte_to_paddr(blk_pte, data);
393 pte = iopte_prot(blk_pte);
394
395 /* Fully populate the table. */
396 __arm_lpae_init_pte(data, blk_paddr, pte, lvl, ptes_per_table, tablep);
397
398 pte = arm_lpae_install_table(tablep, ptep, blk_pte, data);
399 if (pte != blk_pte) {
400 __arm_lpae_free_pages(tablep, tablesz, cfg, data->iop.cookie);
401 /*
402 * We may race against someone unmapping another part of this
403 * block, but anything else is invalid. We can't misinterpret
404 * a page entry here since we're never at the last level.
405 */
406 if (iopte_type(pte) != ARM_LPAE_PTE_TYPE_TABLE)
407 return 0;
408
409 tablep = iopte_deref(pte, data);
410 }
411
412 return __arm_lpae_unmap(data, gather, iova, size, pgcount, lvl, tablep);
413 }
414
__arm_lpae_unmap(struct arm_lpae_io_pgtable * data,struct iommu_iotlb_gather * gather,unsigned long iova,size_t size,size_t pgcount,int lvl,arm_lpae_iopte * ptep)415 static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
416 struct iommu_iotlb_gather *gather,
417 unsigned long iova, size_t size, size_t pgcount,
418 int lvl, arm_lpae_iopte *ptep)
419 {
420 arm_lpae_iopte pte;
421 struct io_pgtable *iop = &data->iop;
422 int i = 0, num_entries, max_entries, unmap_idx_start;
423
424 /* Something went horribly wrong and we ran out of page table */
425 if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS))
426 return 0;
427
428 unmap_idx_start = ARM_LPAE_LVL_IDX(iova, lvl, data);
429 ptep += unmap_idx_start;
430 pte = READ_ONCE(*ptep);
431 if (!iopte_valid(pte)) {
432 arm_lpae_unmap_empty();
433 return 0;
434 }
435
436 /* If the size matches this level, we're in the right place */
437 if (size == ARM_LPAE_BLOCK_SIZE(lvl, data)) {
438 max_entries = arm_lpae_max_entries(unmap_idx_start, data);
439 num_entries = min_t(int, pgcount, max_entries);
440
441 /* Find and handle non-leaf entries */
442 for (i = 0; i < num_entries; i++) {
443 pte = READ_ONCE(ptep[i]);
444 if (!iopte_valid(pte)) {
445 arm_lpae_unmap_empty();
446 break;
447 }
448
449 if (!iopte_leaf(pte, lvl, iop->fmt)) {
450 __arm_lpae_clear_pte(&ptep[i], &iop->cfg, 1);
451
452 /* Also flush any partial walks */
453 io_pgtable_tlb_flush_walk(iop, iova + i * size, size,
454 ARM_LPAE_GRANULE(data));
455
456 /* Now clear the pte of the table as it's about to be freed. */
457 if (iop->cfg.quirks & IO_PGTABLE_QUIRK_UNMAP_INVAL)
458 ptep[i] = 0;
459
460 __arm_lpae_free_pgtable(data, lvl + 1,
461 iopte_deref(pte, data));
462 }
463 }
464
465 /* Clear the remaining entries */
466 __arm_lpae_clear_pte(ptep, &iop->cfg, i);
467
468 if (gather && !iommu_iotlb_gather_queued(gather))
469 for (int j = 0; j < i; j++)
470 io_pgtable_tlb_add_page(iop, gather, iova + j * size, size);
471
472 return i * size;
473 } else if (iopte_leaf(pte, lvl, iop->fmt)) {
474 /*
475 * Insert a table at the next level to map the old region,
476 * minus the part we want to unmap
477 */
478 return arm_lpae_split_blk_unmap(data, gather, iova, size, pte,
479 lvl + 1, ptep, pgcount);
480 }
481
482 /* Keep on walkin' */
483 ptep = iopte_deref(pte, data);
484 return __arm_lpae_unmap(data, gather, iova, size, pgcount, lvl + 1, ptep);
485 }
486
arm_lpae_unmap_pages(struct io_pgtable_ops * ops,unsigned long iova,size_t pgsize,size_t pgcount,struct iommu_iotlb_gather * gather)487 static size_t arm_lpae_unmap_pages(struct io_pgtable_ops *ops, unsigned long iova,
488 size_t pgsize, size_t pgcount,
489 struct iommu_iotlb_gather *gather)
490 {
491 struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
492 struct io_pgtable_cfg *cfg = &data->iop.cfg;
493 arm_lpae_iopte *ptep = data->pgd;
494 long iaext = (s64)iova >> cfg->ias;
495
496 if (WARN_ON(!pgsize || (pgsize & cfg->pgsize_bitmap) != pgsize || !pgcount))
497 return 0;
498
499 if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1)
500 iaext = ~iaext;
501 if (WARN_ON(iaext))
502 return 0;
503
504 return __arm_lpae_unmap(data, gather, iova, pgsize, pgcount,
505 data->start_level, ptep);
506 }
507
508 struct iova_to_phys_data {
509 arm_lpae_iopte pte;
510 int lvl;
511 };
512
visit_iova_to_phys(struct io_pgtable_walk_data * walk_data,int lvl,arm_lpae_iopte * ptep,size_t size)513 static int visit_iova_to_phys(struct io_pgtable_walk_data *walk_data, int lvl,
514 arm_lpae_iopte *ptep, size_t size)
515 {
516 struct io_pgtable_walk_common *walker = walk_data->data;
517 struct iova_to_phys_data *data = walker->data;
518 data->pte = *ptep;
519 data->lvl = lvl;
520 return 0;
521 }
522
arm_lpae_iova_to_phys(struct io_pgtable_ops * ops,unsigned long iova)523 static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops,
524 unsigned long iova)
525 {
526 struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
527 struct iova_to_phys_data d;
528 struct io_pgtable_walk_common walker = {
529 .data = &d,
530 };
531 struct io_pgtable_walk_data walk_data = {
532 .data = &walker,
533 .visit = visit_iova_to_phys,
534 .addr = iova,
535 .end = iova + 1,
536 };
537 int ret;
538
539 ret = __arm_lpae_iopte_walk(data, &walk_data, data->pgd, data->start_level);
540 if (ret || !iopte_valid(d.pte))
541 return 0;
542
543 iova &= (ARM_LPAE_BLOCK_SIZE(d.lvl, data) - 1);
544 return iopte_to_paddr(d.pte, data) | iova;
545 }
546
visit_pgtable_walk(struct io_pgtable_walk_data * walk_data,int lvl,arm_lpae_iopte * ptep,size_t size)547 static int visit_pgtable_walk(struct io_pgtable_walk_data *walk_data, int lvl,
548 arm_lpae_iopte *ptep, size_t size)
549 {
550 struct io_pgtable_walk_common *walker = walk_data->data;
551 struct arm_lpae_io_pgtable_walk_data *data = walker->data;
552
553 data->ptes[lvl] = *ptep;
554 data->level = lvl + 1;
555 return 0;
556 }
557
arm_lpae_pgtable_walk(struct io_pgtable_ops * ops,unsigned long iova,size_t size,struct io_pgtable_walk_common * walker)558 static int arm_lpae_pgtable_walk(struct io_pgtable_ops *ops, unsigned long iova,
559 size_t size, struct io_pgtable_walk_common *walker)
560 {
561 struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
562 struct io_pgtable_walk_data walk_data = {
563 .data = walker,
564 .visit = visit_pgtable_walk,
565 .addr = iova,
566 .end = iova + size,
567 };
568
569 return __arm_lpae_iopte_walk(data, &walk_data, data->pgd, data->start_level);
570 }
571
io_pgtable_visit(struct arm_lpae_io_pgtable * data,struct io_pgtable_walk_data * walk_data,arm_lpae_iopte * ptep,int lvl)572 static int io_pgtable_visit(struct arm_lpae_io_pgtable *data,
573 struct io_pgtable_walk_data *walk_data,
574 arm_lpae_iopte *ptep, int lvl)
575 {
576 struct io_pgtable *iop = &data->iop;
577 struct io_pgtable_cfg *cfg = &iop->cfg;
578 arm_lpae_iopte pte = READ_ONCE(*ptep);
579 struct io_pgtable_walk_common *walker = walk_data->data;
580 bool is_leaf, is_table;
581
582 size_t size = ARM_LPAE_BLOCK_SIZE(lvl, data);
583 int ret = walk_data->visit(walk_data, lvl, ptep, size);
584 if (ret)
585 return ret;
586
587 if (cfg->quirks & IO_PGTABLE_QUIRK_UNMAP_INVAL) {
588 /* Visitng invalid tables as it still have enteries. */
589 is_table = pte && iopte_table(pte | ARM_LPAE_PTE_VALID, lvl);
590 is_leaf = pte && iopte_leaf(pte | ARM_LPAE_PTE_VALID, lvl, iop->fmt);
591 } else {
592 is_table = iopte_table(pte, lvl);
593 is_leaf = iopte_leaf(pte, lvl, iop->fmt);
594 }
595
596 if (is_leaf) {
597 if (walker->visit_leaf)
598 walker->visit_leaf(iopte_to_paddr(pte, data), size, walker, ptep);
599 walk_data->addr += size;
600 return 0;
601 }
602
603 /* Don't fail the walk if one entry is invalid, just skip over it */
604 if (!is_table) {
605 walk_data->addr += size;
606 return 0;
607 }
608
609 ptep = iopte_deref(pte, data);
610
611 return __arm_lpae_iopte_walk(data, walk_data, ptep, lvl + 1);
612 }
613
__arm_lpae_iopte_walk(struct arm_lpae_io_pgtable * data,struct io_pgtable_walk_data * walk_data,arm_lpae_iopte * ptep,int lvl)614 int __arm_lpae_iopte_walk(struct arm_lpae_io_pgtable *data,
615 struct io_pgtable_walk_data *walk_data,
616 arm_lpae_iopte *ptep,
617 int lvl)
618 {
619 u32 idx;
620 int max_entries, ret;
621
622 if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS))
623 return -EINVAL;
624
625 if (lvl == data->start_level)
626 max_entries = ARM_LPAE_PGD_SIZE(data) / sizeof(arm_lpae_iopte);
627 else
628 max_entries = ARM_LPAE_PTES_PER_TABLE(data);
629
630 for (idx = ARM_LPAE_LVL_IDX(walk_data->addr, lvl, data);
631 (idx < max_entries) && (walk_data->addr < walk_data->end); ++idx) {
632 ret = io_pgtable_visit(data, walk_data, ptep + idx, lvl);
633 if (ret)
634 return ret;
635 }
636
637 return 0;
638 }
639
arm_lpae_restrict_pgsizes(struct io_pgtable_cfg * cfg)640 static void arm_lpae_restrict_pgsizes(struct io_pgtable_cfg *cfg)
641 {
642 unsigned long granule, page_sizes;
643 unsigned int max_addr_bits = 48;
644
645 /*
646 * We need to restrict the supported page sizes to match the
647 * translation regime for a particular granule. Aim to match
648 * the CPU page size if possible, otherwise prefer smaller sizes.
649 * While we're at it, restrict the block sizes to match the
650 * chosen granule.
651 */
652 if (cfg->pgsize_bitmap & PAGE_SIZE)
653 granule = PAGE_SIZE;
654 else if (cfg->pgsize_bitmap & ~PAGE_MASK)
655 granule = 1UL << __fls(cfg->pgsize_bitmap & ~PAGE_MASK);
656 else if (cfg->pgsize_bitmap & PAGE_MASK)
657 granule = 1UL << __ffs(cfg->pgsize_bitmap & PAGE_MASK);
658 else
659 granule = 0;
660
661 switch (granule) {
662 case SZ_4K:
663 page_sizes = (SZ_4K | SZ_2M | SZ_1G);
664 break;
665 case SZ_16K:
666 page_sizes = (SZ_16K | SZ_32M);
667 break;
668 case SZ_64K:
669 max_addr_bits = 52;
670 page_sizes = (SZ_64K | SZ_512M);
671 if (cfg->oas > 48)
672 page_sizes |= 1ULL << 42; /* 4TB */
673 break;
674 default:
675 page_sizes = 0;
676 }
677
678 cfg->pgsize_bitmap &= page_sizes;
679 cfg->ias = min(cfg->ias, max_addr_bits);
680 cfg->oas = min(cfg->oas, max_addr_bits);
681 }
682
arm_lpae_init_pgtable(struct io_pgtable_cfg * cfg,struct arm_lpae_io_pgtable * data)683 int arm_lpae_init_pgtable(struct io_pgtable_cfg *cfg,
684 struct arm_lpae_io_pgtable *data)
685 {
686 int levels, va_bits, pg_shift;
687
688 arm_lpae_restrict_pgsizes(cfg);
689
690 if (!(cfg->pgsize_bitmap & (SZ_4K | SZ_16K | SZ_64K)))
691 return -EINVAL;
692
693 if (cfg->ias > ARM_LPAE_MAX_ADDR_BITS)
694 return -E2BIG;
695
696 if (cfg->oas > ARM_LPAE_MAX_ADDR_BITS)
697 return -E2BIG;
698
699 pg_shift = __ffs(cfg->pgsize_bitmap);
700 data->bits_per_level = pg_shift - ilog2(sizeof(arm_lpae_iopte));
701
702 va_bits = cfg->ias - pg_shift;
703 levels = DIV_ROUND_UP(va_bits, data->bits_per_level);
704 data->start_level = ARM_LPAE_MAX_LEVELS - levels;
705
706 /* Calculate the actual size of our pgd (without concatenation) */
707 data->pgd_bits = va_bits - (data->bits_per_level * (levels - 1));
708
709 data->iop.ops = (struct io_pgtable_ops) {
710 .map_pages = arm_lpae_map_pages,
711 .unmap_pages = arm_lpae_unmap_pages,
712 .iova_to_phys = arm_lpae_iova_to_phys,
713 .pgtable_walk = arm_lpae_pgtable_walk,
714 };
715
716 return 0;
717 }
718
arm_lpae_init_pgtable_s1(struct io_pgtable_cfg * cfg,struct arm_lpae_io_pgtable * data)719 int arm_lpae_init_pgtable_s1(struct io_pgtable_cfg *cfg,
720 struct arm_lpae_io_pgtable *data)
721 {
722 u64 reg;
723 int ret;
724 typeof(&cfg->arm_lpae_s1_cfg.tcr) tcr = &cfg->arm_lpae_s1_cfg.tcr;
725 bool tg1;
726
727 if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS |
728 IO_PGTABLE_QUIRK_ARM_TTBR1 |
729 IO_PGTABLE_QUIRK_ARM_OUTER_WBWA |
730 IO_PGTABLE_QUIRK_ARM_HD |
731 IO_PGTABLE_QUIRK_UNMAP_INVAL))
732 return -EINVAL;
733
734 ret = arm_lpae_init_pgtable(cfg, data);
735 if (ret)
736 return ret;
737
738 /* TCR */
739 if (cfg->coherent_walk) {
740 tcr->sh = ARM_LPAE_TCR_SH_IS;
741 tcr->irgn = ARM_LPAE_TCR_RGN_WBWA;
742 tcr->orgn = ARM_LPAE_TCR_RGN_WBWA;
743 if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_OUTER_WBWA)
744 return -EINVAL;
745 } else {
746 tcr->sh = ARM_LPAE_TCR_SH_OS;
747 tcr->irgn = ARM_LPAE_TCR_RGN_NC;
748 if (!(cfg->quirks & IO_PGTABLE_QUIRK_ARM_OUTER_WBWA))
749 tcr->orgn = ARM_LPAE_TCR_RGN_NC;
750 else
751 tcr->orgn = ARM_LPAE_TCR_RGN_WBWA;
752 }
753
754 tg1 = cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1;
755 switch (ARM_LPAE_GRANULE(data)) {
756 case SZ_4K:
757 tcr->tg = tg1 ? ARM_LPAE_TCR_TG1_4K : ARM_LPAE_TCR_TG0_4K;
758 break;
759 case SZ_16K:
760 tcr->tg = tg1 ? ARM_LPAE_TCR_TG1_16K : ARM_LPAE_TCR_TG0_16K;
761 break;
762 case SZ_64K:
763 tcr->tg = tg1 ? ARM_LPAE_TCR_TG1_64K : ARM_LPAE_TCR_TG0_64K;
764 break;
765 }
766
767 switch (cfg->oas) {
768 case 32:
769 tcr->ips = ARM_LPAE_TCR_PS_32_BIT;
770 break;
771 case 36:
772 tcr->ips = ARM_LPAE_TCR_PS_36_BIT;
773 break;
774 case 40:
775 tcr->ips = ARM_LPAE_TCR_PS_40_BIT;
776 break;
777 case 42:
778 tcr->ips = ARM_LPAE_TCR_PS_42_BIT;
779 break;
780 case 44:
781 tcr->ips = ARM_LPAE_TCR_PS_44_BIT;
782 break;
783 case 48:
784 tcr->ips = ARM_LPAE_TCR_PS_48_BIT;
785 break;
786 case 52:
787 tcr->ips = ARM_LPAE_TCR_PS_52_BIT;
788 break;
789 default:
790 return -EINVAL;
791 }
792
793 tcr->tsz = 64ULL - cfg->ias;
794
795 /* MAIRs */
796 reg = (ARM_LPAE_MAIR_ATTR_NC
797 << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_NC)) |
798 (ARM_LPAE_MAIR_ATTR_WBRWA
799 << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_CACHE)) |
800 (ARM_LPAE_MAIR_ATTR_DEVICE
801 << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_DEV)) |
802 (ARM_LPAE_MAIR_ATTR_INC_OWBRWA
803 << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_INC_OCACHE));
804
805 cfg->arm_lpae_s1_cfg.mair = reg;
806 return 0;
807 }
808
arm_lpae_init_pgtable_s2(struct io_pgtable_cfg * cfg,struct arm_lpae_io_pgtable * data)809 int arm_lpae_init_pgtable_s2(struct io_pgtable_cfg *cfg,
810 struct arm_lpae_io_pgtable *data)
811 {
812 u64 sl;
813 int ret;
814 typeof(&cfg->arm_lpae_s2_cfg.vtcr) vtcr = &cfg->arm_lpae_s2_cfg.vtcr;
815
816 /* The NS quirk doesn't apply at stage 2 */
817 if (cfg->quirks & ~IO_PGTABLE_QUIRK_UNMAP_INVAL)
818 return -EINVAL;
819
820 ret = arm_lpae_init_pgtable(cfg, data);
821 if (ret)
822 return ret;
823
824 if (arm_lpae_concat_mandatory(cfg, data)) {
825 if (WARN_ON((ARM_LPAE_PGD_SIZE(data) / sizeof(arm_lpae_iopte)) >
826 ARM_LPAE_S2_MAX_CONCAT_PAGES))
827 return -EINVAL;
828 data->pgd_bits += data->bits_per_level;
829 data->start_level++;
830 }
831
832 /* VTCR */
833 if (cfg->coherent_walk) {
834 vtcr->sh = ARM_LPAE_TCR_SH_IS;
835 vtcr->irgn = ARM_LPAE_TCR_RGN_WBWA;
836 vtcr->orgn = ARM_LPAE_TCR_RGN_WBWA;
837 } else {
838 vtcr->sh = ARM_LPAE_TCR_SH_OS;
839 vtcr->irgn = ARM_LPAE_TCR_RGN_NC;
840 vtcr->orgn = ARM_LPAE_TCR_RGN_NC;
841 }
842
843 sl = data->start_level;
844
845 switch (ARM_LPAE_GRANULE(data)) {
846 case SZ_4K:
847 vtcr->tg = ARM_LPAE_TCR_TG0_4K;
848 sl++; /* SL0 format is different for 4K granule size */
849 break;
850 case SZ_16K:
851 vtcr->tg = ARM_LPAE_TCR_TG0_16K;
852 break;
853 case SZ_64K:
854 vtcr->tg = ARM_LPAE_TCR_TG0_64K;
855 break;
856 }
857
858 switch (cfg->oas) {
859 case 32:
860 vtcr->ps = ARM_LPAE_TCR_PS_32_BIT;
861 break;
862 case 36:
863 vtcr->ps = ARM_LPAE_TCR_PS_36_BIT;
864 break;
865 case 40:
866 vtcr->ps = ARM_LPAE_TCR_PS_40_BIT;
867 break;
868 case 42:
869 vtcr->ps = ARM_LPAE_TCR_PS_42_BIT;
870 break;
871 case 44:
872 vtcr->ps = ARM_LPAE_TCR_PS_44_BIT;
873 break;
874 case 48:
875 vtcr->ps = ARM_LPAE_TCR_PS_48_BIT;
876 break;
877 case 52:
878 vtcr->ps = ARM_LPAE_TCR_PS_52_BIT;
879 break;
880 default:
881 return -EINVAL;
882 }
883
884 vtcr->tsz = 64ULL - cfg->ias;
885 vtcr->sl = ~sl & ARM_LPAE_VTCR_SL0_MASK;
886 return 0;
887 }
888