1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Stand-alone page-table allocator for hyp stage-1 and guest stage-2.
4 * No bombay mix was harmed in the writing of this file.
5 *
6 * Copyright (C) 2020 Google LLC
7 * Author: Will Deacon <will@kernel.org>
8 */
9
10 #include <linux/bitfield.h>
11 #include <asm/kvm_pgtable.h>
12 #include <asm/stage2_pgtable.h>
13
14
15 #define KVM_PTE_TYPE BIT(1)
16 #define KVM_PTE_TYPE_BLOCK 0
17 #define KVM_PTE_TYPE_PAGE 1
18 #define KVM_PTE_TYPE_TABLE 1
19
20 #define KVM_PTE_LEAF_ATTR_LO GENMASK(11, 2)
21
22 #define KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX GENMASK(4, 2)
23 #define KVM_PTE_LEAF_ATTR_LO_S1_AP GENMASK(7, 6)
24 #define KVM_PTE_LEAF_ATTR_LO_S1_AP_RO 3
25 #define KVM_PTE_LEAF_ATTR_LO_S1_AP_RW 1
26 #define KVM_PTE_LEAF_ATTR_LO_S1_SH GENMASK(9, 8)
27 #define KVM_PTE_LEAF_ATTR_LO_S1_SH_IS 3
28 #define KVM_PTE_LEAF_ATTR_LO_S1_AF BIT(10)
29
30 #define KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR GENMASK(5, 2)
31 #define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R BIT(6)
32 #define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W BIT(7)
33 #define KVM_PTE_LEAF_ATTR_LO_S2_SH GENMASK(9, 8)
34 #define KVM_PTE_LEAF_ATTR_LO_S2_SH_IS 3
35 #define KVM_PTE_LEAF_ATTR_LO_S2_AF BIT(10)
36
37 #define KVM_PTE_LEAF_ATTR_HI GENMASK(63, 51)
38
39 #define KVM_PTE_LEAF_ATTR_HI_SW GENMASK(58, 55)
40
41 #define KVM_PTE_LEAF_ATTR_HI_S1_XN BIT(54)
42
43 #define KVM_PTE_LEAF_ATTR_HI_S2_XN BIT(54)
44
45 #define KVM_PTE_LEAF_ATTR_S2_PERMS (KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | \
46 KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | \
47 KVM_PTE_LEAF_ATTR_HI_S2_XN)
48
49 struct kvm_pgtable_walk_data {
50 struct kvm_pgtable *pgt;
51 struct kvm_pgtable_walker *walker;
52
53 u64 addr;
54 u64 end;
55 };
56
57 #define KVM_PHYS_INVALID (-1ULL)
58
kvm_phys_is_valid(u64 phys)59 static bool kvm_phys_is_valid(u64 phys)
60 {
61 return phys < BIT(id_aa64mmfr0_parange_to_phys_shift(ID_AA64MMFR0_PARANGE_MAX));
62 }
63
kvm_block_mapping_supported(u64 addr,u64 end,u64 phys,u32 level)64 static bool kvm_block_mapping_supported(u64 addr, u64 end, u64 phys, u32 level)
65 {
66 u64 granule = kvm_granule_size(level);
67
68 if (!kvm_level_supports_block_mapping(level))
69 return false;
70
71 if (granule > (end - addr))
72 return false;
73
74 if (kvm_phys_is_valid(phys) && !IS_ALIGNED(phys, granule))
75 return false;
76
77 return IS_ALIGNED(addr, granule);
78 }
79
kvm_pgtable_idx(struct kvm_pgtable_walk_data * data,u32 level)80 static u32 kvm_pgtable_idx(struct kvm_pgtable_walk_data *data, u32 level)
81 {
82 u64 shift = kvm_granule_shift(level);
83 u64 mask = BIT(PAGE_SHIFT - 3) - 1;
84
85 return (data->addr >> shift) & mask;
86 }
87
__kvm_pgd_page_idx(struct kvm_pgtable * pgt,u64 addr)88 static u32 __kvm_pgd_page_idx(struct kvm_pgtable *pgt, u64 addr)
89 {
90 u64 shift = kvm_granule_shift(pgt->start_level - 1); /* May underflow */
91 u64 mask = BIT(pgt->ia_bits) - 1;
92
93 return (addr & mask) >> shift;
94 }
95
kvm_pgd_page_idx(struct kvm_pgtable_walk_data * data)96 static u32 kvm_pgd_page_idx(struct kvm_pgtable_walk_data *data)
97 {
98 return __kvm_pgd_page_idx(data->pgt, data->addr);
99 }
100
kvm_pgd_pages(u32 ia_bits,u32 start_level)101 static u32 kvm_pgd_pages(u32 ia_bits, u32 start_level)
102 {
103 struct kvm_pgtable pgt = {
104 .ia_bits = ia_bits,
105 .start_level = start_level,
106 };
107
108 return __kvm_pgd_page_idx(&pgt, -1ULL) + 1;
109 }
110
kvm_pte_table(kvm_pte_t pte,u32 level)111 static bool kvm_pte_table(kvm_pte_t pte, u32 level)
112 {
113 if (level == KVM_PGTABLE_MAX_LEVELS - 1)
114 return false;
115
116 if (!kvm_pte_valid(pte))
117 return false;
118
119 return FIELD_GET(KVM_PTE_TYPE, pte) == KVM_PTE_TYPE_TABLE;
120 }
121
kvm_phys_to_pte(u64 pa)122 static kvm_pte_t kvm_phys_to_pte(u64 pa)
123 {
124 kvm_pte_t pte = pa & KVM_PTE_ADDR_MASK;
125
126 if (PAGE_SHIFT == 16)
127 pte |= FIELD_PREP(KVM_PTE_ADDR_51_48, pa >> 48);
128
129 return pte;
130 }
131
kvm_pte_follow(kvm_pte_t pte,struct kvm_pgtable_mm_ops * mm_ops)132 static kvm_pte_t *kvm_pte_follow(kvm_pte_t pte, struct kvm_pgtable_mm_ops *mm_ops)
133 {
134 return mm_ops->phys_to_virt(kvm_pte_to_phys(pte));
135 }
136
kvm_clear_pte(kvm_pte_t * ptep)137 static void kvm_clear_pte(kvm_pte_t *ptep)
138 {
139 WRITE_ONCE(*ptep, 0);
140 }
141
kvm_set_table_pte(kvm_pte_t * ptep,kvm_pte_t * childp,struct kvm_pgtable_mm_ops * mm_ops)142 static void kvm_set_table_pte(kvm_pte_t *ptep, kvm_pte_t *childp,
143 struct kvm_pgtable_mm_ops *mm_ops)
144 {
145 kvm_pte_t old = *ptep, pte = kvm_phys_to_pte(mm_ops->virt_to_phys(childp));
146
147 pte |= FIELD_PREP(KVM_PTE_TYPE, KVM_PTE_TYPE_TABLE);
148 pte |= KVM_PTE_VALID;
149
150 WARN_ON(kvm_pte_valid(old));
151 smp_store_release(ptep, pte);
152 }
153
kvm_init_valid_leaf_pte(u64 pa,kvm_pte_t attr,u32 level)154 static kvm_pte_t kvm_init_valid_leaf_pte(u64 pa, kvm_pte_t attr, u32 level)
155 {
156 kvm_pte_t pte = kvm_phys_to_pte(pa);
157 u64 type = (level == KVM_PGTABLE_MAX_LEVELS - 1) ? KVM_PTE_TYPE_PAGE :
158 KVM_PTE_TYPE_BLOCK;
159
160 pte |= attr & (KVM_PTE_LEAF_ATTR_LO | KVM_PTE_LEAF_ATTR_HI);
161 pte |= FIELD_PREP(KVM_PTE_TYPE, type);
162 pte |= KVM_PTE_VALID;
163
164 return pte;
165 }
166
kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data * data,u64 addr,u32 level,kvm_pte_t * ptep,enum kvm_pgtable_walk_flags flag)167 static int kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data *data, u64 addr,
168 u32 level, kvm_pte_t *ptep,
169 enum kvm_pgtable_walk_flags flag)
170 {
171 struct kvm_pgtable_walker *walker = data->walker;
172 return walker->cb(addr, data->end, level, ptep, flag, walker->arg);
173 }
174
175 static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data,
176 kvm_pte_t *pgtable, u32 level);
177
__kvm_pgtable_visit(struct kvm_pgtable_walk_data * data,kvm_pte_t * ptep,u32 level)178 static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data,
179 kvm_pte_t *ptep, u32 level)
180 {
181 int ret = 0;
182 u64 addr = data->addr;
183 kvm_pte_t *childp, pte = *ptep;
184 bool table = kvm_pte_table(pte, level);
185 enum kvm_pgtable_walk_flags flags = data->walker->flags;
186
187 if (table && (flags & KVM_PGTABLE_WALK_TABLE_PRE)) {
188 ret = kvm_pgtable_visitor_cb(data, addr, level, ptep,
189 KVM_PGTABLE_WALK_TABLE_PRE);
190 }
191
192 if (!table && (flags & KVM_PGTABLE_WALK_LEAF)) {
193 ret = kvm_pgtable_visitor_cb(data, addr, level, ptep,
194 KVM_PGTABLE_WALK_LEAF);
195 pte = *ptep;
196 table = kvm_pte_table(pte, level);
197 }
198
199 if (ret)
200 goto out;
201
202 if (!table) {
203 data->addr = ALIGN_DOWN(data->addr, kvm_granule_size(level));
204 data->addr += kvm_granule_size(level);
205 goto out;
206 }
207
208 childp = kvm_pte_follow(pte, data->pgt->mm_ops);
209 ret = __kvm_pgtable_walk(data, childp, level + 1);
210 if (ret)
211 goto out;
212
213 if (flags & KVM_PGTABLE_WALK_TABLE_POST) {
214 ret = kvm_pgtable_visitor_cb(data, addr, level, ptep,
215 KVM_PGTABLE_WALK_TABLE_POST);
216 }
217
218 out:
219 return ret;
220 }
221
__kvm_pgtable_walk(struct kvm_pgtable_walk_data * data,kvm_pte_t * pgtable,u32 level)222 static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data,
223 kvm_pte_t *pgtable, u32 level)
224 {
225 u32 idx;
226 int ret = 0;
227
228 if (WARN_ON_ONCE(level >= KVM_PGTABLE_MAX_LEVELS))
229 return -EINVAL;
230
231 for (idx = kvm_pgtable_idx(data, level); idx < PTRS_PER_PTE; ++idx) {
232 kvm_pte_t *ptep = &pgtable[idx];
233
234 if (data->addr >= data->end)
235 break;
236
237 ret = __kvm_pgtable_visit(data, ptep, level);
238 if (ret)
239 break;
240 }
241
242 return ret;
243 }
244
_kvm_pgtable_walk(struct kvm_pgtable_walk_data * data)245 static int _kvm_pgtable_walk(struct kvm_pgtable_walk_data *data)
246 {
247 u32 idx;
248 int ret = 0;
249 struct kvm_pgtable *pgt = data->pgt;
250 u64 limit = BIT(pgt->ia_bits);
251
252 if (data->addr > limit || data->end > limit)
253 return -ERANGE;
254
255 if (!pgt->pgd)
256 return -EINVAL;
257
258 for (idx = kvm_pgd_page_idx(data); data->addr < data->end; ++idx) {
259 kvm_pte_t *ptep = &pgt->pgd[idx * PTRS_PER_PTE];
260
261 ret = __kvm_pgtable_walk(data, ptep, pgt->start_level);
262 if (ret)
263 break;
264 }
265
266 return ret;
267 }
268
kvm_pgtable_walk(struct kvm_pgtable * pgt,u64 addr,u64 size,struct kvm_pgtable_walker * walker)269 int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size,
270 struct kvm_pgtable_walker *walker)
271 {
272 struct kvm_pgtable_walk_data walk_data = {
273 .pgt = pgt,
274 .addr = ALIGN_DOWN(addr, PAGE_SIZE),
275 .end = PAGE_ALIGN(walk_data.addr + size),
276 .walker = walker,
277 };
278
279 return _kvm_pgtable_walk(&walk_data);
280 }
281
282 struct leaf_walk_data {
283 kvm_pte_t pte;
284 u32 level;
285 };
286
leaf_walker(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,enum kvm_pgtable_walk_flags flag,void * const arg)287 static int leaf_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
288 enum kvm_pgtable_walk_flags flag, void * const arg)
289 {
290 struct leaf_walk_data *data = arg;
291
292 data->pte = *ptep;
293 data->level = level;
294
295 return 0;
296 }
297
kvm_pgtable_get_leaf(struct kvm_pgtable * pgt,u64 addr,kvm_pte_t * ptep,u32 * level)298 int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr,
299 kvm_pte_t *ptep, u32 *level)
300 {
301 struct leaf_walk_data data;
302 struct kvm_pgtable_walker walker = {
303 .cb = leaf_walker,
304 .flags = KVM_PGTABLE_WALK_LEAF,
305 .arg = &data,
306 };
307 int ret;
308
309 ret = kvm_pgtable_walk(pgt, ALIGN_DOWN(addr, PAGE_SIZE),
310 PAGE_SIZE, &walker);
311 if (!ret) {
312 if (ptep)
313 *ptep = data.pte;
314 if (level)
315 *level = data.level;
316 }
317
318 return ret;
319 }
320
321 struct hyp_map_data {
322 u64 phys;
323 kvm_pte_t attr;
324 struct kvm_pgtable_mm_ops *mm_ops;
325 };
326
hyp_set_prot_attr(enum kvm_pgtable_prot prot,kvm_pte_t * ptep)327 static int hyp_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep)
328 {
329 bool device = prot & KVM_PGTABLE_PROT_DEVICE;
330 u32 mtype = device ? MT_DEVICE_nGnRE : MT_NORMAL;
331 kvm_pte_t attr = FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX, mtype);
332 u32 sh = KVM_PTE_LEAF_ATTR_LO_S1_SH_IS;
333 u32 ap = (prot & KVM_PGTABLE_PROT_W) ? KVM_PTE_LEAF_ATTR_LO_S1_AP_RW :
334 KVM_PTE_LEAF_ATTR_LO_S1_AP_RO;
335
336 if (!(prot & KVM_PGTABLE_PROT_R))
337 return -EINVAL;
338
339 if (prot & KVM_PGTABLE_PROT_X) {
340 if (prot & KVM_PGTABLE_PROT_W)
341 return -EINVAL;
342
343 if (device)
344 return -EINVAL;
345 } else {
346 attr |= KVM_PTE_LEAF_ATTR_HI_S1_XN;
347 }
348
349 attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_AP, ap);
350 attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_SH, sh);
351 attr |= KVM_PTE_LEAF_ATTR_LO_S1_AF;
352 attr |= prot & KVM_PTE_LEAF_ATTR_HI_SW;
353 *ptep = attr;
354
355 return 0;
356 }
357
kvm_pgtable_hyp_pte_prot(kvm_pte_t pte)358 enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte)
359 {
360 enum kvm_pgtable_prot prot = pte & KVM_PTE_LEAF_ATTR_HI_SW;
361 u32 ap;
362
363 if (!kvm_pte_valid(pte))
364 return prot;
365
366 if (!(pte & KVM_PTE_LEAF_ATTR_HI_S1_XN))
367 prot |= KVM_PGTABLE_PROT_X;
368
369 ap = FIELD_GET(KVM_PTE_LEAF_ATTR_LO_S1_AP, pte);
370 if (ap == KVM_PTE_LEAF_ATTR_LO_S1_AP_RO)
371 prot |= KVM_PGTABLE_PROT_R;
372 else if (ap == KVM_PTE_LEAF_ATTR_LO_S1_AP_RW)
373 prot |= KVM_PGTABLE_PROT_RW;
374
375 return prot;
376 }
377
hyp_map_walker_try_leaf(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,struct hyp_map_data * data)378 static bool hyp_map_walker_try_leaf(u64 addr, u64 end, u32 level,
379 kvm_pte_t *ptep, struct hyp_map_data *data)
380 {
381 kvm_pte_t new, old = *ptep;
382 u64 granule = kvm_granule_size(level), phys = data->phys;
383
384 if (!kvm_block_mapping_supported(addr, end, phys, level))
385 return false;
386
387 data->phys += granule;
388 new = kvm_init_valid_leaf_pte(phys, data->attr, level);
389 if (old == new)
390 return true;
391 if (!kvm_pte_valid(old))
392 data->mm_ops->get_page(ptep);
393 else if (WARN_ON((old ^ new) & ~KVM_PTE_LEAF_ATTR_HI_SW))
394 return false;
395
396 smp_store_release(ptep, new);
397 return true;
398 }
399
hyp_map_walker(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,enum kvm_pgtable_walk_flags flag,void * const arg)400 static int hyp_map_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
401 enum kvm_pgtable_walk_flags flag, void * const arg)
402 {
403 kvm_pte_t *childp;
404 struct hyp_map_data *data = arg;
405 struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
406
407 if (hyp_map_walker_try_leaf(addr, end, level, ptep, arg))
408 return 0;
409
410 if (WARN_ON(level == KVM_PGTABLE_MAX_LEVELS - 1))
411 return -EINVAL;
412
413 childp = (kvm_pte_t *)mm_ops->zalloc_page(NULL);
414 if (!childp)
415 return -ENOMEM;
416
417 kvm_set_table_pte(ptep, childp, mm_ops);
418 mm_ops->get_page(ptep);
419 return 0;
420 }
421
kvm_pgtable_hyp_map(struct kvm_pgtable * pgt,u64 addr,u64 size,u64 phys,enum kvm_pgtable_prot prot)422 int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys,
423 enum kvm_pgtable_prot prot)
424 {
425 int ret;
426 struct hyp_map_data map_data = {
427 .phys = ALIGN_DOWN(phys, PAGE_SIZE),
428 .mm_ops = pgt->mm_ops,
429 };
430 struct kvm_pgtable_walker walker = {
431 .cb = hyp_map_walker,
432 .flags = KVM_PGTABLE_WALK_LEAF,
433 .arg = &map_data,
434 };
435
436 ret = hyp_set_prot_attr(prot, &map_data.attr);
437 if (ret)
438 return ret;
439
440 ret = kvm_pgtable_walk(pgt, addr, size, &walker);
441 dsb(ishst);
442 isb();
443 return ret;
444 }
445
446 struct hyp_unmap_data {
447 u64 unmapped;
448 struct kvm_pgtable_mm_ops *mm_ops;
449 };
450
hyp_unmap_walker(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,enum kvm_pgtable_walk_flags flag,void * const arg)451 static int hyp_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
452 enum kvm_pgtable_walk_flags flag, void * const arg)
453 {
454 kvm_pte_t pte = *ptep, *childp = NULL;
455 u64 granule = kvm_granule_size(level);
456 struct hyp_unmap_data *data = arg;
457 struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
458
459 if (!kvm_pte_valid(pte))
460 return -EINVAL;
461
462 if (kvm_pte_table(pte, level)) {
463 childp = kvm_pte_follow(pte, mm_ops);
464
465 if (mm_ops->page_count(childp) != 1)
466 return 0;
467
468 kvm_clear_pte(ptep);
469 dsb(ishst);
470 __tlbi_level(vae2is, __TLBI_VADDR(addr, 0), level);
471 } else {
472 if (end - addr < granule)
473 return -EINVAL;
474
475 kvm_clear_pte(ptep);
476 dsb(ishst);
477 __tlbi_level(vale2is, __TLBI_VADDR(addr, 0), level);
478 data->unmapped += granule;
479 }
480
481 dsb(ish);
482 isb();
483 mm_ops->put_page(ptep);
484
485 if (childp)
486 mm_ops->put_page(childp);
487
488 return 0;
489 }
490
kvm_pgtable_hyp_unmap(struct kvm_pgtable * pgt,u64 addr,u64 size)491 u64 kvm_pgtable_hyp_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
492 {
493 struct hyp_unmap_data unmap_data = {
494 .mm_ops = pgt->mm_ops,
495 };
496 struct kvm_pgtable_walker walker = {
497 .cb = hyp_unmap_walker,
498 .arg = &unmap_data,
499 .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
500 };
501
502 if (!pgt->mm_ops->page_count)
503 return 0;
504
505 kvm_pgtable_walk(pgt, addr, size, &walker);
506 return unmap_data.unmapped;
507 }
508
kvm_pgtable_hyp_init(struct kvm_pgtable * pgt,u32 va_bits,struct kvm_pgtable_mm_ops * mm_ops)509 int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits,
510 struct kvm_pgtable_mm_ops *mm_ops)
511 {
512 u64 levels = ARM64_HW_PGTABLE_LEVELS(va_bits);
513
514 pgt->pgd = (kvm_pte_t *)mm_ops->zalloc_page(NULL);
515 if (!pgt->pgd)
516 return -ENOMEM;
517
518 pgt->ia_bits = va_bits;
519 pgt->start_level = KVM_PGTABLE_MAX_LEVELS - levels;
520 pgt->mm_ops = mm_ops;
521 pgt->mmu = NULL;
522 pgt->force_pte_cb = NULL;
523
524 return 0;
525 }
526
hyp_free_walker(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,enum kvm_pgtable_walk_flags flag,void * const arg)527 static int hyp_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
528 enum kvm_pgtable_walk_flags flag, void * const arg)
529 {
530 struct kvm_pgtable_mm_ops *mm_ops = arg;
531 kvm_pte_t pte = *ptep;
532
533 if (!kvm_pte_valid(pte))
534 return 0;
535
536 mm_ops->put_page(ptep);
537
538 if (kvm_pte_table(pte, level))
539 mm_ops->put_page(kvm_pte_follow(pte, mm_ops));
540
541 return 0;
542 }
543
kvm_pgtable_hyp_destroy(struct kvm_pgtable * pgt)544 void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt)
545 {
546 struct kvm_pgtable_walker walker = {
547 .cb = hyp_free_walker,
548 .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
549 .arg = pgt->mm_ops,
550 };
551
552 WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker));
553 pgt->mm_ops->put_page(pgt->pgd);
554 pgt->pgd = NULL;
555 }
556
557 struct stage2_map_data {
558 u64 phys;
559 kvm_pte_t attr;
560 u64 annotation;
561
562 kvm_pte_t *anchor;
563 kvm_pte_t *childp;
564
565 struct kvm_s2_mmu *mmu;
566 void *memcache;
567
568 struct kvm_pgtable_mm_ops *mm_ops;
569
570 /* Force mappings to page granularity */
571 bool force_pte;
572 };
573
kvm_get_vtcr(u64 mmfr0,u64 mmfr1,u32 phys_shift)574 u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift)
575 {
576 u64 vtcr = VTCR_EL2_FLAGS;
577 u8 lvls;
578
579 vtcr |= kvm_get_parange(mmfr0) << VTCR_EL2_PS_SHIFT;
580 vtcr |= VTCR_EL2_T0SZ(phys_shift);
581 /*
582 * Use a minimum 2 level page table to prevent splitting
583 * host PMD huge pages at stage2.
584 */
585 lvls = stage2_pgtable_levels(phys_shift);
586 if (lvls < 2)
587 lvls = 2;
588 vtcr |= VTCR_EL2_LVLS_TO_SL0(lvls);
589
590 /*
591 * Enable the Hardware Access Flag management, unconditionally
592 * on all CPUs. The features is RES0 on CPUs without the support
593 * and must be ignored by the CPUs.
594 */
595 vtcr |= VTCR_EL2_HA;
596
597 /* Set the vmid bits */
598 vtcr |= (get_vmid_bits(mmfr1) == 16) ?
599 VTCR_EL2_VS_16BIT :
600 VTCR_EL2_VS_8BIT;
601
602 return vtcr;
603 }
604
stage2_has_fwb(struct kvm_pgtable * pgt)605 static bool stage2_has_fwb(struct kvm_pgtable *pgt)
606 {
607 if (!cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
608 return false;
609
610 return !(pgt->flags & KVM_PGTABLE_S2_NOFWB);
611 }
612
613 #define KVM_S2_MEMATTR(pgt, attr) PAGE_S2_MEMATTR(attr, stage2_has_fwb(pgt))
614
stage2_set_prot_attr(struct kvm_pgtable * pgt,enum kvm_pgtable_prot prot,kvm_pte_t * ptep)615 static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot prot,
616 kvm_pte_t *ptep)
617 {
618 bool device = prot & KVM_PGTABLE_PROT_DEVICE;
619 kvm_pte_t attr = device ? KVM_S2_MEMATTR(pgt, DEVICE_nGnRE) :
620 KVM_S2_MEMATTR(pgt, NORMAL);
621 u32 sh = KVM_PTE_LEAF_ATTR_LO_S2_SH_IS;
622
623 if (!(prot & KVM_PGTABLE_PROT_X))
624 attr |= KVM_PTE_LEAF_ATTR_HI_S2_XN;
625 else if (device)
626 return -EINVAL;
627
628 if (prot & KVM_PGTABLE_PROT_R)
629 attr |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R;
630
631 if (prot & KVM_PGTABLE_PROT_W)
632 attr |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W;
633
634 attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S2_SH, sh);
635 attr |= KVM_PTE_LEAF_ATTR_LO_S2_AF;
636 attr |= prot & KVM_PTE_LEAF_ATTR_HI_SW;
637 *ptep = attr;
638
639 return 0;
640 }
641
kvm_pgtable_stage2_pte_prot(kvm_pte_t pte)642 enum kvm_pgtable_prot kvm_pgtable_stage2_pte_prot(kvm_pte_t pte)
643 {
644 enum kvm_pgtable_prot prot = pte & KVM_PTE_LEAF_ATTR_HI_SW;
645
646 if (!kvm_pte_valid(pte))
647 return prot;
648
649 if (pte & KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R)
650 prot |= KVM_PGTABLE_PROT_R;
651 if (pte & KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W)
652 prot |= KVM_PGTABLE_PROT_W;
653 if (!(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN))
654 prot |= KVM_PGTABLE_PROT_X;
655
656 return prot;
657 }
658
stage2_pte_needs_update(kvm_pte_t old,kvm_pte_t new)659 static bool stage2_pte_needs_update(kvm_pte_t old, kvm_pte_t new)
660 {
661 if (!kvm_pte_valid(old) || !kvm_pte_valid(new))
662 return true;
663
664 return ((old ^ new) & (~KVM_PTE_LEAF_ATTR_S2_PERMS));
665 }
666
stage2_pte_is_counted(kvm_pte_t pte)667 static bool stage2_pte_is_counted(kvm_pte_t pte)
668 {
669 /*
670 * The refcount tracks valid entries as well as invalid entries if they
671 * encode ownership of a page to another entity than the page-table
672 * owner, whose id is 0.
673 */
674 return !!pte;
675 }
676
stage2_put_pte(kvm_pte_t * ptep,struct kvm_s2_mmu * mmu,u64 addr,u32 level,struct kvm_pgtable_mm_ops * mm_ops)677 static void stage2_put_pte(kvm_pte_t *ptep, struct kvm_s2_mmu *mmu, u64 addr,
678 u32 level, struct kvm_pgtable_mm_ops *mm_ops)
679 {
680 /*
681 * Clear the existing PTE, and perform break-before-make with
682 * TLB maintenance if it was valid.
683 */
684 if (kvm_pte_valid(*ptep)) {
685 kvm_clear_pte(ptep);
686 kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, addr, level);
687 }
688
689 mm_ops->put_page(ptep);
690 }
691
stage2_pte_cacheable(struct kvm_pgtable * pgt,kvm_pte_t pte)692 static bool stage2_pte_cacheable(struct kvm_pgtable *pgt, kvm_pte_t pte)
693 {
694 u64 memattr = pte & KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR;
695 return kvm_pte_valid(pte) && memattr == KVM_S2_MEMATTR(pgt, NORMAL);
696 }
697
stage2_pte_executable(kvm_pte_t pte)698 static bool stage2_pte_executable(kvm_pte_t pte)
699 {
700 return kvm_pte_valid(pte) && !(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN);
701 }
702
stage2_leaf_mapping_allowed(u64 addr,u64 end,u32 level,struct stage2_map_data * data)703 static bool stage2_leaf_mapping_allowed(u64 addr, u64 end, u32 level,
704 struct stage2_map_data *data)
705 {
706 if (data->force_pte && (level < (KVM_PGTABLE_MAX_LEVELS - 1)))
707 return false;
708
709 return kvm_block_mapping_supported(addr, end, data->phys, level);
710 }
711
stage2_map_walker_try_leaf(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,struct stage2_map_data * data)712 static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level,
713 kvm_pte_t *ptep,
714 struct stage2_map_data *data)
715 {
716 kvm_pte_t new, old = *ptep;
717 u64 granule = kvm_granule_size(level), phys = data->phys;
718 struct kvm_pgtable *pgt = data->mmu->pgt;
719 struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
720
721 if (!stage2_leaf_mapping_allowed(addr, end, level, data))
722 return -E2BIG;
723
724 if (kvm_phys_is_valid(phys))
725 new = kvm_init_valid_leaf_pte(phys, data->attr, level);
726 else
727 new = data->annotation;
728
729 if (stage2_pte_is_counted(old)) {
730 /*
731 * Skip updating the PTE if we are trying to recreate the exact
732 * same mapping or only change the access permissions. Instead,
733 * the vCPU will exit one more time from guest if still needed
734 * and then go through the path of relaxing permissions.
735 */
736 if (!stage2_pte_needs_update(old, new))
737 return -EAGAIN;
738
739 /*
740 * If we're only changing software bits, then we don't need to
741 * do anything else/
742 */
743 if (!((old ^ new) & ~KVM_PTE_LEAF_ATTR_HI_SW))
744 goto out_set_pte;
745
746 stage2_put_pte(ptep, data->mmu, addr, level, mm_ops);
747 }
748
749 /* Perform CMOs before installation of the guest stage-2 PTE */
750 if (mm_ops->dcache_clean_inval_poc && stage2_pte_cacheable(pgt, new))
751 mm_ops->dcache_clean_inval_poc(kvm_pte_follow(new, mm_ops),
752 granule);
753 if (mm_ops->icache_inval_pou && stage2_pte_executable(new))
754 mm_ops->icache_inval_pou(kvm_pte_follow(new, mm_ops), granule);
755
756 if (stage2_pte_is_counted(new))
757 mm_ops->get_page(ptep);
758
759 out_set_pte:
760 smp_store_release(ptep, new);
761 if (kvm_phys_is_valid(phys))
762 data->phys += granule;
763 return 0;
764 }
765
stage2_map_walk_table_pre(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,struct stage2_map_data * data)766 static int stage2_map_walk_table_pre(u64 addr, u64 end, u32 level,
767 kvm_pte_t *ptep,
768 struct stage2_map_data *data)
769 {
770 if (data->anchor)
771 return 0;
772
773 if (!stage2_leaf_mapping_allowed(addr, end, level, data))
774 return 0;
775
776 data->childp = kvm_pte_follow(*ptep, data->mm_ops);
777 kvm_clear_pte(ptep);
778
779 /*
780 * Invalidate the whole stage-2, as we may have numerous leaf
781 * entries below us which would otherwise need invalidating
782 * individually.
783 */
784 kvm_call_hyp(__kvm_tlb_flush_vmid, data->mmu);
785 data->anchor = ptep;
786 return 0;
787 }
788
stage2_map_walk_leaf(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,struct stage2_map_data * data)789 static int stage2_map_walk_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
790 struct stage2_map_data *data)
791 {
792 struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
793 kvm_pte_t *childp, pte = *ptep;
794 int ret;
795
796 if (data->anchor) {
797 if (stage2_pte_is_counted(pte))
798 mm_ops->put_page(ptep);
799
800 return 0;
801 }
802
803 ret = stage2_map_walker_try_leaf(addr, end, level, ptep, data);
804 if (ret != -E2BIG)
805 return ret;
806
807 if (WARN_ON(level == KVM_PGTABLE_MAX_LEVELS - 1))
808 return -EINVAL;
809
810 if (!data->memcache)
811 return -ENOMEM;
812
813 childp = mm_ops->zalloc_page(data->memcache);
814 if (!childp)
815 return -ENOMEM;
816
817 /*
818 * If we've run into an existing block mapping then replace it with
819 * a table. Accesses beyond 'end' that fall within the new table
820 * will be mapped lazily.
821 */
822 if (stage2_pte_is_counted(pte))
823 stage2_put_pte(ptep, data->mmu, addr, level, mm_ops);
824
825 kvm_set_table_pte(ptep, childp, mm_ops);
826 mm_ops->get_page(ptep);
827
828 return 0;
829 }
830
stage2_map_walk_table_post(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,struct stage2_map_data * data)831 static int stage2_map_walk_table_post(u64 addr, u64 end, u32 level,
832 kvm_pte_t *ptep,
833 struct stage2_map_data *data)
834 {
835 struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
836 kvm_pte_t *childp;
837 int ret = 0;
838
839 if (!data->anchor)
840 return 0;
841
842 if (data->anchor == ptep) {
843 childp = data->childp;
844 data->anchor = NULL;
845 data->childp = NULL;
846 ret = stage2_map_walk_leaf(addr, end, level, ptep, data);
847 } else {
848 childp = kvm_pte_follow(*ptep, mm_ops);
849 }
850
851 mm_ops->put_page(childp);
852 mm_ops->put_page(ptep);
853
854 return ret;
855 }
856
857 /*
858 * This is a little fiddly, as we use all three of the walk flags. The idea
859 * is that the TABLE_PRE callback runs for table entries on the way down,
860 * looking for table entries which we could conceivably replace with a
861 * block entry for this mapping. If it finds one, then it sets the 'anchor'
862 * field in 'struct stage2_map_data' to point at the table entry, before
863 * clearing the entry to zero and descending into the now detached table.
864 *
865 * The behaviour of the LEAF callback then depends on whether or not the
866 * anchor has been set. If not, then we're not using a block mapping higher
867 * up the table and we perform the mapping at the existing leaves instead.
868 * If, on the other hand, the anchor _is_ set, then we drop references to
869 * all valid leaves so that the pages beneath the anchor can be freed.
870 *
871 * Finally, the TABLE_POST callback does nothing if the anchor has not
872 * been set, but otherwise frees the page-table pages while walking back up
873 * the page-table, installing the block entry when it revisits the anchor
874 * pointer and clearing the anchor to NULL.
875 */
stage2_map_walker(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,enum kvm_pgtable_walk_flags flag,void * const arg)876 static int stage2_map_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
877 enum kvm_pgtable_walk_flags flag, void * const arg)
878 {
879 struct stage2_map_data *data = arg;
880
881 switch (flag) {
882 case KVM_PGTABLE_WALK_TABLE_PRE:
883 return stage2_map_walk_table_pre(addr, end, level, ptep, data);
884 case KVM_PGTABLE_WALK_LEAF:
885 return stage2_map_walk_leaf(addr, end, level, ptep, data);
886 case KVM_PGTABLE_WALK_TABLE_POST:
887 return stage2_map_walk_table_post(addr, end, level, ptep, data);
888 }
889
890 return -EINVAL;
891 }
892
kvm_pgtable_stage2_map(struct kvm_pgtable * pgt,u64 addr,u64 size,u64 phys,enum kvm_pgtable_prot prot,void * mc)893 int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
894 u64 phys, enum kvm_pgtable_prot prot,
895 void *mc)
896 {
897 int ret;
898 struct stage2_map_data map_data = {
899 .phys = ALIGN_DOWN(phys, PAGE_SIZE),
900 .mmu = pgt->mmu,
901 .memcache = mc,
902 .mm_ops = pgt->mm_ops,
903 .force_pte = pgt->force_pte_cb && pgt->force_pte_cb(addr, addr + size, prot),
904 };
905 struct kvm_pgtable_walker walker = {
906 .cb = stage2_map_walker,
907 .flags = KVM_PGTABLE_WALK_TABLE_PRE |
908 KVM_PGTABLE_WALK_LEAF |
909 KVM_PGTABLE_WALK_TABLE_POST,
910 .arg = &map_data,
911 };
912
913 if (WARN_ON((pgt->flags & KVM_PGTABLE_S2_IDMAP) && (addr != phys)))
914 return -EINVAL;
915
916 ret = stage2_set_prot_attr(pgt, prot, &map_data.attr);
917 if (ret)
918 return ret;
919
920 ret = kvm_pgtable_walk(pgt, addr, size, &walker);
921 dsb(ishst);
922 return ret;
923 }
924
kvm_pgtable_stage2_annotate(struct kvm_pgtable * pgt,u64 addr,u64 size,void * mc,kvm_pte_t annotation)925 int kvm_pgtable_stage2_annotate(struct kvm_pgtable *pgt, u64 addr, u64 size,
926 void *mc, kvm_pte_t annotation)
927 {
928 int ret;
929 struct stage2_map_data map_data = {
930 .phys = KVM_PHYS_INVALID,
931 .mmu = pgt->mmu,
932 .memcache = mc,
933 .mm_ops = pgt->mm_ops,
934 .force_pte = true,
935 .annotation = annotation,
936 };
937 struct kvm_pgtable_walker walker = {
938 .cb = stage2_map_walker,
939 .flags = KVM_PGTABLE_WALK_TABLE_PRE |
940 KVM_PGTABLE_WALK_LEAF |
941 KVM_PGTABLE_WALK_TABLE_POST,
942 .arg = &map_data,
943 };
944
945 if (annotation & PTE_VALID)
946 return -EINVAL;
947
948 ret = kvm_pgtable_walk(pgt, addr, size, &walker);
949 return ret;
950 }
951
stage2_unmap_walker(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,enum kvm_pgtable_walk_flags flag,void * const arg)952 static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
953 enum kvm_pgtable_walk_flags flag,
954 void * const arg)
955 {
956 struct kvm_pgtable *pgt = arg;
957 struct kvm_s2_mmu *mmu = pgt->mmu;
958 struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops;
959 kvm_pte_t pte = *ptep, *childp = NULL;
960 bool need_flush = false;
961
962 if (!kvm_pte_valid(pte)) {
963 if (stage2_pte_is_counted(pte)) {
964 kvm_clear_pte(ptep);
965 mm_ops->put_page(ptep);
966 }
967 return 0;
968 }
969
970 if (kvm_pte_table(pte, level)) {
971 childp = kvm_pte_follow(pte, mm_ops);
972
973 if (mm_ops->page_count(childp) != 1)
974 return 0;
975 } else if (stage2_pte_cacheable(pgt, pte)) {
976 need_flush = !stage2_has_fwb(pgt);
977 }
978
979 /*
980 * This is similar to the map() path in that we unmap the entire
981 * block entry and rely on the remaining portions being faulted
982 * back lazily.
983 */
984 stage2_put_pte(ptep, mmu, addr, level, mm_ops);
985
986 if (need_flush && mm_ops->dcache_clean_inval_poc)
987 mm_ops->dcache_clean_inval_poc(kvm_pte_follow(pte, mm_ops),
988 kvm_granule_size(level));
989
990 if (childp)
991 mm_ops->put_page(childp);
992
993 return 0;
994 }
995
kvm_pgtable_stage2_unmap(struct kvm_pgtable * pgt,u64 addr,u64 size)996 int kvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
997 {
998 struct kvm_pgtable_walker walker = {
999 .cb = stage2_unmap_walker,
1000 .arg = pgt,
1001 .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
1002 };
1003
1004 return kvm_pgtable_walk(pgt, addr, size, &walker);
1005 }
1006
1007 struct stage2_attr_data {
1008 kvm_pte_t attr_set;
1009 kvm_pte_t attr_clr;
1010 kvm_pte_t pte;
1011 u32 level;
1012 struct kvm_pgtable_mm_ops *mm_ops;
1013 };
1014
stage2_attr_walker(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,enum kvm_pgtable_walk_flags flag,void * const arg)1015 static int stage2_attr_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
1016 enum kvm_pgtable_walk_flags flag,
1017 void * const arg)
1018 {
1019 kvm_pte_t pte = *ptep;
1020 struct stage2_attr_data *data = arg;
1021 struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
1022
1023 if (!kvm_pte_valid(pte))
1024 return 0;
1025
1026 data->level = level;
1027 data->pte = pte;
1028 pte &= ~data->attr_clr;
1029 pte |= data->attr_set;
1030
1031 /*
1032 * We may race with the CPU trying to set the access flag here,
1033 * but worst-case the access flag update gets lost and will be
1034 * set on the next access instead.
1035 */
1036 if (data->pte != pte) {
1037 /*
1038 * Invalidate instruction cache before updating the guest
1039 * stage-2 PTE if we are going to add executable permission.
1040 */
1041 if (mm_ops->icache_inval_pou &&
1042 stage2_pte_executable(pte) && !stage2_pte_executable(*ptep))
1043 mm_ops->icache_inval_pou(kvm_pte_follow(pte, mm_ops),
1044 kvm_granule_size(level));
1045 WRITE_ONCE(*ptep, pte);
1046 }
1047
1048 return 0;
1049 }
1050
stage2_update_leaf_attrs(struct kvm_pgtable * pgt,u64 addr,u64 size,kvm_pte_t attr_set,kvm_pte_t attr_clr,kvm_pte_t * orig_pte,u32 * level)1051 static int stage2_update_leaf_attrs(struct kvm_pgtable *pgt, u64 addr,
1052 u64 size, kvm_pte_t attr_set,
1053 kvm_pte_t attr_clr, kvm_pte_t *orig_pte,
1054 u32 *level)
1055 {
1056 int ret;
1057 kvm_pte_t attr_mask = KVM_PTE_LEAF_ATTR_LO | KVM_PTE_LEAF_ATTR_HI;
1058 struct stage2_attr_data data = {
1059 .attr_set = attr_set & attr_mask,
1060 .attr_clr = attr_clr & attr_mask,
1061 .mm_ops = pgt->mm_ops,
1062 };
1063 struct kvm_pgtable_walker walker = {
1064 .cb = stage2_attr_walker,
1065 .arg = &data,
1066 .flags = KVM_PGTABLE_WALK_LEAF,
1067 };
1068
1069 ret = kvm_pgtable_walk(pgt, addr, size, &walker);
1070 if (ret)
1071 return ret;
1072
1073 if (orig_pte)
1074 *orig_pte = data.pte;
1075
1076 if (level)
1077 *level = data.level;
1078 return 0;
1079 }
1080
kvm_pgtable_stage2_wrprotect(struct kvm_pgtable * pgt,u64 addr,u64 size)1081 int kvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size)
1082 {
1083 return stage2_update_leaf_attrs(pgt, addr, size, 0,
1084 KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W,
1085 NULL, NULL);
1086 }
1087
kvm_pgtable_stage2_mkyoung(struct kvm_pgtable * pgt,u64 addr)1088 kvm_pte_t kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr)
1089 {
1090 kvm_pte_t pte = 0;
1091 stage2_update_leaf_attrs(pgt, addr, 1, KVM_PTE_LEAF_ATTR_LO_S2_AF, 0,
1092 &pte, NULL);
1093 dsb(ishst);
1094 return pte;
1095 }
1096
kvm_pgtable_stage2_mkold(struct kvm_pgtable * pgt,u64 addr)1097 kvm_pte_t kvm_pgtable_stage2_mkold(struct kvm_pgtable *pgt, u64 addr)
1098 {
1099 kvm_pte_t pte = 0;
1100 stage2_update_leaf_attrs(pgt, addr, 1, 0, KVM_PTE_LEAF_ATTR_LO_S2_AF,
1101 &pte, NULL);
1102 /*
1103 * "But where's the TLBI?!", you scream.
1104 * "Over in the core code", I sigh.
1105 *
1106 * See the '->clear_flush_young()' callback on the KVM mmu notifier.
1107 */
1108 return pte;
1109 }
1110
kvm_pgtable_stage2_is_young(struct kvm_pgtable * pgt,u64 addr)1111 bool kvm_pgtable_stage2_is_young(struct kvm_pgtable *pgt, u64 addr)
1112 {
1113 kvm_pte_t pte = 0;
1114 stage2_update_leaf_attrs(pgt, addr, 1, 0, 0, &pte, NULL);
1115 return pte & KVM_PTE_LEAF_ATTR_LO_S2_AF;
1116 }
1117
kvm_pgtable_stage2_relax_perms(struct kvm_pgtable * pgt,u64 addr,enum kvm_pgtable_prot prot)1118 int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
1119 enum kvm_pgtable_prot prot)
1120 {
1121 int ret;
1122 u32 level;
1123 kvm_pte_t set = 0, clr = 0;
1124
1125 if (prot & KVM_PTE_LEAF_ATTR_HI_SW)
1126 return -EINVAL;
1127
1128 if (prot & KVM_PGTABLE_PROT_R)
1129 set |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R;
1130
1131 if (prot & KVM_PGTABLE_PROT_W)
1132 set |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W;
1133
1134 if (prot & KVM_PGTABLE_PROT_X)
1135 clr |= KVM_PTE_LEAF_ATTR_HI_S2_XN;
1136
1137 ret = stage2_update_leaf_attrs(pgt, addr, 1, set, clr, NULL, &level);
1138 if (!ret)
1139 kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, pgt->mmu, addr, level);
1140 return ret;
1141 }
1142
stage2_flush_walker(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,enum kvm_pgtable_walk_flags flag,void * const arg)1143 static int stage2_flush_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
1144 enum kvm_pgtable_walk_flags flag,
1145 void * const arg)
1146 {
1147 struct kvm_pgtable *pgt = arg;
1148 struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops;
1149 kvm_pte_t pte = *ptep;
1150
1151 if (!stage2_pte_cacheable(pgt, pte))
1152 return 0;
1153
1154 if (mm_ops->dcache_clean_inval_poc)
1155 mm_ops->dcache_clean_inval_poc(kvm_pte_follow(pte, mm_ops),
1156 kvm_granule_size(level));
1157 return 0;
1158 }
1159
kvm_pgtable_stage2_flush(struct kvm_pgtable * pgt,u64 addr,u64 size)1160 int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size)
1161 {
1162 struct kvm_pgtable_walker walker = {
1163 .cb = stage2_flush_walker,
1164 .flags = KVM_PGTABLE_WALK_LEAF,
1165 .arg = pgt,
1166 };
1167
1168 if (stage2_has_fwb(pgt))
1169 return 0;
1170
1171 return kvm_pgtable_walk(pgt, addr, size, &walker);
1172 }
1173
1174
__kvm_pgtable_stage2_init(struct kvm_pgtable * pgt,struct kvm_s2_mmu * mmu,struct kvm_pgtable_mm_ops * mm_ops,enum kvm_pgtable_stage2_flags flags,kvm_pgtable_force_pte_cb_t force_pte_cb)1175 int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
1176 struct kvm_pgtable_mm_ops *mm_ops,
1177 enum kvm_pgtable_stage2_flags flags,
1178 kvm_pgtable_force_pte_cb_t force_pte_cb)
1179 {
1180 size_t pgd_sz;
1181 u64 vtcr = mmu->arch->vtcr;
1182 u32 ia_bits = VTCR_EL2_IPA(vtcr);
1183 u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr);
1184 u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0;
1185
1186 pgd_sz = kvm_pgd_pages(ia_bits, start_level) * PAGE_SIZE;
1187 pgt->pgd = mm_ops->zalloc_pages_exact(pgd_sz);
1188 if (!pgt->pgd)
1189 return -ENOMEM;
1190
1191 pgt->ia_bits = ia_bits;
1192 pgt->start_level = start_level;
1193 pgt->mm_ops = mm_ops;
1194 pgt->mmu = mmu;
1195 pgt->flags = flags;
1196 pgt->force_pte_cb = force_pte_cb;
1197
1198 /* Ensure zeroed PGD pages are visible to the hardware walker */
1199 dsb(ishst);
1200 return 0;
1201 }
1202
kvm_pgtable_stage2_pgd_size(u64 vtcr)1203 size_t kvm_pgtable_stage2_pgd_size(u64 vtcr)
1204 {
1205 u32 ia_bits = VTCR_EL2_IPA(vtcr);
1206 u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr);
1207 u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0;
1208
1209 return kvm_pgd_pages(ia_bits, start_level) * PAGE_SIZE;
1210 }
1211
stage2_free_walker(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,enum kvm_pgtable_walk_flags flag,void * const arg)1212 static int stage2_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
1213 enum kvm_pgtable_walk_flags flag,
1214 void * const arg)
1215 {
1216 struct kvm_pgtable_mm_ops *mm_ops = arg;
1217 kvm_pte_t pte = *ptep;
1218
1219 if (!stage2_pte_is_counted(pte))
1220 return 0;
1221
1222 mm_ops->put_page(ptep);
1223
1224 if (kvm_pte_table(pte, level))
1225 mm_ops->put_page(kvm_pte_follow(pte, mm_ops));
1226
1227 return 0;
1228 }
1229
kvm_pgtable_stage2_destroy(struct kvm_pgtable * pgt)1230 void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
1231 {
1232 size_t pgd_sz;
1233 struct kvm_pgtable_walker walker = {
1234 .cb = stage2_free_walker,
1235 .flags = KVM_PGTABLE_WALK_LEAF |
1236 KVM_PGTABLE_WALK_TABLE_POST,
1237 .arg = pgt->mm_ops,
1238 };
1239
1240 WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker));
1241 pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level) * PAGE_SIZE;
1242 pgt->mm_ops->free_pages_exact(pgt->pgd, pgd_sz);
1243 pgt->pgd = NULL;
1244 }
1245