1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Stand-alone page-table allocator for hyp stage-1 and guest stage-2.
4 * No bombay mix was harmed in the writing of this file.
5 *
6 * Copyright (C) 2020 Google LLC
7 * Author: Will Deacon <will@kernel.org>
8 */
9
10 #include <linux/bitfield.h>
11 #include <asm/kvm_pgtable.h>
12 #include <asm/stage2_pgtable.h>
13
14
15 #define KVM_PTE_LEAF_ATTR_S2_PERMS (KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | \
16 KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | \
17 KVM_PTE_LEAF_ATTR_HI_S2_XN)
18
19 struct kvm_pgtable_walk_data {
20 struct kvm_pgtable *pgt;
21 struct kvm_pgtable_walker *walker;
22
23 u64 addr;
24 u64 end;
25 };
26
kvm_phys_is_valid(u64 phys)27 static bool kvm_phys_is_valid(u64 phys)
28 {
29 return phys < BIT(id_aa64mmfr0_parange_to_phys_shift(ID_AA64MMFR0_EL1_PARANGE_MAX));
30 }
31
kvm_block_mapping_supported(u64 addr,u64 end,u64 phys,u32 level)32 static bool kvm_block_mapping_supported(u64 addr, u64 end, u64 phys, u32 level)
33 {
34 u64 granule = kvm_granule_size(level);
35
36 if (!kvm_level_supports_block_mapping(level))
37 return false;
38
39 if (granule > (end - addr))
40 return false;
41
42 if (kvm_phys_is_valid(phys) && !IS_ALIGNED(phys, granule))
43 return false;
44
45 return IS_ALIGNED(addr, granule);
46 }
47
kvm_pgtable_idx(struct kvm_pgtable_walk_data * data,u32 level)48 static u32 kvm_pgtable_idx(struct kvm_pgtable_walk_data *data, u32 level)
49 {
50 u64 shift = kvm_granule_shift(level);
51 u64 mask = BIT(PAGE_SHIFT - 3) - 1;
52
53 return (data->addr >> shift) & mask;
54 }
55
__kvm_pgd_page_idx(struct kvm_pgtable * pgt,u64 addr)56 static u32 __kvm_pgd_page_idx(struct kvm_pgtable *pgt, u64 addr)
57 {
58 u64 shift = kvm_granule_shift(pgt->start_level - 1); /* May underflow */
59 u64 mask = BIT(pgt->ia_bits) - 1;
60
61 return (addr & mask) >> shift;
62 }
63
kvm_pgd_page_idx(struct kvm_pgtable_walk_data * data)64 static u32 kvm_pgd_page_idx(struct kvm_pgtable_walk_data *data)
65 {
66 return __kvm_pgd_page_idx(data->pgt, data->addr);
67 }
68
kvm_pgd_pages(u32 ia_bits,u32 start_level)69 static u32 kvm_pgd_pages(u32 ia_bits, u32 start_level)
70 {
71 struct kvm_pgtable pgt = {
72 .ia_bits = ia_bits,
73 .start_level = start_level,
74 };
75
76 return __kvm_pgd_page_idx(&pgt, -1ULL) + 1;
77 }
78
kvm_clear_pte(kvm_pte_t * ptep)79 static void kvm_clear_pte(kvm_pte_t *ptep)
80 {
81 WRITE_ONCE(*ptep, 0);
82 }
83
kvm_set_table_pte(kvm_pte_t * ptep,kvm_pte_t * childp,struct kvm_pgtable_mm_ops * mm_ops)84 static void kvm_set_table_pte(kvm_pte_t *ptep, kvm_pte_t *childp,
85 struct kvm_pgtable_mm_ops *mm_ops)
86 {
87 kvm_pte_t old = *ptep, pte = kvm_phys_to_pte(mm_ops->virt_to_phys(childp));
88
89 pte |= FIELD_PREP(KVM_PTE_TYPE, KVM_PTE_TYPE_TABLE);
90 pte |= KVM_PTE_VALID;
91
92 WARN_ON(kvm_pte_valid(old));
93 smp_store_release(ptep, pte);
94 }
95
kvm_init_valid_leaf_pte(u64 pa,kvm_pte_t attr,u32 level)96 static kvm_pte_t kvm_init_valid_leaf_pte(u64 pa, kvm_pte_t attr, u32 level)
97 {
98 kvm_pte_t pte = kvm_phys_to_pte(pa);
99 u64 type = (level == KVM_PGTABLE_MAX_LEVELS - 1) ? KVM_PTE_TYPE_PAGE :
100 KVM_PTE_TYPE_BLOCK;
101
102 pte |= attr & (KVM_PTE_LEAF_ATTR_LO | KVM_PTE_LEAF_ATTR_HI);
103 pte |= FIELD_PREP(KVM_PTE_TYPE, type);
104 pte |= KVM_PTE_VALID;
105
106 return pte;
107 }
108
kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data * data,u64 addr,u32 level,kvm_pte_t * ptep,enum kvm_pgtable_walk_flags flag)109 static int kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data *data, u64 addr,
110 u32 level, kvm_pte_t *ptep,
111 enum kvm_pgtable_walk_flags flag)
112 {
113 struct kvm_pgtable_walker *walker = data->walker;
114 return walker->cb(addr, data->end, level, ptep, flag, walker->arg);
115 }
116
117 static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data,
118 kvm_pte_t *pgtable, u32 level);
119
__kvm_pgtable_visit(struct kvm_pgtable_walk_data * data,kvm_pte_t * ptep,u32 level)120 static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data,
121 kvm_pte_t *ptep, u32 level)
122 {
123 int ret = 0;
124 u64 addr = data->addr;
125 kvm_pte_t *childp, pte = *ptep;
126 bool table = kvm_pte_table(pte, level);
127 enum kvm_pgtable_walk_flags flags = data->walker->flags;
128
129 if (table && (flags & KVM_PGTABLE_WALK_TABLE_PRE)) {
130 ret = kvm_pgtable_visitor_cb(data, addr, level, ptep,
131 KVM_PGTABLE_WALK_TABLE_PRE);
132 }
133
134 if (!table && (flags & KVM_PGTABLE_WALK_LEAF)) {
135 ret = kvm_pgtable_visitor_cb(data, addr, level, ptep,
136 KVM_PGTABLE_WALK_LEAF);
137 pte = *ptep;
138 table = kvm_pte_table(pte, level);
139 }
140
141 if (ret)
142 goto out;
143
144 if (!table) {
145 data->addr = ALIGN_DOWN(data->addr, kvm_granule_size(level));
146 data->addr += kvm_granule_size(level);
147 goto out;
148 }
149
150 childp = kvm_pte_follow(pte, data->pgt->mm_ops);
151 ret = __kvm_pgtable_walk(data, childp, level + 1);
152 if (ret)
153 goto out;
154
155 if (flags & KVM_PGTABLE_WALK_TABLE_POST) {
156 ret = kvm_pgtable_visitor_cb(data, addr, level, ptep,
157 KVM_PGTABLE_WALK_TABLE_POST);
158 }
159
160 out:
161 return ret;
162 }
163
__kvm_pgtable_walk(struct kvm_pgtable_walk_data * data,kvm_pte_t * pgtable,u32 level)164 static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data,
165 kvm_pte_t *pgtable, u32 level)
166 {
167 u32 idx;
168 int ret = 0;
169
170 if (WARN_ON_ONCE(level >= KVM_PGTABLE_MAX_LEVELS))
171 return -EINVAL;
172
173 for (idx = kvm_pgtable_idx(data, level); idx < PTRS_PER_PTE; ++idx) {
174 kvm_pte_t *ptep = &pgtable[idx];
175
176 if (data->addr >= data->end)
177 break;
178
179 ret = __kvm_pgtable_visit(data, ptep, level);
180 if (ret)
181 break;
182 }
183
184 return ret;
185 }
186
_kvm_pgtable_walk(struct kvm_pgtable_walk_data * data)187 static int _kvm_pgtable_walk(struct kvm_pgtable_walk_data *data)
188 {
189 u32 idx;
190 int ret = 0;
191 struct kvm_pgtable *pgt = data->pgt;
192 u64 limit = BIT(pgt->ia_bits);
193
194 if (data->addr > limit || data->end > limit)
195 return -ERANGE;
196
197 if (!pgt->pgd)
198 return -EINVAL;
199
200 for (idx = kvm_pgd_page_idx(data); data->addr < data->end; ++idx) {
201 kvm_pte_t *ptep = &pgt->pgd[idx * PTRS_PER_PTE];
202
203 ret = __kvm_pgtable_walk(data, ptep, pgt->start_level);
204 if (ret)
205 break;
206 }
207
208 return ret;
209 }
210
kvm_pgtable_walk(struct kvm_pgtable * pgt,u64 addr,u64 size,struct kvm_pgtable_walker * walker)211 int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size,
212 struct kvm_pgtable_walker *walker)
213 {
214 struct kvm_pgtable_walk_data walk_data = {
215 .pgt = pgt,
216 .addr = ALIGN_DOWN(addr, PAGE_SIZE),
217 .end = PAGE_ALIGN(walk_data.addr + size),
218 .walker = walker,
219 };
220
221 return _kvm_pgtable_walk(&walk_data);
222 }
223
224 struct leaf_walk_data {
225 kvm_pte_t pte;
226 u32 level;
227 };
228
leaf_walker(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,enum kvm_pgtable_walk_flags flag,void * const arg)229 static int leaf_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
230 enum kvm_pgtable_walk_flags flag, void * const arg)
231 {
232 struct leaf_walk_data *data = arg;
233
234 data->pte = *ptep;
235 data->level = level;
236
237 return 0;
238 }
239
kvm_pgtable_get_leaf(struct kvm_pgtable * pgt,u64 addr,kvm_pte_t * ptep,u32 * level)240 int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr,
241 kvm_pte_t *ptep, u32 *level)
242 {
243 struct leaf_walk_data data;
244 struct kvm_pgtable_walker walker = {
245 .cb = leaf_walker,
246 .flags = KVM_PGTABLE_WALK_LEAF,
247 .arg = &data,
248 };
249 int ret;
250
251 ret = kvm_pgtable_walk(pgt, ALIGN_DOWN(addr, PAGE_SIZE),
252 PAGE_SIZE, &walker);
253 if (!ret) {
254 if (ptep)
255 *ptep = data.pte;
256 if (level)
257 *level = data.level;
258 }
259
260 return ret;
261 }
262
263 struct hyp_map_data {
264 u64 phys;
265 kvm_pte_t attr;
266 struct kvm_pgtable_mm_ops *mm_ops;
267 };
268
hyp_set_prot_attr(enum kvm_pgtable_prot prot,kvm_pte_t * ptep)269 static int hyp_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep)
270 {
271 u32 ap = (prot & KVM_PGTABLE_PROT_W) ? KVM_PTE_LEAF_ATTR_LO_S1_AP_RW :
272 KVM_PTE_LEAF_ATTR_LO_S1_AP_RO;
273 bool device = prot & KVM_PGTABLE_PROT_DEVICE;
274 u32 sh = KVM_PTE_LEAF_ATTR_LO_S1_SH_IS;
275 bool nc = prot & KVM_PGTABLE_PROT_NC;
276 kvm_pte_t attr;
277 u32 mtype;
278
279 if (!(prot & KVM_PGTABLE_PROT_R) || (device && nc) ||
280 (prot & (KVM_PGTABLE_PROT_PXN | KVM_PGTABLE_PROT_UXN)))
281 return -EINVAL;
282
283 if (device)
284 mtype = MT_DEVICE_nGnRnE;
285 else if (nc)
286 mtype = MT_NORMAL_NC;
287 else
288 mtype = MT_NORMAL;
289
290 attr = FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX, mtype);
291
292 if (prot & KVM_PGTABLE_PROT_X) {
293 if (prot & KVM_PGTABLE_PROT_W)
294 return -EINVAL;
295
296 if (device)
297 return -EINVAL;
298 } else {
299 attr |= KVM_PTE_LEAF_ATTR_HI_S1_XN;
300 }
301
302 attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_AP, ap);
303 attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_SH, sh);
304 attr |= KVM_PTE_LEAF_ATTR_LO_S1_AF;
305 attr |= prot & KVM_PTE_LEAF_ATTR_HI_SW;
306 *ptep = attr;
307
308 return 0;
309 }
310
kvm_pgtable_hyp_pte_prot(kvm_pte_t pte)311 enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte)
312 {
313 enum kvm_pgtable_prot prot = pte & KVM_PTE_LEAF_ATTR_HI_SW;
314 u32 ap;
315
316 if (!kvm_pte_valid(pte))
317 return prot;
318
319 if (!(pte & KVM_PTE_LEAF_ATTR_HI_S1_XN))
320 prot |= KVM_PGTABLE_PROT_X;
321
322 ap = FIELD_GET(KVM_PTE_LEAF_ATTR_LO_S1_AP, pte);
323 if (ap == KVM_PTE_LEAF_ATTR_LO_S1_AP_RO)
324 prot |= KVM_PGTABLE_PROT_R;
325 else if (ap == KVM_PTE_LEAF_ATTR_LO_S1_AP_RW)
326 prot |= KVM_PGTABLE_PROT_RW;
327
328 return prot;
329 }
330
hyp_map_walker_try_leaf(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,struct hyp_map_data * data)331 static bool hyp_map_walker_try_leaf(u64 addr, u64 end, u32 level,
332 kvm_pte_t *ptep, struct hyp_map_data *data)
333 {
334 kvm_pte_t new, old = *ptep;
335 u64 granule = kvm_granule_size(level), phys = data->phys;
336
337 if (!kvm_block_mapping_supported(addr, end, phys, level))
338 return false;
339
340 data->phys += granule;
341 new = kvm_init_valid_leaf_pte(phys, data->attr, level);
342 if (old == new)
343 return true;
344 if (!kvm_pte_valid(old))
345 data->mm_ops->get_page(ptep);
346 else if (WARN_ON((old ^ new) & ~KVM_PTE_LEAF_ATTR_HI_SW))
347 return false;
348
349 smp_store_release(ptep, new);
350 return true;
351 }
352
hyp_map_walker(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,enum kvm_pgtable_walk_flags flag,void * const arg)353 static int hyp_map_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
354 enum kvm_pgtable_walk_flags flag, void * const arg)
355 {
356 kvm_pte_t *childp;
357 struct hyp_map_data *data = arg;
358 struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
359
360 if (hyp_map_walker_try_leaf(addr, end, level, ptep, arg))
361 return 0;
362
363 if (WARN_ON(level == KVM_PGTABLE_MAX_LEVELS - 1))
364 return -EINVAL;
365
366 childp = (kvm_pte_t *)mm_ops->zalloc_page(NULL);
367 if (!childp)
368 return -ENOMEM;
369
370 kvm_set_table_pte(ptep, childp, mm_ops);
371 mm_ops->get_page(ptep);
372 return 0;
373 }
374
kvm_pgtable_hyp_map(struct kvm_pgtable * pgt,u64 addr,u64 size,u64 phys,enum kvm_pgtable_prot prot)375 int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys,
376 enum kvm_pgtable_prot prot)
377 {
378 int ret;
379 struct hyp_map_data map_data = {
380 .phys = ALIGN_DOWN(phys, PAGE_SIZE),
381 .mm_ops = pgt->mm_ops,
382 };
383 struct kvm_pgtable_walker walker = {
384 .cb = hyp_map_walker,
385 .flags = KVM_PGTABLE_WALK_LEAF,
386 .arg = &map_data,
387 };
388
389 ret = hyp_set_prot_attr(prot, &map_data.attr);
390 if (ret)
391 return ret;
392
393 ret = kvm_pgtable_walk(pgt, addr, size, &walker);
394 dsb(ishst);
395 isb();
396 return ret;
397 }
398
399 struct hyp_unmap_data {
400 u64 unmapped;
401 struct kvm_pgtable_mm_ops *mm_ops;
402 };
403
hyp_unmap_walker(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,enum kvm_pgtable_walk_flags flag,void * const arg)404 static int hyp_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
405 enum kvm_pgtable_walk_flags flag, void * const arg)
406 {
407 kvm_pte_t pte = *ptep, *childp = NULL;
408 u64 granule = kvm_granule_size(level);
409 struct hyp_unmap_data *data = arg;
410 struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
411
412 if (!kvm_pte_valid(pte))
413 return -EINVAL;
414
415 if (kvm_pte_table(pte, level)) {
416 childp = kvm_pte_follow(pte, mm_ops);
417
418 if (mm_ops->page_count(childp) != 1)
419 return 0;
420
421 kvm_clear_pte(ptep);
422 dsb(ishst);
423 __tlbi_level(vae2is, __TLBI_VADDR(addr, 0), level);
424 } else {
425 if (end - addr < granule)
426 return -EINVAL;
427
428 kvm_clear_pte(ptep);
429 dsb(ishst);
430 __tlbi_level(vale2is, __TLBI_VADDR(addr, 0), level);
431 data->unmapped += granule;
432 }
433
434 dsb(ish);
435 isb();
436 mm_ops->put_page(ptep);
437
438 if (childp)
439 mm_ops->put_page(childp);
440
441 return 0;
442 }
443
kvm_pgtable_hyp_unmap(struct kvm_pgtable * pgt,u64 addr,u64 size)444 u64 kvm_pgtable_hyp_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
445 {
446 struct hyp_unmap_data unmap_data = {
447 .mm_ops = pgt->mm_ops,
448 };
449 struct kvm_pgtable_walker walker = {
450 .cb = hyp_unmap_walker,
451 .arg = &unmap_data,
452 .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
453 };
454
455 if (!pgt->mm_ops->page_count)
456 return 0;
457
458 kvm_pgtable_walk(pgt, addr, size, &walker);
459 return unmap_data.unmapped;
460 }
461
kvm_pgtable_hyp_init(struct kvm_pgtable * pgt,u32 va_bits,struct kvm_pgtable_mm_ops * mm_ops)462 int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits,
463 struct kvm_pgtable_mm_ops *mm_ops)
464 {
465 u64 levels = ARM64_HW_PGTABLE_LEVELS(va_bits);
466
467 pgt->pgd = (kvm_pte_t *)mm_ops->zalloc_page(NULL);
468 if (!pgt->pgd)
469 return -ENOMEM;
470
471 pgt->ia_bits = va_bits;
472 pgt->start_level = KVM_PGTABLE_MAX_LEVELS - levels;
473 pgt->mm_ops = mm_ops;
474 pgt->mmu = NULL;
475 pgt->pte_ops = NULL;
476
477 return 0;
478 }
479
hyp_free_walker(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,enum kvm_pgtable_walk_flags flag,void * const arg)480 static int hyp_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
481 enum kvm_pgtable_walk_flags flag, void * const arg)
482 {
483 struct kvm_pgtable_mm_ops *mm_ops = arg;
484 kvm_pte_t pte = *ptep;
485
486 if (!kvm_pte_valid(pte))
487 return 0;
488
489 mm_ops->put_page(ptep);
490
491 if (kvm_pte_table(pte, level))
492 mm_ops->put_page(kvm_pte_follow(pte, mm_ops));
493
494 return 0;
495 }
496
kvm_pgtable_hyp_destroy(struct kvm_pgtable * pgt)497 void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt)
498 {
499 struct kvm_pgtable_walker walker = {
500 .cb = hyp_free_walker,
501 .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
502 .arg = pgt->mm_ops,
503 };
504
505 WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker));
506 pgt->mm_ops->put_page(pgt->pgd);
507 pgt->pgd = NULL;
508 }
509
510 struct stage2_map_data {
511 u64 phys;
512 kvm_pte_t attr;
513 u64 annotation;
514
515 kvm_pte_t *anchor;
516 kvm_pte_t *childp;
517
518 struct kvm_s2_mmu *mmu;
519 void *memcache;
520
521 struct kvm_pgtable_mm_ops *mm_ops;
522
523 /* Force mappings to page granularity */
524 bool force_pte;
525 };
526
kvm_get_vtcr(u64 mmfr0,u64 mmfr1,u32 phys_shift)527 u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift)
528 {
529 u64 vtcr = VTCR_EL2_FLAGS;
530 u8 lvls;
531
532 vtcr |= kvm_get_parange(mmfr0) << VTCR_EL2_PS_SHIFT;
533 vtcr |= VTCR_EL2_T0SZ(phys_shift);
534 /*
535 * Use a minimum 2 level page table to prevent splitting
536 * host PMD huge pages at stage2.
537 */
538 lvls = stage2_pgtable_levels(phys_shift);
539 if (lvls < 2)
540 lvls = 2;
541 vtcr |= VTCR_EL2_LVLS_TO_SL0(lvls);
542
543 /*
544 * Enable the Hardware Access Flag management, unconditionally
545 * on all CPUs. The features is RES0 on CPUs without the support
546 * and must be ignored by the CPUs.
547 */
548 vtcr |= VTCR_EL2_HA;
549
550 /* Set the vmid bits */
551 vtcr |= (get_vmid_bits(mmfr1) == 16) ?
552 VTCR_EL2_VS_16BIT :
553 VTCR_EL2_VS_8BIT;
554
555 return vtcr;
556 }
557
stage2_has_fwb(struct kvm_pgtable * pgt)558 static bool stage2_has_fwb(struct kvm_pgtable *pgt)
559 {
560 if (!cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
561 return false;
562
563 return !(pgt->flags & KVM_PGTABLE_S2_NOFWB);
564 }
565
566 #define KVM_S2_MEMATTR(pgt, attr) PAGE_S2_MEMATTR(attr, stage2_has_fwb(pgt))
567
stage2_set_prot_attr(struct kvm_pgtable * pgt,enum kvm_pgtable_prot prot,kvm_pte_t * ptep)568 static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot prot,
569 kvm_pte_t *ptep)
570 {
571 u64 exec_type = KVM_PTE_LEAF_ATTR_HI_S2_XN_XN;
572 bool device = prot & KVM_PGTABLE_PROT_DEVICE;
573 u32 sh = KVM_PTE_LEAF_ATTR_LO_S2_SH_IS;
574 bool nc = prot & KVM_PGTABLE_PROT_NC;
575 enum kvm_pgtable_prot exec_prot;
576 kvm_pte_t attr;
577
578 if (device)
579 attr = KVM_S2_MEMATTR(pgt, DEVICE_nGnRE);
580 else if (nc)
581 attr = KVM_S2_MEMATTR(pgt, NORMAL_NC);
582 else
583 attr = KVM_S2_MEMATTR(pgt, NORMAL);
584
585 exec_prot = prot & (KVM_PGTABLE_PROT_X | KVM_PGTABLE_PROT_PXN | KVM_PGTABLE_PROT_UXN);
586 switch(exec_prot) {
587 case KVM_PGTABLE_PROT_X:
588 goto set_ap;
589 case KVM_PGTABLE_PROT_PXN:
590 exec_type = KVM_PTE_LEAF_ATTR_HI_S2_XN_PXN;
591 break;
592 case KVM_PGTABLE_PROT_UXN:
593 exec_type = KVM_PTE_LEAF_ATTR_HI_S2_XN_UXN;
594 break;
595 default:
596 if (exec_prot)
597 return -EINVAL;
598 }
599 attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_HI_S2_XN, exec_type);
600
601 set_ap:
602 if (prot & KVM_PGTABLE_PROT_R)
603 attr |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R;
604
605 if (prot & KVM_PGTABLE_PROT_W)
606 attr |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W;
607
608 attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S2_SH, sh);
609 attr |= KVM_PTE_LEAF_ATTR_LO_S2_AF;
610 attr |= prot & KVM_PTE_LEAF_ATTR_HI_SW;
611 *ptep = attr;
612
613 return 0;
614 }
615
kvm_pgtable_stage2_pte_prot(kvm_pte_t pte)616 enum kvm_pgtable_prot kvm_pgtable_stage2_pte_prot(kvm_pte_t pte)
617 {
618 enum kvm_pgtable_prot prot = pte & KVM_PTE_LEAF_ATTR_HI_SW;
619
620 if (!kvm_pte_valid(pte))
621 return prot;
622
623 if (pte & KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R)
624 prot |= KVM_PGTABLE_PROT_R;
625 if (pte & KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W)
626 prot |= KVM_PGTABLE_PROT_W;
627 switch(FIELD_GET(KVM_PTE_LEAF_ATTR_HI_S2_XN, pte)) {
628 case 0:
629 prot |= KVM_PGTABLE_PROT_X;
630 break;
631 case KVM_PTE_LEAF_ATTR_HI_S2_XN_PXN:
632 prot |= KVM_PGTABLE_PROT_PXN;
633 break;
634 case KVM_PTE_LEAF_ATTR_HI_S2_XN_UXN:
635 prot |= KVM_PGTABLE_PROT_UXN;
636 break;
637 case KVM_PTE_LEAF_ATTR_HI_S2_XN_XN:
638 break;
639 default:
640 WARN_ON(1);
641 }
642
643 return prot;
644 }
645
stage2_pte_needs_update(struct kvm_pgtable * pgt,kvm_pte_t old,kvm_pte_t new)646 static bool stage2_pte_needs_update(struct kvm_pgtable *pgt,
647 kvm_pte_t old, kvm_pte_t new)
648 {
649 /* Following filter logic applies only to guest stage-2 entries. */
650 if (pgt->flags & KVM_PGTABLE_S2_IDMAP)
651 return true;
652
653 if (!kvm_pte_valid(old) || !kvm_pte_valid(new))
654 return true;
655
656 return ((old ^ new) & (~KVM_PTE_LEAF_ATTR_S2_PERMS));
657 }
658
stage2_clear_pte(kvm_pte_t * ptep,struct kvm_s2_mmu * mmu,u64 addr,u32 level)659 static void stage2_clear_pte(kvm_pte_t *ptep, struct kvm_s2_mmu *mmu, u64 addr,
660 u32 level)
661 {
662 if (!kvm_pte_valid(*ptep))
663 return;
664
665 kvm_clear_pte(ptep);
666 kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, addr, level);
667 }
668
stage2_put_pte(kvm_pte_t * ptep,struct kvm_s2_mmu * mmu,u64 addr,u32 level,struct kvm_pgtable_mm_ops * mm_ops)669 static void stage2_put_pte(kvm_pte_t *ptep, struct kvm_s2_mmu *mmu, u64 addr,
670 u32 level, struct kvm_pgtable_mm_ops *mm_ops)
671 {
672 /*
673 * Clear the existing PTE, and perform break-before-make with
674 * TLB maintenance if it was valid.
675 */
676 stage2_clear_pte(ptep, mmu, addr, level);
677 mm_ops->put_page(ptep);
678 }
679
stage2_pte_cacheable(struct kvm_pgtable * pgt,kvm_pte_t pte)680 static bool stage2_pte_cacheable(struct kvm_pgtable *pgt, kvm_pte_t pte)
681 {
682 u64 memattr = pte & KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR;
683 return kvm_pte_valid(pte) && memattr == KVM_S2_MEMATTR(pgt, NORMAL);
684 }
685
stage2_pte_executable(kvm_pte_t pte)686 static bool stage2_pte_executable(kvm_pte_t pte)
687 {
688 kvm_pte_t xn = FIELD_GET(KVM_PTE_LEAF_ATTR_HI_S2_XN, pte);
689
690 return kvm_pte_valid(pte) && xn != KVM_PTE_LEAF_ATTR_HI_S2_XN_XN;
691 }
692
stage2_leaf_mapping_allowed(u64 addr,u64 end,u32 level,struct stage2_map_data * data)693 static bool stage2_leaf_mapping_allowed(u64 addr, u64 end, u32 level,
694 struct stage2_map_data *data)
695 {
696 if (data->force_pte && (level < (KVM_PGTABLE_MAX_LEVELS - 1)))
697 return false;
698
699 return kvm_block_mapping_supported(addr, end, data->phys, level);
700 }
701
stage2_map_walker_try_leaf(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,struct stage2_map_data * data)702 static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level,
703 kvm_pte_t *ptep,
704 struct stage2_map_data *data)
705 {
706 kvm_pte_t new, old = *ptep;
707 u64 granule = kvm_granule_size(level), phys = data->phys;
708 struct kvm_pgtable *pgt = data->mmu->pgt;
709 struct kvm_pgtable_pte_ops *pte_ops = pgt->pte_ops;
710 struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
711
712 if (!stage2_leaf_mapping_allowed(addr, end, level, data))
713 return -E2BIG;
714
715 if (kvm_phys_is_valid(phys))
716 new = kvm_init_valid_leaf_pte(phys, data->attr, level);
717 else
718 new = data->annotation;
719
720 /*
721 * Skip updating a guest PTE if we are trying to recreate the exact
722 * same mapping or change only the access permissions. Instead,
723 * the vCPU will exit one more time from the guest if still needed
724 * and then go through the path of relaxing permissions. This applies
725 * only to guest PTEs; Host PTEs are unconditionally updated. The
726 * host cannot livelock because the abort handler has done prior
727 * checks before calling here.
728 */
729 if (!stage2_pte_needs_update(pgt, old, new))
730 return -EAGAIN;
731
732 if (pte_ops->pte_is_counted_cb(old, level))
733 mm_ops->put_page(ptep);
734
735 /*
736 * If we're only changing software bits, then we don't need to
737 * do anything else.
738 */
739 if (!((old ^ new) & ~KVM_PTE_LEAF_ATTR_HI_SW))
740 goto out_set_pte;
741
742 stage2_clear_pte(ptep, data->mmu, addr, level);
743
744 /* Perform CMOs before installation of the guest stage-2 PTE */
745 if (mm_ops->dcache_clean_inval_poc && stage2_pte_cacheable(pgt, new))
746 mm_ops->dcache_clean_inval_poc(kvm_pte_follow(new, mm_ops),
747 granule);
748 if (mm_ops->icache_inval_pou && stage2_pte_executable(new))
749 mm_ops->icache_inval_pou(kvm_pte_follow(new, mm_ops), granule);
750
751 out_set_pte:
752 if (pte_ops->pte_is_counted_cb(new, level))
753 mm_ops->get_page(ptep);
754
755 smp_store_release(ptep, new);
756 if (kvm_phys_is_valid(phys))
757 data->phys += granule;
758 return 0;
759 }
760
stage2_map_walk_table_pre(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,struct stage2_map_data * data)761 static int stage2_map_walk_table_pre(u64 addr, u64 end, u32 level,
762 kvm_pte_t *ptep,
763 struct stage2_map_data *data)
764 {
765 if (data->anchor)
766 return 0;
767
768 if (!stage2_leaf_mapping_allowed(addr, end, level, data))
769 return 0;
770
771 data->childp = kvm_pte_follow(*ptep, data->mm_ops);
772 kvm_clear_pte(ptep);
773
774 /*
775 * Invalidate the whole stage-2, as we may have numerous leaf
776 * entries below us which would otherwise need invalidating
777 * individually.
778 */
779 kvm_call_hyp(__kvm_tlb_flush_vmid, data->mmu);
780 data->anchor = ptep;
781 return 0;
782 }
783
stage2_map_prefault_idmap(struct kvm_pgtable_pte_ops * pte_ops,u64 addr,u64 end,u32 level,kvm_pte_t * ptep,kvm_pte_t block_pte)784 static void stage2_map_prefault_idmap(struct kvm_pgtable_pte_ops *pte_ops,
785 u64 addr, u64 end, u32 level,
786 kvm_pte_t *ptep, kvm_pte_t block_pte)
787 {
788 u64 pa, granule;
789 int i;
790
791 WARN_ON(pte_ops->pte_is_counted_cb(block_pte, level-1));
792
793 if (!kvm_pte_valid(block_pte))
794 return;
795
796 pa = ALIGN_DOWN(addr, kvm_granule_size(level-1));
797 granule = kvm_granule_size(level);
798 for (i = 0; i < PTRS_PER_PTE; ++i, ++ptep, pa += granule) {
799 kvm_pte_t pte = kvm_init_valid_leaf_pte(pa, block_pte, level);
800 /* Skip ptes in the range being modified by the caller. */
801 if ((pa < addr) || (pa >= end)) {
802 /* We can write non-atomically: ptep isn't yet live. */
803 *ptep = pte;
804 }
805 }
806 }
807
stage2_map_walk_leaf(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,struct stage2_map_data * data)808 static int stage2_map_walk_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
809 struct stage2_map_data *data)
810 {
811 struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
812 struct kvm_pgtable *pgt = data->mmu->pgt;
813 struct kvm_pgtable_pte_ops *pte_ops = pgt->pte_ops;
814 kvm_pte_t *childp, pte = *ptep;
815 int ret;
816
817 if (data->anchor) {
818 if (pte_ops->pte_is_counted_cb(pte, level))
819 mm_ops->put_page(ptep);
820
821 return 0;
822 }
823
824 ret = stage2_map_walker_try_leaf(addr, end, level, ptep, data);
825 if (ret != -E2BIG)
826 return ret;
827
828 if (WARN_ON(level == KVM_PGTABLE_MAX_LEVELS - 1))
829 return -EINVAL;
830
831 if (!data->memcache)
832 return -ENOMEM;
833
834 childp = mm_ops->zalloc_page(data->memcache);
835 if (!childp)
836 return -ENOMEM;
837
838 if (pgt->flags & KVM_PGTABLE_S2_IDMAP) {
839 stage2_map_prefault_idmap(pte_ops, addr, end, level + 1,
840 childp, pte);
841 }
842
843 /*
844 * If we've run into an existing block mapping then replace it with
845 * a table. Accesses beyond 'end' that fall within the new table
846 * will be mapped lazily.
847 */
848 if (pte_ops->pte_is_counted_cb(pte, level)) {
849 stage2_put_pte(ptep, data->mmu, addr, level, mm_ops);
850 } else {
851 /*
852 * On non-refcounted PTEs we just clear them out without
853 * dropping the refcount.
854 */
855 stage2_clear_pte(ptep, data->mmu, addr, level);
856 }
857
858 kvm_set_table_pte(ptep, childp, mm_ops);
859 mm_ops->get_page(ptep);
860
861 return 0;
862 }
863
stage2_coalesce_walk_table_post(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,struct stage2_map_data * data)864 static void stage2_coalesce_walk_table_post(u64 addr, u64 end, u32 level,
865 kvm_pte_t *ptep,
866 struct stage2_map_data *data)
867 {
868 struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
869 kvm_pte_t *childp = kvm_pte_follow(*ptep, mm_ops);
870
871 /*
872 * Decrement the refcount only on the set ownership path to avoid a
873 * loop situation when the following happens:
874 * 1. We take a host stage2 fault and we create a small mapping which
875 * has default attributes (is not refcounted).
876 * 2. On the way back we execute the post handler and we zap the
877 * table that holds our mapping.
878 */
879 if (kvm_phys_is_valid(data->phys) ||
880 !kvm_level_supports_block_mapping(level))
881 return;
882
883 /*
884 * Free a page that is not referenced anymore and drop the reference
885 * of the page table page.
886 */
887 if (mm_ops->page_count(childp) == 1) {
888 kvm_clear_pte(ptep);
889 kvm_call_hyp(__kvm_tlb_flush_vmid, data->mmu);
890 mm_ops->put_page(ptep);
891 mm_ops->put_page(childp);
892 }
893 }
894
stage2_map_walk_table_post(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,struct stage2_map_data * data)895 static int stage2_map_walk_table_post(u64 addr, u64 end, u32 level,
896 kvm_pte_t *ptep,
897 struct stage2_map_data *data)
898 {
899 struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
900 kvm_pte_t *childp;
901 int ret = 0;
902
903 if (!data->anchor) {
904 stage2_coalesce_walk_table_post(addr, end, level, ptep,
905 data);
906 return 0;
907 }
908
909 if (data->anchor == ptep) {
910 childp = data->childp;
911 data->anchor = NULL;
912 data->childp = NULL;
913 ret = stage2_map_walk_leaf(addr, end, level, ptep, data);
914 } else {
915 childp = kvm_pte_follow(*ptep, mm_ops);
916 }
917
918 mm_ops->put_page(childp);
919 mm_ops->put_page(ptep);
920
921 return ret;
922 }
923
924 /*
925 * This is a little fiddly, as we use all three of the walk flags. The idea
926 * is that the TABLE_PRE callback runs for table entries on the way down,
927 * looking for table entries which we could conceivably replace with a
928 * block entry for this mapping. If it finds one, then it sets the 'anchor'
929 * field in 'struct stage2_map_data' to point at the table entry, before
930 * clearing the entry to zero and descending into the now detached table.
931 *
932 * The behaviour of the LEAF callback then depends on whether or not the
933 * anchor has been set. If not, then we're not using a block mapping higher
934 * up the table and we perform the mapping at the existing leaves instead.
935 * If, on the other hand, the anchor _is_ set, then we drop references to
936 * all valid leaves so that the pages beneath the anchor can be freed.
937 *
938 * Finally, the TABLE_POST callback does nothing if the anchor has not
939 * been set, but otherwise frees the page-table pages while walking back up
940 * the page-table, installing the block entry when it revisits the anchor
941 * pointer and clearing the anchor to NULL.
942 */
stage2_map_walker(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,enum kvm_pgtable_walk_flags flag,void * const arg)943 static int stage2_map_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
944 enum kvm_pgtable_walk_flags flag, void * const arg)
945 {
946 struct stage2_map_data *data = arg;
947
948 switch (flag) {
949 case KVM_PGTABLE_WALK_TABLE_PRE:
950 return stage2_map_walk_table_pre(addr, end, level, ptep, data);
951 case KVM_PGTABLE_WALK_LEAF:
952 return stage2_map_walk_leaf(addr, end, level, ptep, data);
953 case KVM_PGTABLE_WALK_TABLE_POST:
954 return stage2_map_walk_table_post(addr, end, level, ptep, data);
955 }
956
957 return -EINVAL;
958 }
959
kvm_pgtable_stage2_map(struct kvm_pgtable * pgt,u64 addr,u64 size,u64 phys,enum kvm_pgtable_prot prot,void * mc)960 int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
961 u64 phys, enum kvm_pgtable_prot prot,
962 void *mc)
963 {
964 int ret;
965 struct kvm_pgtable_pte_ops *pte_ops = pgt->pte_ops;
966 struct stage2_map_data map_data = {
967 .phys = ALIGN_DOWN(phys, PAGE_SIZE),
968 .mmu = pgt->mmu,
969 .memcache = mc,
970 .mm_ops = pgt->mm_ops,
971 };
972 struct kvm_pgtable_walker walker = {
973 .cb = stage2_map_walker,
974 .flags = KVM_PGTABLE_WALK_TABLE_PRE |
975 KVM_PGTABLE_WALK_LEAF |
976 KVM_PGTABLE_WALK_TABLE_POST,
977 .arg = &map_data,
978 };
979
980 if (pte_ops->force_pte_cb)
981 map_data.force_pte = pte_ops->force_pte_cb(addr, addr + size, prot);
982
983 if (WARN_ON((pgt->flags & KVM_PGTABLE_S2_IDMAP) && (addr != phys)))
984 return -EINVAL;
985
986 ret = stage2_set_prot_attr(pgt, prot, &map_data.attr);
987 if (ret)
988 return ret;
989
990 ret = kvm_pgtable_walk(pgt, addr, size, &walker);
991 dsb(ishst);
992 return ret;
993 }
994
kvm_pgtable_stage2_annotate(struct kvm_pgtable * pgt,u64 addr,u64 size,void * mc,kvm_pte_t annotation)995 int kvm_pgtable_stage2_annotate(struct kvm_pgtable *pgt, u64 addr, u64 size,
996 void *mc, kvm_pte_t annotation)
997 {
998 int ret;
999 struct stage2_map_data map_data = {
1000 .phys = KVM_PHYS_INVALID,
1001 .mmu = pgt->mmu,
1002 .memcache = mc,
1003 .mm_ops = pgt->mm_ops,
1004 .force_pte = true,
1005 .annotation = annotation,
1006 };
1007 struct kvm_pgtable_walker walker = {
1008 .cb = stage2_map_walker,
1009 .flags = KVM_PGTABLE_WALK_TABLE_PRE |
1010 KVM_PGTABLE_WALK_LEAF |
1011 KVM_PGTABLE_WALK_TABLE_POST,
1012 .arg = &map_data,
1013 };
1014
1015 if (annotation & PTE_VALID)
1016 return -EINVAL;
1017
1018 ret = kvm_pgtable_walk(pgt, addr, size, &walker);
1019 return ret;
1020 }
1021
stage2_unmap_walker(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,enum kvm_pgtable_walk_flags flag,void * const arg)1022 static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
1023 enum kvm_pgtable_walk_flags flag,
1024 void * const arg)
1025 {
1026 struct kvm_pgtable *pgt = arg;
1027 struct kvm_s2_mmu *mmu = pgt->mmu;
1028 struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops;
1029 struct kvm_pgtable_pte_ops *pte_ops = pgt->pte_ops;
1030 kvm_pte_t pte = *ptep, *childp = NULL;
1031 bool need_flush = false;
1032
1033 if (!kvm_pte_valid(pte)) {
1034 if (pte_ops->pte_is_counted_cb(pte, level)) {
1035 kvm_clear_pte(ptep);
1036 mm_ops->put_page(ptep);
1037 }
1038 return 0;
1039 }
1040
1041 if (kvm_pte_table(pte, level)) {
1042 childp = kvm_pte_follow(pte, mm_ops);
1043
1044 if (mm_ops->page_count(childp) != 1)
1045 return 0;
1046 } else if (stage2_pte_cacheable(pgt, pte)) {
1047 need_flush = !stage2_has_fwb(pgt);
1048 }
1049
1050 /*
1051 * This is similar to the map() path in that we unmap the entire
1052 * block entry and rely on the remaining portions being faulted
1053 * back lazily.
1054 */
1055 if (pte_ops->pte_is_counted_cb(pte, level))
1056 stage2_put_pte(ptep, mmu, addr, level, mm_ops);
1057 else
1058 stage2_clear_pte(ptep, mmu, addr, level);
1059
1060 if (need_flush && mm_ops->dcache_clean_inval_poc)
1061 mm_ops->dcache_clean_inval_poc(kvm_pte_follow(pte, mm_ops),
1062 kvm_granule_size(level));
1063
1064 if (childp)
1065 mm_ops->put_page(childp);
1066
1067 return 0;
1068 }
1069
kvm_pgtable_stage2_unmap(struct kvm_pgtable * pgt,u64 addr,u64 size)1070 int kvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
1071 {
1072 struct kvm_pgtable_walker walker = {
1073 .cb = stage2_unmap_walker,
1074 .arg = pgt,
1075 .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
1076 };
1077
1078 return kvm_pgtable_walk(pgt, addr, size, &walker);
1079 }
1080
stage2_reclaim_leaf_walker(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,enum kvm_pgtable_walk_flags flag,void * const arg)1081 static int stage2_reclaim_leaf_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
1082 enum kvm_pgtable_walk_flags flag, void * const arg)
1083 {
1084 stage2_coalesce_walk_table_post(addr, end, level, ptep, arg);
1085
1086 return 0;
1087 }
1088
kvm_pgtable_stage2_reclaim_leaves(struct kvm_pgtable * pgt,u64 addr,u64 size)1089 int kvm_pgtable_stage2_reclaim_leaves(struct kvm_pgtable *pgt, u64 addr, u64 size)
1090 {
1091 struct stage2_map_data map_data = {
1092 .phys = KVM_PHYS_INVALID,
1093 .mmu = pgt->mmu,
1094 .mm_ops = pgt->mm_ops,
1095 };
1096 struct kvm_pgtable_walker walker = {
1097 .cb = stage2_reclaim_leaf_walker,
1098 .arg = &map_data,
1099 .flags = KVM_PGTABLE_WALK_TABLE_POST,
1100 };
1101
1102 return kvm_pgtable_walk(pgt, addr, size, &walker);
1103 }
1104
1105 struct stage2_attr_data {
1106 kvm_pte_t attr_set;
1107 kvm_pte_t attr_clr;
1108 kvm_pte_t pte;
1109 u32 level;
1110 struct kvm_pgtable_mm_ops *mm_ops;
1111 };
1112
stage2_attr_walker(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,enum kvm_pgtable_walk_flags flag,void * const arg)1113 static int stage2_attr_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
1114 enum kvm_pgtable_walk_flags flag,
1115 void * const arg)
1116 {
1117 kvm_pte_t pte = *ptep;
1118 struct stage2_attr_data *data = arg;
1119 struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
1120
1121 if (!kvm_pte_valid(pte))
1122 return 0;
1123
1124 data->level = level;
1125 data->pte = pte;
1126 pte &= ~data->attr_clr;
1127 pte |= data->attr_set;
1128
1129 /*
1130 * We may race with the CPU trying to set the access flag here,
1131 * but worst-case the access flag update gets lost and will be
1132 * set on the next access instead.
1133 */
1134 if (data->pte != pte) {
1135 /*
1136 * Invalidate instruction cache before updating the guest
1137 * stage-2 PTE if we are going to add executable permission.
1138 */
1139 if (mm_ops->icache_inval_pou &&
1140 stage2_pte_executable(pte) && !stage2_pte_executable(*ptep))
1141 mm_ops->icache_inval_pou(kvm_pte_follow(pte, mm_ops),
1142 kvm_granule_size(level));
1143 WRITE_ONCE(*ptep, pte);
1144 }
1145
1146 return 0;
1147 }
1148
stage2_update_leaf_attrs(struct kvm_pgtable * pgt,u64 addr,u64 size,kvm_pte_t attr_set,kvm_pte_t attr_clr,kvm_pte_t * orig_pte,u32 * level)1149 static int stage2_update_leaf_attrs(struct kvm_pgtable *pgt, u64 addr,
1150 u64 size, kvm_pte_t attr_set,
1151 kvm_pte_t attr_clr, kvm_pte_t *orig_pte,
1152 u32 *level)
1153 {
1154 int ret;
1155 kvm_pte_t attr_mask = KVM_PTE_LEAF_ATTR_LO | KVM_PTE_LEAF_ATTR_HI;
1156 struct stage2_attr_data data = {
1157 .attr_set = attr_set & attr_mask,
1158 .attr_clr = attr_clr & attr_mask,
1159 .mm_ops = pgt->mm_ops,
1160 };
1161 struct kvm_pgtable_walker walker = {
1162 .cb = stage2_attr_walker,
1163 .arg = &data,
1164 .flags = KVM_PGTABLE_WALK_LEAF,
1165 };
1166
1167 ret = kvm_pgtable_walk(pgt, addr, size, &walker);
1168 if (ret)
1169 return ret;
1170
1171 if (orig_pte)
1172 *orig_pte = data.pte;
1173
1174 if (level)
1175 *level = data.level;
1176 return 0;
1177 }
1178
kvm_pgtable_stage2_wrprotect(struct kvm_pgtable * pgt,u64 addr,u64 size)1179 int kvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size)
1180 {
1181 return stage2_update_leaf_attrs(pgt, addr, size, 0,
1182 KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W,
1183 NULL, NULL);
1184 }
1185
kvm_pgtable_stage2_mkyoung(struct kvm_pgtable * pgt,u64 addr)1186 kvm_pte_t kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr)
1187 {
1188 kvm_pte_t pte = 0;
1189 stage2_update_leaf_attrs(pgt, addr, 1, KVM_PTE_LEAF_ATTR_LO_S2_AF, 0,
1190 &pte, NULL);
1191 dsb(ishst);
1192 return pte;
1193 }
1194
kvm_pgtable_stage2_mkold(struct kvm_pgtable * pgt,u64 addr)1195 kvm_pte_t kvm_pgtable_stage2_mkold(struct kvm_pgtable *pgt, u64 addr)
1196 {
1197 kvm_pte_t pte = 0;
1198 stage2_update_leaf_attrs(pgt, addr, 1, 0, KVM_PTE_LEAF_ATTR_LO_S2_AF,
1199 &pte, NULL);
1200 /*
1201 * "But where's the TLBI?!", you scream.
1202 * "Over in the core code", I sigh.
1203 *
1204 * See the '->clear_flush_young()' callback on the KVM mmu notifier.
1205 */
1206 return pte;
1207 }
1208
kvm_pgtable_stage2_is_young(struct kvm_pgtable * pgt,u64 addr)1209 bool kvm_pgtable_stage2_is_young(struct kvm_pgtable *pgt, u64 addr)
1210 {
1211 kvm_pte_t pte = 0;
1212 stage2_update_leaf_attrs(pgt, addr, 1, 0, 0, &pte, NULL);
1213 return pte & KVM_PTE_LEAF_ATTR_LO_S2_AF;
1214 }
1215
kvm_pgtable_stage2_relax_perms(struct kvm_pgtable * pgt,u64 addr,enum kvm_pgtable_prot prot)1216 int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
1217 enum kvm_pgtable_prot prot)
1218 {
1219 int ret;
1220 u32 level;
1221 kvm_pte_t set = 0, clr = 0;
1222
1223 if (prot & !KVM_PGTABLE_PROT_RWX)
1224 return -EINVAL;
1225
1226 if (prot & KVM_PGTABLE_PROT_R)
1227 set |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R;
1228
1229 if (prot & KVM_PGTABLE_PROT_W)
1230 set |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W;
1231
1232 if (prot & KVM_PGTABLE_PROT_X)
1233 clr |= KVM_PTE_LEAF_ATTR_HI_S2_XN;
1234
1235 ret = stage2_update_leaf_attrs(pgt, addr, 1, set, clr, NULL, &level);
1236 if (!ret)
1237 kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, pgt->mmu, addr, level);
1238 return ret;
1239 }
1240
stage2_flush_walker(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,enum kvm_pgtable_walk_flags flag,void * const arg)1241 static int stage2_flush_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
1242 enum kvm_pgtable_walk_flags flag,
1243 void * const arg)
1244 {
1245 struct kvm_pgtable *pgt = arg;
1246 struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops;
1247 kvm_pte_t pte = *ptep;
1248
1249 if (!stage2_pte_cacheable(pgt, pte))
1250 return 0;
1251
1252 if (mm_ops->dcache_clean_inval_poc)
1253 mm_ops->dcache_clean_inval_poc(kvm_pte_follow(pte, mm_ops),
1254 kvm_granule_size(level));
1255 return 0;
1256 }
1257
kvm_pgtable_stage2_flush(struct kvm_pgtable * pgt,u64 addr,u64 size)1258 int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size)
1259 {
1260 struct kvm_pgtable_walker walker = {
1261 .cb = stage2_flush_walker,
1262 .flags = KVM_PGTABLE_WALK_LEAF,
1263 .arg = pgt,
1264 };
1265
1266 if (stage2_has_fwb(pgt))
1267 return 0;
1268
1269 return kvm_pgtable_walk(pgt, addr, size, &walker);
1270 }
1271
1272
__kvm_pgtable_stage2_init(struct kvm_pgtable * pgt,struct kvm_s2_mmu * mmu,struct kvm_pgtable_mm_ops * mm_ops,enum kvm_pgtable_stage2_flags flags,struct kvm_pgtable_pte_ops * pte_ops)1273 int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
1274 struct kvm_pgtable_mm_ops *mm_ops,
1275 enum kvm_pgtable_stage2_flags flags,
1276 struct kvm_pgtable_pte_ops *pte_ops)
1277 {
1278 size_t pgd_sz;
1279 u64 vtcr = mmu->arch->vtcr;
1280 u32 ia_bits = VTCR_EL2_IPA(vtcr);
1281 u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr);
1282 u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0;
1283
1284 pgd_sz = kvm_pgd_pages(ia_bits, start_level) * PAGE_SIZE;
1285 pgt->pgd = mm_ops->zalloc_pages_exact(pgd_sz);
1286 if (!pgt->pgd)
1287 return -ENOMEM;
1288
1289 pgt->ia_bits = ia_bits;
1290 pgt->start_level = start_level;
1291 pgt->mm_ops = mm_ops;
1292 pgt->mmu = mmu;
1293 pgt->flags = flags;
1294 pgt->pte_ops = pte_ops;
1295
1296 /* Ensure zeroed PGD pages are visible to the hardware walker */
1297 dsb(ishst);
1298 return 0;
1299 }
1300
kvm_pgtable_stage2_pgd_size(u64 vtcr)1301 size_t kvm_pgtable_stage2_pgd_size(u64 vtcr)
1302 {
1303 u32 ia_bits = VTCR_EL2_IPA(vtcr);
1304 u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr);
1305 u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0;
1306
1307 return kvm_pgd_pages(ia_bits, start_level) * PAGE_SIZE;
1308 }
1309
stage2_free_walker(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,enum kvm_pgtable_walk_flags flag,void * const arg)1310 static int stage2_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
1311 enum kvm_pgtable_walk_flags flag,
1312 void * const arg)
1313 {
1314 struct kvm_pgtable *pgt = arg;
1315 struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops;
1316 struct kvm_pgtable_pte_ops *pte_ops = pgt->pte_ops;
1317 kvm_pte_t pte = *ptep;
1318
1319 if (!pte_ops->pte_is_counted_cb(pte, level))
1320 return 0;
1321
1322 mm_ops->put_page(ptep);
1323
1324 if (kvm_pte_table(pte, level))
1325 mm_ops->put_page(kvm_pte_follow(pte, mm_ops));
1326
1327 return 0;
1328 }
1329
kvm_pgtable_stage2_destroy(struct kvm_pgtable * pgt)1330 void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
1331 {
1332 size_t pgd_sz;
1333 struct kvm_pgtable_walker walker = {
1334 .cb = stage2_free_walker,
1335 .flags = KVM_PGTABLE_WALK_LEAF |
1336 KVM_PGTABLE_WALK_TABLE_POST,
1337 .arg = pgt,
1338 };
1339
1340 WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker));
1341 pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level) * PAGE_SIZE;
1342 pgt->mm_ops->free_pages_exact(pgt->pgd, pgd_sz);
1343 pgt->pgd = NULL;
1344 }
1345